1919from codesectools .utils import USER_CACHE_DIR
2020
2121if TYPE_CHECKING :
22- from typing import Self
23-
2422 from codesectools .sasts .core .parser import AnalysisResult , Defect
2523 from codesectools .shared .cwe import CWE
2624
@@ -44,7 +42,7 @@ class Dataset(ABC):
4442 license : str
4543 license_url : str
4644
47- def __init__ (self , lang : str | None = None ) -> None :
45+ def __init__ (self , lang : str = "" ) -> None :
4846 """Initialize the Dataset instance.
4947
5048 Set up paths and load the dataset if a language is specified.
@@ -168,6 +166,7 @@ class PrebuiltDatasetMixin:
168166
169167 """
170168
169+ name : str
171170 build_command : str
172171 prebuilt_expected : tuple [Path , str ]
173172 artifacts_arg : str
@@ -223,14 +222,13 @@ class File(DatasetUnit):
223222 """
224223
225224 def __init__ (
226- self , filepath : Path , content : str | bytes , cwes : list [CWE ], has_vuln : bool
225+ self , filepath : Path , content : bytes , cwes : list [CWE ], has_vuln : bool
227226 ) -> None :
228227 """Initialize a File instance.
229228
230229 Args:
231230 filepath: The relative path of the file.
232- content: The content of the file, as a string or bytes. It will be
233- converted to bytes if provided as a string.
231+ content: The content of the file, as bytes.
234232 cwes: A list of CWEs associated with the file.
235233 has_vuln: True if the vulnerability is real, False if it's
236234 intended to be a false positive test case.
@@ -242,9 +240,6 @@ def __init__(
242240 self .cwes = cwes
243241 self .has_vuln = has_vuln
244242
245- if isinstance (content , str ):
246- self .content = content .encode ()
247-
248243 def __repr__ (self ) -> str :
249244 """Return a developer-friendly string representation of the File.
250245
@@ -257,7 +252,7 @@ def __repr__(self) -> str:
257252 cwes: \t { self .cwes }
258253)"""
259254
260- def __eq__ (self , other : str | Path | Self ) -> bool :
255+ def __eq__ (self , other : object ) -> bool :
261256 """Compare this File with another object for equality based on filepath.
262257
263258 Args:
@@ -515,7 +510,7 @@ def __repr__(self) -> str:
515510 files: \t { self .files }
516511)"""
517512
518- def __eq__ (self , other : str | Self ) -> bool :
513+ def __eq__ (self , other : object ) -> bool :
519514 """Compare this GitRepo with another object for equality based on name.
520515
521516 Args:
@@ -589,7 +584,7 @@ def validate(self, analysis_results: list[AnalysisResult]) -> GitRepoDatasetData
589584 validated_repos = []
590585
591586 for analysis_result in analysis_results :
592- repo = self .repos [self .repos .index (analysis_result .name )]
587+ repo = self .repos [self .repos .index (analysis_result .name )] # ty:ignore[invalid-argument-type]
593588
594589 # 1. Process reported defects to get unique (file, cwe) pairs
595590 # and keep one original Defect object for each to retain metadata.
@@ -663,8 +658,7 @@ class GitRepoDatasetData(BenchmarkData):
663658
664659 Attributes:
665660 dataset (GitRepoDataset): The dataset used for the benchmark.
666- validated_repos (list[dict]): A list of dictionaries, each containing
667- the validation results for a single repository.
661+ validated_repos (list[dict]): A list of validation results per repository.
668662 total_repo_number (int): The total number of repositories in the dataset.
669663 defect_numbers (int): The total number of defects found across all repos.
670664
0 commit comments