OPPIDA
diff --git a/‎codesectools/datasets/BenchmarkJava/dataset.py‎
Lines changed: 6 additions & 6 deletions b/‎codesectools/datasets/BenchmarkJava/dataset.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎codesectools/datasets/CVEfixes/dataset.py‎
Lines changed: 4 additions & 4 deletions b/‎codesectools/datasets/CVEfixes/dataset.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎codesectools/datasets/JulietTestSuiteC/dataset.py‎
Lines changed: 5 additions & 5 deletions b/‎codesectools/datasets/JulietTestSuiteC/dataset.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎codesectools/datasets/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎codesectools/datasets/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎codesectools/datasets/core/dataset.py‎
Lines changed: 8 additions & 14 deletions b/‎codesectools/datasets/core/dataset.py‎
Lines changed: 8 additions & 14 deletions
diff --git a/‎codesectools/sasts/core/parser.py‎
Lines changed: 6 additions & 6 deletions b/‎codesectools/sasts/core/parser.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎codesectools/sasts/core/sast/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎codesectools/sasts/core/sast/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎codesectools/sasts/core/sast/requirements.py‎
Lines changed: 12 additions & 5 deletions b/‎codesectools/sasts/core/sast/requirements.py‎
Lines changed: 12 additions & 5 deletions
@@ -25,15 +25,15 @@ class TestCode(File):
     def __init__(
         self,
         filepath: Path,
-        content: str | bytes,
+        content: bytes,
         cwes: list[CWE],
         has_vuln: bool,
     ) -> None:
         """Initialize a TestCode instance.
 
         Args:
             filepath: The path to the file.
-            content: The content of the file, as a string or bytes.
+            content: The content of the file, as bytes.
             cwes: A list of CWEs associated with the file.
             has_vuln: A boolean indicating if the vulnerability is real or a false positive test case.
 
@@ -69,7 +69,7 @@ class BenchmarkJava(PrebuiltFileDataset):
     prebuilt_expected = (Path("target/classes/org/owasp/benchmark/testcode"), "*.class")
     artifacts_arg = "."
 
-    def __init__(self, lang: None | str = None) -> None:
+    def __init__(self, lang: str = "") -> None:
         """Initialize the BenchmarkJava dataset.
 
         Args:
@@ -79,7 +79,7 @@ def __init__(self, lang: None | str = None) -> None:
         """
         super().__init__(lang)
 
-    def __eq__(self, other: str | Self) -> bool:
+    def __eq__(self, other: object) -> bool:
         """Compare this dataset with another object for equality.
 
         Args:
@@ -121,7 +121,7 @@ def download_files(self: Self, test: bool = False) -> None:
             for to_delete_testcode in random.sample(testcodes, k=len(testcodes) - 50):
                 to_delete_testcode.unlink()
 
-    def load_dataset(self) -> list[TestCode]:
+    def load_dataset(self) -> list[File]:
         """Load the BenchmarkJava dataset from its source files.
 
         Reads a CSV file for vulnerability metadata and the corresponding Java
@@ -149,7 +149,7 @@ def load_dataset(self) -> list[TestCode]:
             filename = f"{row[0]}.java"
             filepath = testcode_dir / filename
             if filepath.is_file():
-                content = filepath.read_text()
+                content = filepath.read_bytes()
                 cwes = [CWEs.from_id(int(row[3]))]
                 has_vuln = True if row[2] == "true" else False
                 files.append(
 
@@ -8,7 +8,7 @@
 import csv
 from typing import Self
 
-from codesectools.datasets.core.dataset import GitRepo, GitRepoDataset
+from codesectools.datasets.core.dataset import File, GitRepo, GitRepoDataset
 from codesectools.shared.cwe import CWEs
 from codesectools.utils import DATA_DIR
 
@@ -32,14 +32,14 @@ class CVEfixes(GitRepoDataset):
     license = "CC BY 4.0"
     license_url = "https://creativecommons.org/licenses/by/4.0/"
 
-    def __init__(self, lang: str | None = None) -> None:
+    def __init__(self, lang: str = "") -> None:
         """Initialize the CVEfixes dataset.
 
         Args:
             lang: The programming language of the dataset to load.
 
         """
-        self.max_repo_size = 100e6
+        self.max_repo_size = 100 * 10**6
         super().__init__(lang)
 
     def download_files(self: Self, test: bool = False) -> None:
@@ -53,7 +53,7 @@ def download_files(self: Self, test: bool = False) -> None:
 
     def load_dataset(
         self,
-    ) -> list[GitRepo]:
+    ) -> list[File]:
         """Load the CVEfixes dataset from its source CSV file.
 
         Parses a CSV file containing information about CVEs, repositories,
 
@@ -24,15 +24,15 @@ class TestCode(File):
     def __init__(
         self,
         filepath: Path,
-        content: str | bytes,
+        content: bytes,
         cwes: list[CWE],
         has_vuln: bool,
     ) -> None:
         """Initialize a TestCode instance.
 
         Args:
             filepath: The path to the file.
-            content: The content of the file, as a string or bytes.
+            content: The content of the file, as bytes.
             cwes: A list of CWEs associated with the file.
             has_vuln: A boolean indicating if the vulnerability is real or a false positive test case.
 
@@ -58,7 +58,7 @@ class JulietTestSuiteC(PrebuiltFileDataset):
     prebuilt_expected = (Path("."), "compile_commands.json")
     artifacts_arg = "compile_commands.json"
 
-    def __init__(self, lang: None | str = None) -> None:
+    def __init__(self, lang: str = "") -> None:
         """Initialize the JulietTestSuiteC dataset.
 
         Args:
@@ -68,7 +68,7 @@ def __init__(self, lang: None | str = None) -> None:
         """
         super().__init__(lang)
 
-    def __eq__(self, other: str | Self) -> bool:
+    def __eq__(self, other: object) -> bool:
         """Compare this dataset with another object for equality.
 
         Args:
@@ -118,7 +118,7 @@ def download_files(self: Self, test: bool = False) -> None:
                 if not cwe_dir.name.startswith("CWE835"):
                     shutil.rmtree(cwe_dir)
 
-    def load_dataset(self) -> list[TestCode]:
+    def load_dataset(self) -> list[File]:
         """Load the JulietTestSuiteC dataset from the source files.
 
         Parses the `manifest.xml` file to identify vulnerabilities in the C/C++
 
@@ -12,7 +12,7 @@
 """
 
 import importlib
-from typing import Any
+from typing import Any, Type
 
 from codesectools.datasets.core.dataset import Dataset
 from codesectools.utils import DATASETS_DIR
@@ -37,7 +37,7 @@ def _load(self) -> None:
             self.dataset_module = importlib.import_module(
                 f"codesectools.datasets.{self.name}.dataset"
             )
-            self.dataset: Dataset = getattr(self.dataset_module, self.name)
+            self.dataset: Type[Dataset] = getattr(self.dataset_module, self.name)
 
             self.loaded = True
 
 
@@ -19,8 +19,6 @@
 from codesectools.utils import USER_CACHE_DIR
 
 if TYPE_CHECKING:
-    from typing import Self
-
     from codesectools.sasts.core.parser import AnalysisResult, Defect
     from codesectools.shared.cwe import CWE
 
@@ -44,7 +42,7 @@ class Dataset(ABC):
     license: str
     license_url: str
 
-    def __init__(self, lang: str | None = None) -> None:
+    def __init__(self, lang: str = "") -> None:
         """Initialize the Dataset instance.
 
         Set up paths and load the dataset if a language is specified.
@@ -168,6 +166,7 @@ class PrebuiltDatasetMixin:
 
     """
 
+    name: str
     build_command: str
     prebuilt_expected: tuple[Path, str]
     artifacts_arg: str
@@ -223,14 +222,13 @@ class File(DatasetUnit):
     """
 
     def __init__(
-        self, filepath: Path, content: str | bytes, cwes: list[CWE], has_vuln: bool
+        self, filepath: Path, content: bytes, cwes: list[CWE], has_vuln: bool
     ) -> None:
         """Initialize a File instance.
 
         Args:
             filepath: The relative path of the file.
-            content: The content of the file, as a string or bytes. It will be
-                converted to bytes if provided as a string.
+            content: The content of the file, as bytes.
             cwes: A list of CWEs associated with the file.
             has_vuln: True if the vulnerability is real, False if it's
                 intended to be a false positive test case.
@@ -242,9 +240,6 @@ def __init__(
         self.cwes = cwes
         self.has_vuln = has_vuln
 
-        if isinstance(content, str):
-            self.content = content.encode()
-
     def __repr__(self) -> str:
         """Return a developer-friendly string representation of the File.
 
@@ -257,7 +252,7 @@ def __repr__(self) -> str:
     cwes: \t{self.cwes}
 )"""
 
-    def __eq__(self, other: str | Path | Self) -> bool:
+    def __eq__(self, other: object) -> bool:
         """Compare this File with another object for equality based on filepath.
 
         Args:
@@ -515,7 +510,7 @@ def __repr__(self) -> str:
     files: \t{self.files}
 )"""
 
-    def __eq__(self, other: str | Self) -> bool:
+    def __eq__(self, other: object) -> bool:
         """Compare this GitRepo with another object for equality based on name.
 
         Args:
@@ -589,7 +584,7 @@ def validate(self, analysis_results: list[AnalysisResult]) -> GitRepoDatasetData
         validated_repos = []
 
         for analysis_result in analysis_results:
-            repo = self.repos[self.repos.index(analysis_result.name)]
+            repo = self.repos[self.repos.index(analysis_result.name)]  # ty:ignore[invalid-argument-type]
 
             # 1. Process reported defects to get unique (file, cwe) pairs
             # and keep one original Defect object for each to retain metadata.
@@ -663,8 +658,7 @@ class GitRepoDatasetData(BenchmarkData):
 
     Attributes:
         dataset (GitRepoDataset): The dataset used for the benchmark.
-        validated_repos (list[dict]): A list of dictionaries, each containing
-            the validation results for a single repository.
+        validated_repos (list[dict]): A list of validation results per repository.
         total_repo_number (int): The total number of repositories in the dataset.
         defect_numbers (int): The total number of defects found across all repos.
 
 
@@ -8,7 +8,7 @@
 
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Self
+from typing import Self
 
 from codesectools.shared.cwe import CWE
 
@@ -26,7 +26,7 @@ class Defect:
         cwe (CWE): The CWE associated with the defect.
         message (str): The description of the defect.
         location (tuple[int, int] | None): A tuple with the start and end line numbers of the defect.
-        data (tuple[Any]): Raw data from the SAST tool for this defect.
+        data (dict): Raw data from the SAST tool for this defect.
 
     """
 
@@ -40,7 +40,7 @@ def __init__(
         cwe: CWE,
         message: str,
         lines: list[int] | None,
-        data: tuple[Any],
+        data: dict,
     ) -> None:
         """Initialize a Defect instance.
 
@@ -90,7 +90,7 @@ class AnalysisResult(ABC):
         defects (list[Defect]): A list of `Defect` objects found.
         time (float): The duration of the analysis in seconds.
         loc (int): The number of lines of code analyzed.
-        data (tuple[Any]): Raw data from the SAST tool's output.
+        data (tuple): Raw data from the SAST tool's output.
 
     """
 
@@ -103,7 +103,7 @@ def __init__(
         defects: list[Defect],
         time: float,
         loc: int,
-        data: tuple[Any],
+        data: tuple,
     ) -> None:
         """Initialize an AnalysisResult instance.
 
@@ -161,7 +161,7 @@ def load_from_output_dir(cls, output_dir: Path) -> Self:
         pass
 
     @classmethod
-    def load_from_output_dirs(cls, output_dirs: list[str]) -> list[Self]:
+    def load_from_output_dirs(cls, output_dirs: list[Path]) -> list[Self]:
         """Load and parse analysis results from multiple directories.
 
         Args:
 
@@ -342,7 +342,7 @@ class PrebuiltSAST(SAST):
 
     def analyze_files(
         self,
-        dataset: PrebuiltFileDataset,
+        dataset: FileDataset,
         overwrite: bool = False,
         testing: bool = False,
     ) -> None:
@@ -359,6 +359,8 @@ def analyze_files(
         """
         from rich.panel import Panel
 
+        assert isinstance(dataset, PrebuiltFileDataset)
+
         if not dataset.is_built():
             prebuilt_dir, prebuilt_glob = dataset.prebuilt_expected
             panel = Panel(
 
@@ -1,23 +1,27 @@
 """Define requirements for SAST tools and their fulfillment status."""
 
+from __future__ import annotations
+
 import shutil
 from abc import ABC, abstractmethod
-from pathlib import Path
-from typing import Any, Literal, Self
+from typing import TYPE_CHECKING, Any, Literal
 
 import typer
 from rich import print
 
 from codesectools.utils import USER_CACHE_DIR, USER_CONFIG_DIR
 
+if TYPE_CHECKING:
+    from pathlib import Path
+
 
 class SASTRequirement(ABC):
     """Represent a single requirement for a SAST tool to be functional."""
 
     def __init__(
         self,
         name: str,
-        depends_on: list[Self] | None = None,
+        depends_on: list[SASTRequirement] | None = None,
         instruction: str | None = None,
         url: str | None = None,
         doc: bool = False,
@@ -94,6 +98,7 @@ class Config(SASTRequirement):
     def __init__(
         self,
         name: str,
+        sast_name: str,
         depends_on: list[SASTRequirement] | None = None,
         instruction: str | None = None,
         url: str | None = None,
@@ -103,19 +108,21 @@ def __init__(
 
         Args:
             name: The name of the requirement.
+            sast_name: The name of the SAST tool this config belongs to.
             depends_on: A list of other requirements that must be fulfilled first.
             instruction: A short instruction on how to download the requirement.
             url: A URL for more detailed instructions.
             doc: A flag indicating if the instruction is available in the documentation.
 
         """
+        self.sast_name = sast_name
         super().__init__(
             name=name, depends_on=depends_on, instruction=instruction, url=url, doc=doc
         )
 
-    def is_fulfilled(self, sast_name: str, **kwargs: Any) -> bool:
+    def is_fulfilled(self, **kwargs: Any) -> bool:
         """Check if the configuration file exists for the given SAST tool."""
-        return (USER_CONFIG_DIR / sast_name / self.name).is_file()
+        return (USER_CONFIG_DIR / self.sast_name / self.name).is_file()
 
 
 class Binary(SASTRequirement):