-
Notifications
You must be signed in to change notification settings - Fork 699
Fix: Eval hash mismatch due to parameter truncation in DB storage #1523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -113,6 +113,7 @@ class ComponentIdentifier: | |
| KEY_CLASS_NAME: ClassVar[str] = "class_name" | ||
| KEY_CLASS_MODULE: ClassVar[str] = "class_module" | ||
| KEY_HASH: ClassVar[str] = "hash" | ||
| KEY_EVAL_HASH: ClassVar[str] = "eval_hash" | ||
| KEY_PYRIT_VERSION: ClassVar[str] = "pyrit_version" | ||
| KEY_CHILDREN: ClassVar[str] = "children" | ||
| LEGACY_KEY_TYPE: ClassVar[str] = "__type__" | ||
|
|
@@ -130,6 +131,10 @@ class ComponentIdentifier: | |
| hash: str = field(init=False, compare=False) | ||
| #: Version tag for storage. Not included in hash. | ||
| pyrit_version: str = field(default_factory=lambda: pyrit.__version__, compare=False) | ||
| #: Evaluation hash preserved from DB round-trip. Computed before truncation and | ||
| #: stored alongside the identity so that EvaluationIdentifier can use it directly | ||
| #: instead of recomputing from potentially truncated params. | ||
| stored_eval_hash: Optional[str] = field(default=None, init=False, compare=False) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. want to rename to eval_hash |
||
|
|
||
| def __post_init__(self) -> None: | ||
| """Compute the content-addressed hash at creation time.""" | ||
|
|
@@ -231,7 +236,7 @@ def normalize(cls, value: Union[ComponentIdentifier, dict[str, Any]]) -> Compone | |
| return cls.from_dict(value) | ||
| raise TypeError(f"Expected ComponentIdentifier or dict, got {type(value).__name__}") | ||
|
|
||
| def to_dict(self, *, max_value_length: Optional[int] = None) -> dict[str, Any]: | ||
| def to_dict(self, *, max_value_length: Optional[int] = None, eval_hash: Optional[str] = None) -> dict[str, Any]: | ||
| """ | ||
| Serialize to a JSON-compatible dictionary for DB/JSONL storage. | ||
|
|
||
|
|
@@ -246,6 +251,10 @@ def to_dict(self, *, max_value_length: Optional[int] = None) -> dict[str, Any]: | |
| DB storage where column sizes may be limited. The truncation applies | ||
| only to param values, not to structural keys like class_name or hash. | ||
| The limit is propagated to children. Defaults to None (no truncation). | ||
| eval_hash (Optional[str]): If provided, the evaluation hash is included in | ||
| the serialized dict. This should be computed before truncation so that | ||
| it can be recovered via ``from_dict()`` even when param values are | ||
| truncated. Defaults to None (no eval_hash stored). | ||
|
|
||
| Returns: | ||
| Dict[str, Any]: JSON-serializable dictionary suitable for database storage | ||
|
|
@@ -258,6 +267,11 @@ def to_dict(self, *, max_value_length: Optional[int] = None) -> dict[str, Any]: | |
| self.KEY_PYRIT_VERSION: self.pyrit_version, | ||
| } | ||
|
|
||
| # Include eval_hash if explicitly provided or if preserved from a prior round-trip | ||
| effective_eval_hash = eval_hash if eval_hash is not None else self.stored_eval_hash | ||
| if effective_eval_hash is not None: | ||
| result[self.KEY_EVAL_HASH] = effective_eval_hash | ||
|
|
||
| for key, value in self.params.items(): | ||
| result[key] = self._truncate_value(value=value, max_length=max_value_length) | ||
|
|
||
|
|
@@ -324,6 +338,7 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: | |
| class_module = data.pop(cls.KEY_CLASS_MODULE, None) or data.pop(cls.LEGACY_KEY_MODULE, None) or "unknown" | ||
|
|
||
| stored_hash = data.pop(cls.KEY_HASH, None) | ||
| stored_eval_hash = data.pop(cls.KEY_EVAL_HASH, None) | ||
| pyrit_version = data.pop(cls.KEY_PYRIT_VERSION, pyrit.__version__) | ||
|
|
||
| # Reconstruct children | ||
|
|
@@ -346,6 +361,11 @@ def from_dict(cls, data: dict[str, Any]) -> ComponentIdentifier: | |
| if stored_hash: | ||
| object.__setattr__(identifier, "hash", stored_hash) | ||
|
|
||
| # Preserve stored eval_hash if available — computed before truncation | ||
| # so that EvaluationIdentifier can use it directly. | ||
| if stored_eval_hash: | ||
| object.__setattr__(identifier, "stored_eval_hash", stored_eval_hash) | ||
|
|
||
| return identifier | ||
|
|
||
| def get_child(self, key: str) -> Optional[ComponentIdentifier]: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,25 @@ def __init__(self, *, validator: ScorerPromptValidator): | |
| """ | ||
| self._validator = validator | ||
|
|
||
| def get_identifier(self) -> ComponentIdentifier: | ||
| """ | ||
| Get the scorer's identifier with eval_hash always attached. | ||
|
|
||
| Overrides the base ``Identifiable.get_identifier()`` so that | ||
| ``to_dict()`` always emits the ``eval_hash`` key. This lets consumers | ||
| see at a glance whether the scorer matches a registry entry. | ||
|
|
||
| Returns: | ||
| ComponentIdentifier: The identity with ``stored_eval_hash`` set. | ||
| """ | ||
| identifier = super().get_identifier() | ||
| if identifier.stored_eval_hash is None: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can make this better |
||
| from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier | ||
|
|
||
| eval_hash = ScorerEvaluationIdentifier(identifier).eval_hash | ||
| object.__setattr__(identifier, "stored_eval_hash", eval_hash) | ||
| return identifier | ||
|
|
||
| def get_eval_hash(self) -> str: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should get rid of get_eval_hash |
||
| """ | ||
| Compute a behavioral equivalence hash for evaluation grouping. | ||
|
|
@@ -81,10 +100,7 @@ def get_eval_hash(self) -> str: | |
| Returns: | ||
| str: A hex-encoded SHA256 hash suitable for eval registry keying. | ||
| """ | ||
| # Deferred import to avoid circular dependency (evaluation_identifier → identifiers → …) | ||
| from pyrit.identifiers.evaluation_identifier import ScorerEvaluationIdentifier | ||
|
|
||
| return ScorerEvaluationIdentifier(self.get_identifier()).eval_hash | ||
| return self.get_identifier().stored_eval_hash | ||
|
|
||
| @property | ||
| def scorer_type(self) -> ScoreType: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you re-run the configuring scenarios notebook?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is another bug because scenarios aren't setting the underlying model so it's defaulting to "gpt-4o" and the hash is different. I want scenarios to grab this from the registry so there isn't a mismatch. But for now the notebook doesn't update. I'd like to tackle with a future PR