sourcegraph
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-051/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-051/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-052/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-053/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-053/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-057/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-057/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-057/tests/task_spec.json‎
Lines changed: 1 addition & 9 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-057/tests/task_spec.json‎
Lines changed: 1 addition & 9 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-115/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-115/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-115/tests/task_spec.json‎
Lines changed: 65 additions & 1 deletion b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-115/tests/task_spec.json‎
Lines changed: 65 additions & 1 deletion
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-118/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-118/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-118/tests/task_spec.json‎
Lines changed: 10 additions & 1 deletion b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-118/tests/task_spec.json‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎benchmarks/ccb_mcp_compliance/ccx-compliance-124/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions b/‎benchmarks/ccb_mcp_compliance/ccx-compliance-124/tests/oracle_checks.py‎
Lines changed: 22 additions & 0 deletions
@@ -38,6 +38,24 @@
 _MIRROR_HASH_RE = re.compile(r"--[0-9a-f]{6,}$")
 
 
+def _coerce_file_entry(entry) -> Dict[str, str]:
+    """Coerce a file entry to {"repo": ..., "path": ...} dict format.
+
+    Handles string entries like "sg-evals/kubernetes--v1.32.0/pkg/file.go"
+    where the first two path components are the repo.
+    """
+    if isinstance(entry, dict):
+        return entry
+    if isinstance(entry, str):
+        parts = entry.split("/", 2)
+        if len(parts) >= 3:
+            return {"repo": f"{parts[0]}/{parts[1]}", "path": parts[2]}
+        elif len(parts) == 2:
+            return {"repo": parts[0], "path": parts[1]}
+        return {"repo": "", "path": entry}
+    return {"repo": "", "path": str(entry)}
+
+
 def _normalize_repo(name: str) -> str:
     """Reduce a repo identifier to its base name for fuzzy comparison.
 
@@ -180,6 +198,10 @@ def check_file_set_match(
     >>> result["weighted_recall"]  # matched required(2) / total(3) = 0.6667
     0.6667
     """
+    # Coerce string entries to dicts (handles legacy oracle format)
+    oracle_files = [_coerce_file_entry(f) for f in oracle_files]
+    answer_files = [_coerce_file_entry(f) for f in answer_files]
+
     matched, missing, extra = _match_items(answer_files, oracle_files, ["repo", "path"])
 
     n_oracle = len({(f.get("repo", ""), f.get("path", "")) for f in oracle_files})
 
@@ -38,6 +38,24 @@
 _MIRROR_HASH_RE = re.compile(r"--[0-9a-f]{6,}$")
 
 
+def _coerce_file_entry(entry) -> Dict[str, str]:
+    """Coerce a file entry to {"repo": ..., "path": ...} dict format.
+
+    Handles string entries like "sg-evals/kubernetes--v1.32.0/pkg/file.go"
+    where the first two path components are the repo.
+    """
+    if isinstance(entry, dict):
+        return entry
+    if isinstance(entry, str):
+        parts = entry.split("/", 2)
+        if len(parts) >= 3:
+            return {"repo": f"{parts[0]}/{parts[1]}", "path": parts[2]}
+        elif len(parts) == 2:
+            return {"repo": parts[0], "path": parts[1]}
+        return {"repo": "", "path": entry}
+    return {"repo": "", "path": str(entry)}
+
+
 def _normalize_repo(name: str) -> str:
     """Reduce a repo identifier to its base name for fuzzy comparison.
 
@@ -180,6 +198,10 @@ def check_file_set_match(
     >>> result["weighted_recall"]  # matched required(2) / total(3) = 0.6667
     0.6667
     """
+    # Coerce string entries to dicts (handles legacy oracle format)
+    oracle_files = [_coerce_file_entry(f) for f in oracle_files]
+    answer_files = [_coerce_file_entry(f) for f in answer_files]
+
     matched, missing, extra = _match_items(answer_files, oracle_files, ["repo", "path"])
 
     n_oracle = len({(f.get("repo", ""), f.get("path", "")) for f in oracle_files})
 
@@ -38,6 +38,24 @@
 _MIRROR_HASH_RE = re.compile(r"--[0-9a-f]{6,}$")
 
 
+def _coerce_file_entry(entry) -> Dict[str, str]:
+    """Coerce a file entry to {"repo": ..., "path": ...} dict format.
+
+    Handles string entries like "sg-evals/kubernetes--v1.32.0/pkg/file.go"
+    where the first two path components are the repo.
+    """
+    if isinstance(entry, dict):
+        return entry
+    if isinstance(entry, str):
+        parts = entry.split("/", 2)
+        if len(parts) >= 3:
+            return {"repo": f"{parts[0]}/{parts[1]}", "path": parts[2]}
+        elif len(parts) == 2:
+            return {"repo": parts[0], "path": parts[1]}
+        return {"repo": "", "path": entry}
+    return {"repo": "", "path": str(entry)}
+
+
 def _normalize_repo(name: str) -> str:
     """Reduce a repo identifier to its base name for fuzzy comparison.
 
@@ -180,6 +198,10 @@ def check_file_set_match(
     >>> result["weighted_recall"]  # matched required(2) / total(3) = 0.6667
     0.6667
     """
+    # Coerce string entries to dicts (handles legacy oracle format)
+    oracle_files = [_coerce_file_entry(f) for f in oracle_files]
+    answer_files = [_coerce_file_entry(f) for f in answer_files]
+
     matched, missing, extra = _match_items(answer_files, oracle_files, ["repo", "path"])
 
     n_oracle = len({(f.get("repo", ""), f.get("path", "")) for f in oracle_files})
 
@@ -38,6 +38,24 @@
 _MIRROR_HASH_RE = re.compile(r"--[0-9a-f]{6,}$")
 
 
+def _coerce_file_entry(entry) -> Dict[str, str]:
+    """Coerce a file entry to {"repo": ..., "path": ...} dict format.
+
+    Handles string entries like "sg-evals/kubernetes--v1.32.0/pkg/file.go"
+    where the first two path components are the repo.
+    """
+    if isinstance(entry, dict):
+        return entry
+    if isinstance(entry, str):
+        parts = entry.split("/", 2)
+        if len(parts) >= 3:
+            return {"repo": f"{parts[0]}/{parts[1]}", "path": parts[2]}
+        elif len(parts) == 2:
+            return {"repo": parts[0], "path": parts[1]}
+        return {"repo": "", "path": entry}
+    return {"repo": "", "path": str(entry)}
+
+
 def _normalize_repo(name: str) -> str:
     """Reduce a repo identifier to its base name for fuzzy comparison.
 
@@ -180,6 +198,10 @@ def check_file_set_match(
     >>> result["weighted_recall"]  # matched required(2) / total(3) = 0.6667
     0.6667
     """
+    # Coerce string entries to dicts (handles legacy oracle format)
+    oracle_files = [_coerce_file_entry(f) for f in oracle_files]
+    answer_files = [_coerce_file_entry(f) for f in answer_files]
+
     matched, missing, extra = _match_items(answer_files, oracle_files, ["repo", "path"])
 
     n_oracle = len({(f.get("repo", ""), f.get("path", "")) for f in oracle_files})
 
@@ -26,15 +26,7 @@
     "modes": [
       "deterministic"
     ],
-    "checks": [
-      {
-        "type": "file_set_match",
-        "params": {
-          "search_pattern": "SocialService OR ProvideService",
-          "file_filter": ""
-        }
-      }
-    ],
+    "checks": [],
     "eval_script": "/tests/eval.sh",
     "pass_exit_code": 0
   },
 
@@ -38,6 +38,24 @@
 _MIRROR_HASH_RE = re.compile(r"--[0-9a-f]{6,}$")
 
 
+def _coerce_file_entry(entry) -> Dict[str, str]:
+    """Coerce a file entry to {"repo": ..., "path": ...} dict format.
+
+    Handles string entries like "sg-evals/kubernetes--v1.32.0/pkg/file.go"
+    where the first two path components are the repo.
+    """
+    if isinstance(entry, dict):
+        return entry
+    if isinstance(entry, str):
+        parts = entry.split("/", 2)
+        if len(parts) >= 3:
+            return {"repo": f"{parts[0]}/{parts[1]}", "path": parts[2]}
+        elif len(parts) == 2:
+            return {"repo": parts[0], "path": parts[1]}
+        return {"repo": "", "path": entry}
+    return {"repo": "", "path": str(entry)}
+
+
 def _normalize_repo(name: str) -> str:
     """Reduce a repo identifier to its base name for fuzzy comparison.
 
@@ -180,6 +198,10 @@ def check_file_set_match(
     >>> result["weighted_recall"]  # matched required(2) / total(3) = 0.6667
     0.6667
     """
+    # Coerce string entries to dicts (handles legacy oracle format)
+    oracle_files = [_coerce_file_entry(f) for f in oracle_files]
+    answer_files = [_coerce_file_entry(f) for f in answer_files]
+
     matched, missing, extra = _match_items(answer_files, oracle_files, ["repo", "path"])
 
     n_oracle = len({(f.get("repo", ""), f.get("path", "")) for f in oracle_files})
 
@@ -28,6 +28,66 @@
         {
           "repo": "django/django",
           "path": "django/contrib/auth/__init__.py"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "django/contrib/admin/static/admin/img/README.md",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "django/contrib/auth/migrations/0011_update_proxy_permissions.py",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/faq/general.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/internals/contributing/accessibility.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/releases/1.3.5.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/releases/1.4.18.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/releases/1.4.3.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/releases/1.6.10.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/releases/1.7.1.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/releases/1.7.3.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "docs/releases/2.2.txt",
+          "tier": "required"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "tests/model_inheritance_regress/models.py",
+          "tier": "required"
         }
       ],
       "required_symbols": [],
@@ -56,9 +116,13 @@
       {
         "type": "file_set_match",
         "params": {
-          "search_pattern": "Compliance OR Audit OR Django",
+          "search_pattern": "",
           "file_filter": ""
         }
+      },
+      {
+        "type": "dependency_chain",
+        "params": {}
       }
     ],
     "eval_script": "/tests/eval.sh",
 
@@ -38,6 +38,24 @@
 _MIRROR_HASH_RE = re.compile(r"--[0-9a-f]{6,}$")
 
 
+def _coerce_file_entry(entry) -> Dict[str, str]:
+    """Coerce a file entry to {"repo": ..., "path": ...} dict format.
+
+    Handles string entries like "sg-evals/kubernetes--v1.32.0/pkg/file.go"
+    where the first two path components are the repo.
+    """
+    if isinstance(entry, dict):
+        return entry
+    if isinstance(entry, str):
+        parts = entry.split("/", 2)
+        if len(parts) >= 3:
+            return {"repo": f"{parts[0]}/{parts[1]}", "path": parts[2]}
+        elif len(parts) == 2:
+            return {"repo": parts[0], "path": parts[1]}
+        return {"repo": "", "path": entry}
+    return {"repo": "", "path": str(entry)}
+
+
 def _normalize_repo(name: str) -> str:
     """Reduce a repo identifier to its base name for fuzzy comparison.
 
@@ -180,6 +198,10 @@ def check_file_set_match(
     >>> result["weighted_recall"]  # matched required(2) / total(3) = 0.6667
     0.6667
     """
+    # Coerce string entries to dicts (handles legacy oracle format)
+    oracle_files = [_coerce_file_entry(f) for f in oracle_files]
+    answer_files = [_coerce_file_entry(f) for f in answer_files]
+
     matched, missing, extra = _match_items(answer_files, oracle_files, ["repo", "path"])
 
     n_oracle = len({(f.get("repo", ""), f.get("path", "")) for f in oracle_files})
 
@@ -36,6 +36,11 @@
         {
           "repo": "django/django",
           "path": "django/contrib/admin/templates/admin/filter.html"
+        },
+        {
+          "repo": "github.com/sg-evals/django--674eda1c",
+          "path": "tests/admin_filters/tests.py",
+          "tier": "required"
         }
       ],
       "required_symbols": [],
@@ -68,9 +73,13 @@
       {
         "type": "file_set_match",
         "params": {
-          "search_pattern": "RelatedFieldListFilter OR ForeignKey OR ListFilter OR ChangeList",
+          "search_pattern": "",
           "file_filter": ""
         }
+      },
+      {
+        "type": "dependency_chain",
+        "params": {}
       }
     ],
     "eval_script": "/tests/eval.sh",
 
@@ -38,6 +38,24 @@
 _MIRROR_HASH_RE = re.compile(r"--[0-9a-f]{6,}$")
 
 
+def _coerce_file_entry(entry) -> Dict[str, str]:
+    """Coerce a file entry to {"repo": ..., "path": ...} dict format.
+
+    Handles string entries like "sg-evals/kubernetes--v1.32.0/pkg/file.go"
+    where the first two path components are the repo.
+    """
+    if isinstance(entry, dict):
+        return entry
+    if isinstance(entry, str):
+        parts = entry.split("/", 2)
+        if len(parts) >= 3:
+            return {"repo": f"{parts[0]}/{parts[1]}", "path": parts[2]}
+        elif len(parts) == 2:
+            return {"repo": parts[0], "path": parts[1]}
+        return {"repo": "", "path": entry}
+    return {"repo": "", "path": str(entry)}
+
+
 def _normalize_repo(name: str) -> str:
     """Reduce a repo identifier to its base name for fuzzy comparison.
 
@@ -180,6 +198,10 @@ def check_file_set_match(
     >>> result["weighted_recall"]  # matched required(2) / total(3) = 0.6667
     0.6667
     """
+    # Coerce string entries to dicts (handles legacy oracle format)
+    oracle_files = [_coerce_file_entry(f) for f in oracle_files]
+    answer_files = [_coerce_file_entry(f) for f in answer_files]
+
     matched, missing, extra = _match_items(answer_files, oracle_files, ["repo", "path"])
 
     n_oracle = len({(f.get("repo", ""), f.get("path", "")) for f in oracle_files})
Original file line number	Diff line number	Diff line change
`@@ -36,6 +36,11 @@`
`36`	`36`	`{`
`37`	`37`	`"repo": "django/django",`
`38`	`38`	`"path": "django/contrib/admin/templates/admin/filter.html"`
	`39`	`+ },`
	`40`	`+ {`
	`41`	`+ "repo": "github.com/sg-evals/django--674eda1c",`
	`42`	`+ "path": "tests/admin_filters/tests.py",`
	`43`	`+ "tier": "required"`
`39`	`44`	`}`
`40`	`45`	`],`
`41`	`46`	`"required_symbols": [],`
`@@ -68,9 +73,13 @@`
`68`	`73`	`{`
`69`	`74`	`"type": "file_set_match",`
`70`	`75`	`"params": {`
`71`		`- "search_pattern": "RelatedFieldListFilter OR ForeignKey OR ListFilter OR ChangeList",`
	`76`	`+ "search_pattern": "",`
`72`	`77`	`"file_filter": ""`
`73`	`78`	`}`
	`79`	`+ },`
	`80`	`+ {`
	`81`	`+ "type": "dependency_chain",`
	`82`	`+ "params": {}`
`74`	`83`	`}`
`75`	`84`	`],`
`76`	`85`	`"eval_script": "/tests/eval.sh",`