Commit 0c856b7
fix: oracle format mismatch — convert string entries to dicts, auto-infer checks
Root cause: 130 MCP-unique tasks had oracle_answer.json with string-format
file entries ("sg-evals/repo/path/file.go") instead of dict format
({"repo": ..., "path": ...}). oracle_checks.py crashed with
AttributeError: 'str' object has no attribute 'get'.
Additionally, ALL 211 MCP-unique tasks had 0 evaluation checks because
oracle_check_types was never populated in the selection file.
Fixes:
- hydrate_task_specs.py: add _normalize_file_entry() to convert strings
to dicts during hydration; auto-infer check types from oracle data
(files→file_set_match, symbols→symbol_resolution+keyword_presence,
chain→dependency_chain)
- oracle_checks.py: add _coerce_file_entry() as defensive normalization
in check_file_set_match() so string entries don't crash
- Re-ran hydration: 209/211 tasks now have evaluation checks
(2 tasks have genuinely empty oracles, need manual curation)
- Regenerated all 211 oracle_checks.py copies in task dirs
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>1 parent 3fbd443 commit 0c856b7
File tree
380 files changed
+9475
-1113
lines changed- benchmarks
- ccb_mcp_compliance
- ccx-compliance-051/tests
- ccx-compliance-052/tests
- ccx-compliance-053/tests
- ccx-compliance-057/tests
- ccx-compliance-115/tests
- ccx-compliance-118/tests
- ccx-compliance-124/tests
- ccx-compliance-182/tests
- ccx-compliance-183/tests
- ccx-compliance-184/tests
- ccx-compliance-185/tests
- ccx-compliance-186/tests
- ccx-compliance-187/tests
- ccx-compliance-188/tests
- ccx-compliance-189/tests
- ccx-compliance-190/tests
- ccx-compliance-191/tests
- ccx-compliance-192/tests
- ccx-compliance-193/tests
- ccx-compliance-194/tests
- ccb_mcp_crossorg
- ccx-crossorg-061/tests
- ccx-crossorg-062/tests
- ccx-crossorg-066/tests
- ccx-crossorg-121/tests
- ccx-crossorg-132/tests
- ccx-crossorg-208/tests
- ccx-crossorg-209/tests
- ccx-crossorg-210/tests
- ccx-crossorg-211/tests
- ccx-crossorg-212/tests
- ccx-crossorg-213/tests
- ccx-crossorg-214/tests
- ccx-crossorg-215/tests
- ccx-crossorg-216/tests
- ccx-crossorg-217/tests
- ccx-crossorg-218/tests
- ccx-crossorg-219/tests
- ccx-crossorg-220/tests
- ccx-crossorg-221/tests
- ccx-crossorg-222/tests
- ccb_mcp_crossrepo_tracing
- ccx-config-trace-003/tests
- ccx-config-trace-010/tests
- ccx-dep-trace-001/tests
- ccx-dep-trace-002/tests
- ccx-dep-trace-004/tests
- ccx-dep-trace-102/tests
- ccx-dep-trace-116/tests
- ccx-dep-trace-123/tests
- ccx-dep-trace-133/tests
- ccx-dep-trace-171/tests
- ccx-dep-trace-172/tests
- ccx-dep-trace-173/tests
- ccx-dep-trace-174/tests
- ccx-dep-trace-175/tests
- ccx-dep-trace-176/tests
- ccx-dep-trace-177/tests
- ccx-dep-trace-178/tests
- ccx-dep-trace-179/tests
- ccx-dep-trace-180/tests
- ccx-dep-trace-181/tests
- ccb_mcp_crossrepo
- ccx-dep-trace-106/tests
- ccx-dep-trace-253/tests
- ccx-dep-trace-254/tests
- ccx-dep-trace-255/tests
- ccx-dep-trace-256/tests
- ccx-dep-trace-257/tests
- ccx-dep-trace-258/tests
- ccx-dep-trace-259/tests
- ccx-dep-trace-260/tests
- ccx-dep-trace-261/tests
- ccx-dep-trace-262/tests
- ccx-dep-trace-263/tests
- ccx-dep-trace-264/tests
- ccx-dep-trace-265/tests
- ccx-dep-trace-266/tests
- ccx-dep-trace-267/tests
- ccx-dep-trace-268/tests
- ccx-dep-trace-269/tests
- ccx-dep-trace-270/tests
- ccx-dep-trace-271/tests
- ccb_mcp_domain
- ccx-domain-071/tests
- ccx-domain-072/tests
- ccx-domain-073/tests
- ccx-domain-074/tests
- ccx-domain-101/tests
- ccx-domain-112/tests
- ccx-domain-120/tests
- ccx-domain-129/tests
- ccx-domain-137/tests
- ccx-domain-140/tests
- ccx-domain-151/tests
- ccx-domain-152/tests
- ccx-domain-153/tests
- ccx-domain-154/tests
- ccx-domain-155/tests
- ccx-domain-156/tests
- ccx-domain-157/tests
- ccx-domain-158/tests
- ccx-domain-159/tests
- ccx-domain-160/tests
- ccb_mcp_incident
- ccx-incident-031/tests
- ccx-incident-032/tests
- ccx-incident-033/tests
- ccx-incident-034/tests
- ccx-incident-037/tests
- ccx-incident-108/tests
- ccx-incident-110/tests
- ccx-incident-113/tests
- ccx-incident-125/tests
- ccx-incident-131/tests
- ccx-incident-139/tests
- ccx-incident-142/tests
- ccx-incident-143/tests
- ccx-incident-144/tests
- ccx-incident-145/tests
- ccx-incident-146/tests
- ccx-incident-147/tests
- ccx-incident-148/tests
- ccx-incident-149/tests
- ccx-incident-150/tests
- ccb_mcp_migration
- ccx-migration-022/tests
- ccx-migration-025/tests
- ccx-migration-026/tests
- ccx-migration-027/tests
- ccx-migration-107/tests
- ccx-migration-114/tests
- ccx-migration-117/tests
- ccx-migration-195/tests
- ccx-migration-196/tests
- ccx-migration-197/tests
- ccx-migration-198/tests
- ccx-migration-199/tests
- ccx-migration-200/tests
- ccx-migration-201/tests
- ccx-migration-202/tests
- ccx-migration-203/tests
- ccx-migration-204/tests
- ccx-migration-205/tests
- ccx-migration-206/tests
- ccx-migration-207/tests
- ccb_mcp_onboarding
- ccx-onboard-041/tests
- ccx-onboard-042/tests
- ccx-onboard-043/tests
- ccx-onboard-044/tests
- ccx-onboard-050/tests
- ccx-onboard-103/tests
- ccx-onboard-109/tests
- ccx-onboard-128/tests
- ccx-onboard-134/tests
- ccx-onboard-136/tests
- ccx-onboard-138/tests
- ccb_mcp_org
- ccx-agentic-081/tests
- ccx-agentic-082/tests
- ccx-agentic-083/tests
- ccx-agentic-122/tests
- ccx-agentic-127/tests
- ccx-agentic-223/tests
- ccx-agentic-224/tests
- ccx-agentic-225/tests
- ccx-agentic-226/tests
- ccx-agentic-227/tests
- ccx-agentic-228/tests
- ccx-agentic-229/tests
- ccx-agentic-230/tests
- ccx-agentic-231/tests
- ccx-agentic-232/tests
- ccx-agentic-233/tests
- ccx-agentic-234/tests
- ccx-agentic-235/tests
- ccx-agentic-236/tests
- ccx-agentic-237/tests
- ccb_mcp_platform
- ccx-platform-091/tests
- ccx-platform-094/tests
- ccx-platform-100/tests
- ccx-platform-104/tests
- ccx-platform-119/tests
- ccx-platform-238/tests
- ccx-platform-239/tests
- ccx-platform-240/tests
- ccx-platform-241/tests
- ccx-platform-242/tests
- ccx-platform-243/tests
- ccx-platform-244/tests
- ccx-platform-245/tests
- ccx-platform-246/tests
- ccx-platform-247/tests
- ccx-platform-248/tests
- ccx-platform-249/tests
- ccx-platform-250/tests
- ccx-platform-251/tests
- ccx-platform-252/tests
- ccb_mcp_security
- ccx-vuln-remed-011/tests
- ccx-vuln-remed-012/tests
- ccx-vuln-remed-013/tests
- ccx-vuln-remed-014/tests
- ccx-vuln-remed-105/tests
- ccx-vuln-remed-111/tests
- ccx-vuln-remed-126/tests
- ccx-vuln-remed-130/tests
- ccx-vuln-remed-135/tests
- ccx-vuln-remed-141/tests
- ccx-vuln-remed-161/tests
- ccx-vuln-remed-162/tests
- ccx-vuln-remed-163/tests
- ccx-vuln-remed-164/tests
- ccx-vuln-remed-165/tests
- ccx-vuln-remed-166/tests
- ccx-vuln-remed-167/tests
- ccx-vuln-remed-168/tests
- ccx-vuln-remed-169/tests
- ccx-vuln-remed-170/tests
- scripts
- ccb_metrics
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
380 files changed
+9475
-1113
lines changedLines changed: 22 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
41 | 59 | | |
42 | 60 | | |
43 | 61 | | |
| |||
180 | 198 | | |
181 | 199 | | |
182 | 200 | | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
183 | 205 | | |
184 | 206 | | |
185 | 207 | | |
| |||
Lines changed: 22 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
41 | 59 | | |
42 | 60 | | |
43 | 61 | | |
| |||
180 | 198 | | |
181 | 199 | | |
182 | 200 | | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
183 | 205 | | |
184 | 206 | | |
185 | 207 | | |
| |||
Lines changed: 22 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
41 | 59 | | |
42 | 60 | | |
43 | 61 | | |
| |||
180 | 198 | | |
181 | 199 | | |
182 | 200 | | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
183 | 205 | | |
184 | 206 | | |
185 | 207 | | |
| |||
Lines changed: 22 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
41 | 59 | | |
42 | 60 | | |
43 | 61 | | |
| |||
180 | 198 | | |
181 | 199 | | |
182 | 200 | | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
183 | 205 | | |
184 | 206 | | |
185 | 207 | | |
| |||
Lines changed: 1 addition & 9 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
26 | 26 | | |
27 | 27 | | |
28 | 28 | | |
29 | | - | |
30 | | - | |
31 | | - | |
32 | | - | |
33 | | - | |
34 | | - | |
35 | | - | |
36 | | - | |
37 | | - | |
| 29 | + | |
38 | 30 | | |
39 | 31 | | |
40 | 32 | | |
| |||
Lines changed: 22 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
41 | 59 | | |
42 | 60 | | |
43 | 61 | | |
| |||
180 | 198 | | |
181 | 199 | | |
182 | 200 | | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
183 | 205 | | |
184 | 206 | | |
185 | 207 | | |
| |||
Lines changed: 65 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
28 | 28 | | |
29 | 29 | | |
30 | 30 | | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
31 | 91 | | |
32 | 92 | | |
33 | 93 | | |
| |||
56 | 116 | | |
57 | 117 | | |
58 | 118 | | |
59 | | - | |
| 119 | + | |
60 | 120 | | |
61 | 121 | | |
| 122 | + | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
62 | 126 | | |
63 | 127 | | |
64 | 128 | | |
| |||
Lines changed: 22 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
41 | 59 | | |
42 | 60 | | |
43 | 61 | | |
| |||
180 | 198 | | |
181 | 199 | | |
182 | 200 | | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
183 | 205 | | |
184 | 206 | | |
185 | 207 | | |
| |||
Lines changed: 10 additions & 1 deletion
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
36 | 36 | | |
37 | 37 | | |
38 | 38 | | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
39 | 44 | | |
40 | 45 | | |
41 | 46 | | |
| |||
68 | 73 | | |
69 | 74 | | |
70 | 75 | | |
71 | | - | |
| 76 | + | |
72 | 77 | | |
73 | 78 | | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
74 | 83 | | |
75 | 84 | | |
76 | 85 | | |
| |||
Lines changed: 22 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
38 | 38 | | |
39 | 39 | | |
40 | 40 | | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
41 | 59 | | |
42 | 60 | | |
43 | 61 | | |
| |||
180 | 198 | | |
181 | 199 | | |
182 | 200 | | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
183 | 205 | | |
184 | 206 | | |
185 | 207 | | |
| |||
0 commit comments