Skip to content

Commit 60ea357

Browse files
sjarmakclaude
andcommitted
Complete SDLC ground truth coverage (160/160) and fix curator runner bugs
Add ground_truth.json for all remaining SDLC tasks: - 7 code-review tasks: files extracted directly from instructions - 3 debug/understand tasks: files identified via codebase investigation - 1 feature task (bustub): files from instruction - Remaining tasks generated via Daytona curator (Opus, phase1 prompt) Fix critical bugs in daytona_curator_runner.py: - Fix TimeoutError catch: Python 3.10 concurrent.futures.TimeoutError is NOT the builtin TimeoutError — import and catch FuturesTimeoutError - Add JSON rescue mechanism: haiku fallback + regex extraction when curator outputs prose instead of JSON - Add rate limit detection and early abort - Add clone retry with verification (3 attempts, backoff) Also adds Strategies 4/4b/5 to _resolve_repos() in context_retrieval_agent.py for SWEAP FROM tag and TAC image parsing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 0447be5 commit 60ea357

File tree

50 files changed

+1200
-35
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1200
-35
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"files": [
3+
"lib/ansible/galaxy/collection/__init__.py",
4+
"lib/ansible/galaxy/collection/concrete_artifact_manager.py"
5+
],
6+
"text": "Tar directory extraction fragility: _extract_tar_dir, _extract_tar_file, _get_tar_file_member in collection/__init__.py use tar.getmember() with custom cache that strips trailing separators. _tarfile_extract in concrete_artifact_manager.py."
7+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"model": "manual",
3+
"backend": "codebase-investigation",
4+
"prompt_version": "manual",
5+
"cost_usd": 0.0,
6+
"elapsed_sec": 0,
7+
"timestamp": "2026-03-03T22:25:04Z",
8+
"tool_calls": 0,
9+
"generator": "manual_codebase_investigation"
10+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"files": [
3+
"qutebrowser/components/braveadblock.py",
4+
"tests/unit/components/test_braveadblock.py"
5+
],
6+
"symbols": [
7+
{
8+
"file": "qutebrowser/components/braveadblock.py",
9+
"symbol": "read_cache",
10+
"repo": null
11+
},
12+
{
13+
"file": "qutebrowser/components/braveadblock.py",
14+
"symbol": "_map_exceptions",
15+
"repo": null
16+
},
17+
{
18+
"file": "qutebrowser/components/braveadblock.py",
19+
"symbol": "DeserializationError",
20+
"repo": null
21+
},
22+
{
23+
"file": "qutebrowser/components/braveadblock.py",
24+
"symbol": "BraveAdBlocker",
25+
"repo": null
26+
}
27+
]
28+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"has_ground_truth": true,
3+
"has_chunk_ground_truth": false,
4+
"ground_truth_source": "curator_agent",
5+
"ground_truth_confidence": "medium",
6+
"task_name": "qutebrowser-adblock-cache-regression-prove-001",
7+
"curator_agent_version": "2.0",
8+
"model": "claude-opus-4-6",
9+
"backend": "hybrid",
10+
"timestamp": "2026-03-03T18:03:58Z",
11+
"files_count": 2,
12+
"edit_files_count": 0,
13+
"chunks_count": 0,
14+
"symbols_count": 4,
15+
"cost_usd": 2.35366845,
16+
"elapsed_sec": 449.7,
17+
"exploration_notes": "The bug is in `braveadblock.py:read_cache()` (lines 249-254). When `adblock.Engine.deserialize_from_file()` is called on a cache file that exists but cannot be read (e.g., permission denied after system crash/disk corruption), it raises `OSError`. The current code only catches `DeserializationError`, so `OSError` propagates as an unhandled exception, crashing the application. The fix (commit a6bb26d13, not yet on HEAD) adds `except OSError as e:` after the `DeserializationError` handler. The reg"
18+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
{
2+
"files": [
3+
"qutebrowser/browser/webengine/darkmode.py",
4+
"tests/unit/browser/webengine/test_darkmode.py",
5+
"qutebrowser/utils/version.py",
6+
"qutebrowser/utils/utils.py"
7+
],
8+
"symbols": [
9+
{
10+
"file": "qutebrowser/browser/webengine/darkmode.py",
11+
"symbol": "Variant",
12+
"repo": null
13+
},
14+
{
15+
"file": "qutebrowser/browser/webengine/darkmode.py",
16+
"symbol": "_DEFINITIONS",
17+
"repo": null
18+
},
19+
{
20+
"file": "qutebrowser/browser/webengine/darkmode.py",
21+
"symbol": "_variant",
22+
"repo": null
23+
},
24+
{
25+
"file": "qutebrowser/browser/webengine/darkmode.py",
26+
"symbol": "_Setting",
27+
"repo": null
28+
},
29+
{
30+
"file": "qutebrowser/browser/webengine/darkmode.py",
31+
"symbol": "_Definition",
32+
"repo": null
33+
},
34+
{
35+
"file": "qutebrowser/browser/webengine/darkmode.py",
36+
"symbol": "copy_replace_setting",
37+
"repo": null
38+
},
39+
{
40+
"file": "qutebrowser/browser/webengine/darkmode.py",
41+
"symbol": "settings",
42+
"repo": null
43+
},
44+
{
45+
"file": "qutebrowser/utils/version.py",
46+
"symbol": "WebEngineVersions",
47+
"repo": null
48+
},
49+
{
50+
"file": "qutebrowser/utils/utils.py",
51+
"symbol": "VersionNumber",
52+
"repo": null
53+
}
54+
]
55+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"has_ground_truth": true,
3+
"has_chunk_ground_truth": false,
4+
"ground_truth_source": "curator_agent",
5+
"ground_truth_confidence": "medium",
6+
"task_name": "qutebrowser-darkmode-threshold-regression-prove-001",
7+
"curator_agent_version": "2.0",
8+
"model": "claude-opus-4-6",
9+
"backend": "hybrid",
10+
"timestamp": "2026-03-03T21:34:52Z",
11+
"files_count": 4,
12+
"edit_files_count": 0,
13+
"chunks_count": 0,
14+
"symbols_count": 9,
15+
"cost_usd": 3.1905410000000005,
16+
"elapsed_sec": 625.6,
17+
"exploration_notes": "The bug is in qutebrowser/browser/webengine/darkmode.py. Qt 6.4's Chromium renamed 'TextBrightnessThreshold' to 'ForegroundBrightnessThreshold'. The fix (commit 50efac08f) added: (1) a Variant.qt_64 enum member, (2) a copy_replace_setting method on _Definition, (3) a Qt 6.4 entry in _DEFINITIONS that maps threshold.text to ForegroundBrightnessThreshold, and (4) version detection in _variant() for Qt >= 6.4. Before the fix, Qt 6.4 was treated as Qt 6.3 and used the old TextBrightnessThreshold key"
18+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{
2+
"files": [
3+
"qutebrowser/utils/urlutils.py",
4+
"qutebrowser/browser/navigate.py",
5+
"tests/unit/utils/test_urlutils.py"
6+
],
7+
"symbols": [
8+
{
9+
"file": "qutebrowser/utils/urlutils.py",
10+
"symbol": "incdec_number",
11+
"repo": null
12+
},
13+
{
14+
"file": "qutebrowser/utils/urlutils.py",
15+
"symbol": "_get_incdec_value",
16+
"repo": null
17+
},
18+
{
19+
"file": "qutebrowser/utils/urlutils.py",
20+
"symbol": "_URL_SEGMENTS",
21+
"repo": null
22+
},
23+
{
24+
"file": "qutebrowser/utils/urlutils.py",
25+
"symbol": "IncDecError",
26+
"repo": null
27+
}
28+
]
29+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"has_ground_truth": true,
3+
"has_chunk_ground_truth": false,
4+
"ground_truth_source": "curator_agent",
5+
"ground_truth_confidence": "medium",
6+
"task_name": "qutebrowser-url-regression-prove-001",
7+
"curator_agent_version": "2.0",
8+
"model": "claude-opus-4-6",
9+
"backend": "hybrid",
10+
"timestamp": "2026-03-03T18:06:21Z",
11+
"files_count": 3,
12+
"edit_files_count": 0,
13+
"chunks_count": 0,
14+
"symbols_count": 4,
15+
"cost_usd": 2.5924995,
16+
"elapsed_sec": 592.7,
17+
"exploration_notes": "The root cause is in urlutils.py lines 559-624. The _URL_SEGMENTS table uses QUrl.PrettyDecoded for getters and QUrl.StrictMode for setters. PrettyDecoded decodes percent-encoded characters that don't need encoding in their context (e.g., %20\u2192space). When the modified string is written back via StrictMode, the decoded characters (like literal spaces) make the URL invalid. The regression test at /workspace/regression_test.py demonstrates: (1) %20 in path/query causes URLs to become invalid after "
18+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"files": [
3+
"src/common/api/worker/rest/EntityRestClient.ts",
4+
"src/common/api/common/EntityClient.ts",
5+
"src/common/api/worker/crypto/CryptoFacade.ts",
6+
"src/common/api/worker/rest/DefaultEntityRestCache.ts",
7+
"src/mail-app/workerUtils/index/MailIndexer.ts"
8+
],
9+
"text": "Session key propagation gap: EntityRestClient.loadMultiple/resolveSessionKey does not pass ownerEncSessionKeyProvider through to decryption layer. CryptoFacade.resolveSessionKey cannot decrypt without the key. DefaultEntityRestCache forwards to EntityRestClient without propagating keys. MailIndexer fails silently on non-legacy mails."
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"model": "manual",
3+
"backend": "codebase-investigation",
4+
"prompt_version": "manual",
5+
"cost_usd": 0.0,
6+
"elapsed_sec": 0,
7+
"timestamp": "2026-03-03T22:25:04Z",
8+
"tool_calls": 0,
9+
"generator": "manual_codebase_investigation"
10+
}

0 commit comments

Comments
 (0)