Skip to content

Commit c0c0139

Browse files
committed
test(eval): add fix-loop regression fixtures
1 parent 19d6fb3 commit c0c0139

4 files changed

Lines changed: 109 additions & 1 deletion

File tree

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ This roadmap is derived from deep research into Greptile's public docs, blog, MC
5757
27. [x] Add "challenge the finding" verification loops where a validator tries to falsify a suspected issue before keeping it.
5858
28. [ ] Add caching between iterations so repeated codebase retrieval and verification runs are cheaper.
5959
29. [x] Allow loop policies to differ by profile: conservative auditor, high-autonomy fixer, or report-only.
60-
30. [ ] Add eval fixtures specifically for loop convergence and reopened-issue regressions.
60+
30. [x] Add eval fixtures specifically for loop convergence and reopened-issue regressions.
6161

6262
## 4. Code Graph and Repository Intelligence
6363

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: repo regression - fix loop premature convergence
2+
repo_path: ../../..
3+
diff: |
4+
diff --git a/src/server/api/gh.rs b/src/server/api/gh.rs
5+
index eaf02e9..deadbeef 100644
6+
--- a/src/server/api/gh.rs
7+
+++ b/src/server/api/gh.rs
8+
@@ -1949,8 +1949,7 @@ pub(crate) async fn run_gh_pr_fix_loop(
9+
})?;
10+
11+
if latest_summary_ref.merge_readiness == MergeReadiness::Ready
12+
&& latest_summary_ref.open_blockers == 0
13+
- && latest_summary_ref.open_comments == 0
14+
{
15+
return Ok(Json(build_pr_fix_loop_response(PrFixLoopResponseArgs {
16+
repo: request.repo.clone(),
17+
expect:
18+
must_find:
19+
- file: src/server/api/gh.rs
20+
contains_any:
21+
- prematurely marks the fix loop as converged
22+
- unresolved findings can still remain when the loop stops
23+
- missing open_comments check in the convergence guard
24+
rule_id: bug.fix-loop.premature-convergence
25+
must_not_find:
26+
- contains: style
27+
min_total: 1
28+
max_total: 8
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: repo regression - reopened finding telemetry broken
2+
repo_path: ../../..
3+
diff: |
4+
diff --git a/src/server/api/gh.rs b/src/server/api/gh.rs
5+
index eaf02e9..cafef00d 100644
6+
--- a/src/server/api/gh.rs
7+
+++ b/src/server/api/gh.rs
8+
@@ -1075,8 +1075,8 @@ pub(crate) fn build_fix_loop_telemetry(
9+
findings_cleared += previous_findings.difference(&current_findings).count();
10+
findings_reopened += current_findings
11+
.difference(&previous_findings)
12+
- .filter(|finding_id| historical_findings.contains(*finding_id))
13+
+ .filter(|finding_id| previous_findings.contains(*finding_id))
14+
.count();
15+
16+
historical_findings.extend(current_findings);
17+
expect:
18+
must_find:
19+
- file: src/server/api/gh.rs
20+
contains_any:
21+
- reopened findings will never be counted correctly
22+
- logic bug in reopened finding telemetry
23+
- a finding in current minus previous can never also be in previous_findings
24+
rule_id: bug.fix-loop.reopened-finding-telemetry
25+
must_not_find:
26+
- contains: style
27+
min_total: 1
28+
max_total: 8

src/commands/eval/fixtures.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,58 @@ expect:
172172
.any(|phrase| phrase.contains("trait implementation")));
173173
}
174174

175+
#[test]
176+
fn test_checked_in_fix_loop_convergence_fixture_loads_expected_fields() {
177+
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
178+
.join("eval/fixtures/repo_regressions/fix_loop_premature_convergence.yml");
179+
180+
let fixtures = load_eval_fixtures_from_path(&fixture_path).unwrap();
181+
182+
assert_eq!(fixtures.len(), 1);
183+
assert_eq!(
184+
fixtures[0].fixture.name.as_deref(),
185+
Some("repo regression - fix loop premature convergence")
186+
);
187+
assert_eq!(
188+
fixtures[0].fixture.repo_path,
189+
Some(std::path::PathBuf::from("../../.."))
190+
);
191+
assert_eq!(
192+
fixtures[0].fixture.expect.must_find[0].file.as_deref(),
193+
Some("src/server/api/gh.rs")
194+
);
195+
assert_eq!(
196+
fixtures[0].fixture.expect.must_find[0].rule_id.as_deref(),
197+
Some("bug.fix-loop.premature-convergence")
198+
);
199+
}
200+
201+
#[test]
202+
fn test_checked_in_fix_loop_reopened_fixture_loads_expected_fields() {
203+
let fixture_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
204+
.join("eval/fixtures/repo_regressions/fix_loop_reopened_findings.yml");
205+
206+
let fixtures = load_eval_fixtures_from_path(&fixture_path).unwrap();
207+
208+
assert_eq!(fixtures.len(), 1);
209+
assert_eq!(
210+
fixtures[0].fixture.name.as_deref(),
211+
Some("repo regression - reopened finding telemetry broken")
212+
);
213+
assert_eq!(
214+
fixtures[0].fixture.repo_path,
215+
Some(std::path::PathBuf::from("../../.."))
216+
);
217+
assert_eq!(
218+
fixtures[0].fixture.expect.must_find[0].file.as_deref(),
219+
Some("src/server/api/gh.rs")
220+
);
221+
assert_eq!(
222+
fixtures[0].fixture.expect.must_find[0].rule_id.as_deref(),
223+
Some("bug.fix-loop.reopened-finding-telemetry")
224+
);
225+
}
226+
175227
#[test]
176228
fn test_collect_eval_fixtures_expands_pack_entries_in_sorted_order() {
177229
let dir = tempdir().unwrap();

0 commit comments

Comments
 (0)