Skip to content

Commit e96aef4

Browse files
committed
feat: expand deep review fixture coverage
Broaden the frontier eval suite with new authz, async, and CI-injection regressions so live runs cover more high-signal failure modes. Harden semantic matching around tenant-isolation, async-await, and GitHub Actions phrasing so benchmarks reward the substance of correct findings across frontier models. Made-with: Cursor
1 parent 9e7480c commit e96aef4

File tree

5 files changed

+218
-0
lines changed

5 files changed

+218
-0
lines changed

eval/fixtures/deep_review_suite/review_depth_async.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,46 @@
115115
"max_total": 8,
116116
"description": "Awaiting async permission checks is required before destructive actions.",
117117
"source": "deep-review-suite"
118+
},
119+
{
120+
"name": "typescript-async-foreach-not-awaited",
121+
"category": "bug",
122+
"language": "typescript",
123+
"difficulty": "Hard",
124+
"diff_content": "diff --git a/src/notify.ts b/src/notify.ts\nindex 1111111..2222222 100644\n--- a/src/notify.ts\n+++ b/src/notify.ts\n@@ -1,5 +1,7 @@\n export async function sendNotifications(users) {\n- await Promise.all(users.map((user) => sendNotification(user)));\n+ users.forEach(async (user) => {\n+ await sendNotification(user);\n+ });\n return { sent: true };\n }\n",
125+
"expected_findings": [
126+
{
127+
"description": "Async work launched inside forEach is not awaited before the function returns.",
128+
"severity": "Warning",
129+
"category": "Bug",
130+
"file_pattern": "src/notify.ts",
131+
"line_hint": 2,
132+
"contains_any": [
133+
"foreach does not await",
134+
"async callbacks are not awaited",
135+
"returns before sendnotification completes",
136+
"fire and forget",
137+
"promise is ignored",
138+
"map result is ignored",
139+
"does not await promises",
140+
"return before all notifications complete",
141+
"breaks the async contract"
142+
],
143+
"tags_any": [
144+
"async"
145+
]
146+
}
147+
],
148+
"negative_findings": [
149+
{
150+
"description": "Avoid style-only comments.",
151+
"contains": "style"
152+
}
153+
],
154+
"min_total": 1,
155+
"max_total": 8,
156+
"description": "Notification fan-out should keep awaiting all spawned async work.",
157+
"source": "deep-review-suite"
118158
}
119159
]
120160
}

eval/fixtures/deep_review_suite/review_depth_authz.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,46 @@
9393
"max_total": 8,
9494
"description": "Privilege-gated destructive operations must keep their role checks.",
9595
"source": "deep-review-suite"
96+
},
97+
{
98+
"name": "python-tenant-scope-removed-from-invoice-lookup",
99+
"category": "security",
100+
"language": "python",
101+
"difficulty": "Hard",
102+
"diff_content": "diff --git a/billing.py b/billing.py\nindex 1111111..2222222 100644\n--- a/billing.py\n+++ b/billing.py\n@@ -1,5 +1,5 @@\n def mark_invoice_paid(request, db):\n- invoice = db.invoices.find_one({\"id\": request.args[\"invoice_id\"], \"tenant_id\": request.user.tenant_id})\n+ invoice = db.invoices.find_one({\"id\": request.args[\"invoice_id\"]})\n invoice[\"status\"] = \"paid\"\n db.invoices.save(invoice)\n return {\"ok\": True}\n",
103+
"expected_findings": [
104+
{
105+
"description": "Invoice lookup is no longer constrained to the caller's tenant.",
106+
"severity": "Error",
107+
"category": "Security",
108+
"file_pattern": "billing.py",
109+
"line_hint": 2,
110+
"contains_any": [
111+
"tenant isolation",
112+
"cross-tenant access",
113+
"missing tenant filter",
114+
"authorization bypass",
115+
"idor",
116+
"invoice lookup is not scoped",
117+
"removed tenant_id check",
118+
"other tenants",
119+
"broken authorization"
120+
],
121+
"tags_any": [
122+
"authorization"
123+
]
124+
}
125+
],
126+
"negative_findings": [
127+
{
128+
"description": "Avoid style-only comments.",
129+
"contains": "style"
130+
}
131+
],
132+
"min_total": 1,
133+
"max_total": 8,
134+
"description": "Multi-tenant invoice access must stay scoped to the caller's tenant.",
135+
"source": "deep-review-suite"
96136
}
97137
]
98138
}

eval/fixtures/deep_review_suite/review_depth_supply_chain.json

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,61 @@
108108
"max_total": 8,
109109
"description": "Build pipelines should not execute unverified remote scripts.",
110110
"source": "deep-review-suite"
111+
},
112+
{
113+
"name": "yaml-pull-request-target-script-injection",
114+
"category": "security",
115+
"language": "yaml",
116+
"difficulty": "Expert",
117+
"diff_content": "diff --git a/.github/workflows/pr-check.yml b/.github/workflows/pr-check.yml\nindex 1111111..2222222 100644\n--- a/.github/workflows/pr-check.yml\n+++ b/.github/workflows/pr-check.yml\n@@ -1,5 +1,8 @@\n name: PR Check\n-on: pull_request\n+on: pull_request_target\n jobs:\n test:\n runs-on: ubuntu-latest\n+ steps:\n+ - run: echo \"${{ github.event.pull_request.title }}\" | bash\n",
118+
"expected_findings": [
119+
{
120+
"description": "pull_request_target can expose write-scoped credentials to untrusted PR code.",
121+
"severity": "Warning",
122+
"category": "Security",
123+
"file_pattern": ".github/workflows/pr-check.yml",
124+
"line_hint": 2,
125+
"contains_any": [
126+
"pull_request_target",
127+
"untrusted code with write permissions",
128+
"elevated github token",
129+
"ci injection"
130+
],
131+
"tags_any": [
132+
"supply-chain"
133+
],
134+
"rule_id": "sec.supply-chain.ci-injection"
135+
},
136+
{
137+
"description": "Attacker-controlled GitHub event data is piped into bash.",
138+
"severity": "Error",
139+
"category": "Security",
140+
"file_pattern": ".github/workflows/pr-check.yml",
141+
"line_hint": 7,
142+
"contains_any": [
143+
"script injection via github.event",
144+
"attacker-controlled pr title",
145+
"piped into bash",
146+
"untrusted github event context",
147+
"pr title is piped directly to bash",
148+
"arbitrary command execution"
149+
],
150+
"tags_any": [
151+
"supply-chain"
152+
],
153+
"rule_id": "sec.supply-chain.ci-injection"
154+
}
155+
],
156+
"negative_findings": [
157+
{
158+
"description": "Avoid style-only comments.",
159+
"contains": "style"
160+
}
161+
],
162+
"min_total": 1,
163+
"max_total": 8,
164+
"description": "CI workflows should not combine pull_request_target with attacker-controlled shell input.",
165+
"source": "deep-review-suite"
111166
}
112167
]
113168
}

src/commands/eval/pattern/matching/predicates.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ fn category_aliases(expected: &str) -> &'static [&'static str] {
239239
"secret",
240240
"forbidden",
241241
"unauthorized",
242+
"tenant isolation",
243+
"cross tenant",
244+
"cross tenant access",
245+
"multi tenant",
246+
"other tenants",
242247
],
243248
"bug" => &[
244249
"bug",
@@ -251,8 +256,11 @@ fn category_aliases(expected: &str) -> &'static [&'static str] {
251256
"background task",
252257
"spawned task",
253258
"not awaited",
259+
"does not await",
254260
"missing await",
255261
"promise is always truthy",
262+
"async contract",
263+
"unhandled promise",
256264
"swallowed error",
257265
"logic error",
258266
"race condition",
@@ -289,6 +297,10 @@ fn canonicalize_semantic_text(text: &str) -> String {
289297
("authorisation", "authorization"),
290298
("access control", "authorization"),
291299
("broken access control", "authorization bypass"),
300+
("cross-tenant", "cross tenant"),
301+
("multi-tenancy", "tenant isolation"),
302+
("multi tenancy", "tenant isolation"),
303+
("tenant_id", "tenant"),
292304
("verbose-error", "information disclosure"),
293305
("verbose error", "information disclosure"),
294306
("debug-details", "information disclosure"),
@@ -313,6 +325,8 @@ fn canonicalize_semantic_text(text: &str) -> String {
313325
("attack vector", "risk"),
314326
("silently discarded", "swallowed error"),
315327
("silent failure", "swallowed error"),
328+
("piped directly to bash", "piped into bash"),
329+
("piped to bash", "piped into bash"),
316330
("sqli", "sql injection"),
317331
("xss", "cross site scripting"),
318332
("ssrf", "server side request forgery"),
@@ -359,6 +373,9 @@ fn semantic_tokens(text: &str) -> Vec<String> {
359373
"auth" | "authz" => "authorization".to_string(),
360374
"authn" => "authentication".to_string(),
361375
"access" => "authorization".to_string(),
376+
"tenants" => "tenant".to_string(),
377+
"promises" => "promise".to_string(),
378+
"callbacks" => "callback".to_string(),
362379
"piping" | "piped" | "pipes" => "pipe".to_string(),
363380
"bash" | "sh" => "shell".to_string(),
364381
"downloaded" | "downloading" | "downloads" => "download".to_string(),

src/commands/eval/pattern/matching/run.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,72 @@ mod tests {
243243
assert!(pattern.matches(&comment));
244244
}
245245

246+
#[test]
247+
fn test_eval_pattern_matches_bug_category_from_async_foreach_signals() {
248+
let comment = core::Comment {
249+
id: "comment-3f".to_string(),
250+
file_path: PathBuf::from("src/notify.ts"),
251+
line_number: 2,
252+
content: "forEach with async callback does not await promises, so the function returns before all notifications complete and breaks the async contract."
253+
.to_string(),
254+
rule_id: Some("async.foreach-no-await".to_string()),
255+
severity: Severity::Error,
256+
category: Category::BestPractice,
257+
suggestion: None,
258+
confidence: 0.9,
259+
code_suggestion: None,
260+
tags: vec!["async-await".to_string(), "promise".to_string()],
261+
fix_effort: FixEffort::Low,
262+
feedback: None,
263+
};
264+
265+
let pattern = EvalPattern {
266+
category: Some("bug".to_string()),
267+
contains_any: vec![
268+
"does not await promises".to_string(),
269+
"returns before all notifications complete".to_string(),
270+
],
271+
severity: Some("warning".to_string()),
272+
tags_any: vec!["async".to_string()],
273+
..Default::default()
274+
};
275+
276+
assert!(pattern.matches(&comment));
277+
}
278+
279+
#[test]
280+
fn test_eval_pattern_matches_security_category_from_tenant_isolation_signals() {
281+
let comment = core::Comment {
282+
id: "comment-3g".to_string(),
283+
file_path: PathBuf::from("billing.py"),
284+
line_number: 2,
285+
content: "Removed tenant_id check from invoice query, allowing users to access invoices from other tenants."
286+
.to_string(),
287+
rule_id: Some("sec.authz.missing-tenant-check".to_string()),
288+
severity: Severity::Error,
289+
category: Category::Bug,
290+
suggestion: None,
291+
confidence: 0.95,
292+
code_suggestion: None,
293+
tags: vec!["multi-tenancy".to_string(), "authorization".to_string()],
294+
fix_effort: FixEffort::Low,
295+
feedback: None,
296+
};
297+
298+
let pattern = EvalPattern {
299+
category: Some("security".to_string()),
300+
contains_any: vec![
301+
"other tenants".to_string(),
302+
"removed tenant_id check".to_string(),
303+
],
304+
severity: Some("error".to_string()),
305+
tags_any: vec!["authorization".to_string()],
306+
..Default::default()
307+
};
308+
309+
assert!(pattern.matches(&comment));
310+
}
311+
246312
#[test]
247313
fn test_eval_pattern_matches_adjacent_line_hint() {
248314
let comment = core::Comment {

0 commit comments

Comments
 (0)