Skip to content

Commit c8d0745

Browse files
committed
refactor: split review verification parser helpers
Separate verification schema construction, JSON decoding, regex fallback parsing, and auto-zero reconciliation so parser changes stay localized without changing verifier behavior. Made-with: Cursor
1 parent d4b6973 commit c8d0745

6 files changed

Lines changed: 267 additions & 201 deletions

File tree

TODO.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@
1010

1111
## Review Backlog
1212

13-
- [ ] `src/review/verification/parser.rs`
14-
- Split response schema construction from response parsing.
15-
- Split fenced-JSON extraction / JSON decoding from regex fallback parsing.
16-
- Split auto-zero detection from verification-result reconciliation.
17-
- Keep `parse_verification_response()` as a thin coordinator.
1813
- [ ] `src/review/verification/prompt.rs`
1914
- Split diff evidence extraction, source context extraction, and supporting-context gathering.
2015
- Split supporting-context scoring/truncation from final formatting.

src/review/verification/parser.rs

Lines changed: 17 additions & 196 deletions
Original file line numberDiff line numberDiff line change
@@ -1,212 +1,33 @@
1-
use once_cell::sync::Lazy;
2-
use regex::Regex;
3-
use serde_json::Value;
4-
51
use crate::adapters::llm::StructuredOutputSchema;
62
use crate::core::Comment;
73

84
use super::VerificationResult;
95

10-
const AUTO_ZERO_PATTERNS: &[&str] = &[
11-
"docstring",
12-
"doc comment",
13-
"documentation comment",
14-
"type hint",
15-
"type annotation",
16-
"import order",
17-
"import sorting",
18-
"unused import",
19-
"trailing whitespace",
20-
"trailing newline",
21-
];
6+
#[path = "parser/auto_zero.rs"]
7+
mod auto_zero;
8+
#[path = "parser/json.rs"]
9+
mod json;
10+
#[path = "parser/schema.rs"]
11+
mod schema;
12+
#[path = "parser/text.rs"]
13+
mod text;
2214

23-
pub fn is_auto_zero(content: &str) -> bool {
24-
let lower = content.to_lowercase();
25-
AUTO_ZERO_PATTERNS.iter().any(|p| lower.contains(p))
26-
}
15+
use auto_zero::apply_auto_zero;
16+
use json::parse_verification_json;
17+
use text::parse_verification_text;
18+
19+
#[allow(unused_imports)]
20+
pub use auto_zero::is_auto_zero;
2721

2822
pub(super) fn verification_response_schema() -> StructuredOutputSchema {
29-
StructuredOutputSchema::json_schema(
30-
"verification_results",
31-
serde_json::json!({
32-
"type": "array",
33-
"items": {
34-
"type": "object",
35-
"additionalProperties": false,
36-
"required": [
37-
"index",
38-
"accurate",
39-
"line_correct",
40-
"suggestion_sound",
41-
"score",
42-
"reason"
43-
],
44-
"properties": {
45-
"index": {"type": "integer", "minimum": 1},
46-
"accurate": {"type": "boolean"},
47-
"line_correct": {"type": "boolean"},
48-
"suggestion_sound": {"type": "boolean"},
49-
"score": {"type": "integer", "minimum": 0, "maximum": 10},
50-
"reason": {"type": "string"}
51-
}
52-
}
53-
}),
54-
)
23+
schema::verification_response_schema()
5524
}
5625

5726
pub(super) fn parse_verification_response(
5827
content: &str,
5928
comments: &[Comment],
6029
) -> Vec<VerificationResult> {
61-
static FINDING_PATTERN: Lazy<Regex> = Lazy::new(|| {
62-
Regex::new(r"(?i)FINDING\s+(\d+)\s*:\s*score\s*=\s*(\d+)\s+accurate\s*=\s*(true|false)(?:\s+line_correct\s*=\s*(true|false))?(?:\s+suggestion_sound\s*=\s*(true|false))?\s+reason\s*=\s*(.+)")
63-
.unwrap()
64-
});
65-
66-
if let Some(results) = parse_verification_json(content, comments) {
67-
return apply_auto_zero(results, comments);
68-
}
69-
70-
let mut results = Vec::new();
71-
72-
for line in content.lines() {
73-
if let Some(caps) = FINDING_PATTERN.captures(line) {
74-
let index: usize = caps
75-
.get(1)
76-
.expect("capture group 1 (index) must exist after regex match")
77-
.as_str()
78-
.parse()
79-
.unwrap_or(0);
80-
let score: u8 = caps
81-
.get(2)
82-
.expect("capture group 2 (score) must exist after regex match")
83-
.as_str()
84-
.parse()
85-
.unwrap_or(0);
86-
let accurate = caps
87-
.get(3)
88-
.expect("capture group 3 (accurate) must exist after regex match")
89-
.as_str()
90-
.to_lowercase()
91-
== "true";
92-
let line_correct = caps
93-
.get(4)
94-
.map(|value| value.as_str().eq_ignore_ascii_case("true"))
95-
.unwrap_or(accurate);
96-
let suggestion_sound = caps
97-
.get(5)
98-
.map(|value| value.as_str().eq_ignore_ascii_case("true"))
99-
.unwrap_or(true);
100-
let reason = caps
101-
.get(6)
102-
.expect("capture group 6 (reason) must exist after regex match")
103-
.as_str()
104-
.trim()
105-
.to_string();
106-
107-
if index > 0 && index <= comments.len() {
108-
results.push(VerificationResult {
109-
comment_id: comments[index - 1].id.clone(),
110-
accurate,
111-
line_correct,
112-
suggestion_sound,
113-
score: score.min(10),
114-
reason,
115-
});
116-
}
117-
}
118-
}
119-
30+
let results = parse_verification_json(content, comments)
31+
.unwrap_or_else(|| parse_verification_text(content, comments));
12032
apply_auto_zero(results, comments)
12133
}
122-
123-
fn parse_verification_json(content: &str, comments: &[Comment]) -> Option<Vec<VerificationResult>> {
124-
let trimmed = content.trim();
125-
let candidate = if trimmed.starts_with("```") {
126-
trimmed
127-
.lines()
128-
.skip_while(|line| line.trim_start().starts_with("```"))
129-
.take_while(|line| !line.trim_start().starts_with("```"))
130-
.collect::<Vec<_>>()
131-
.join("\n")
132-
} else {
133-
trimmed.to_string()
134-
};
135-
136-
let value = serde_json::from_str::<Value>(&candidate).ok()?;
137-
let items = if let Some(array) = value.as_array() {
138-
array.clone()
139-
} else {
140-
value
141-
.get("results")
142-
.and_then(|results| results.as_array())
143-
.cloned()?
144-
};
145-
146-
let mut results = Vec::new();
147-
for item in items {
148-
let index = item.get("index").and_then(|value| value.as_u64())? as usize;
149-
if index == 0 || index > comments.len() {
150-
continue;
151-
}
152-
let accurate = item
153-
.get("accurate")
154-
.and_then(|value| value.as_bool())
155-
.unwrap_or(false);
156-
let line_correct = item
157-
.get("line_correct")
158-
.and_then(|value| value.as_bool())
159-
.unwrap_or(accurate);
160-
let suggestion_sound = item
161-
.get("suggestion_sound")
162-
.and_then(|value| value.as_bool())
163-
.unwrap_or(true);
164-
let score = item
165-
.get("score")
166-
.and_then(|value| value.as_u64())
167-
.unwrap_or(0)
168-
.min(10) as u8;
169-
let reason = item
170-
.get("reason")
171-
.and_then(|value| value.as_str())
172-
.unwrap_or("No reason provided")
173-
.to_string();
174-
175-
results.push(VerificationResult {
176-
comment_id: comments[index - 1].id.clone(),
177-
accurate,
178-
line_correct,
179-
suggestion_sound,
180-
score,
181-
reason,
182-
});
183-
}
184-
Some(results)
185-
}
186-
187-
fn apply_auto_zero(
188-
mut results: Vec<VerificationResult>,
189-
comments: &[Comment],
190-
) -> Vec<VerificationResult> {
191-
for comment in comments {
192-
if is_auto_zero(&comment.content) {
193-
if let Some(existing) = results.iter_mut().find(|r| r.comment_id == comment.id) {
194-
existing.accurate = false;
195-
existing.line_correct = false;
196-
existing.score = 0;
197-
existing.reason = "Auto-zero: noise category".to_string();
198-
} else {
199-
results.push(VerificationResult {
200-
comment_id: comment.id.clone(),
201-
accurate: false,
202-
line_correct: false,
203-
suggestion_sound: false,
204-
score: 0,
205-
reason: "Auto-zero: noise category".to_string(),
206-
});
207-
}
208-
}
209-
}
210-
211-
results
212-
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
use crate::core::Comment;
2+
3+
use super::super::VerificationResult;
4+
5+
const AUTO_ZERO_PATTERNS: &[&str] = &[
6+
"docstring",
7+
"doc comment",
8+
"documentation comment",
9+
"type hint",
10+
"type annotation",
11+
"import order",
12+
"import sorting",
13+
"unused import",
14+
"trailing whitespace",
15+
"trailing newline",
16+
];
17+
18+
pub fn is_auto_zero(content: &str) -> bool {
19+
let lower = content.to_lowercase();
20+
AUTO_ZERO_PATTERNS
21+
.iter()
22+
.any(|pattern| lower.contains(pattern))
23+
}
24+
25+
pub(super) fn apply_auto_zero(
26+
mut results: Vec<VerificationResult>,
27+
comments: &[Comment],
28+
) -> Vec<VerificationResult> {
29+
for comment in comments {
30+
if !is_auto_zero(&comment.content) {
31+
continue;
32+
}
33+
34+
if let Some(existing) = results
35+
.iter_mut()
36+
.find(|result| result.comment_id == comment.id)
37+
{
38+
existing.accurate = false;
39+
existing.line_correct = false;
40+
existing.score = 0;
41+
existing.reason = "Auto-zero: noise category".to_string();
42+
} else {
43+
results.push(auto_zero_result(comment));
44+
}
45+
}
46+
47+
results
48+
}
49+
50+
fn auto_zero_result(comment: &Comment) -> VerificationResult {
51+
VerificationResult {
52+
comment_id: comment.id.clone(),
53+
accurate: false,
54+
line_correct: false,
55+
suggestion_sound: false,
56+
score: 0,
57+
reason: "Auto-zero: noise category".to_string(),
58+
}
59+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
use serde_json::Value;
2+
3+
use crate::core::Comment;
4+
5+
use super::super::VerificationResult;
6+
7+
pub(super) fn parse_verification_json(
8+
content: &str,
9+
comments: &[Comment],
10+
) -> Option<Vec<VerificationResult>> {
11+
let candidate = extract_json_candidate(content);
12+
let value = serde_json::from_str::<Value>(&candidate).ok()?;
13+
let items = value_items(value)?;
14+
15+
let mut results = Vec::new();
16+
for item in items {
17+
let index = item_index(&item)?;
18+
if index == 0 || index > comments.len() {
19+
continue;
20+
}
21+
results.push(verification_result_from_json_item(
22+
&item,
23+
&comments[index - 1],
24+
));
25+
}
26+
Some(results)
27+
}
28+
29+
fn extract_json_candidate(content: &str) -> String {
30+
let trimmed = content.trim();
31+
if trimmed.starts_with("```") {
32+
trimmed
33+
.lines()
34+
.skip_while(|line| line.trim_start().starts_with("```"))
35+
.take_while(|line| !line.trim_start().starts_with("```"))
36+
.collect::<Vec<_>>()
37+
.join("\n")
38+
} else {
39+
trimmed.to_string()
40+
}
41+
}
42+
43+
fn value_items(value: Value) -> Option<Vec<Value>> {
44+
if let Some(array) = value.as_array() {
45+
Some(array.clone())
46+
} else {
47+
value
48+
.get("results")
49+
.and_then(|results| results.as_array())
50+
.cloned()
51+
}
52+
}
53+
54+
fn item_index(item: &Value) -> Option<usize> {
55+
item.get("index")
56+
.and_then(|value| value.as_u64())
57+
.map(|value| value as usize)
58+
}
59+
60+
fn verification_result_from_json_item(item: &Value, comment: &Comment) -> VerificationResult {
61+
let accurate = item
62+
.get("accurate")
63+
.and_then(|value| value.as_bool())
64+
.unwrap_or(false);
65+
let line_correct = item
66+
.get("line_correct")
67+
.and_then(|value| value.as_bool())
68+
.unwrap_or(accurate);
69+
let suggestion_sound = item
70+
.get("suggestion_sound")
71+
.and_then(|value| value.as_bool())
72+
.unwrap_or(true);
73+
let score = item
74+
.get("score")
75+
.and_then(|value| value.as_u64())
76+
.unwrap_or(0)
77+
.min(10) as u8;
78+
let reason = item
79+
.get("reason")
80+
.and_then(|value| value.as_str())
81+
.unwrap_or("No reason provided")
82+
.to_string();
83+
84+
VerificationResult {
85+
comment_id: comment.id.clone(),
86+
accurate,
87+
line_correct,
88+
suggestion_sound,
89+
score,
90+
reason,
91+
}
92+
}

0 commit comments

Comments
 (0)