|
1 | | -use once_cell::sync::Lazy; |
2 | | -use regex::Regex; |
3 | | -use serde_json::Value; |
4 | | - |
5 | 1 | use crate::adapters::llm::StructuredOutputSchema; |
6 | 2 | use crate::core::Comment; |
7 | 3 |
|
8 | 4 | use super::VerificationResult; |
9 | 5 |
|
10 | | -const AUTO_ZERO_PATTERNS: &[&str] = &[ |
11 | | - "docstring", |
12 | | - "doc comment", |
13 | | - "documentation comment", |
14 | | - "type hint", |
15 | | - "type annotation", |
16 | | - "import order", |
17 | | - "import sorting", |
18 | | - "unused import", |
19 | | - "trailing whitespace", |
20 | | - "trailing newline", |
21 | | -]; |
| 6 | +#[path = "parser/auto_zero.rs"] |
| 7 | +mod auto_zero; |
| 8 | +#[path = "parser/json.rs"] |
| 9 | +mod json; |
| 10 | +#[path = "parser/schema.rs"] |
| 11 | +mod schema; |
| 12 | +#[path = "parser/text.rs"] |
| 13 | +mod text; |
22 | 14 |
|
23 | | -pub fn is_auto_zero(content: &str) -> bool { |
24 | | - let lower = content.to_lowercase(); |
25 | | - AUTO_ZERO_PATTERNS.iter().any(|p| lower.contains(p)) |
26 | | -} |
| 15 | +use auto_zero::apply_auto_zero; |
| 16 | +use json::parse_verification_json; |
| 17 | +use text::parse_verification_text; |
| 18 | + |
| 19 | +#[allow(unused_imports)] |
| 20 | +pub use auto_zero::is_auto_zero; |
27 | 21 |
|
28 | 22 | pub(super) fn verification_response_schema() -> StructuredOutputSchema { |
29 | | - StructuredOutputSchema::json_schema( |
30 | | - "verification_results", |
31 | | - serde_json::json!({ |
32 | | - "type": "array", |
33 | | - "items": { |
34 | | - "type": "object", |
35 | | - "additionalProperties": false, |
36 | | - "required": [ |
37 | | - "index", |
38 | | - "accurate", |
39 | | - "line_correct", |
40 | | - "suggestion_sound", |
41 | | - "score", |
42 | | - "reason" |
43 | | - ], |
44 | | - "properties": { |
45 | | - "index": {"type": "integer", "minimum": 1}, |
46 | | - "accurate": {"type": "boolean"}, |
47 | | - "line_correct": {"type": "boolean"}, |
48 | | - "suggestion_sound": {"type": "boolean"}, |
49 | | - "score": {"type": "integer", "minimum": 0, "maximum": 10}, |
50 | | - "reason": {"type": "string"} |
51 | | - } |
52 | | - } |
53 | | - }), |
54 | | - ) |
| 23 | + schema::verification_response_schema() |
55 | 24 | } |
56 | 25 |
|
57 | 26 | pub(super) fn parse_verification_response( |
58 | 27 | content: &str, |
59 | 28 | comments: &[Comment], |
60 | 29 | ) -> Vec<VerificationResult> { |
61 | | - static FINDING_PATTERN: Lazy<Regex> = Lazy::new(|| { |
62 | | - Regex::new(r"(?i)FINDING\s+(\d+)\s*:\s*score\s*=\s*(\d+)\s+accurate\s*=\s*(true|false)(?:\s+line_correct\s*=\s*(true|false))?(?:\s+suggestion_sound\s*=\s*(true|false))?\s+reason\s*=\s*(.+)") |
63 | | - .unwrap() |
64 | | - }); |
65 | | - |
66 | | - if let Some(results) = parse_verification_json(content, comments) { |
67 | | - return apply_auto_zero(results, comments); |
68 | | - } |
69 | | - |
70 | | - let mut results = Vec::new(); |
71 | | - |
72 | | - for line in content.lines() { |
73 | | - if let Some(caps) = FINDING_PATTERN.captures(line) { |
74 | | - let index: usize = caps |
75 | | - .get(1) |
76 | | - .expect("capture group 1 (index) must exist after regex match") |
77 | | - .as_str() |
78 | | - .parse() |
79 | | - .unwrap_or(0); |
80 | | - let score: u8 = caps |
81 | | - .get(2) |
82 | | - .expect("capture group 2 (score) must exist after regex match") |
83 | | - .as_str() |
84 | | - .parse() |
85 | | - .unwrap_or(0); |
86 | | - let accurate = caps |
87 | | - .get(3) |
88 | | - .expect("capture group 3 (accurate) must exist after regex match") |
89 | | - .as_str() |
90 | | - .to_lowercase() |
91 | | - == "true"; |
92 | | - let line_correct = caps |
93 | | - .get(4) |
94 | | - .map(|value| value.as_str().eq_ignore_ascii_case("true")) |
95 | | - .unwrap_or(accurate); |
96 | | - let suggestion_sound = caps |
97 | | - .get(5) |
98 | | - .map(|value| value.as_str().eq_ignore_ascii_case("true")) |
99 | | - .unwrap_or(true); |
100 | | - let reason = caps |
101 | | - .get(6) |
102 | | - .expect("capture group 6 (reason) must exist after regex match") |
103 | | - .as_str() |
104 | | - .trim() |
105 | | - .to_string(); |
106 | | - |
107 | | - if index > 0 && index <= comments.len() { |
108 | | - results.push(VerificationResult { |
109 | | - comment_id: comments[index - 1].id.clone(), |
110 | | - accurate, |
111 | | - line_correct, |
112 | | - suggestion_sound, |
113 | | - score: score.min(10), |
114 | | - reason, |
115 | | - }); |
116 | | - } |
117 | | - } |
118 | | - } |
119 | | - |
| 30 | + let results = parse_verification_json(content, comments) |
| 31 | + .unwrap_or_else(|| parse_verification_text(content, comments)); |
120 | 32 | apply_auto_zero(results, comments) |
121 | 33 | } |
122 | | - |
123 | | -fn parse_verification_json(content: &str, comments: &[Comment]) -> Option<Vec<VerificationResult>> { |
124 | | - let trimmed = content.trim(); |
125 | | - let candidate = if trimmed.starts_with("```") { |
126 | | - trimmed |
127 | | - .lines() |
128 | | - .skip_while(|line| line.trim_start().starts_with("```")) |
129 | | - .take_while(|line| !line.trim_start().starts_with("```")) |
130 | | - .collect::<Vec<_>>() |
131 | | - .join("\n") |
132 | | - } else { |
133 | | - trimmed.to_string() |
134 | | - }; |
135 | | - |
136 | | - let value = serde_json::from_str::<Value>(&candidate).ok()?; |
137 | | - let items = if let Some(array) = value.as_array() { |
138 | | - array.clone() |
139 | | - } else { |
140 | | - value |
141 | | - .get("results") |
142 | | - .and_then(|results| results.as_array()) |
143 | | - .cloned()? |
144 | | - }; |
145 | | - |
146 | | - let mut results = Vec::new(); |
147 | | - for item in items { |
148 | | - let index = item.get("index").and_then(|value| value.as_u64())? as usize; |
149 | | - if index == 0 || index > comments.len() { |
150 | | - continue; |
151 | | - } |
152 | | - let accurate = item |
153 | | - .get("accurate") |
154 | | - .and_then(|value| value.as_bool()) |
155 | | - .unwrap_or(false); |
156 | | - let line_correct = item |
157 | | - .get("line_correct") |
158 | | - .and_then(|value| value.as_bool()) |
159 | | - .unwrap_or(accurate); |
160 | | - let suggestion_sound = item |
161 | | - .get("suggestion_sound") |
162 | | - .and_then(|value| value.as_bool()) |
163 | | - .unwrap_or(true); |
164 | | - let score = item |
165 | | - .get("score") |
166 | | - .and_then(|value| value.as_u64()) |
167 | | - .unwrap_or(0) |
168 | | - .min(10) as u8; |
169 | | - let reason = item |
170 | | - .get("reason") |
171 | | - .and_then(|value| value.as_str()) |
172 | | - .unwrap_or("No reason provided") |
173 | | - .to_string(); |
174 | | - |
175 | | - results.push(VerificationResult { |
176 | | - comment_id: comments[index - 1].id.clone(), |
177 | | - accurate, |
178 | | - line_correct, |
179 | | - suggestion_sound, |
180 | | - score, |
181 | | - reason, |
182 | | - }); |
183 | | - } |
184 | | - Some(results) |
185 | | -} |
186 | | - |
187 | | -fn apply_auto_zero( |
188 | | - mut results: Vec<VerificationResult>, |
189 | | - comments: &[Comment], |
190 | | -) -> Vec<VerificationResult> { |
191 | | - for comment in comments { |
192 | | - if is_auto_zero(&comment.content) { |
193 | | - if let Some(existing) = results.iter_mut().find(|r| r.comment_id == comment.id) { |
194 | | - existing.accurate = false; |
195 | | - existing.line_correct = false; |
196 | | - existing.score = 0; |
197 | | - existing.reason = "Auto-zero: noise category".to_string(); |
198 | | - } else { |
199 | | - results.push(VerificationResult { |
200 | | - comment_id: comment.id.clone(), |
201 | | - accurate: false, |
202 | | - line_correct: false, |
203 | | - suggestion_sound: false, |
204 | | - score: 0, |
205 | | - reason: "Auto-zero: noise category".to_string(), |
206 | | - }); |
207 | | - } |
208 | | - } |
209 | | - } |
210 | | - |
211 | | - results |
212 | | -} |
0 commit comments