|
1 | 1 | use anyhow::Result; |
2 | | -use tracing::{info, warn}; |
| 2 | +use tracing::info; |
| 3 | + |
| 4 | +#[path = "postprocess/dedup.rs"] |
| 5 | +mod dedup; |
| 6 | +#[path = "postprocess/feedback.rs"] |
| 7 | +mod feedback; |
| 8 | +#[path = "postprocess/suppression.rs"] |
| 9 | +mod suppression; |
| 10 | +#[path = "postprocess/verification.rs"] |
| 11 | +mod verification; |
3 | 12 |
|
4 | | -use crate::adapters; |
5 | | -use crate::config; |
6 | 13 | use crate::core; |
7 | 14 |
|
8 | | -use super::super::feedback::derive_file_patterns; |
| 15 | +use dedup::deduplicate_specialized_comments; |
| 16 | +use feedback::apply_semantic_feedback_adjustment; |
| 17 | +use suppression::apply_convention_suppression; |
| 18 | +use verification::apply_verification_pass; |
| 19 | + |
9 | 20 | use super::super::filters::{apply_feedback_confidence_adjustment, apply_review_filters}; |
10 | | -use super::comments::is_analyzer_comment; |
11 | 21 | use super::contracts::ExecutionSummary; |
12 | 22 | use super::repo_support::save_convention_store; |
13 | 23 | use super::services::PipelineServices; |
@@ -48,48 +58,7 @@ pub(super) async fn run_postprocess( |
48 | 58 | .plugin_manager |
49 | 59 | .run_post_processors(all_comments, &repo_path_str) |
50 | 60 | .await?; |
51 | | - |
52 | | - let (analyzer_comments, llm_comments): (Vec<_>, Vec<_>) = processed_comments |
53 | | - .into_iter() |
54 | | - .partition(is_analyzer_comment); |
55 | | - |
56 | | - let verified_llm_comments = if services.config.verification_pass |
57 | | - && !llm_comments.is_empty() |
58 | | - && llm_comments.len() <= services.config.verification_max_comments |
59 | | - { |
60 | | - let comment_count_before = llm_comments.len(); |
61 | | - match super::super::verification::verify_comments( |
62 | | - llm_comments, |
63 | | - &session.diffs, |
64 | | - &session.source_files, |
65 | | - &session.verification_context, |
66 | | - services.verification_adapter.as_ref(), |
67 | | - services.config.verification_min_score, |
68 | | - ) |
69 | | - .await |
70 | | - { |
71 | | - Ok(verified) => { |
72 | | - info!( |
73 | | - "Verification pass: {}/{} comments passed", |
74 | | - verified.len(), |
75 | | - comment_count_before |
76 | | - ); |
77 | | - verified |
78 | | - } |
79 | | - Err(error) => { |
80 | | - warn!( |
81 | | - "Verification pass failed, dropping unverified LLM comments: {}", |
82 | | - error |
83 | | - ); |
84 | | - Vec::new() |
85 | | - } |
86 | | - } |
87 | | - } else { |
88 | | - llm_comments |
89 | | - }; |
90 | | - |
91 | | - let mut processed_comments = analyzer_comments; |
92 | | - processed_comments.extend(verified_llm_comments); |
| 61 | + let processed_comments = apply_verification_pass(processed_comments, services, session).await; |
93 | 62 |
|
94 | 63 | let processed_comments = if services.config.semantic_feedback { |
95 | 64 | apply_semantic_feedback_adjustment( |
@@ -136,222 +105,3 @@ pub(super) async fn run_postprocess( |
136 | 105 | agent_activity, |
137 | 106 | }) |
138 | 107 | } |
139 | | - |
140 | | -pub(super) fn deduplicate_specialized_comments( |
141 | | - mut comments: Vec<core::Comment>, |
142 | | -) -> Vec<core::Comment> { |
143 | | - if comments.len() <= 1 { |
144 | | - return comments; |
145 | | - } |
146 | | - |
147 | | - comments.sort_by(|a, b| { |
148 | | - a.file_path |
149 | | - .cmp(&b.file_path) |
150 | | - .then(a.line_number.cmp(&b.line_number)) |
151 | | - }); |
152 | | - |
153 | | - let mut deduped: Vec<core::Comment> = Vec::with_capacity(comments.len()); |
154 | | - for comment in comments { |
155 | | - let dominated = deduped.iter_mut().find(|existing| { |
156 | | - existing.file_path == comment.file_path |
157 | | - && existing.line_number == comment.line_number |
158 | | - && core::multi_pass::content_similarity(&existing.content, &comment.content) > 0.6 |
159 | | - }); |
160 | | - if let Some(existing) = dominated { |
161 | | - if comment.confidence > existing.confidence { |
162 | | - existing.content = comment.content; |
163 | | - existing.confidence = comment.confidence; |
164 | | - existing.severity = comment.severity; |
165 | | - } |
166 | | - for tag in &comment.tags { |
167 | | - if !existing.tags.contains(tag) { |
168 | | - existing.tags.push(tag.clone()); |
169 | | - } |
170 | | - } |
171 | | - } else { |
172 | | - deduped.push(comment); |
173 | | - } |
174 | | - } |
175 | | - |
176 | | - deduped |
177 | | -} |
178 | | - |
179 | | -pub(super) async fn apply_semantic_feedback_adjustment( |
180 | | - comments: Vec<core::Comment>, |
181 | | - store: Option<&core::SemanticFeedbackStore>, |
182 | | - embedding_adapter: Option<&dyn adapters::llm::LLMAdapter>, |
183 | | - config: &config::Config, |
184 | | -) -> Vec<core::Comment> { |
185 | | - let Some(store) = store else { |
186 | | - return comments; |
187 | | - }; |
188 | | - if store.examples.len() < config.semantic_feedback_min_examples { |
189 | | - return comments; |
190 | | - } |
191 | | - |
192 | | - let embedding_texts = comments |
193 | | - .iter() |
194 | | - .map(|comment| { |
195 | | - core::build_feedback_embedding_text(&comment.content, comment.category.as_str()) |
196 | | - }) |
197 | | - .collect::<Vec<_>>(); |
198 | | - let embeddings = core::embed_texts_with_fallback(embedding_adapter, &embedding_texts).await; |
199 | | - |
200 | | - comments |
201 | | - .into_iter() |
202 | | - .zip(embeddings) |
203 | | - .map(|(mut comment, embedding)| { |
204 | | - if is_analyzer_comment(&comment) { |
205 | | - return comment; |
206 | | - } |
207 | | - |
208 | | - let file_patterns = derive_file_patterns(&comment.file_path); |
209 | | - let matches = core::find_similar_feedback_examples( |
210 | | - store, |
211 | | - &embedding, |
212 | | - comment.category.as_str(), |
213 | | - &file_patterns, |
214 | | - config.semantic_feedback_similarity, |
215 | | - config.semantic_feedback_max_neighbors, |
216 | | - ); |
217 | | - let accepted = matches |
218 | | - .iter() |
219 | | - .filter(|(example, _)| example.accepted) |
220 | | - .count(); |
221 | | - let rejected = matches |
222 | | - .iter() |
223 | | - .filter(|(example, _)| !example.accepted) |
224 | | - .count(); |
225 | | - let observations = accepted + rejected; |
226 | | - |
227 | | - if observations < config.semantic_feedback_min_examples { |
228 | | - return comment; |
229 | | - } |
230 | | - |
231 | | - if rejected > accepted { |
232 | | - let delta = ((rejected - accepted) as f32 * 0.15).min(0.45); |
233 | | - comment.confidence = (comment.confidence - delta).clamp(0.0, 1.0); |
234 | | - if !comment |
235 | | - .tags |
236 | | - .iter() |
237 | | - .any(|tag| tag == "semantic-feedback:rejected") |
238 | | - { |
239 | | - comment.tags.push("semantic-feedback:rejected".to_string()); |
240 | | - } |
241 | | - } else if accepted > rejected { |
242 | | - let delta = ((accepted - rejected) as f32 * 0.10).min(0.25); |
243 | | - comment.confidence = (comment.confidence + delta).clamp(0.0, 1.0); |
244 | | - if !comment |
245 | | - .tags |
246 | | - .iter() |
247 | | - .any(|tag| tag == "semantic-feedback:accepted") |
248 | | - { |
249 | | - comment.tags.push("semantic-feedback:accepted".to_string()); |
250 | | - } |
251 | | - } |
252 | | - |
253 | | - comment |
254 | | - }) |
255 | | - .collect() |
256 | | -} |
257 | | - |
258 | | -pub(super) fn apply_convention_suppression( |
259 | | - comments: Vec<core::Comment>, |
260 | | - convention_store: &core::convention_learner::ConventionStore, |
261 | | -) -> (Vec<core::Comment>, usize) { |
262 | | - let suppression_patterns = convention_store.suppression_patterns(); |
263 | | - if suppression_patterns.is_empty() { |
264 | | - return (comments, 0); |
265 | | - } |
266 | | - |
267 | | - let before_count = comments.len(); |
268 | | - let filtered: Vec<core::Comment> = comments |
269 | | - .into_iter() |
270 | | - .filter(|comment| { |
271 | | - let category_str = comment.category.to_string(); |
272 | | - let score = convention_store.score_comment(&comment.content, &category_str); |
273 | | - score > -0.25 |
274 | | - }) |
275 | | - .collect(); |
276 | | - |
277 | | - let suppressed = before_count.saturating_sub(filtered.len()); |
278 | | - if suppressed > 0 { |
279 | | - info!( |
280 | | - "Convention learning suppressed {} comment(s) based on team feedback patterns", |
281 | | - suppressed |
282 | | - ); |
283 | | - } |
284 | | - |
285 | | - (filtered, suppressed) |
286 | | -} |
287 | | - |
288 | | -#[cfg(test)] |
289 | | -mod tests { |
290 | | - use super::*; |
291 | | - use std::path::PathBuf; |
292 | | - |
293 | | - fn make_comment(file: &str, line: usize, content: &str, tag: &str) -> core::Comment { |
294 | | - core::Comment { |
295 | | - id: format!("cmt_{}", line), |
296 | | - file_path: PathBuf::from(file), |
297 | | - line_number: line, |
298 | | - content: content.to_string(), |
299 | | - rule_id: None, |
300 | | - severity: core::comment::Severity::Warning, |
301 | | - category: core::comment::Category::BestPractice, |
302 | | - suggestion: None, |
303 | | - confidence: 0.7, |
304 | | - code_suggestion: None, |
305 | | - tags: vec![tag.to_string()], |
306 | | - fix_effort: core::comment::FixEffort::Medium, |
307 | | - feedback: None, |
308 | | - } |
309 | | - } |
310 | | - |
311 | | - #[test] |
312 | | - fn dedup_removes_similar_comments_on_same_line() { |
313 | | - let comments = vec![ |
314 | | - make_comment("a.rs", 10, "Missing null check on input", "security-pass"), |
315 | | - make_comment( |
316 | | - "a.rs", |
317 | | - 10, |
318 | | - "Missing null check on user input", |
319 | | - "correctness-pass", |
320 | | - ), |
321 | | - ]; |
322 | | - let deduped = deduplicate_specialized_comments(comments); |
323 | | - assert_eq!(deduped.len(), 1); |
324 | | - assert!(deduped[0].tags.contains(&"security-pass".to_string())); |
325 | | - } |
326 | | - |
327 | | - #[test] |
328 | | - fn dedup_keeps_different_comments_on_same_line() { |
329 | | - let comments = vec![ |
330 | | - make_comment("a.rs", 10, "SQL injection vulnerability", "security-pass"), |
331 | | - make_comment("a.rs", 10, "Off-by-one error in loop", "correctness-pass"), |
332 | | - ]; |
333 | | - let deduped = deduplicate_specialized_comments(comments); |
334 | | - assert_eq!(deduped.len(), 2); |
335 | | - } |
336 | | - |
337 | | - #[test] |
338 | | - fn dedup_keeps_similar_comments_on_different_lines() { |
339 | | - let comments = vec![ |
340 | | - make_comment("a.rs", 10, "Missing null check on input", "security-pass"), |
341 | | - make_comment( |
342 | | - "a.rs", |
343 | | - 20, |
344 | | - "Missing null check on input", |
345 | | - "correctness-pass", |
346 | | - ), |
347 | | - ]; |
348 | | - let deduped = deduplicate_specialized_comments(comments); |
349 | | - assert_eq!(deduped.len(), 2); |
350 | | - } |
351 | | - |
352 | | - #[test] |
353 | | - fn dedup_handles_empty_input() { |
354 | | - let deduped = deduplicate_specialized_comments(vec![]); |
355 | | - assert!(deduped.is_empty()); |
356 | | - } |
357 | | -} |
0 commit comments