Skip to content

Commit 0fb9ca5

Browse files
committed
refactor: split feedback stats helpers
Separate feedback bucket primitives from threshold scoring so the report builder can evolve bucket shaping and confusion-matrix logic independently. Made-with: Cursor
1 parent 06f7b72 commit 0fb9ca5

4 files changed

Lines changed: 186 additions & 101 deletions

File tree

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
- [x] `src/commands/eval/pattern/matching.rs`: split normalized rule-id helpers, matcher predicates, and focused matcher tests.
6868
- [x] `src/commands/eval/metrics/rules.rs`: separate aggregate math, rule counting, and summary reduction helpers.
6969
- [x] `src/commands/doctor/endpoint/inference.rs`: split request building, HTTP execution/error handling, and response parsing.
70-
- [ ] `src/commands/feedback_eval/report/build/stats.rs`: split threshold confusion-matrix scoring from bucket primitives.
70+
- [x] `src/commands/feedback_eval/report/build/stats.rs`: split threshold confusion-matrix scoring from bucket primitives.
7171
- [ ] `src/commands/doctor/command/display.rs`: separate header/config output, endpoint listing, and inference result rendering.
7272
- [ ] `src/commands/doctor/command/run.rs`: separate endpoint discovery, recommendation flow, and test helpers.
7373
- [ ] `src/commands/eval/runner/matching.rs`: split required-match search, unexpected-match detection, and rule metric assembly.
Lines changed: 6 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,7 @@
1-
use std::collections::HashMap;
1+
#[path = "stats/buckets.rs"]
2+
mod buckets;
3+
#[path = "stats/thresholds.rs"]
4+
mod thresholds;
25

3-
use super::super::super::{FeedbackEvalBucket, FeedbackEvalComment, FeedbackThresholdMetrics};
4-
5-
pub(super) fn build_threshold_metrics(
6-
comments: &[FeedbackEvalComment],
7-
confidence_threshold: f32,
8-
) -> Option<FeedbackThresholdMetrics> {
9-
let scored_comments = comments
10-
.iter()
11-
.filter_map(|comment| comment.confidence.map(|confidence| (comment, confidence)))
12-
.collect::<Vec<_>>();
13-
if scored_comments.is_empty() {
14-
return None;
15-
}
16-
17-
let mut metrics = FeedbackThresholdMetrics {
18-
total_scored: scored_comments.len(),
19-
..Default::default()
20-
};
21-
22-
for (comment, confidence) in scored_comments {
23-
let predicted_accepted = confidence >= confidence_threshold;
24-
match (predicted_accepted, comment.accepted) {
25-
(true, true) => metrics.true_positive += 1,
26-
(true, false) => metrics.false_positive += 1,
27-
(false, false) => metrics.true_negative += 1,
28-
(false, true) => metrics.false_negative += 1,
29-
}
30-
}
31-
32-
metrics.precision = ratio(
33-
metrics.true_positive,
34-
metrics.true_positive + metrics.false_positive,
35-
);
36-
metrics.recall = ratio(
37-
metrics.true_positive,
38-
metrics.true_positive + metrics.false_negative,
39-
);
40-
metrics.f1 = harmonic_mean(metrics.precision, metrics.recall);
41-
metrics.agreement_rate = ratio(
42-
metrics.true_positive + metrics.true_negative,
43-
metrics.total_scored,
44-
);
45-
Some(metrics)
46-
}
47-
48-
pub(super) fn add_bucket_count(
49-
counts: &mut HashMap<String, (usize, usize)>,
50-
name: &str,
51-
accepted: bool,
52-
) {
53-
let entry = counts.entry(name.to_string()).or_default();
54-
if accepted {
55-
entry.0 += 1;
56-
} else {
57-
entry.1 += 1;
58-
}
59-
}
60-
61-
pub(super) fn buckets_from_counts(
62-
counts: HashMap<String, (usize, usize)>,
63-
) -> Vec<FeedbackEvalBucket> {
64-
let mut buckets = counts
65-
.into_iter()
66-
.map(|(name, (accepted, rejected))| build_bucket(name, accepted + rejected, accepted))
67-
.collect::<Vec<_>>();
68-
buckets.sort_by(|left, right| {
69-
right
70-
.total
71-
.cmp(&left.total)
72-
.then_with(|| left.name.cmp(&right.name))
73-
});
74-
buckets
75-
}
76-
77-
pub(super) fn build_bucket(name: String, total: usize, accepted: usize) -> FeedbackEvalBucket {
78-
FeedbackEvalBucket {
79-
name,
80-
total,
81-
accepted,
82-
rejected: total.saturating_sub(accepted),
83-
acceptance_rate: ratio(accepted, total),
84-
}
85-
}
86-
87-
pub(super) fn ratio(numerator: usize, denominator: usize) -> f32 {
88-
if denominator == 0 {
89-
0.0
90-
} else {
91-
numerator as f32 / denominator as f32
92-
}
93-
}
94-
95-
fn harmonic_mean(left: f32, right: f32) -> f32 {
96-
if left + right <= f32::EPSILON {
97-
0.0
98-
} else {
99-
2.0 * left * right / (left + right)
100-
}
101-
}
6+
pub(super) use buckets::{add_bucket_count, buckets_from_counts, build_bucket, ratio};
7+
pub(super) use thresholds::build_threshold_metrics;
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
use std::collections::HashMap;
2+
3+
use super::super::super::super::FeedbackEvalBucket;
4+
5+
pub(in super::super) fn add_bucket_count(
6+
counts: &mut HashMap<String, (usize, usize)>,
7+
name: &str,
8+
accepted: bool,
9+
) {
10+
let entry = counts.entry(name.to_string()).or_default();
11+
if accepted {
12+
entry.0 += 1;
13+
} else {
14+
entry.1 += 1;
15+
}
16+
}
17+
18+
pub(in super::super) fn buckets_from_counts(
19+
counts: HashMap<String, (usize, usize)>,
20+
) -> Vec<FeedbackEvalBucket> {
21+
let mut buckets = counts
22+
.into_iter()
23+
.map(|(name, (accepted, rejected))| build_bucket(name, accepted + rejected, accepted))
24+
.collect::<Vec<_>>();
25+
buckets.sort_by(|left, right| {
26+
right
27+
.total
28+
.cmp(&left.total)
29+
.then_with(|| left.name.cmp(&right.name))
30+
});
31+
buckets
32+
}
33+
34+
pub(in super::super) fn build_bucket(
35+
name: String,
36+
total: usize,
37+
accepted: usize,
38+
) -> FeedbackEvalBucket {
39+
FeedbackEvalBucket {
40+
name,
41+
total,
42+
accepted,
43+
rejected: total.saturating_sub(accepted),
44+
acceptance_rate: ratio(accepted, total),
45+
}
46+
}
47+
48+
pub(in super::super) fn ratio(numerator: usize, denominator: usize) -> f32 {
49+
if denominator == 0 {
50+
0.0
51+
} else {
52+
numerator as f32 / denominator as f32
53+
}
54+
}
55+
56+
#[cfg(test)]
57+
mod tests {
58+
use super::*;
59+
60+
#[test]
61+
fn buckets_from_counts_orders_by_total_then_name() {
62+
let counts = HashMap::from([
63+
("zeta".to_string(), (2, 1)),
64+
("alpha".to_string(), (2, 1)),
65+
("beta".to_string(), (1, 0)),
66+
]);
67+
68+
let buckets = buckets_from_counts(counts);
69+
70+
assert_eq!(buckets[0].name, "alpha");
71+
assert_eq!(buckets[1].name, "zeta");
72+
assert_eq!(buckets[2].name, "beta");
73+
}
74+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
use super::super::super::super::{FeedbackEvalComment, FeedbackThresholdMetrics};
2+
use super::buckets::ratio;
3+
4+
pub(in super::super) fn build_threshold_metrics(
5+
comments: &[FeedbackEvalComment],
6+
confidence_threshold: f32,
7+
) -> Option<FeedbackThresholdMetrics> {
8+
let scored_comments = comments
9+
.iter()
10+
.filter_map(|comment| comment.confidence.map(|confidence| (comment, confidence)))
11+
.collect::<Vec<_>>();
12+
if scored_comments.is_empty() {
13+
return None;
14+
}
15+
16+
let mut metrics = FeedbackThresholdMetrics {
17+
total_scored: scored_comments.len(),
18+
..Default::default()
19+
};
20+
21+
for (comment, confidence) in scored_comments {
22+
let predicted_accepted = confidence >= confidence_threshold;
23+
match (predicted_accepted, comment.accepted) {
24+
(true, true) => metrics.true_positive += 1,
25+
(true, false) => metrics.false_positive += 1,
26+
(false, false) => metrics.true_negative += 1,
27+
(false, true) => metrics.false_negative += 1,
28+
}
29+
}
30+
31+
metrics.precision = ratio(
32+
metrics.true_positive,
33+
metrics.true_positive + metrics.false_positive,
34+
);
35+
metrics.recall = ratio(
36+
metrics.true_positive,
37+
metrics.true_positive + metrics.false_negative,
38+
);
39+
metrics.f1 = harmonic_mean(metrics.precision, metrics.recall);
40+
metrics.agreement_rate = ratio(
41+
metrics.true_positive + metrics.true_negative,
42+
metrics.total_scored,
43+
);
44+
Some(metrics)
45+
}
46+
47+
fn harmonic_mean(left: f32, right: f32) -> f32 {
48+
if left + right <= f32::EPSILON {
49+
0.0
50+
} else {
51+
2.0 * left * right / (left + right)
52+
}
53+
}
54+
55+
#[cfg(test)]
56+
mod tests {
57+
use super::*;
58+
use std::path::PathBuf;
59+
60+
fn build_comment(accepted: bool, confidence: Option<f32>) -> FeedbackEvalComment {
61+
FeedbackEvalComment {
62+
source_kind: "review-session".to_string(),
63+
review_id: Some("review-1".to_string()),
64+
repo: Some("owner/repo".to_string()),
65+
pr_number: Some(12),
66+
title: Some("Fix query path".to_string()),
67+
file_path: Some(PathBuf::from("src/lib.rs")),
68+
line_number: Some(10),
69+
file_patterns: vec!["*.rs".to_string()],
70+
content: "User-controlled SQL is interpolated into the query string.".to_string(),
71+
category: "Security".to_string(),
72+
severity: Some("Warning".to_string()),
73+
confidence,
74+
accepted,
75+
}
76+
}
77+
78+
#[test]
79+
fn build_threshold_metrics_scores_confusion_matrix_counts() {
80+
let comments = vec![
81+
build_comment(true, Some(0.9)),
82+
build_comment(false, Some(0.8)),
83+
build_comment(false, Some(0.2)),
84+
build_comment(true, Some(0.1)),
85+
];
86+
87+
let metrics = build_threshold_metrics(&comments, 0.5).unwrap();
88+
89+
assert_eq!(metrics.total_scored, 4);
90+
assert_eq!(metrics.true_positive, 1);
91+
assert_eq!(metrics.false_positive, 1);
92+
assert_eq!(metrics.true_negative, 1);
93+
assert_eq!(metrics.false_negative, 1);
94+
assert!((metrics.precision - 0.5).abs() < f32::EPSILON);
95+
assert!((metrics.recall - 0.5).abs() < f32::EPSILON);
96+
assert!((metrics.f1 - 0.5).abs() < f32::EPSILON);
97+
assert!((metrics.agreement_rate - 0.5).abs() < f32::EPSILON);
98+
}
99+
100+
#[test]
101+
fn build_threshold_metrics_returns_none_without_scored_comments() {
102+
let comments = vec![build_comment(true, None)];
103+
assert!(build_threshold_metrics(&comments, 0.5).is_none());
104+
}
105+
}

0 commit comments

Comments
 (0)