Skip to content

Commit ba63f1e

Browse files
committed
refactor: type review context provenance
1 parent 174d002 commit ba63f1e

16 files changed

Lines changed: 385 additions & 197 deletions

src/core/context.rs

Lines changed: 82 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@ use std::path::Path;
66
use std::path::PathBuf;
77

88
use crate::core::function_chunker::find_enclosing_boundary_line;
9-
use crate::core::SymbolIndex;
9+
use crate::core::{ContextProvenance, SymbolIndex};
10+
1011
#[derive(Debug, Clone, Serialize, Deserialize)]
1112
pub struct LLMContextChunk {
1213
pub file_path: PathBuf,
1314
pub content: String,
1415
pub context_type: ContextType,
1516
pub line_range: Option<(usize, usize)>,
1617
#[serde(default, skip_serializing_if = "Option::is_none")]
17-
pub provenance: Option<String>,
18+
pub provenance: Option<ContextProvenance>,
1819
}
1920

2021
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
@@ -25,6 +26,52 @@ pub enum ContextType {
2526
Documentation,
2627
}
2728

29+
impl LLMContextChunk {
30+
pub fn new(
31+
file_path: impl Into<PathBuf>,
32+
content: impl Into<String>,
33+
context_type: ContextType,
34+
) -> Self {
35+
Self {
36+
file_path: file_path.into(),
37+
content: content.into(),
38+
context_type,
39+
line_range: None,
40+
provenance: None,
41+
}
42+
}
43+
44+
pub fn file_content(file_path: impl Into<PathBuf>, content: impl Into<String>) -> Self {
45+
Self::new(file_path, content, ContextType::FileContent)
46+
}
47+
48+
pub fn definition(file_path: impl Into<PathBuf>, content: impl Into<String>) -> Self {
49+
Self::new(file_path, content, ContextType::Definition)
50+
}
51+
52+
pub fn reference(file_path: impl Into<PathBuf>, content: impl Into<String>) -> Self {
53+
Self::new(file_path, content, ContextType::Reference)
54+
}
55+
56+
pub fn documentation(file_path: impl Into<PathBuf>, content: impl Into<String>) -> Self {
57+
Self::new(file_path, content, ContextType::Documentation)
58+
}
59+
60+
pub fn with_line_range(mut self, line_range: (usize, usize)) -> Self {
61+
self.line_range = Some(line_range);
62+
self
63+
}
64+
65+
pub fn with_provenance(mut self, provenance: ContextProvenance) -> Self {
66+
self.provenance = Some(provenance);
67+
self
68+
}
69+
70+
pub fn provenance_label(&self) -> Option<String> {
71+
self.provenance.as_ref().map(ToString::to_string)
72+
}
73+
}
74+
2875
pub struct ContextFetcher {
2976
repo_path: PathBuf,
3077
}
@@ -71,13 +118,10 @@ impl ContextFetcher {
71118
file_lines[start_idx..end_idx].join("\n"),
72119
MAX_CONTEXT_CHARS,
73120
);
74-
chunks.push(LLMContextChunk {
75-
file_path: file_path.clone(),
76-
content: chunk_content,
77-
context_type: ContextType::FileContent,
78-
line_range: Some((expanded_start, expanded_end)),
79-
provenance: None,
80-
});
121+
chunks.push(
122+
LLMContextChunk::file_content(file_path.clone(), chunk_content)
123+
.with_line_range((expanded_start, expanded_end)),
124+
);
81125
}
82126
}
83127
}
@@ -135,13 +179,10 @@ impl ContextFetcher {
135179
continue;
136180
}
137181

138-
chunks.push(LLMContextChunk {
139-
file_path: relative_path.to_path_buf(),
140-
content: snippet,
141-
context_type: ContextType::Reference,
142-
line_range: None,
143-
provenance: None,
144-
});
182+
chunks.push(LLMContextChunk::reference(
183+
relative_path.to_path_buf(),
184+
snippet,
185+
));
145186
}
146187

147188
Ok(chunks)
@@ -184,13 +225,10 @@ impl ContextFetcher {
184225
MAX_CONTEXT_CHARS,
185226
);
186227

187-
chunks.push(LLMContextChunk {
188-
file_path: file_path.clone(),
189-
content: definition_content,
190-
context_type: ContextType::Definition,
191-
line_range: Some((start_line + 1, end_line)),
192-
provenance: None,
193-
});
228+
chunks.push(
229+
LLMContextChunk::definition(file_path.clone(), definition_content)
230+
.with_line_range((start_line + 1, end_line)),
231+
);
194232
}
195233
}
196234
}
@@ -222,13 +260,13 @@ impl ContextFetcher {
222260
continue;
223261
}
224262
let snippet = truncate_with_notice(location.snippet.clone(), MAX_CONTEXT_CHARS);
225-
chunks.push(LLMContextChunk {
226-
file_path: location.file_path.clone(),
227-
content: snippet,
228-
context_type: ContextType::Definition,
229-
line_range: Some(location.line_range),
230-
provenance: location.provenance.clone(),
231-
});
263+
let mut chunk =
264+
LLMContextChunk::definition(location.file_path.clone(), snippet)
265+
.with_line_range(location.line_range);
266+
if let Some(provenance) = location.provenance.clone() {
267+
chunk = chunk.with_provenance(provenance);
268+
}
269+
chunks.push(chunk);
232270
}
233271
}
234272
}
@@ -244,13 +282,12 @@ impl ContextFetcher {
244282
continue;
245283
}
246284
let snippet = truncate_with_notice(location.snippet, MAX_CONTEXT_CHARS);
247-
chunks.push(LLMContextChunk {
248-
file_path: location.file_path,
249-
content: snippet,
250-
context_type: ContextType::Definition,
251-
line_range: Some(location.line_range),
252-
provenance: location.provenance,
253-
});
285+
let mut chunk = LLMContextChunk::definition(location.file_path, snippet)
286+
.with_line_range(location.line_range);
287+
if let Some(provenance) = location.provenance {
288+
chunk = chunk.with_provenance(provenance);
289+
}
290+
chunks.push(chunk);
254291
}
255292

256293
for location in index.multi_hop_locations(
@@ -264,13 +301,12 @@ impl ContextFetcher {
264301
continue;
265302
}
266303
let snippet = truncate_with_notice(location.snippet, MAX_CONTEXT_CHARS);
267-
chunks.push(LLMContextChunk {
268-
file_path: location.file_path,
269-
content: snippet,
270-
context_type: ContextType::Reference,
271-
line_range: Some(location.line_range),
272-
provenance: location.provenance,
273-
});
304+
let mut chunk = LLMContextChunk::reference(location.file_path, snippet)
305+
.with_line_range(location.line_range);
306+
if let Some(provenance) = location.provenance {
307+
chunk = chunk.with_provenance(provenance);
308+
}
309+
chunks.push(chunk);
274310
}
275311

276312
Ok(chunks)
@@ -441,12 +477,12 @@ mod tests {
441477
assert_eq!(graph_chunk.context_type, ContextType::Definition);
442478
assert!(graph_chunk.content.contains("validate_token"));
443479
assert!(graph_chunk
444-
.provenance
480+
.provenance_label()
445481
.as_deref()
446482
.unwrap_or_default()
447483
.contains("symbol graph"));
448484
assert!(graph_chunk
449-
.provenance
485+
.provenance_label()
450486
.as_deref()
451487
.unwrap_or_default()
452488
.contains("calls"));

src/core/context_provenance.rs

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
use std::fmt;
2+
3+
use serde::{Deserialize, Serialize};
4+
5+
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
6+
#[serde(tag = "kind", rename_all = "snake_case")]
7+
pub enum ContextProvenance {
8+
ActiveReviewRules,
9+
Analyzer {
10+
name: String,
11+
},
12+
CustomContextNotes,
13+
DependencyGraphNeighborhood,
14+
PathSpecificFocusAreas,
15+
PatternRepositoryContext {
16+
source: String,
17+
},
18+
PatternRepositorySource {
19+
source: String,
20+
},
21+
RelatedTestFile,
22+
ReverseDependencySummary,
23+
SemanticRetrieval {
24+
similarity: f32,
25+
symbol_name: String,
26+
},
27+
SymbolGraphPath {
28+
relation_path: Vec<String>,
29+
hops: usize,
30+
relevance: f32,
31+
},
32+
}
33+
34+
impl ContextProvenance {
35+
pub fn analyzer(name: impl Into<String>) -> Self {
36+
Self::Analyzer { name: name.into() }
37+
}
38+
39+
pub fn pattern_repository_context(source: impl Into<String>) -> Self {
40+
Self::PatternRepositoryContext {
41+
source: source.into(),
42+
}
43+
}
44+
45+
pub fn pattern_repository_source(source: impl Into<String>) -> Self {
46+
Self::PatternRepositorySource {
47+
source: source.into(),
48+
}
49+
}
50+
51+
pub fn semantic_retrieval(similarity: f32, symbol_name: impl Into<String>) -> Self {
52+
Self::SemanticRetrieval {
53+
similarity,
54+
symbol_name: symbol_name.into(),
55+
}
56+
}
57+
58+
pub fn symbol_graph_path(relation_path: Vec<String>, hops: usize, relevance: f32) -> Self {
59+
Self::SymbolGraphPath {
60+
relation_path,
61+
hops,
62+
relevance,
63+
}
64+
}
65+
66+
pub fn ranking_bonus(&self) -> i32 {
67+
match self {
68+
Self::ActiveReviewRules => 120,
69+
Self::PatternRepositorySource { .. } => 40,
70+
Self::PatternRepositoryContext { .. } => 35,
71+
Self::SemanticRetrieval { .. } => 25,
72+
Self::SymbolGraphPath {
73+
relation_path,
74+
hops,
75+
..
76+
} => {
77+
let mut bonus = 50;
78+
if *hops == 1 {
79+
bonus += 15;
80+
}
81+
if relation_path.iter().any(|step| {
82+
step.eq_ignore_ascii_case("calls") || step.eq_ignore_ascii_case("called-by")
83+
}) {
84+
bonus += 10;
85+
}
86+
bonus
87+
}
88+
Self::Analyzer { .. }
89+
| Self::CustomContextNotes
90+
| Self::DependencyGraphNeighborhood
91+
| Self::PathSpecificFocusAreas
92+
| Self::RelatedTestFile
93+
| Self::ReverseDependencySummary => 0,
94+
}
95+
}
96+
97+
pub fn verification_bonus(&self) -> i32 {
98+
match self {
99+
Self::SymbolGraphPath { .. } => 80,
100+
Self::SemanticRetrieval { .. } => 30,
101+
_ => 0,
102+
}
103+
}
104+
105+
fn label(&self) -> String {
106+
match self {
107+
Self::ActiveReviewRules => "active review rules".to_string(),
108+
Self::Analyzer { name } => format!("{} analyzer", name),
109+
Self::CustomContextNotes => "custom context notes".to_string(),
110+
Self::DependencyGraphNeighborhood => "dependency graph neighborhood".to_string(),
111+
Self::PathSpecificFocusAreas => "path-specific focus areas".to_string(),
112+
Self::PatternRepositoryContext { source } => {
113+
format!("pattern repository: {}", source)
114+
}
115+
Self::PatternRepositorySource { source } => {
116+
format!("pattern repository source: {}", source)
117+
}
118+
Self::RelatedTestFile => "related test file".to_string(),
119+
Self::ReverseDependencySummary => "reverse dependency summary".to_string(),
120+
Self::SemanticRetrieval {
121+
similarity,
122+
symbol_name,
123+
} => format!(
124+
"semantic retrieval (similarity={:.2}, symbol={})",
125+
similarity, symbol_name
126+
),
127+
Self::SymbolGraphPath {
128+
relation_path,
129+
hops,
130+
relevance,
131+
} => format!(
132+
"symbol graph path: {} (hops={}, relevance={:.2})",
133+
if relation_path.is_empty() {
134+
"seed".to_string()
135+
} else {
136+
relation_path.join(" -> ")
137+
},
138+
hops,
139+
relevance
140+
),
141+
}
142+
}
143+
}
144+
145+
impl fmt::Display for ContextProvenance {
146+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147+
write!(f, "{}", self.label())
148+
}
149+
}
150+
151+
#[cfg(test)]
152+
mod tests {
153+
use super::ContextProvenance;
154+
155+
#[test]
156+
fn symbol_graph_path_formats_and_scores() {
157+
let provenance = ContextProvenance::symbol_graph_path(
158+
vec!["calls".to_string(), "uses".to_string()],
159+
1,
160+
0.42,
161+
);
162+
163+
assert_eq!(
164+
provenance.to_string(),
165+
"symbol graph path: calls -> uses (hops=1, relevance=0.42)"
166+
);
167+
assert_eq!(provenance.ranking_bonus(), 75);
168+
assert_eq!(provenance.verification_bonus(), 80);
169+
}
170+
171+
#[test]
172+
fn active_rules_and_pattern_repository_have_stable_labels() {
173+
assert_eq!(
174+
ContextProvenance::ActiveReviewRules.to_string(),
175+
"active review rules"
176+
);
177+
assert_eq!(
178+
ContextProvenance::pattern_repository_source("org/repo").to_string(),
179+
"pattern repository source: org/repo"
180+
);
181+
assert_eq!(
182+
ContextProvenance::pattern_repository_context("org/repo").to_string(),
183+
"pattern repository: org/repo"
184+
);
185+
}
186+
}

0 commit comments

Comments
 (0)