Skip to content

Commit 44367cd

Browse files
committed
feat(retention): prune artifacts and trend history
1 parent 4011b64 commit 44367cd

File tree

18 files changed

+420
-61
lines changed

18 files changed

+420
-61
lines changed

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ This roadmap is derived from deep research into Greptile's public docs, blog, MC
129129
81. [x] Split `src/server/api.rs` by domain so the growing platform API stays maintainable.
130130
82. [x] Split `src/server/state.rs` into session lifecycle, persistence, progress, and GitHub coordination modules.
131131
83. [x] Add queue depth and worker saturation metrics for long-running review and eval jobs.
132-
84. [ ] Add retention policies for review artifacts, eval artifacts, and trend histories.
132+
84. [x] Add retention policies for review artifacts, eval artifacts, and trend histories.
133133
85. [x] Add storage migrations for richer comment lifecycle and reinforcement schemas.
134134
86. [ ] Add deployment docs for self-hosted review + analytics + trend retention setups.
135135
87. [ ] Add secret-management guidance and validation for multi-provider enterprise installs.

src/commands/eval/command.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ mod report;
1010
use anyhow::Result;
1111
use std::collections::HashSet;
1212
use std::path::{Path, PathBuf};
13+
use tracing::info;
1314

1415
use crate::config;
1516

17+
use super::runner::prune_eval_artifacts;
1618
use super::{EvalRunFilters, EvalRunMetadata, EvalRunOptions};
1719
use batch::run_eval_batch;
1820
use fixtures::run_eval_fixtures;
@@ -29,13 +31,25 @@ pub async fn eval_command(
2931
if options.trend_file.is_none() {
3032
options.trend_file = Some(config.eval_trend_path.clone());
3133
}
34+
if let Some(artifact_dir) = options.artifact_dir.as_deref() {
35+
let pruned =
36+
prune_eval_artifacts(artifact_dir, config.retention.eval_artifact_max_age_days).await?;
37+
if pruned > 0 {
38+
info!(
39+
artifact_dir = %artifact_dir.display(),
40+
pruned,
41+
"Pruned stale eval artifacts"
42+
);
43+
}
44+
}
3245
ensure_frontier_eval_models(&config, &options)?;
3346
if options.repeat > 1 || !options.matrix_models.is_empty() {
3447
return run_eval_batch(config, &fixtures_dir, output_path.as_deref(), &options).await;
3548
}
3649

3750
let execution = run_eval_fixtures(&config, &fixtures_dir, &options).await?;
38-
let prepared_options = prepare_eval_options(&options)?;
51+
let prepared_options =
52+
prepare_eval_options(&options, config.retention.trend_history_max_entries)?;
3953
let report_output_path = output_path.clone().or_else(|| {
4054
options
4155
.artifact_dir

src/commands/eval/command/batch.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ pub(super) async fn run_eval_batch(
3939
options: &EvalRunOptions,
4040
) -> Result<()> {
4141
config.verification.fail_open = true;
42-
let prepared_options = prepare_eval_options(options)?;
42+
let prepared_options =
43+
prepare_eval_options(options, config.retention.trend_history_max_entries)?;
4344
let models = matrix_models(&config, options);
4445
let repeat_total = options.repeat.max(1);
4546
let multi_model = models.len() > 1;

src/commands/eval/command/options.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,13 @@ pub(super) struct PreparedEvalOptions {
1212
pub(super) baseline: Option<EvalReport>,
1313
pub(super) threshold_options: EvalThresholdOptions,
1414
pub(super) trend_path: Option<std::path::PathBuf>,
15+
pub(super) trend_max_entries: usize,
1516
}
1617

17-
pub(super) fn prepare_eval_options(options: &EvalRunOptions) -> Result<PreparedEvalOptions> {
18+
pub(super) fn prepare_eval_options(
19+
options: &EvalRunOptions,
20+
trend_max_entries: usize,
21+
) -> Result<PreparedEvalOptions> {
1822
let baseline = match options.baseline_report.as_deref() {
1923
Some(path) => Some(load_eval_report(path)?),
2024
None => None,
@@ -37,6 +41,7 @@ pub(super) fn prepare_eval_options(options: &EvalRunOptions) -> Result<PreparedE
3741
max_rule_f1_drop: max_rule_drop_thresholds,
3842
},
3943
trend_path: options.trend_file.clone(),
44+
trend_max_entries,
4045
})
4146
}
4247

src/commands/eval/command/report.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pub(super) async fn materialize_eval_report(
4545
write_eval_report(&report, path).await?;
4646
}
4747
if let Some(path) = prepared_options.trend_path.as_deref() {
48-
update_eval_quality_trend(&report, path).await?;
48+
update_eval_quality_trend(&report, path, prepared_options.trend_max_entries).await?;
4949
}
5050

5151
Ok(report)

src/commands/eval/report/trend.rs

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use super::super::EvalReport;
88
pub(in super::super) async fn update_eval_quality_trend(
99
report: &EvalReport,
1010
path: &Path,
11+
max_entries: usize,
1112
) -> Result<()> {
1213
let Some(entry) = trend_entry_for_report(report) else {
1314
return Ok(());
@@ -23,6 +24,7 @@ pub(in super::super) async fn update_eval_quality_trend(
2324
QualityTrend::new()
2425
};
2526
trend.entries.push(entry);
27+
trim_trend_entries(&mut trend.entries, max_entries);
2628

2729
if let Some(parent) = path.parent() {
2830
tokio::fs::create_dir_all(parent)
@@ -35,6 +37,13 @@ pub(in super::super) async fn update_eval_quality_trend(
3537
Ok(())
3638
}
3739

40+
fn trim_trend_entries(entries: &mut Vec<TrendEntry>, max_entries: usize) {
41+
if entries.len() > max_entries {
42+
let excess = entries.len() - max_entries;
43+
entries.drain(0..excess);
44+
}
45+
}
46+
3847
fn trend_entry_for_report(report: &EvalReport) -> Option<TrendEntry> {
3948
let result = benchmark_result_for_report(report)?;
4049
let verification_health = report.verification_health.as_ref();
@@ -213,12 +222,17 @@ mod tests {
213222
let dir = tempdir().unwrap();
214223
let path = dir.path().join("trend.json");
215224

216-
update_eval_quality_trend(&sample_report(Some("first"), "2026-03-13T00:00:00Z"), &path)
217-
.await
218-
.unwrap();
225+
update_eval_quality_trend(
226+
&sample_report(Some("first"), "2026-03-13T00:00:00Z"),
227+
&path,
228+
200,
229+
)
230+
.await
231+
.unwrap();
219232
update_eval_quality_trend(
220233
&sample_report(Some("second"), "2026-03-13T00:10:00Z"),
221234
&path,
235+
200,
222236
)
223237
.await
224238
.unwrap();
@@ -258,4 +272,30 @@ mod tests {
258272
0.8
259273
);
260274
}
275+
276+
#[tokio::test]
277+
async fn update_eval_quality_trend_trims_old_entries() {
278+
let dir = tempdir().unwrap();
279+
let path = dir.path().join("trend.json");
280+
281+
update_eval_quality_trend(
282+
&sample_report(Some("first"), "2026-03-13T00:00:00Z"),
283+
&path,
284+
1,
285+
)
286+
.await
287+
.unwrap();
288+
update_eval_quality_trend(
289+
&sample_report(Some("second"), "2026-03-13T00:10:00Z"),
290+
&path,
291+
1,
292+
)
293+
.await
294+
.unwrap();
295+
296+
let content = tokio::fs::read_to_string(&path).await.unwrap();
297+
let trend = QualityTrend::from_json(&content).unwrap();
298+
assert_eq!(trend.entries.len(), 1);
299+
assert_eq!(trend.entries[0].label.as_deref(), Some("second"));
300+
}
261301
}

src/commands/eval/runner.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ mod execute;
44
mod matching;
55

66
pub(super) use execute::{
7-
describe_eval_fixture_graph, run_eval_fixture, EvalFixtureArtifactContext,
7+
describe_eval_fixture_graph, prune_eval_artifacts, run_eval_fixture, EvalFixtureArtifactContext,
88
};

src/commands/eval/runner/execute.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use self::loading::prepare_fixture_execution;
1818
use self::result::build_fixture_result;
1919
use super::super::{EvalFixtureResult, LoadedEvalFixture};
2020

21-
pub(in super::super) use self::artifact::EvalFixtureArtifactContext;
21+
pub(in super::super) use self::artifact::{prune_eval_artifacts, EvalFixtureArtifactContext};
2222

2323
pub(crate) fn describe_eval_fixture_graph(
2424
repro_validate: bool,

src/commands/eval/runner/execute/artifact.rs

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use anyhow::Result;
1+
use anyhow::{Context, Result};
22
use serde::Serialize;
33
use std::path::PathBuf;
44

@@ -100,6 +100,76 @@ pub(super) async fn maybe_write_fixture_artifact(
100100
Ok(Some(artifact_path.display().to_string()))
101101
}
102102

103+
pub(crate) async fn prune_eval_artifacts(
104+
artifact_dir: &std::path::Path,
105+
max_age_days: i64,
106+
) -> Result<usize> {
107+
let artifact_dir = artifact_dir.to_path_buf();
108+
tokio::task::spawn_blocking(move || prune_eval_artifacts_blocking(&artifact_dir, max_age_days))
109+
.await
110+
.context("eval artifact retention task failed")?
111+
}
112+
113+
fn prune_eval_artifacts_blocking(
114+
artifact_dir: &std::path::Path,
115+
max_age_days: i64,
116+
) -> Result<usize> {
117+
if !artifact_dir.exists() {
118+
return Ok(0);
119+
}
120+
121+
let cutoff = std::time::SystemTime::now()
122+
.checked_sub(std::time::Duration::from_secs(
123+
max_age_days.max(1) as u64 * 86_400,
124+
))
125+
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
126+
prune_eval_artifacts_before(artifact_dir, cutoff)
127+
}
128+
129+
fn prune_eval_artifacts_before(
130+
artifact_dir: &std::path::Path,
131+
cutoff: std::time::SystemTime,
132+
) -> Result<usize> {
133+
let mut removed = 0;
134+
prune_eval_artifacts_tree(artifact_dir, cutoff, true, &mut removed)?;
135+
Ok(removed)
136+
}
137+
138+
fn prune_eval_artifacts_tree(
139+
path: &std::path::Path,
140+
cutoff: std::time::SystemTime,
141+
preserve_root: bool,
142+
removed: &mut usize,
143+
) -> Result<()> {
144+
for entry in std::fs::read_dir(path)? {
145+
let entry = entry?;
146+
let entry_path = entry.path();
147+
let metadata = std::fs::symlink_metadata(&entry_path)?;
148+
149+
if metadata.is_dir() {
150+
prune_eval_artifacts_tree(&entry_path, cutoff, false, removed)?;
151+
if std::fs::read_dir(&entry_path)?.next().is_none() {
152+
std::fs::remove_dir(&entry_path)?;
153+
*removed += 1;
154+
}
155+
} else if metadata.is_file()
156+
&& metadata
157+
.modified()
158+
.map(|modified| modified < cutoff)
159+
.unwrap_or(false)
160+
{
161+
std::fs::remove_file(&entry_path)?;
162+
*removed += 1;
163+
}
164+
}
165+
166+
if !preserve_root && std::fs::read_dir(path)?.next().is_none() {
167+
return Ok(());
168+
}
169+
170+
Ok(())
171+
}
172+
103173
fn sanitize_path_segment(value: &str) -> String {
104174
let mut sanitized = value
105175
.trim()
@@ -126,3 +196,44 @@ fn sanitize_path_segment(value: &str) -> String {
126196
sanitized
127197
}
128198
}
199+
200+
#[cfg(test)]
201+
mod tests {
202+
use super::*;
203+
use tempfile::tempdir;
204+
205+
#[test]
206+
fn prune_eval_artifacts_removes_stale_files_and_empty_dirs() {
207+
let dir = tempdir().unwrap();
208+
let nested = dir.path().join("fixtures");
209+
std::fs::create_dir_all(&nested).unwrap();
210+
let artifact = nested.join("old.json");
211+
std::fs::write(&artifact, "{}").unwrap();
212+
213+
let removed = prune_eval_artifacts_before(
214+
dir.path(),
215+
std::time::SystemTime::now() + std::time::Duration::from_secs(1),
216+
)
217+
.unwrap();
218+
219+
assert_eq!(removed, 2);
220+
assert!(!artifact.exists());
221+
assert!(!nested.exists());
222+
}
223+
224+
#[test]
225+
fn prune_eval_artifacts_keeps_recent_files() {
226+
let dir = tempdir().unwrap();
227+
let nested = dir.path().join("fixtures");
228+
std::fs::create_dir_all(&nested).unwrap();
229+
let artifact = nested.join("recent.json");
230+
std::fs::write(&artifact, "{}").unwrap();
231+
232+
let removed =
233+
prune_eval_artifacts_before(dir.path(), std::time::SystemTime::UNIX_EPOCH).unwrap();
234+
235+
assert_eq!(removed, 0);
236+
assert!(artifact.exists());
237+
assert!(nested.exists());
238+
}
239+
}

src/commands/feedback_eval/command.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pub async fn feedback_eval_command(
1313
input_path: PathBuf,
1414
output_path: Option<PathBuf>,
1515
trend_path: Option<PathBuf>,
16+
trend_max_entries: usize,
1617
confidence_threshold: f32,
1718
eval_report_path: Option<PathBuf>,
1819
) -> Result<()> {
@@ -25,6 +26,7 @@ pub async fn feedback_eval_command(
2526
&loaded,
2627
output_path.as_deref(),
2728
trend_path.as_deref(),
29+
trend_max_entries,
2830
confidence_threshold,
2931
eval_report.as_ref(),
3032
)

0 commit comments

Comments
 (0)