Skip to content

Commit 18b4a39

Browse files
committed
Add metadata TTL and stale-while-revalidate support
Cached metadata is now served directly within a configurable TTL window (default 5m) without contacting upstream, reducing latency and upstream load. When upstream is unreachable and the cache is past its TTL, stale content is served with a Warning: 110 header per RFC 7234. New config: `metadata_ttl` (YAML) / `PROXY_METADATA_TTL` (env). Set to "0" to always revalidate with upstream.
1 parent 23a39c3 commit 18b4a39

7 files changed

Lines changed: 303 additions & 22 deletions

File tree

cmd/proxy/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,7 @@ func runMirror() {
453453
resolver := fetch.NewResolver()
454454
proxy := handler.NewProxy(db, store, fetcher, resolver, logger)
455455
proxy.CacheMetadata = true // mirror always caches metadata
456+
proxy.MetadataTTL = cfg.ParseMetadataTTL()
456457

457458
m := mirror.New(proxy, db, store, logger, *concurrency)
458459

docs/configuration.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,20 @@ Or via environment variable: `PROXY_CACHE_METADATA=true`.
225225

226226
The `proxy mirror` command always enables metadata caching regardless of this setting.
227227

228+
### Metadata TTL
229+
230+
When metadata caching is enabled, `metadata_ttl` controls how long a cached response is considered fresh before revalidating with upstream. During the TTL window, cached metadata is served directly without contacting upstream, reducing latency and upstream load.
231+
232+
```yaml
233+
metadata_ttl: "5m" # default
234+
```
235+
236+
Or via environment variable: `PROXY_METADATA_TTL=10m`.
237+
238+
Set to `"0"` to always revalidate with upstream (ETag-based conditional requests still avoid re-downloading unchanged content).
239+
240+
When upstream is unreachable and the cached entry is past its TTL, the proxy serves the stale cached copy with a `Warning: 110 - "Response is Stale"` header so clients can tell the data may be outdated.
241+
228242
## Mirror API
229243

230244
The `/api/mirror` endpoints are disabled by default. Enable them to allow starting mirror jobs via HTTP:

internal/config/config.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ import (
5555
"path/filepath"
5656
"strconv"
5757
"strings"
58+
"time"
5859

5960
"gopkg.in/yaml.v3"
6061
)
@@ -89,6 +90,11 @@ type Config struct {
8990
// The mirror command always enables this regardless of this setting.
9091
CacheMetadata bool `json:"cache_metadata" yaml:"cache_metadata"`
9192

93+
// MetadataTTL is how long cached metadata is considered fresh before
94+
// revalidating with upstream. Uses Go duration syntax (e.g. "5m", "1h").
95+
// Default: "5m". Set to "0" to always revalidate.
96+
MetadataTTL string `json:"metadata_ttl" yaml:"metadata_ttl"`
97+
9298
// MirrorAPI enables the /api/mirror endpoints for starting mirror jobs via HTTP.
9399
// Disabled by default to prevent unauthenticated users from triggering downloads.
94100
MirrorAPI bool `json:"mirror_api" yaml:"mirror_api"`
@@ -321,6 +327,9 @@ func (c *Config) LoadFromEnv() {
321327
if v := os.Getenv("PROXY_MIRROR_API"); v != "" {
322328
c.MirrorAPI = v == "true" || v == "1"
323329
}
330+
if v := os.Getenv("PROXY_METADATA_TTL"); v != "" {
331+
c.MetadataTTL = v
332+
}
324333
}
325334

326335
// Validate checks the configuration for errors.
@@ -370,9 +379,34 @@ func (c *Config) Validate() error {
370379
}
371380
}
372381

382+
// Validate metadata TTL if specified
383+
if c.MetadataTTL != "" && c.MetadataTTL != "0" {
384+
if _, err := time.ParseDuration(c.MetadataTTL); err != nil {
385+
return fmt.Errorf("invalid metadata_ttl %q: %w", c.MetadataTTL, err)
386+
}
387+
}
388+
373389
return nil
374390
}
375391

392+
const defaultMetadataTTL = 5 * time.Minute //nolint:mnd // sensible default
393+
394+
// ParseMetadataTTL returns the metadata TTL duration.
395+
// Returns 5 minutes if unset, 0 if explicitly disabled.
396+
func (c *Config) ParseMetadataTTL() time.Duration {
397+
if c.MetadataTTL == "" {
398+
return defaultMetadataTTL
399+
}
400+
if c.MetadataTTL == "0" {
401+
return 0
402+
}
403+
d, err := time.ParseDuration(c.MetadataTTL)
404+
if err != nil {
405+
return defaultMetadataTTL
406+
}
407+
return d
408+
}
409+
376410
// ParseSize parses a human-readable size string (e.g., "10GB", "500MB").
377411
// Returns the size in bytes.
378412
func ParseSize(s string) (int64, error) {

internal/config/config_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"os"
55
"path/filepath"
66
"testing"
7+
"time"
78
)
89

910
const (
@@ -301,3 +302,56 @@ func TestLoadFileNotFound(t *testing.T) {
301302
t.Error("expected error for nonexistent file")
302303
}
303304
}
305+
306+
func TestParseMetadataTTL(t *testing.T) {
307+
tests := []struct {
308+
name string
309+
ttl string
310+
want time.Duration
311+
}{
312+
{"empty defaults to 5m", "", 5 * time.Minute},
313+
{"explicit zero", "0", 0},
314+
{"10 minutes", "10m", 10 * time.Minute},
315+
{"1 hour", "1h", 1 * time.Hour},
316+
{"invalid defaults to 5m", "not-a-duration", 5 * time.Minute},
317+
}
318+
319+
for _, tt := range tests {
320+
t.Run(tt.name, func(t *testing.T) {
321+
cfg := Default()
322+
cfg.MetadataTTL = tt.ttl
323+
got := cfg.ParseMetadataTTL()
324+
if got != tt.want {
325+
t.Errorf("ParseMetadataTTL() = %v, want %v", got, tt.want)
326+
}
327+
})
328+
}
329+
}
330+
331+
func TestValidateMetadataTTL(t *testing.T) {
332+
cfg := Default()
333+
cfg.MetadataTTL = "invalid"
334+
if err := cfg.Validate(); err == nil {
335+
t.Error("expected validation error for invalid metadata_ttl")
336+
}
337+
338+
cfg.MetadataTTL = "5m"
339+
if err := cfg.Validate(); err != nil {
340+
t.Errorf("unexpected error for valid metadata_ttl: %v", err)
341+
}
342+
343+
cfg.MetadataTTL = "0"
344+
if err := cfg.Validate(); err != nil {
345+
t.Errorf("unexpected error for zero metadata_ttl: %v", err)
346+
}
347+
}
348+
349+
func TestLoadMetadataTTLFromEnv(t *testing.T) {
350+
cfg := Default()
351+
t.Setenv("PROXY_METADATA_TTL", "10m")
352+
cfg.LoadFromEnv()
353+
354+
if cfg.MetadataTTL != "10m" {
355+
t.Errorf("MetadataTTL = %q, want %q", cfg.MetadataTTL, "10m")
356+
}
357+
}

internal/handler/handler.go

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ type Proxy struct {
5757
Logger *slog.Logger
5858
Cooldown *cooldown.Config
5959
CacheMetadata bool
60+
MetadataTTL time.Duration
6061
HTTPClient *http.Client
6162
}
6263

@@ -372,12 +373,31 @@ func (p *Proxy) FetchOrCacheMetadata(ctx context.Context, ecosystem, cacheKey, u
372373

373374
storagePath := metadataStoragePath(ecosystem, cacheKey)
374375

375-
// Check for existing cache entry (for ETag revalidation)
376+
// Check for existing cache entry (for ETag revalidation and TTL)
376377
var entry *database.MetadataCacheEntry
377378
if p.CacheMetadata && p.DB != nil {
378379
entry, _ = p.DB.GetMetadataCache(ecosystem, cacheKey)
379380
}
380381

382+
// Serve from cache if within TTL (skip upstream entirely)
383+
if entry != nil && p.MetadataTTL > 0 && entry.FetchedAt.Valid {
384+
if time.Since(entry.FetchedAt.Time) < p.MetadataTTL {
385+
cached, readErr := p.Storage.Open(ctx, entry.StoragePath)
386+
if readErr == nil {
387+
defer func() { _ = cached.Close() }()
388+
data, readErr := ReadMetadata(cached)
389+
if readErr == nil {
390+
ct := contentTypeJSON
391+
if entry.ContentType.Valid {
392+
ct = entry.ContentType.String
393+
}
394+
return data, ct, nil
395+
}
396+
}
397+
// Cache file missing/unreadable, fall through to upstream
398+
}
399+
}
400+
381401
accept := contentTypeJSON
382402
if len(acceptHeaders) > 0 && acceptHeaders[0] != "" {
383403
accept = acceptHeaders[0]
@@ -518,6 +538,37 @@ func (p *Proxy) cacheMetadataBlob(ctx context.Context, ecosystem, cacheKey, stor
518538
})
519539
}
520540

541+
// cachedMeta holds cache validators and freshness state from a metadata cache entry.
542+
type cachedMeta struct {
543+
etag string
544+
lastModified time.Time
545+
stale bool
546+
}
547+
548+
// lookupCachedMeta retrieves cache validators for a metadata entry.
549+
func (p *Proxy) lookupCachedMeta(ecosystem, cacheKey string) cachedMeta {
550+
if p.DB == nil {
551+
return cachedMeta{}
552+
}
553+
entry, err := p.DB.GetMetadataCache(ecosystem, cacheKey)
554+
if err != nil || entry == nil {
555+
return cachedMeta{}
556+
}
557+
var cm cachedMeta
558+
if entry.ETag.Valid {
559+
cm.etag = entry.ETag.String
560+
}
561+
if entry.LastModified.Valid {
562+
cm.lastModified = entry.LastModified.Time
563+
}
564+
// If FetchedAt is older than TTL, upstream must have failed and
565+
// we served from stale cache (successful fetches update FetchedAt).
566+
if p.MetadataTTL > 0 && entry.FetchedAt.Valid && time.Since(entry.FetchedAt.Time) > p.MetadataTTL {
567+
cm.stale = true
568+
}
569+
return cm
570+
}
571+
521572
// ProxyCached fetches metadata from upstream (with optional caching for offline fallback)
522573
// and writes it to the response. Optional acceptHeaders specify the Accept header to send.
523574
// When metadata caching is disabled, the response is streamed directly to avoid buffering
@@ -540,30 +591,18 @@ func (p *Proxy) ProxyCached(w http.ResponseWriter, r *http.Request, upstreamURL,
540591
return
541592
}
542593

543-
// Look up cache entry to get ETag and upstream Last-Modified for conditional response headers
544-
var etag string
545-
var lastModified time.Time
546-
if p.DB != nil {
547-
if entry, err := p.DB.GetMetadataCache(ecosystem, cacheKey); err == nil && entry != nil {
548-
if entry.ETag.Valid {
549-
etag = entry.ETag.String
550-
}
551-
if entry.LastModified.Valid {
552-
lastModified = entry.LastModified.Time
553-
}
554-
}
555-
}
594+
cm := p.lookupCachedMeta(ecosystem, cacheKey)
556595

557596
// Honor client conditional request headers
558-
if etag != "" {
559-
if match := r.Header.Get("If-None-Match"); match != "" && match == etag {
597+
if cm.etag != "" {
598+
if match := r.Header.Get("If-None-Match"); match != "" && match == cm.etag {
560599
w.WriteHeader(http.StatusNotModified)
561600
return
562601
}
563602
}
564-
if !lastModified.IsZero() {
603+
if !cm.lastModified.IsZero() {
565604
if ims := r.Header.Get("If-Modified-Since"); ims != "" {
566-
if t, err := http.ParseTime(ims); err == nil && !lastModified.After(t) {
605+
if t, err := http.ParseTime(ims); err == nil && !cm.lastModified.After(t) {
567606
w.WriteHeader(http.StatusNotModified)
568607
return
569608
}
@@ -572,11 +611,14 @@ func (p *Proxy) ProxyCached(w http.ResponseWriter, r *http.Request, upstreamURL,
572611

573612
w.Header().Set("Content-Type", contentType)
574613
w.Header().Set("Content-Length", strconv.Itoa(len(body)))
575-
if etag != "" {
576-
w.Header().Set("ETag", etag)
614+
if cm.etag != "" {
615+
w.Header().Set("ETag", cm.etag)
616+
}
617+
if !cm.lastModified.IsZero() {
618+
w.Header().Set("Last-Modified", cm.lastModified.UTC().Format(http.TimeFormat))
577619
}
578-
if !lastModified.IsZero() {
579-
w.Header().Set("Last-Modified", lastModified.UTC().Format(http.TimeFormat))
620+
if cm.stale {
621+
w.Header().Set("Warning", `110 - "Response is Stale"`)
580622
}
581623
w.WriteHeader(http.StatusOK)
582624
_, _ = w.Write(body)

0 commit comments

Comments
 (0)