Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a4f06d3
chore(ledger): start task task-01 for issue #245
stranske-automation-bot Jan 23, 2026
56e2d60
chore(ledger): finish task task-01 for issue #245
stranske-automation-bot Jan 23, 2026
3413be5
feat(cache): add manager query caching
Jan 23, 2026
71afef7
chore(autofix): formatting/lint
github-actions[bot] Jan 23, 2026
2f9834c
fix: resolve CI failures
Jan 23, 2026
5de9d9f
chore(autofix): formatting/lint
github-actions[bot] Jan 23, 2026
9500654
test: cover issue dedup detection
Jan 23, 2026
8d89113
fix: resolve CI failures
Jan 23, 2026
ce2ef5d
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
8a68adf
fix: resolve CI failures
Jan 23, 2026
88a12c0
fix: resolve CI failures
Jan 23, 2026
783ef82
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
005fced
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
24b2c92
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
1f297ab
fix: resolve CI failures
Jan 23, 2026
a1a9ab6
fix: resolve CI failures
Jan 23, 2026
7fb2e97
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
7b62d63
fix: resolve CI failures
Jan 23, 2026
cf5c955
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
99ca29a
fix: resolve CI failures
Jan 23, 2026
ee8aef9
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
a3348ed
chore(codex-keepalive): apply updates (PR #387)
github-actions[bot] Jan 23, 2026
2161894
fix: resolve CI failures
Jan 23, 2026
a68ccd3
fix: make tools a package for mypy
stranske Jan 24, 2026
5629a30
fix: add fakeredis test dependency
stranske Jan 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions .agents/issue-245-ledger.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
version: 1
issue: 245
base: main
branch: codex/issue-245
tasks:
- id: task-01
title: Set up Redis connection
status: done
started_at: '2026-01-23T20:04:23Z'
finished_at: '2026-01-23T20:04:31Z'
commit: a4f06d3f87568cf510873b33c3b021ff1a3f98d6
notes: []
- id: task-02
title: Add cache decorator for query functions
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-03
title: Implement cache invalidation on writes
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-04
title: Add cache hit/miss metrics
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-05
title: Set up Redis connection
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-06
title: Add cache decorator for query functions
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-07
title: Implement cache invalidation on writes
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-08
title: Add cache hit/miss metrics
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-09
title: Repeated reads served from cache
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-10
title: Cache invalidated on data changes
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-11
title: Metrics show cache hit rate
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-12
title: Repeated reads served from cache
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-13
title: Cache invalidated on data changes
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-14
title: Metrics show cache hit rate
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-15
title: '---'
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
- id: task-16
title: '*Test expectation: Should NOT be flagged as duplicate - completely unrelated
topic*'
status: todo
started_at: null
finished_at: null
commit: ''
notes: []
2 changes: 1 addition & 1 deletion .github/scripts/fallback_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def main() -> int:
block_start = m.end(0)
block_end = matches[idx + 1].start(2) if idx + 1 < len(matches) else len(text)
segment = text[block_start:block_end].strip()
title_line = (segment.splitlines() or [f"Topic {idx+1}"])[0][:120]
title_line = (segment.splitlines() or [f"Topic {idx + 1}"])[0][:120]
norm = re.sub(r"\s+", " ", title_line.lower())
topics.append(
{
Expand Down
2 changes: 1 addition & 1 deletion adapters/uk.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ async def list_new_filings(company_number: str, since: str):

async def download(filing: dict[str, str]):
"""Download the filing document."""
url = f"{BASE_URL}/filing-history/{filing['transaction_id']}/document" "?format=pdf"
url = f"{BASE_URL}/filing-history/{filing['transaction_id']}/document?format=pdf"
async with httpx.AsyncClient() as client:
async with tracked_call("uk", url) as log:
r = await client.get(url)
Expand Down
192 changes: 192 additions & 0 deletions api/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
"""Cache helpers with Redis/in-memory backends and Prometheus metrics."""

from __future__ import annotations

import hashlib
import json
import os
from collections.abc import Callable
from dataclasses import dataclass
from functools import wraps
from threading import Lock
from typing import Any

from cachetools import TTLCache # type: ignore[import-untyped]
from prometheus_client import Counter, Gauge

CACHE_HITS = Counter("cache_hits_total", "Cache hits by namespace.", ("namespace",))
CACHE_MISSES = Counter("cache_misses_total", "Cache misses by namespace.", ("namespace",))
CACHE_HIT_RATIO = Gauge("cache_hit_ratio", "Cache hit ratio by namespace.", ("namespace",))

_CACHE_LOCK = Lock()
_CACHE_BACKEND: _CacheBackend | None = None
_METRICS_LOCK = Lock()
_CACHE_METRICS: dict[str, dict[str, int]] = {}


def _cache_ttl_seconds() -> int:
return max(int(os.getenv("CACHE_TTL_SECONDS", "60")), 1)


def _cache_max_items() -> int:
return max(int(os.getenv("CACHE_MAX_ITEMS", "512")), 1)


def _build_redis_client(redis_url: str):
# Import lazily so Redis remains optional in environments without it.
import redis

return redis.Redis.from_url(redis_url)


@dataclass(frozen=True)
class _CacheBackend:
get: Callable[[str], Any]
set: Callable[[str, str, int], None]
delete_prefix: Callable[[str], None]


def _get_backend() -> _CacheBackend:
global _CACHE_BACKEND
if _CACHE_BACKEND is not None:
return _CACHE_BACKEND

redis_url = os.getenv("REDIS_URL")
if redis_url:
try:
client = _build_redis_client(redis_url)
except Exception:
client = None
if client is not None:

def _redis_get(key: str) -> Any:
return client.get(key)

def _redis_set(key: str, payload: str, ttl: int) -> None:
client.setex(key, ttl, payload)

def _redis_delete_prefix(prefix: str) -> None:
keys = list(client.scan_iter(f"{prefix}*"))
if keys:
client.delete(*keys)

_CACHE_BACKEND = _CacheBackend(
get=_redis_get, set=_redis_set, delete_prefix=_redis_delete_prefix
)
return _CACHE_BACKEND

cache = TTLCache(maxsize=_cache_max_items(), ttl=_cache_ttl_seconds())
cache_lock = Lock()

def _memory_get(key: str) -> Any:
with cache_lock:
return cache.get(key)

def _memory_set(key: str, payload: str, ttl: int) -> None:
# TTLCache uses the ttl configured at initialization.
with cache_lock:
cache[key] = payload

def _memory_delete_prefix(prefix: str) -> None:
with cache_lock:
keys = [key for key in cache.keys() if str(key).startswith(prefix)]
for key in keys:
cache.pop(key, None)

_CACHE_BACKEND = _CacheBackend(
get=_memory_get, set=_memory_set, delete_prefix=_memory_delete_prefix
)
return _CACHE_BACKEND


def reset_cache_backend() -> None:
"""Reset the cached backend (useful for tests)."""
global _CACHE_BACKEND
_CACHE_BACKEND = None


def reset_cache_stats() -> None:
"""Reset in-process cache statistics (useful for tests)."""
with _METRICS_LOCK:
_CACHE_METRICS.clear()


def _make_cache_key(namespace: str, args: tuple[Any, ...], kwargs: dict[str, Any]) -> str:
raw = json.dumps({"args": args, "kwargs": kwargs}, sort_keys=True, default=str)
digest = hashlib.sha256(raw.encode("utf-8")).hexdigest()
return f"{namespace}:{digest}"


def _record_cache_metric(namespace: str, hit: bool) -> None:
with _METRICS_LOCK:
stats = _CACHE_METRICS.setdefault(namespace, {"hits": 0, "misses": 0})
if hit:
stats["hits"] += 1
CACHE_HITS.labels(namespace=namespace).inc()
else:
stats["misses"] += 1
CACHE_MISSES.labels(namespace=namespace).inc()
total = stats["hits"] + stats["misses"]
if total:
CACHE_HIT_RATIO.labels(namespace=namespace).set(stats["hits"] / total)


def get_cache_stats(namespace: str) -> dict[str, int | float]:
"""Return cache hit/miss counts and ratio for a namespace."""
with _METRICS_LOCK:
stats = _CACHE_METRICS.get(namespace, {"hits": 0, "misses": 0}).copy()
total = stats["hits"] + stats["misses"]
ratio = (stats["hits"] / total) if total else 0.0
return {"hits": stats["hits"], "misses": stats["misses"], "hit_ratio": ratio}


def cache_get(namespace: str, key: str) -> Any | None:
backend = _get_backend()
payload = backend.get(key)
if payload is None:
_record_cache_metric(namespace, hit=False)
return None
if isinstance(payload, bytes):
payload = payload.decode("utf-8")
try:
value = json.loads(payload)
except (TypeError, json.JSONDecodeError):
value = payload
_record_cache_metric(namespace, hit=True)
return value


def cache_set(key: str, value: Any, *, ttl: int | None = None) -> None:
backend = _get_backend()
payload = json.dumps(value, default=str)
backend.set(key, payload, ttl or _cache_ttl_seconds())


def invalidate_cache_prefix(prefix: str) -> None:
backend = _get_backend()
backend.delete_prefix(prefix)


def cache_query(
namespace: str,
*,
ttl: int | None = None,
skip_args: int = 0,
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
"""Cache results for query-like functions."""

def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
cache_key = _make_cache_key(namespace, args[skip_args:], kwargs)
cached = cache_get(namespace, cache_key)
if cached is not None:
return cached
result = func(*args, **kwargs)
if result is not None:
cache_set(cache_key, result, ttl=ttl)
return result

return wrapper

return decorator
4 changes: 2 additions & 2 deletions api/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def chat(
}
},
),
)
),
):
"""Return a naive answer built from stored documents."""
# Import here to avoid loading embedding models during unrelated endpoints/tests.
Expand Down Expand Up @@ -521,7 +521,7 @@ def _run_with_retries_sync() -> None:
response_model=HealthDbResponse,
summary="Check database connectivity",
description=(
"Run a lightweight database ping and return the health status with " "observed latency."
"Run a lightweight database ping and return the health status with observed latency."
),
responses={
503: {
Expand Down
Loading
Loading