Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 23 additions & 30 deletions claude_code_log/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2573,7 +2573,6 @@ def process_projects_hierarchy(

# Process each project directory
project_summaries: list[dict[str, Any]] = []
any_cache_updated = False # Track if any project had cache updates

# Aggregated stats
total_projects = len(project_dirs)
Expand Down Expand Up @@ -2748,9 +2747,7 @@ def _rel_to_index(p: Path) -> str:
stats.files_loaded_from_cache = len(jsonl_files) - stats.files_updated
stats.sessions_regenerated = len(stale_sessions)

# Track if cache was updated (for index regeneration)
if modified_files:
any_cache_updated = True
projects_with_updates += 1

# Generate output for this project (handles cache updates internally)
Expand Down Expand Up @@ -3138,33 +3135,31 @@ def _rel_to_index(p: Path) -> str:
# Update total projects count to include archived
total_projects = len(project_dirs) + archived_project_count

# Generate index (always regenerate if outdated). Index lives at
# the root of the output destination — `output_dir` if set
# (#151), else the legacy `projects_path` location.
ext = get_file_extension(output_format)
# Generate index — always regenerated. Skipping when "nothing
# changed" would let stale links survive a variant-flag toggle
# (e.g. `--compact` / `--no-timestamps` / `--detail`), which
# produces new per-project filenames without touching the cache.
# The index is built from the already-aggregated
# `project_summaries` in memory (one template pass + one write),
# so unconditional regeneration is cheap.
index_path = index_root / get_index_filename(output_format)
renderer = get_renderer(output_format, image_export_mode)
index_regenerated = False
if renderer.is_outdated(index_path) or from_date or to_date or any_cache_updated:
# Under `--expand-paths` (Obsidian mode), both Markdown and
# HTML render the index as a nested directory hierarchy that
# mirrors the projected folder tree. JSON keeps a flat list
# (structured data — tree shape isn't meaningful) so it does
# not accept the kwarg.
index_kwargs: dict[str, Any] = {}
if expand_paths and output_format in ("md", "markdown", "html"):
index_kwargs["expand_paths_tree"] = True
index_content = renderer.generate_projects_index(
project_summaries, from_date, to_date, **index_kwargs
)
assert index_content is not None
# Ensure the index root exists when projecting into a fresh dir.
index_path.parent.mkdir(parents=True, exist_ok=True)
# See issue #139: errors="replace" for lone-surrogate safety.
index_path.write_text(index_content, encoding="utf-8", errors="replace")
index_regenerated = True
elif not silent:
print(f"Index {ext.upper()} is current, skipping regeneration")
# Under `--expand-paths` (Obsidian mode), both Markdown and HTML
# render the index as a nested directory hierarchy that mirrors
# the projected folder tree. JSON keeps a flat list (structured
# data — tree shape isn't meaningful) so it does not accept the
# kwarg.
index_kwargs: dict[str, Any] = {}
if expand_paths and output_format in ("md", "markdown", "html"):
index_kwargs["expand_paths_tree"] = True
index_content = renderer.generate_projects_index(
project_summaries, from_date, to_date, **index_kwargs
)
assert index_content is not None
# Ensure the index root exists when projecting into a fresh dir.
index_path.parent.mkdir(parents=True, exist_ok=True)
# See issue #139: errors="replace" for lone-surrogate safety.
index_path.write_text(index_content, encoding="utf-8", errors="replace")

# Count total sessions from project summaries
for summary in project_summaries:
Expand All @@ -3185,8 +3180,6 @@ def _rel_to_index(p: Path) -> str:
summary_parts.append(f"Processed {total_projects} projects in {elapsed:.1f}s")
if projects_with_updates > 0:
summary_parts.append(f" {projects_with_updates} projects updated")
if index_regenerated:
summary_parts.append(" Index regenerated")
print("\n".join(summary_parts))

# Show archived sessions note if any exist
Expand Down
54 changes: 40 additions & 14 deletions test/test_html_regeneration.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,11 @@ def test_individual_session_regeneration_on_jsonl_change(self, tmp_path):
assert "I can help you test session regeneration" in new_content

def test_projects_index_regeneration_on_jsonl_change(self, tmp_path):
"""Test that index.html is regenerated when any project's JSONL files change."""
"""The projects index is always regenerated on every run, so that
toggling variant flags (e.g. `--compact`) refreshes its links —
and so that JSONL edits are picked up too. This test pins both
behaviours: a no-op re-run still writes the file (always-regen
contract); a JSONL edit shows up in the new content."""
# Setup: Create projects hierarchy
projects_dir = tmp_path / "projects"
projects_dir.mkdir()
Expand Down Expand Up @@ -161,7 +165,7 @@ def test_projects_index_regeneration_on_jsonl_change(self, tmp_path):
index_file = process_projects_hierarchy(projects_dir)
assert index_file.exists()
original_content = index_file.read_text(encoding="utf-8")
original_mtime = index_file.stat().st_mtime
original_mtime_ns = index_file.stat().st_mtime_ns

# Verify index was generated with project data
assert "project1" in original_content
Expand All @@ -170,25 +174,47 @@ def test_projects_index_regeneration_on_jsonl_change(self, tmp_path):
# Wait to ensure different modification time
time.sleep(0.1)

# Second run: No changes, should skip regeneration
# Second run: No source changes — but per the always-regenerate
# contract, the index file is rewritten anyway. The stale
# "Index ... is current, skipping regeneration" log line is
# gone; assert its absence so the contract can't silently
# regress.
with patch("builtins.print") as mock_print:
process_projects_hierarchy(projects_dir, silent=False)
mock_print.assert_any_call("Index HTML is current, skipping regeneration")

# Verify file wasn't regenerated
assert index_file.stat().st_mtime == original_mtime

# Third run: Modify JSONL file in project1, should regenerate index
time.sleep(1.1) # Ensure > 1.0 second difference for cache detection
for call in mock_print.call_args_list:
args = call.args
if args and isinstance(args[0], str):
assert "skipping regeneration" not in args[0], (
"Index regeneration should no longer be skipped on no-op runs."
)
# File was rewritten (mtime advanced).
assert index_file.stat().st_mtime_ns > original_mtime_ns

# Third run: Modify JSONL file in project1; index picks up the
# new content.
time.sleep(1.1) # > 1.0s ensures the cache's mtime tier sees the change.
new_message = '{"type":"summary","summary":"This project now has updated content for index regeneration test.","leafUuid":"msg_011","timestamp":"2025-07-03T16:25:00Z"}\n'
with open(jsonl1, "a", encoding="utf-8") as f:
f.write(new_message)

# Should regenerate index
# Capture before re-run so we can prove the index *content*
# actually changed (not just its mtime). The projects index
# surfaces project-level metadata (counts, date ranges, session
# AI titles) — not raw message text — so a string match on the
# appended message body isn't a reliable signal. A direct
# content comparison catches a "byte-equivalent rewrite" stale
# bug that an mtime check alone would silently pass.
pre_change_content = index_file.read_text(encoding="utf-8")
post_change_mtime_ns = index_file.stat().st_mtime_ns
process_projects_hierarchy(projects_dir)

# Verify index was regenerated
assert index_file.stat().st_mtime > original_mtime
assert index_file.stat().st_mtime_ns > post_change_mtime_ns
post_change_content = index_file.read_text(encoding="utf-8")
assert post_change_content != pre_change_content, (
"Regenerated index should reflect the appended JSONL entry "
"(project metadata: message count, date range, session "
"summary). An mtime bump alone wouldn't catch a "
"stale-content regression."
)

def test_cache_update_detection(self, tmp_path):
"""Test that cache updates are properly detected and used to trigger regeneration."""
Expand Down
6 changes: 5 additions & 1 deletion test/test_integration_realistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,11 @@ class TestIndexHTMLRegeneration:
def test_index_regenerated_when_project_cache_updates(
self, temp_projects_copy: Path
) -> None:
"""Index HTML regenerates when any project cache changes."""
"""Index HTML is rewritten on every `process_projects_hierarchy`
call (always-regenerate semantics — see PR #168). The JSONL edit
here is incidental cargo from when index regen was gated on
project-cache changes; today the assertion holds even without it,
which is the contract we want pinned."""
# Initial processing
process_projects_hierarchy(temp_projects_copy)
index_file = temp_projects_copy / "index.html"
Expand Down
164 changes: 164 additions & 0 deletions test/test_output_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
_get_page_html_path,
convert_jsonl_to,
generate_single_session_file,
process_projects_hierarchy,
)
from claude_code_log.models import DetailLevel
from claude_code_log.utils import VARIANT_ENTRY_RE, variant_suffix
Expand Down Expand Up @@ -631,3 +632,166 @@ def test_full_and_low_pages_coexist_and_cache_hits(self, tmp_path: Path) -> None
assert (fp2_again.st_mtime_ns, fp2_again.st_ino) == full_page2_sig, (
"Second FULL render should have been a cache hit (page 2)"
)


# ---------------------------------------------------------------------------
# Projects index regeneration when variant flags toggle
# ---------------------------------------------------------------------------


def _build_one_project_projects_dir(root: Path, encoded: str = "-p") -> Path:
"""Minimal `~/.claude/projects/` shape with one project + one session.

Mirrors `test_obsidian_output._build_fake_projects_dir` but inlined
to avoid a cross-module fixture dependency."""
projects_dir = root / "projects"
projects_dir.mkdir()
proj = projects_dir / encoded
proj.mkdir()
entry = {
"parentUuid": None,
"isSidechain": False,
"userType": "external",
"cwd": "/home/joe/p",
"sessionId": "sess1",
"version": "2.1.0",
"type": "user",
"uuid": "u1",
"timestamp": "2026-05-10T10:00:00.000Z",
"message": {
"role": "user",
"content": [{"type": "text", "text": "hi"}],
},
}
(proj / "sess1.jsonl").write_text(json.dumps(entry) + "\n", encoding="utf-8")
return projects_dir


class TestProjectsIndexVariantRefresh:
"""Regression: toggling a Markdown variant flag (`--compact`,
`--no-timestamps`, `--detail`) between runs produces new per-project
output filenames (e.g. `combined_transcripts.compact.md`). The
projects `index.md` must regenerate on that re-run so its links
point at the variant-suffixed filenames — not the stale unsuffixed
ones from the previous run.

Pre-fix, the index was gated on a "source JSONL changed" flag, so
flag-only re-runs (forward or backward through the variant matrix)
left the index stale even when per-project files were rewritten
or the user re-targeted a pre-existing variant. Always regenerating
the index closes both directions; the cost is one template pass +
file write over already-aggregated `project_summaries`."""

def test_compact_toggle_refreshes_index_link(
self,
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# Isolate the cache so we don't poison the user's real one and
# so the run is fully deterministic.
monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(tmp_path / "cache.db"))

projects_dir = _build_one_project_projects_dir(tmp_path)
index_path = projects_dir / "index.md"

# First run: default (no variant flag).
process_projects_hierarchy(projects_dir, output_format="md")
assert (projects_dir / "-p" / "combined_transcripts.md").exists()
assert index_path.exists()
first_index = index_path.read_text(encoding="utf-8")
assert "combined_transcripts.md" in first_index
assert "combined_transcripts.compact.md" not in first_index

# Second run: same sources, `--compact` toggled on. This is the
# regression scenario — no source files changed, so the legacy
# `any_cache_updated` flag stays False, but a new variant file
# is produced and the index must follow.
process_projects_hierarchy(projects_dir, output_format="md", compact=True)
assert (projects_dir / "-p" / "combined_transcripts.compact.md").exists()
refreshed_index = index_path.read_text(encoding="utf-8")
assert "combined_transcripts.compact.md" in refreshed_index, (
"Index should have been regenerated to point at the .compact "
"variant; it still references the bare filename."
)

def test_expand_paths_combined_no_refreshes_index_link(
self,
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""`--expand-paths` mode uses `--combined no` (per-session
files only, no `combined_transcripts*`). Toggling `--compact`
must still refresh the index so the per-session bullet links
carry the `.compact` suffix."""
monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(tmp_path / "cache.db"))

projects_dir = _build_one_project_projects_dir(tmp_path)
out = tmp_path / "out"
index_path = out / "index.md"

process_projects_hierarchy(
projects_dir,
output_format="md",
output_dir=out,
expand_paths=True,
write_combined=False,
)
assert index_path.exists()
first_index = index_path.read_text(encoding="utf-8")
# Per-session bullet should point at the bare session filename.
assert "session-sess1.md" in first_index
assert "session-sess1.compact.md" not in first_index

process_projects_hierarchy(
projects_dir,
output_format="md",
output_dir=out,
expand_paths=True,
write_combined=False,
compact=True,
)
# Per-session variant file must exist on disk.
assert (out / "home" / "joe" / "p" / "session-sess1.compact.md").exists()
refreshed_index = index_path.read_text(encoding="utf-8")
assert "session-sess1.compact.md" in refreshed_index, (
"Index should refresh per-session links to the .compact "
"variant when `--combined no` is in effect."
)

def test_toggle_back_to_default_refreshes_index_link(
self,
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Symmetric case: after running with `--compact`, re-running
WITHOUT the flag must point the index back at the unsuffixed
filename — even though the bare file already exists from an
earlier run (no source/cache changes, no slow-path entry for
the project).

Pre-"always regenerate" fix this stayed stale: the slow path
wasn't entered for the project, so the dirty flag never
flipped, and the index kept `.compact` links."""
monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(tmp_path / "cache.db"))

projects_dir = _build_one_project_projects_dir(tmp_path)
index_path = projects_dir / "index.md"

# Seed both variants on disk.
process_projects_hierarchy(projects_dir, output_format="md")
process_projects_hierarchy(projects_dir, output_format="md", compact=True)
# Index now reflects the most recent (compact) run.
assert "combined_transcripts.compact.md" in index_path.read_text(
encoding="utf-8"
)

# Re-run default. No source change, the bare combined file
# already exists → the per-project loop takes the fast path.
# Index must still flip back to the unsuffixed link.
process_projects_hierarchy(projects_dir, output_format="md")
final_index = index_path.read_text(encoding="utf-8")
assert "combined_transcripts.compact.md" not in final_index, (
"Index should no longer reference the .compact variant "
"after toggling back to the default."
)
assert "combined_transcripts.md" in final_index
Loading