From 4c017cc06af34bc87ab7dda331ce6f4b07a0436a Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Mon, 29 Jun 2026 11:02:53 +0200 Subject: [PATCH 1/3] Regenerate single-file output when source grows; fix double success message (#221 follow-up) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default single-file output path (no `-o`, detail-suffixed naming like `.low.html`) rode the version-only staleness skip: `is_outdated()` compares only the embedded tool version, not the source's freshness. So a single `.jsonl` that grew between runs (an in-progress session re-exported with the same tool version) was wrongly reported "current" and served stale HTML. #237 only forced regeneration for the explicit `-o file` case. Bug 1: add a source-mtime term to the single-file `else` branch in convert_jsonl_to — regenerate when the source file is newer than the existing output. Scoped to a file source (a directory source has no single mtime and is already handled by the cache / `cache_was_updated` path). The version-marker check is kept for tool-version bumps. Bug 2: on a skip the CLI printed "Successfully converted" on top of the converter's own "is current, skipping regeneration" line. convert_jsonl_to now reports whether it actually (re)wrote the output via a non-breaking optional `regenerated` out-parameter (the `Path` return contract that ~20 callers rely on is unchanged); the CLI prints the success line only on a real regeneration. Tests: flip test_single_file_mode_regeneration_behavior (it enshrined the stale behavior as "expected") to assert regeneration on a grown source; add TestSingleFileStaleness covering the exact CLI repro — grown source regenerates, unchanged source skips with no redundant success line. Both fail on the prior code. Co-Authored-By: Claude Opus 4.8 (1M context) --- claude_code_log/cli.py | 11 +++++++- claude_code_log/converter.py | 35 ++++++++++++++++++++++++- test/test_html_regeneration.py | 26 +++++++++++------- test/test_output_explicit.py | 48 ++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 11 deletions(-) diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index b46c6d10..0867f78e 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -1299,6 +1299,10 @@ def main( ) return + # Out-param: convert_jsonl_to appends whether it actually (re)wrote + # the combined output, so we don't print "Successfully converted" on + # top of its own "is current, skipping regeneration" line on a skip. + regenerated: list[bool] = [] output_path = convert_jsonl_to( output_format, input_path, @@ -1326,9 +1330,14 @@ def main( # same dir still regenerates), and `--all-projects` calls this # with output=None anyway, so its skip is never forced. force_regenerate=output is not None and _output_path_is_file(output), + regenerated=regenerated, ) + # On a skip the converter already printed "is current, skipping + # regeneration"; suppress the redundant/misleading success line. + was_regenerated = regenerated != [False] if input_path.is_file(): - click.echo(f"Successfully converted {input_path} to {output_path}") + if was_regenerated: + click.echo(f"Successfully converted {input_path} to {output_path}") else: jsonl_count = len(list(input_path.glob("*.jsonl"))) if write_individual: diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 99ea2e41..d201632b 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1707,6 +1707,7 @@ def convert_jsonl_to( no_timestamps: bool = False, no_recaps: bool = False, force_regenerate: bool = False, + regenerated: Optional[list[bool]] = None, ) -> Path: """Convert JSONL transcript(s) to the specified format. @@ -1730,6 +1731,12 @@ def convert_jsonl_to( file at a user-chosen path was kept even when a different transcript was requested — silent stale content. The tool's own managed ``combined_transcripts*`` artifacts still use the skip. + regenerated: Optional out-parameter. When a list is passed, a single + bool is appended reporting whether the combined output was + actually (re)written (True) or skipped as current (False). Lets + the CLI avoid printing a misleading "Successfully converted" line + on a skip, without changing the ``Path`` return contract that + ~20 callers rely on. """ if not input_path.exists(): raise FileNotFoundError(f"Input path not found: {input_path}") @@ -1829,6 +1836,8 @@ def convert_jsonl_to( f"All HTML files are current for {input_path.name}, " "skipping regeneration" ) + if regenerated is not None: + regenerated.append(False) return output_path # Phase 2: Load messages (will use fresh cache when available) @@ -1894,6 +1903,9 @@ def convert_jsonl_to( # requested) are still produced by `_generate_individual_session_files` # below. The function still returns `output_path` for the caller's # index linking, but the file at that path is not (re-)written. + # Tracks whether the combined output was actually (re)written this call, + # reported via the `regenerated` out-parameter for the CLI's message. + did_regenerate = False if not write_combined: pass elif use_pagination: @@ -1932,6 +1944,7 @@ def convert_jsonl_to( compact=compact, no_recaps=no_recaps, ) + did_regenerate = True else: # Use single-file generation for small projects or filtered views # Use incremental regeneration via html_cache when available @@ -1949,16 +1962,33 @@ def convert_jsonl_to( or not output_path.exists() ) else: - # Fallback: old logic for single file mode or no cache + # Fallback: old logic for single file mode or no cache. + # + # is_outdated() only compares the embedded tool version, not the + # source's freshness, so a single .jsonl that grows between runs + # (e.g. an in-progress session re-exported with the same tool + # version) would be wrongly skipped as "current" and serve stale + # HTML (issue #221 follow-up). Mirror the directory path's + # source-tracking intent with an mtime check: regenerate when the + # source file is newer than the existing output. Scoped to a file + # source — a directory source has no single mtime and is handled + # by the cache branch / `cache_was_updated` above. + source_is_newer = ( + input_path.is_file() + and output_path.exists() + and input_path.stat().st_mtime > output_path.stat().st_mtime + ) should_regenerate = ( force_regenerate or renderer.is_outdated(output_path) + or source_is_newer or from_date is not None or to_date is not None or not output_path.exists() or (input_path.is_dir() and cache_was_updated) ) + did_regenerate = should_regenerate if should_regenerate: # For referenced images, pass the output directory output_dir = output_path.parent @@ -2003,6 +2033,9 @@ def convert_jsonl_to( no_recaps=no_recaps, ) + if regenerated is not None: + regenerated.append(did_regenerate) + return output_path diff --git a/test/test_html_regeneration.py b/test/test_html_regeneration.py index cecaac8b..8578185f 100644 --- a/test/test_html_regeneration.py +++ b/test/test_html_regeneration.py @@ -300,7 +300,8 @@ def test_force_regeneration_with_cache_update(self, tmp_path): assert "This should force regeneration despite same version" in new_content def test_single_file_mode_regeneration_behavior(self, tmp_path): - """Test that single file mode doesn't use cache but still respects version checks.""" + """Single file mode has no cache: it skips on an unchanged source + (version marker current) but regenerates when the source is newer.""" # Setup: Create a single JSONL file test_data_dir = Path(__file__).parent / "test_data" jsonl_file = tmp_path / "single_test.jsonl" @@ -327,21 +328,28 @@ def test_single_file_mode_regeneration_behavior(self, tmp_path): # Verify file wasn't regenerated (same mtime) assert output_file.stat().st_mtime == original_mtime - # Modify file - should NOT auto-regenerate in single file mode because there's no cache + # Modify the source: even without a cache, a single file that grows + # (e.g. an in-progress session) MUST regenerate — the output is now + # older than its source (issue #221 follow-up). Previously single + # file mode wrongly skipped on the version marker alone. time.sleep(0.1) new_message = '{"type":"user","timestamp":"2025-07-03T16:40:00Z","parentUuid":null,"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"test_session","version":"1.0.0","uuid":"single_file_msg","message":{"role":"user","content":[{"type":"text","text":"Single file mode test."}]}}\n' with open(jsonl_file, "a", encoding="utf-8") as f: f.write(new_message) - # Single file mode doesn't have cache, so it should still skip based on version with patch("builtins.print") as mock_print: convert_jsonl_to_html(jsonl_file) - mock_print.assert_any_call( - "HTML file single_test.html is current, skipping regeneration" - ) - - # Verify file wasn't regenerated (this is expected behavior for single file mode) - assert output_file.stat().st_mtime == original_mtime + # No "skipping" line: it regenerated because the source is newer. + skip_calls = [ + c + for c in mock_print.call_args_list + if c.args and "skipping regeneration" in str(c.args[0]) + ] + assert not skip_calls, f"unexpected skip after source grew: {skip_calls}" + + # The output was rewritten (newer mtime) and now contains the new message. + assert output_file.stat().st_mtime > original_mtime + assert "Single file mode test." in output_file.read_text(encoding="utf-8") class TestIncrementalHtmlCache: diff --git a/test/test_output_explicit.py b/test/test_output_explicit.py index 7c4a6474..43d2a20b 100644 --- a/test/test_output_explicit.py +++ b/test/test_output_explicit.py @@ -205,5 +205,53 @@ def test_nonexistent_path_is_outdated(self, tmp_path: Path): assert MarkdownRenderer().is_outdated(missing) is True +class TestSingleFileStaleness: + """#221 follow-up: the *default* single-file output (no ``-o``, detail- + suffixed naming) rides the version-only skip, so a source that grows + between runs was wrongly kept stale. And a genuine skip printed BOTH the + converter's "is current, skipping" line and the CLI's "Successfully + converted" line. These pin the fixes for the exact CLI repro: + ``claude-code-log .jsonl --detail low`` run, grow source, re-run.""" + + def test_grown_source_regenerates(self, tmp_path: Path): + src = _write_jsonl(tmp_path / "session.jsonl", "FIRST_turn_marker") + runner = CliRunner() + + r1 = runner.invoke(main, [str(src), "--detail", "low"]) + assert r1.exit_code == 0, r1.output + out = next(tmp_path.glob("*.low.html")) + assert "FIRST_turn_marker" in out.read_text(encoding="utf-8") + out_mtime = out.stat().st_mtime + + # Source progresses (a real append, not a synthetic mtime bump), then + # forced strictly newer than the output so the check is robust on + # coarse-granularity filesystems. + with open(src, "a", encoding="utf-8") as f: + f.write(json.dumps(_user_entry("SECOND_turn_marker")) + "\n") + os.utime(src, (out_mtime + 2, out_mtime + 2)) + + r2 = runner.invoke(main, [str(src), "--detail", "low"]) + assert r2.exit_code == 0, r2.output + assert "skipping regeneration" not in r2.output + second = out.read_text(encoding="utf-8") + assert "SECOND_turn_marker" in second, "stale HTML served after source grew" + assert out.stat().st_mtime > out_mtime + + def test_unchanged_source_skips_without_success_line(self, tmp_path: Path): + src = _write_jsonl(tmp_path / "session.jsonl", "ONLY_turn_marker") + runner = CliRunner() + + r1 = runner.invoke(main, [str(src), "--detail", "low"]) + assert r1.exit_code == 0, r1.output + assert "Successfully converted" in r1.output + + # Re-run with the source untouched: skip announced exactly once, and + # NOT also "Successfully converted" (the misleading double message). + r2 = runner.invoke(main, [str(src), "--detail", "low"]) + assert r2.exit_code == 0, r2.output + assert "skipping regeneration" in r2.output + assert "Successfully converted" not in r2.output + + if __name__ == "__main__": pytest.main([__file__, "-v"]) From 314df4adc42cec9452f2c213da4d85739201ffec Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Tue, 30 Jun 2026 22:12:49 +0200 Subject: [PATCH 2/3] Fold in monk review notes: directory-branch symmetry, any() readability, mtime rationale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Non-blocking review follow-ups on the #221 single-file-staleness fix: 1. Directory-branch symmetry: the skip-suppression of the success line was gated only on the single-file branch, so a directory whose combined output is current (Phase-1b early-exit) still printed "Successfully combined N transcript files ... and generated M individual session files" on a pure skip. The `regenerated` out-param already carries the bool — extend the same guard to the directory branch so skip-messaging is consistent for single-file AND directory inputs. 2. Readability: `regenerated != [False]` -> `any(regenerated)`. 3. Mtime rationale: add a comment explaining why the *output* file's mtime is the correct, sufficient, persistence-free basis in single-file mode (no cache DB to store the source mtime as the directory path does), and that both share the same filesystem-granularity limit. Test: add test_directory_skip_suppresses_success_line pinning note 1 (RED on the prior file-only guard, which printed the misleading combined line on a no-op directory re-run). Co-Authored-By: Claude Opus 4.8 (1M context) --- claude_code_log/cli.py | 17 +++++++++++------ claude_code_log/converter.py | 9 +++++++++ test/test_output_explicit.py | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 0867f78e..0a32e5d4 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -1332,12 +1332,17 @@ def main( force_regenerate=output is not None and _output_path_is_file(output), regenerated=regenerated, ) - # On a skip the converter already printed "is current, skipping - # regeneration"; suppress the redundant/misleading success line. - was_regenerated = regenerated != [False] - if input_path.is_file(): - if was_regenerated: - click.echo(f"Successfully converted {input_path} to {output_path}") + # On a skip the converter already printed its "... is current, + # skipping regeneration" line (single-file and directory paths alike), + # so suppress the redundant/misleading success line for BOTH. The + # `regenerated` out-param carries exactly one bool from + # convert_jsonl_to (it tracks the combined-output write; a directory + # whose combined is current early-exits / skips, reporting False). + was_regenerated = any(regenerated) + if not was_regenerated: + pass + elif input_path.is_file(): + click.echo(f"Successfully converted {input_path} to {output_path}") else: jsonl_count = len(list(input_path.glob("*.jsonl"))) if write_individual: diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index d201632b..1a5892e6 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1973,6 +1973,15 @@ def convert_jsonl_to( # source file is newer than the existing output. Scoped to a file # source — a directory source has no single mtime and is handled # by the cache branch / `cache_was_updated` above. + # + # The directory cache persists the *source* mtime in its DB to + # detect change; single-file mode has no DB, so the *output* file's + # own mtime is the natural, persistence-free basis. It is sufficient + # because the output is always written after its source is read, so + # `output.mtime >= source.mtime` holds post-write; a later append + # bumps the source past it. Both approaches share the same + # filesystem-mtime granularity limit (a sub-tick append racing the + # prior write resolves on the next run) — no worse than the cache. source_is_newer = ( input_path.is_file() and output_path.exists() diff --git a/test/test_output_explicit.py b/test/test_output_explicit.py index 43d2a20b..c086417f 100644 --- a/test/test_output_explicit.py +++ b/test/test_output_explicit.py @@ -252,6 +252,24 @@ def test_unchanged_source_skips_without_success_line(self, tmp_path: Path): assert "skipping regeneration" in r2.output assert "Successfully converted" not in r2.output + def test_directory_skip_suppresses_success_line(self, tmp_path: Path): + """Directory symmetry: a no-op directory re-run early-exits with a + skip line and must NOT also print 'Successfully combined ...' (the + same double-message fix applied to the directory branch).""" + proj = tmp_path / "proj" + proj.mkdir() + _write_jsonl(proj / "s.jsonl", "DIR_turn_marker") + runner = CliRunner() + + r1 = runner.invoke(main, [str(proj)]) + assert r1.exit_code == 0, r1.output + assert "Successfully combined" in r1.output + + r2 = runner.invoke(main, [str(proj)]) + assert r2.exit_code == 0, r2.output + assert "skipping regeneration" in r2.output + assert "Successfully combined" not in r2.output + if __name__ == "__main__": pytest.main([__file__, "-v"]) From 4d7f498372f38c29abcd1ab24f9f742fc748692d Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Wed, 1 Jul 2026 00:15:32 +0200 Subject: [PATCH 3/3] Count per-session regeneration in the success-message gate (fix --combined no silent run) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The skip-suppression added earlier gated the CLI success line on the COMBINED-output write only (`did_regenerate`). But per-session generation is an independent axis: `--combined no` writes only individual session files and never a combined transcript, so `did_regenerate` stayed False and the run printed NOTHING despite producing every session file — a regression vs. the prior unconditional "Successfully combined N transcript files ... and generated M individual session files" line. The same gap silently dropped the session count whenever the combined was current but a session was (re)written. `_generate_individual_session_files` already returns a regenerated count; it was discarded. Capture it and OR it into the `regenerated` out-param so the gate means "combined OR any session file (re)written". Pure skips (single-file or a fully-current directory) still report False and stay quiet, preserving the original double-message fix. Surfaced by a self-review pass (/code-review) on the branch; not caught by the diff-scoped manual review because it only manifests under --combined no / session-only regeneration, which the review didn't execute. Test: test_combined_no_confirms_session_generation — `--combined no` writes session files AND confirms the work (RED before: the run printed only Processing/Loading with no summary line). Co-Authored-By: Claude Opus 4.8 (1M context) --- claude_code_log/cli.py | 7 ++++--- claude_code_log/converter.py | 26 ++++++++++++++++++-------- test/test_output_explicit.py | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 0a32e5d4..3e059717 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -1332,12 +1332,13 @@ def main( force_regenerate=output is not None and _output_path_is_file(output), regenerated=regenerated, ) - # On a skip the converter already printed its "... is current, + # On a pure skip the converter already printed its "... is current, # skipping regeneration" line (single-file and directory paths alike), # so suppress the redundant/misleading success line for BOTH. The # `regenerated` out-param carries exactly one bool from - # convert_jsonl_to (it tracks the combined-output write; a directory - # whose combined is current early-exits / skips, reporting False). + # convert_jsonl_to: True if the combined transcript OR any individual + # session file was (re)written (so `--combined no`, which writes only + # session files, still confirms its work), False when all is current. was_regenerated = any(regenerated) if not was_regenerated: pass diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 1a5892e6..49602869 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1732,11 +1732,14 @@ def convert_jsonl_to( transcript was requested — silent stale content. The tool's own managed ``combined_transcripts*`` artifacts still use the skip. regenerated: Optional out-parameter. When a list is passed, a single - bool is appended reporting whether the combined output was - actually (re)written (True) or skipped as current (False). Lets - the CLI avoid printing a misleading "Successfully converted" line - on a skip, without changing the ``Path`` return contract that - ~20 callers rely on. + bool is appended reporting whether ANY output was actually + (re)written — the combined transcript OR at least one individual + session file (True) — versus everything being current/skipped + (False). Lets the CLI avoid printing a misleading "Successfully + converted" line on a pure skip while still confirming per-session + work (e.g. ``--combined no``, which never writes a combined), + without changing the ``Path`` return contract that ~20 callers + rely on. """ if not input_path.exists(): raise FileNotFoundError(f"Input path not found: {input_path}") @@ -2022,9 +2025,16 @@ def convert_jsonl_to( f"{format.upper()} file {output_path.name} is current, skipping regeneration" ) - # Generate individual session files if requested and in directory mode + # Generate individual session files if requested and in directory mode. + # Its return count feeds `regenerated` below: per-session output is an + # independent axis from the combined write, so a run that rewrites session + # files while the combined stays current (or `--combined no`, which never + # writes a combined) still counts as work done — otherwise the CLI would + # fall silent on it (the combined-only flag is what made `--combined no` + # print nothing despite producing every session file). + sessions_regenerated = 0 if generate_individual_sessions and input_path.is_dir(): - _generate_individual_session_files( + sessions_regenerated = _generate_individual_session_files( format, messages, effective_output_dir, @@ -2043,7 +2053,7 @@ def convert_jsonl_to( ) if regenerated is not None: - regenerated.append(did_regenerate) + regenerated.append(did_regenerate or sessions_regenerated > 0) return output_path diff --git a/test/test_output_explicit.py b/test/test_output_explicit.py index c086417f..f5d01cb9 100644 --- a/test/test_output_explicit.py +++ b/test/test_output_explicit.py @@ -270,6 +270,26 @@ def test_directory_skip_suppresses_success_line(self, tmp_path: Path): assert "skipping regeneration" in r2.output assert "Successfully combined" not in r2.output + def test_combined_no_confirms_session_generation(self, tmp_path: Path): + """`--combined no` writes only per-session files (no combined). The + success line is gated on regeneration, and that gate must count + per-session output too — otherwise the run falls completely silent + despite writing every session file. Regression guard: the first + version of the skip-suppression tracked only the combined write.""" + proj = tmp_path / "proj" + proj.mkdir() + _write_jsonl(proj / "aaaaaaaa.jsonl", "SESSION_A_marker") + runner = CliRunner() + + r1 = runner.invoke(main, [str(proj), "--combined", "no"]) + assert r1.exit_code == 0, r1.output + # Per-session files were produced... + assert list(proj.glob("session-*.html")), "no session files written" + # ...and the run confirms that work rather than printing nothing. + assert "individual session files" in r1.output, ( + f"--combined no produced no confirmation; output was:\n{r1.output}" + ) + if __name__ == "__main__": pytest.main([__file__, "-v"])