diff --git a/.gitignore b/.gitignore index a4a9ce6..7b5c577 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__ uv.lock .playwright-mcp/ +tests/.fixture_cache/ diff --git a/README.md b/README.md index 53612ee..6426ebb 100644 --- a/README.md +++ b/README.md @@ -47,14 +47,16 @@ All commands support these options: - `-o, --output DIRECTORY` - output directory (default: writes to temp dir and opens browser) - `-a, --output-auto` - auto-name output subdirectory based on session ID or filename -- `--repo OWNER/NAME` - GitHub repo for commit links (auto-detected from git push output if not specified) +- `--repo PATH|URL|OWNER/NAME` - Git repo for commit links and code viewer. Accepts a local path, GitHub URL, or owner/name format. - `--open` - open the generated `index.html` in your default browser (default if no `-o` specified) - `--gist` - upload the generated HTML files to a GitHub Gist and output a preview URL - `--json` - include the original session file in the output directory +- `--code-view` - generate an interactive code viewer showing all files modified during the session The generated output includes: - `index.html` - an index page with a timeline of prompts and commits - `page-001.html`, `page-002.html`, etc. - paginated transcript pages +- `code.html` - interactive code viewer (when `--code-view` is used) ### Local sessions @@ -106,7 +108,19 @@ Preview: https://gisthost.github.io/?abc123def456/index.html Files: /var/folders/.../session-id ``` -The preview URL uses [gisthost.github.io](https://gisthost.github.io/) to render your HTML gist. The tool automatically injects JavaScript to fix relative links when served through gisthost. +The preview URL uses [gisthost.github.io](https://gisthost.github.io/) to render your HTML gist. The tool automatically injects JavaScript to fix relative links when served through gisthost (also works with gistpreview.github.io for backward compatibility). + +**Large sessions:** GitHub's gist API has size limits (~1MB). For large sessions, the tool automatically handles this: + +- **Page content**: When total page content exceeds 500KB, the tool generates separate `page-data-NNN.json` files for each page. The HTML pages are stripped of their inline content when uploaded, and JavaScript fetches the content from the JSON files on demand. This keeps each file small while preserving full functionality. + +- **Code viewer**: When using `--code-view`, large sessions may have a `code-data.json` file that also needs separate handling. + +- **Two-gist strategy**: When data files exceed 1MB total, they're uploaded to a separate "data gist", and the main gist's HTML files reference it. + +- **Batched uploads**: If files are still too large, they're automatically batched into multiple gists. + +All of this happens transparently and requires no additional options. Search continues to work by fetching from the JSON files instead of HTML. Combine with `-o` to keep a local copy: @@ -116,6 +130,36 @@ claude-code-transcripts json session.json -o ./my-transcript --gist **Requirements:** The `--gist` option requires the [GitHub CLI](https://cli.github.com/) (`gh`) to be installed and authenticated (`gh auth login`). +### Code viewer + +Use `--code-view` to generate an interactive three-pane code viewer that shows all files modified during the session: + +```bash +# Generate with code viewer from a local session +claude-code-transcripts --code-view + +# Point to the actual repo for full file content and blame +claude-code-transcripts --code-view --repo /path/to/repo + +# From a URL +claude-code-transcripts json https://example.com/session.jsonl --code-view +``` + +The code viewer (`code.html`) provides: +- **File tree**: Navigate all files that were written or edited during the session +- **File content**: View file contents with git blame-style annotations showing which prompt modified each line +- **Transcript pane**: Browse the full conversation with links to jump to specific file operations + +When you provide `--repo` pointing to the local git repository that was being modified, the code viewer can show the complete file content with accurate blame attribution. Without a repo path, it shows a diff-only view of the changes. + +Use `--exclude-deleted-files` to filter out files that no longer exist on disk: + +```bash +claude-code-transcripts --code-view --exclude-deleted-files +``` + +This is useful when files were deleted after the session (either manually or by commands not captured in the transcript). + ### Auto-naming output directories Use `-a/--output-auto` to automatically create a subdirectory named after the session: @@ -145,11 +189,14 @@ This is useful for archiving the source data alongside the HTML output. ### Converting from JSON/JSONL files -Convert a specific session file directly: +Convert a specific session file or URL directly: ```bash claude-code-transcripts json session.json -o output-directory/ claude-code-transcripts json session.jsonl --open + +# Fetch and convert from a URL +claude-code-transcripts json https://example.com/session.jsonl --open ``` This works with both JSONL files in the `~/.claude/projects/` folder and JSON session files extracted from Claude Code for web. diff --git a/pyproject.toml b/pyproject.toml index e1eaed6..5de9054 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,9 +11,11 @@ requires-python = ">=3.10" dependencies = [ "click", "click-default-group", + "gitpython", "httpx", "jinja2", "markdown", + "nh3>=0.3.2", "questionary", ] diff --git a/src/claude_code_transcripts/__init__.py b/src/claude_code_transcripts/__init__.py index f2246a2..d27b57c 100644 --- a/src/claude_code_transcripts/__init__.py +++ b/src/claude_code_transcripts/__init__.py @@ -9,14 +9,19 @@ import subprocess import tempfile import webbrowser +from dataclasses import dataclass, field from datetime import datetime from pathlib import Path +from typing import Optional, List, Tuple, Dict, Any import click from click_default_group import DefaultGroup +from git import Repo +from git.exc import InvalidGitRepositoryError import httpx from jinja2 import Environment, PackageLoader import markdown +import nh3 import questionary # Set up Jinja2 environment @@ -49,6 +54,101 @@ def get_template(name): ) +# Import code viewer functionality from separate module +from claude_code_transcripts.code_view import ( + FileOperation, + FileState, + CodeViewData, + BlameRange, + OP_WRITE, + OP_EDIT, + OP_DELETE, + extract_file_operations, + filter_deleted_files, + normalize_file_paths, + find_git_repo_root, + find_commit_before_timestamp, + build_file_history_repo, + get_file_blame_ranges, + get_file_content_from_repo, + build_file_tree, + reconstruct_file_with_blame, + build_file_states, + render_file_tree_html, + file_state_to_dict, + generate_code_view_html, + build_msg_to_user_html, +) + + +def extract_github_repo_from_url(url: str) -> Optional[str]: + """Extract 'owner/name' from various GitHub URL formats. + + Handles: + - https://github.com/owner/repo + - https://github.com/owner/repo.git + - git@github.com:owner/repo.git + + Args: + url: GitHub URL or git remote URL. + + Returns: + Repository identifier as 'owner/name', or None if not found. + """ + match = re.search(r"github\.com[:/]([^/]+/[^/?#.]+)", url) + if match: + repo = match.group(1) + return repo[:-4] if repo.endswith(".git") else repo + return None + + +def parse_repo_value(repo: Optional[str]) -> Tuple[Optional[str], Optional[Path]]: + """Parse --repo value to extract GitHub repo name and/or local path. + + Args: + repo: The --repo value (could be path, URL, or owner/name). + + Returns: + Tuple of (github_repo, local_path): + - github_repo: "owner/name" string for commit links, or None + - local_path: Path to local git repo for file history, or None + """ + if not repo: + return None, None + + # Check if it's a local path that exists + repo_path = Path(repo) + if repo_path.exists() and (repo_path / ".git").exists(): + # Try to extract GitHub remote URL + github_repo = None + try: + result = subprocess.run( + ["git", "remote", "get-url", "origin"], + cwd=repo_path, + capture_output=True, + text=True, + ) + if result.returncode == 0: + github_repo = extract_github_repo_from_url(result.stdout.strip()) + except Exception: + pass + return github_repo, repo_path + + # Check if it's a GitHub URL + if is_url(repo): + github_repo = extract_github_repo_from_url(repo) + if github_repo: + return github_repo, None + # Not a GitHub URL, ignore + return None, None + + # Assume it's owner/name format + if "/" in repo and not repo.startswith("/"): + return repo, None + + return None, None + + def extract_text_from_content(content): """Extract plain text from message content. @@ -401,8 +501,6 @@ def _generate_project_index(project, output_dir): project_name=project["name"], sessions=sessions_data, session_count=len(sessions_data), - css=CSS, - js=JS, ) output_path = output_dir / "index.html" @@ -440,8 +538,6 @@ def _generate_master_index(projects, output_dir): projects=projects_data, total_projects=len(projects), total_sessions=total_sessions, - css=CSS, - js=JS, ) output_path = output_dir / "index.html" @@ -492,6 +588,14 @@ def _parse_jsonl_file(filepath): if obj.get("isCompactSummary"): entry["isCompactSummary"] = True + # Preserve isMeta if present (skill expansions, not real user prompts) + if obj.get("isMeta"): + entry["isMeta"] = True + + # Preserve toolUseResult if present (needed for originalFile content) + if "toolUseResult" in obj: + entry["toolUseResult"] = obj["toolUseResult"] + loglines.append(entry) except json.JSONDecodeError: continue @@ -629,10 +733,58 @@ def format_json(obj): return f"
{html.escape(str(obj))}"
+# Allowed HTML tags for markdown content - anything else gets escaped
+ALLOWED_TAGS = {
+ # Block elements
+ "p",
+ "div",
+ "h1",
+ "h2",
+ "h3",
+ "h4",
+ "h5",
+ "h6",
+ "blockquote",
+ "pre",
+ "hr",
+ # Lists
+ "ul",
+ "ol",
+ "li",
+ # Inline elements
+ "a",
+ "strong",
+ "b",
+ "em",
+ "i",
+ "code",
+ "br",
+ "span",
+ # Tables
+ "table",
+ "thead",
+ "tbody",
+ "tr",
+ "th",
+ "td",
+}
+
+ALLOWED_ATTRIBUTES = {
+ "a": {"href", "title"},
+ "code": {"class"}, # For syntax highlighting
+ "pre": {"class"},
+ "span": {"class"},
+ "td": {"align"},
+ "th": {"align"},
+}
+
+
def render_markdown_text(text):
if not text:
return ""
- return markdown.markdown(text, extensions=["fenced_code", "tables"])
+ raw_html = markdown.markdown(text, extensions=["fenced_code", "tables"])
+ # Sanitize HTML to only allow safe tags - escapes everything else
+ return nh3.clean(raw_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
def is_json_like(text):
@@ -852,7 +1004,7 @@ def is_tool_result_message(message_data):
)
-def render_message(log_type, message_json, timestamp):
+def render_message(log_type, message_json, timestamp, prompt_num=None):
if not message_json:
return ""
try:
@@ -865,7 +1017,8 @@ def render_message(log_type, message_json, timestamp):
if is_tool_result_message(message_data):
role_class, role_label = "tool-reply", "Tool reply"
else:
- role_class, role_label = "user", "User"
+ role_class = "user"
+ role_label = f"User Prompt #{prompt_num}" if prompt_num else "User"
elif log_type == "assistant":
content_html = render_assistant_message(message_data)
role_class, role_label = "assistant", "Assistant"
@@ -1082,7 +1235,7 @@ def render_message(log_type, message_json, timestamp):
}
// Use MutationObserver to catch dynamically added content
- // gistpreview.github.io may add content after initial load
+ // gisthost/gistpreview may add content after initial load
var observer = new MutationObserver(function(mutations) {
mutations.forEach(function(mutation) {
mutation.addedNodes.forEach(function(node) {
@@ -1104,6 +1257,70 @@ def render_message(log_type, message_json, timestamp):
});
});
+ // Load JS from gist (relative script srcs don't work on gistpreview)
+ document.querySelectorAll('script[src]').forEach(function(script) {
+ var src = script.getAttribute('src');
+ if (src.startsWith('http')) return; // Already absolute
+ var jsUrl = 'https://gist.githubusercontent.com/raw/' + gistId + '/' + src;
+ fetch(jsUrl)
+ .then(function(r) { if (!r.ok) throw new Error('Failed'); return r.text(); })
+ .then(function(js) {
+ var newScript = document.createElement('script');
+ newScript.textContent = js;
+ document.body.appendChild(newScript);
+ })
+ .catch(function(e) { console.error('Failed to load JS:', src, e); });
+ });
+
+ // Rewrite relative links to work with gist preview URL format
+ function rewriteLinks(root) {
+ var scope = root || document;
+ var links = [];
+
+ // Check if the root itself is a link (for MutationObserver calls)
+ if (scope.matches && scope.matches('a[href]')) {
+ links.push(scope);
+ }
+
+ // Also get all descendant links
+ scope.querySelectorAll('a[href]').forEach(function(link) {
+ links.push(link);
+ });
+
+ links.forEach(function(link) {
+ var href = link.getAttribute('href');
+ // Skip already-rewritten links (issue #26 fix)
+ if (href.startsWith('?')) return;
+ // Skip external links and anchors
+ if (href.startsWith('http') || href.startsWith('#') || href.startsWith('//')) return;
+ // Handle anchor in relative URL (e.g., page-001.html#msg-123)
+ var parts = href.split('#');
+ var filename = parts[0];
+ var anchor = parts.length > 1 ? '#' + parts[1] : '';
+ link.setAttribute('href', '?' + gistId + '/' + filename + anchor);
+ });
+ }
+
+ // Run immediately
+ rewriteLinks();
+
+ // Also run on DOMContentLoaded in case DOM isn't ready yet
+ if (document.readyState === 'loading') {
+ document.addEventListener('DOMContentLoaded', function() { rewriteLinks(); });
+ }
+
+ // Use MutationObserver to catch dynamically added content
+ // gistpreview.github.io may add content after initial load
+ var observer = new MutationObserver(function(mutations) {
+ mutations.forEach(function(mutation) {
+ mutation.addedNodes.forEach(function(node) {
+ if (node.nodeType === 1) { // Element node
+ rewriteLinks(node);
+ }
+ });
+ });
+ });
+
// Start observing once body exists
function startObserving() {
if (document.body) {
@@ -1114,6 +1331,18 @@ def render_message(log_type, message_json, timestamp):
}
startObserving();
+ // Execute module scripts that were injected via innerHTML
+ // (browsers don't execute scripts added via innerHTML for security)
+ document.querySelectorAll('script[type="module"]').forEach(function(script) {
+ if (script.src) return; // Already has src, skip
+ var blob = new Blob([script.textContent], { type: 'application/javascript' });
+ var url = URL.createObjectURL(blob);
+ var newScript = document.createElement('script');
+ newScript.type = 'module';
+ newScript.src = url;
+ document.body.appendChild(newScript);
+ });
+
// Handle fragment navigation after dynamic content loads
// gisthost.github.io/gistpreview.github.io loads content dynamically, so the browser's
// native fragment navigation fails because the element doesn't exist yet
@@ -1142,10 +1371,29 @@ def render_message(log_type, message_json, timestamp):
def inject_gist_preview_js(output_dir):
- """Inject gist preview JavaScript into all HTML files in the output directory."""
+ """Inject gist preview JavaScript into all HTML files in the output directory.
+
+ Also removes inline CODE_DATA from code.html since gist version fetches it separately.
+
+ Args:
+ output_dir: Path to the output directory containing HTML files.
+ """
output_dir = Path(output_dir)
for html_file in output_dir.glob("*.html"):
content = html_file.read_text(encoding="utf-8")
+
+ # For code.html, remove the inline CODE_DATA script
+ # (gist version fetches code-data.json instead)
+ if html_file.name == "code.html":
+ import re
+
+ content = re.sub(
+ r"\s*",
+ "",
+ content,
+ flags=re.DOTALL,
+ )
+
# Insert the gist preview JS before the closing