From dd25db036c908b597b60e3cb242bec20abb9048a Mon Sep 17 00:00:00 2001
From: Trevin Chow <trevin@trevinchow.com>
Date: Fri, 8 May 2026 08:39:03 -0700
Subject: [PATCH 1/2] fix(ce-sessions): move orchestration to skill, reshape
 historian to synthesis-only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ce-session-historian agent deadlocked when dispatched as a subagent in
Claude Code because its first action was Skill(ce-session-inventory), and
subagents cannot invoke the Skill tool (anthropics/claude-code#38719). The
spinner hung at "Initializing…" indefinitely; after timeout the orchestrator
received a spurious "user doesn't want to proceed" rejection.

The fix removes every code path that has a subagent calling Skill:

- Move 4 extraction scripts into plugins/compound-engineering/skills/ce-sessions/scripts/
  (single home; ce-session-inventory and ce-session-extract skills deleted)
- Rewrite ce-sessions/SKILL.md as the full orchestrator: discovery, branch +
  keyword filtering, scan-window selection, top-5 deep-dive cap, mktemp
  scratch dir, per-session extraction with new --output PATH flag (extraction
  bytes write directly to scratch, never round-trip through main-context tool
  results), dispatch of synthesis-only historian
- Reshape ce-session-historian.agent.md to synthesis-only: receives file
  paths in dispatch prompt, reads via native file-read tool, returns prose
  findings. No Skill calls, no Bash discovery, no orchestration logic
- Update ce-compound Phase 1 to delegate to ce-sessions via the platform's
  skill-invocation primitive (semantic-prose form per plan-handoff.md line 57
  convention, not literal Skill(...) syntax). Specifies dispatch ordering so
  the parallel research subagents and ce-sessions still run concurrently —
  wall-clock parallelism preserved
- Add --output PATH to extract-skeleton.py and extract-errors.py: when set,
  scripts write to file and emit only a one-line JSON status to stdout.
  Stdout-mode behavior preserved when omitted (additive API change)
- Add regression test (tests/skills/ce-session-historian-no-skill-tool.test.ts)
  asserting the agent body never instructs Skill(ce-session-inventory),
  Skill(ce-session-extract), or any literal Skill(...) tool-call expression
- Register ce-session-inventory and ce-session-extract in legacy-cleanup
  lookups (STALE_SKILL_DIRS, LEGACY_ONLY_SKILL_DESCRIPTIONS, and
  EXTRA_LEGACY_ARTIFACTS_BY_PLUGIN) so existing flat-installs sweep on upgrade
- Fix broken See Also links in docs/skills/ce-sessions.md

The bug is structurally gone: no subagent in the post-refactor flow ever
invokes the Skill tool. Plan with full design rationale, alternatives
considered (including issue #794's Options 1 and 2), and implementation
units lives at docs/plans/2026-05-08-001-fix-ce-sessions-orchestration-refactor-plan.md.

All 1337 bun tests pass; bun run release:validate passes (37 skills,
49 agents). Closes #794.
---
 ...ce-sessions-orchestration-refactor-plan.md | 492 ++++++++++++++++++
 docs/skills/ce-sessions.md                    |  23 +-
 .../agents/ce-session-historian.agent.md      | 217 ++------
 .../skills/ce-compound/SKILL.md               |  26 +-
 .../skills/ce-session-extract/SKILL.md        |  64 ---
 .../skills/ce-session-inventory/SKILL.md      |  68 ---
 .../skills/ce-sessions/SKILL.md               | 201 ++++++-
 .../scripts/discover-sessions.sh              |   0
 .../scripts/extract-errors.py                 |  35 +-
 .../scripts/extract-metadata.py               |   0
 .../scripts/extract-skeleton.py               |  38 +-
 src/data/plugin-legacy-artifacts.ts           |   2 +
 src/utils/legacy-cleanup.ts                   |  11 +
 tests/session-history-scripts.test.ts         | 112 +++-
 ...ce-session-historian-no-skill-tool.test.ts |  56 ++
 15 files changed, 997 insertions(+), 348 deletions(-)
 create mode 100644 docs/plans/2026-05-08-001-fix-ce-sessions-orchestration-refactor-plan.md
 delete mode 100644 plugins/compound-engineering/skills/ce-session-extract/SKILL.md
 delete mode 100644 plugins/compound-engineering/skills/ce-session-inventory/SKILL.md
 rename plugins/compound-engineering/skills/{ce-session-inventory => ce-sessions}/scripts/discover-sessions.sh (100%)
 rename plugins/compound-engineering/skills/{ce-session-extract => ce-sessions}/scripts/extract-errors.py (75%)
 rename plugins/compound-engineering/skills/{ce-session-inventory => ce-sessions}/scripts/extract-metadata.py (100%)
 rename plugins/compound-engineering/skills/{ce-session-extract => ce-sessions}/scripts/extract-skeleton.py (89%)
 create mode 100644 tests/skills/ce-session-historian-no-skill-tool.test.ts

diff --git a/docs/plans/2026-05-08-001-fix-ce-sessions-orchestration-refactor-plan.md b/docs/plans/2026-05-08-001-fix-ce-sessions-orchestration-refactor-plan.md
new file mode 100644
index 000000000..e7d4a01d3
--- /dev/null
+++ b/docs/plans/2026-05-08-001-fix-ce-sessions-orchestration-refactor-plan.md
@@ -0,0 +1,492 @@
+---
+title: "fix: Refactor session-history orchestration to avoid subagent Skill-tool deadlock"
+type: fix
+status: completed
+date: 2026-05-08
+---
+
+# fix: Refactor session-history orchestration to avoid subagent Skill-tool deadlock
+
+## Summary
+
+Move all session-history orchestration logic out of the `ce-session-historian` subagent and into the `ce-sessions` skill (main context), where the Skill tool is permitted. The agent shrinks to synthesis-only — receives pre-extracted file paths in `mktemp` scratch space, returns findings prose. `ce-compound` Phase 1 delegates session-history work to the `ce-sessions` skill via the platform's skill-invocation primitive (`Skill` in Claude Code, equivalent on other targets) instead of dispatching the historian directly. Closes #794.
+
+---
+
+## Problem Frame
+
+`ce-session-historian` is dispatched as a subagent by `/ce-compound` Phase 1 and `/ce-sessions`, and its first concrete action is `Skill(ce-session-inventory)`. Claude Code does not permit subagents to invoke the `Skill` tool ([anthropics/claude-code#38719](https://github.com/anthropics/claude-code/issues/38719)) — the call hangs at `Initializing…` indefinitely, eventually surfacing to the orchestrator as a spurious "user doesn't want to proceed with this tool use" rejection. Empirically confirmed in #794: same skill, same args, same machine, only the dispatch context differs (orchestrator works; subagent hangs). The fix is structural, not a workaround — remove every code path that has a subagent calling `Skill`.
+
+---
+
+## Requirements
+
+- R1. `/ce-sessions [question]` and `/ce-compound` Phase 1 with session history opted in must complete successfully on Claude Code without hanging at `Initializing…` or surfacing a spurious user-denial error.
+- R2. No subagent in the post-refactor session-history flow may invoke the `Skill` tool. The full orchestration must run in main conversation context.
+- R3. Existing session-history capabilities must be preserved: cross-platform discovery (Claude Code, Codex, Cursor), branch and keyword filtering, scan-window widening logic, top-5 deep-dive cap, skeleton + errors extraction modes, time-budget discipline.
+- R4. The change must not regress non-Claude-Code targets (Codex, Cursor, Gemini, OpenCode, Pi, Kiro). All script invocations must use cross-platform-portable patterns (bare relative paths, no `${CLAUDE_PLUGIN_ROOT}` / `${CLAUDE_SKILL_DIR}`).
+- R5. `bun run release:validate` and `bun test` must pass after the refactor.
+- R6. Issue #794 closes on merge.
+
+---
+
+## Scope Boundaries
+
+- Verifying or fixing the same architectural pattern on Codex/Cursor — not confirmed to exhibit the same subagent-Skill-tool limit. If it surfaces, follow-up work.
+- Renaming `ce-session-historian` to reflect its synthesis-only role — cosmetic; increases blast radius (legacy-cleanup registries, conversion writers, test fixtures).
+- Adding new session-history features (larger `head:N`, new extraction modes, additional output schemas beyond current behavior) — preserve existing capabilities, no feature additions.
+- Fixing Claude Code's platform-level subagent restriction — not our code.
+
+---
+
+## Context & Research
+
+### Relevant Code and Patterns
+
+- `plugins/compound-engineering/skills/ce-sessions/SKILL.md` — currently a thin wrapper that dispatches `ce-session-historian`; will be rewritten as the orchestrator.
+- `plugins/compound-engineering/agents/ce-session-historian.agent.md` — currently instructs `Skill(ce-session-inventory)` and `Skill(ce-session-extract)` (lines 102-108); will be refactored to synthesis-only.
+- `plugins/compound-engineering/skills/ce-session-inventory/scripts/{discover-sessions.sh,extract-metadata.py}` — scripts move into `ce-sessions/scripts/`.
+- `plugins/compound-engineering/skills/ce-session-extract/scripts/{extract-skeleton.py,extract-errors.py}` — scripts move into `ce-sessions/scripts/`.
+- `plugins/compound-engineering/skills/ce-compound/SKILL.md` Phase 1 lines 175-198 — historian-dispatch block; replaced with semantic-prose invocation of `ce-sessions` via the platform's skill-invocation primitive.
+- `plugins/compound-engineering/skills/ce-clean-gone-branches/SKILL.md` line 17, `ce-resolve-pr-feedback/SKILL.md` line 45, `ce-optimize/SKILL.md` lines 272/315/324 — established `bash scripts/<name>` portable invocation pattern (slash-invoked skills, no `context: fork`, no platform variables).
+- `plugins/compound-engineering/skills/ce-plan/references/plan-handoff.md` line 57 — established semantic-prose convention for one skill invoking another: *"Invoke the `ce-X` skill via the platform's skill-invocation primitive (`Skill` in Claude Code, `Skill` in Codex, the equivalent on Gemini/Pi)"*. ce-compound's delegation to ce-sessions follows this exact form.
+- `plugins/compound-engineering/skills/ce-demo-reel/SKILL.md` lines 109-117 — clearest mirror for `mktemp -d -t <prefix>-XXXXXX` per-run-throwaway scratch pattern.
+- `plugins/compound-engineering/skills/ce-plan/references/deepening-workflow.md` lines 170-177 — pattern for capturing absolute scratch path and threading it into a subagent dispatch prompt.
+- `tests/session-history-scripts.test.ts` lines 4-19 — `INVENTORY_SCRIPTS_DIR` and `EXTRACT_SCRIPTS_DIR` constants and the `scriptsDirFor()` dispatcher; collapse into a single `SCRIPTS_DIR` pointing at `ce-sessions/scripts/`.
+- `tests/skills/ce-plan-handoff-routing.test.ts` — pattern for the regression test (read agent file at module load, regex assertions against body content).
+- `src/utils/legacy-cleanup.ts` — `STALE_SKILL_DIRS` (line 22, "Removed skills (no replacement)" cluster around line 89) and `LEGACY_ONLY_SKILL_DESCRIPTIONS` (line 253).
+- `src/data/plugin-legacy-artifacts.ts` lines 18-237 — `EXTRA_LEGACY_ARTIFACTS_BY_PLUGIN["compound-engineering"].skills[]`, sorted alphabetically.
+- `docs/skills/ce-sessions.md` lines 110, 175-176 — links to deleted skill directories; will 404 after deletion.
+
+### Institutional Learnings
+
+- `docs/solutions/skill-design/pass-paths-not-content-to-subagents-2026-03-26.md` — directly applicable. Establishes orchestrator-does-discovery / subagent-does-reading split, file-mediated handoff via paths, and the empirical finding that per-item walk vs. bulk-find-then-filter affects tool call counts. The synthesis subagent should still be invocable in some standalone form (see Open Questions).
+- `docs/solutions/skill-design/script-first-skill-architecture.md` — reinforces the move: classification rules stay in scripts as single source of truth; do not duplicate them into the synthesis agent's prose. Script produces, model presents.
+- `docs/solutions/skill-design/compound-refresh-skill-improvements.md` Solution #5 — subagents use native file-search/read tools (e.g., Read in Claude Code), not shell `cat`. The synthesis-only historian must use Read for the scratch-dir files.
+- `docs/solutions/skill-design/research-agent-pipeline-separation-2026-04-05.md` — foreground vs. background dispatch placement is deliberate. The current `/ce-compound` Phase 1 historian dispatch is foreground because session files live outside CWD. After this refactor, that rationale shifts (the orchestrator skill handles the access in main context); document the new placement explicitly.
+- `docs/solutions/skill-design/post-menu-routing-belongs-inline-2026-04-28.md` — load-bearing logic must live where it will reliably execute, not where it will silently fail to load. Reinforces moving orchestration from the agent (subagent context where Skill is unreachable) to the skill (main context).
+- `docs/solutions/best-practices/ce-pipeline-end-to-end-learnings-2026-04-17.md` — synthesis subagents must cite actual evidence, not vibe-summarize. Carries over to the new agent's output schema.
+
+### External References
+
+- [anthropics/claude-code#38719](https://github.com/anthropics/claude-code/issues/38719) — closed but the architectural limit is current. Subagents cannot invoke the Skill tool.
+
+---
+
+## Key Technical Decisions
+
+- **Move scripts into `ce-sessions/scripts/` with bare relative-path invocations (`bash scripts/<name>`)**: This is the documented portable pattern in repo AGENTS.md and is empirically used by three existing slash-invoked skills (`ce-clean-gone-branches`, `ce-resolve-pr-feedback`, `ce-optimize`). Avoids `${CLAUDE_PLUGIN_ROOT}` / `${CLAUDE_SKILL_DIR}` (Claude-Code-only) and the `${CLAUDE_SKILL_DIR:-.}` fallback (assumes other targets set CWD to skill dir, unverified). U2 Verification includes a marketplace-install smoke test to confirm runtime CWD resolution actually works on a non-`--plugin-dir` install, since the plugin AGENTS.md "Permission gate" caveat warns the runtime Bash tool may not resolve relative paths from the skill dir — the existing slash-command precedents argue against that warning, but verifying empirically before merge is cheap insurance.
+- **`ce-compound` delegates to `ce-sessions` via the platform's skill-invocation primitive — semantic prose form, not a literal `Skill(...)` call**: Per the established convention in `ce-plan/references/plan-handoff.md` line 57 and plugin AGENTS.md "Cross-Platform Reference Rules" ("prefer semantic wording such as 'load the `ce-doc-review` skill' rather than slash syntax"). The semantic prose lets each target's converter route to its native primitive (`Skill` in Claude Code, equivalent on Codex/Gemini/Pi). A literal `Skill(ce-sessions, ...)` tool-call expression in the SKILL.md body would propagate Claude-Code-specific syntax to non-Claude targets when the skill ships verbatim through the converters. The architecture's central assumption — that the platform's skill-invocation primitive works from inside an executing skill body, not just from a direct slash command — is empirically verified by the current planning workflow itself: ce-plan invokes ce-doc-review via that primitive from its own skill body and the call resolves cleanly.
+- **Synthesis subagent receives file paths in dispatch prompt; reads via the platform's native file-read tool (Read in Claude Code)**: Per `pass-paths-not-content-to-subagents` precedent. Inventory output (small) flows through main-context tool results because the orchestrator needs it for filter/rank judgment. Per-session skeleton/errors output is written *directly to scratch files* by the extraction scripts (via a new `--output PATH` arg added in U2) — extraction content never round-trips through main-context tool results. This is what makes the synthesizer subagent earn its keep: with extraction bytes isolated to its subagent context, the orchestrator's working state stays lean (just paths + small inventory + final findings prose).
+- **Drop the agent's "Conversational mode" framing**: The current agent file advertises two modes (compound enrichment, conversational), but no caller invokes the agent without going through `/ce-sessions` or `/ce-compound` today. Removing the dual-mode framing simplifies the synthesis-only spec. If conversational direct dispatch is needed later, it can be reintroduced with explicit standalone-mode wiring.
+- **Add the deleted skills to all three legacy-cleanup lookups**: `STALE_SKILL_DIRS` in `src/utils/legacy-cleanup.ts`, `EXTRA_LEGACY_ARTIFACTS_BY_PLUGIN["compound-engineering"].skills[]` in `src/data/plugin-legacy-artifacts.ts`, and `LEGACY_ONLY_SKILL_DESCRIPTIONS` (also in `legacy-cleanup.ts`). The descriptions map is required because these skills have no current ce-* replacement — `loadLegacyFingerprints` falls back to that map for ownership fingerprinting on upgrade.
+- **Preserve `/ce-compound` Phase 1 wall-clock parallelism via dispatch ordering**: The current Phase 1 dispatches three background research subagents in parallel and the historian in foreground concurrently — explicitly designed so the historian "runs while the background agents work, adding no wall-clock time." A naive replacement that issues the skill-invocation primitive call to `ce-sessions` *before* the parallel block would serialize ce-sessions in front of the research subagents, regressing wall-clock time materially. The fix: launch the three background research subagents first (Context Analyzer, Solution Extractor, Related Docs Finder), *then* issue the skill-invocation primitive call to `ce-sessions`. The synchronous skill call blocks ce-compound's main-context turn until ce-sessions returns, but the already-dispatched background subagents continue running in parallel underneath — the same wall-clock benefit as today, just with a different concurrency primitive. U4 Approach specifies this ordering explicitly so the implementer doesn't have to rederive it.
+
+---
+
+## Open Questions
+
+### Resolved During Planning
+
+- **Cross-platform script path resolution**: Use bare `bash scripts/<name>` (resolved by codebase precedent — `ce-clean-gone-branches`, `ce-resolve-pr-feedback`, `ce-optimize` all do this in slash-invoked skill bodies portably).
+- **Where scripts live**: `ce-sessions/scripts/` as the single home (resolved by scope dialogue — `ce-session-inventory` and `ce-session-extract` get deleted; their script directories collapse into the orchestrator skill that now uses them directly).
+- **Skill-from-skill-body invocation legitimacy**: Empirically verified — the current session's `/ce-plan` Phase 5.3.8 invoked `Skill(ce-doc-review, "mode:headless ...")` from inside the running ce-plan skill body, and the call resolved cleanly with three reviewer agents dispatched and findings returned. No deadlock, no `Initializing…` hang. This pins down what #794's empirical confirmation table left ambiguous: "main session" includes any non-subagent context, including a currently-executing skill body.
+- **Skill-to-skill invocation form**: Use semantic prose ("Invoke the `ce-sessions` skill via the platform's skill-invocation primitive (`Skill` in Claude Code, equivalent on other targets)") per `plan-handoff.md` line 57 and plugin AGENTS.md "Cross-Platform Reference Rules". Literal `Skill(ce-sessions, ...)` syntax in the SKILL.md body would propagate Claude-Code-specific surface to non-Claude targets when the skill ships verbatim through the converters.
+- **Inventory through main context vs. files**: Through main context. Inventory output is small (~30-50KB for a real-world session count) and the orchestrator needs to reason over it for selection. Per-session skeleton/errors output bypasses main context entirely via a new `--output PATH` arg added to the extract scripts in U2 — extraction content writes directly to scratch and never round-trips through orchestrator tool results.
+- **README skill-count update**: Not required. Counts use `38+` / `50+` `+` suffix (verified via research). `ce-session-inventory` and `ce-session-extract` are not listed in the skill table (agent-facing primitives, intentionally hidden from user-facing inventory).
+- **plugin.json description count update**: Not required. All three plugin.json variants (Claude, Cursor, Codex) have count-free descriptions (verified via research).
+
+### Deferred to Implementation
+
+- **Scratch file naming convention**: Probably `{session-id}.skeleton.txt` and `{session-id}.errors.txt`, but exact naming is decided when writing `ce-sessions/SKILL.md`.
+- **Tail-extract conditional logic placement**: Currently the agent decides whether to follow up `head:200` skeleton with a `tail:50` extract on apparently-incomplete sessions. After the refactor, this judgment lives in ce-sessions (orchestrator). Specific implementation — pre-extract everything proactively, or check head output and re-run for tail — to decide during write.
+- **Errors-mode extraction triggering**: Currently the agent decides selectively per session. Either ce-sessions decides upfront and pre-extracts, or the synthesizer signals back what additional extracts it wants. Defer to implementation; simplest path is "ce-sessions extracts skeleton always, errors only when scan window suggests dead-end value" using existing per-session signals.
+- **Standalone-mode dispatch path for the synthesis agent**: Per `pass-paths-not-content-to-subagents` precedent, sub-agents should remain dispatchable directly. After dropping conversational mode, decide whether the synthesis agent's body should still document a "no paths block in dispatch → return 'no relevant prior sessions'" fallback. Likely yes (defensive against future direct-dispatch use cases); confirm during write.
+
+---
+
+## Alternative Approaches Considered
+
+Three architectural shapes were on the table for closing #794. The chosen approach (move all orchestration into `ce-sessions`, reshape the agent to synthesis-only) is the broadest of the three; this section documents why the narrower options were rejected.
+
+- **Option A — Refactor the agent to invoke scripts directly via Bash from subagent context** (issue #794's "Suggested resolution path 1"). Smallest possible diff: change two `Skill(ce-session-inventory)` and `Skill(ce-session-extract)` calls in the agent body to their underlying `bash scripts/discover-sessions.sh ...` and `python3 scripts/extract-skeleton.py ...` invocations. The agent runs cleanly as a subagent until it hits Skill; Bash from a subagent is unrestricted. **Rejected because**: this option runs into the same script-path-resolution problem we navigated for `ce-sessions`, but without the same answer available. Slash-invoked *skills* have an established sibling-`scripts/` convention (ce-clean-gone-branches, ce-resolve-pr-feedback, ce-optimize) that runtime Bash resolves portably. *Agents* in this plugin do not have an analogous convention — agent files live flat under `agents/` with no sibling `scripts/` dir, and no other agent in the plugin invokes scripts via Bash from its body. To make Option A work, the agent would need either (a) a Claude-Code-only `${CLAUDE_PLUGIN_ROOT}` reference (R4 regression), or (b) a new agent-side sidecar-scripts convention (the codex converter's `collectReferencedSidecarDirs` mechanism could carry it, but the rest of the plugin doesn't follow this pattern, so we'd be establishing it for one agent). The chosen approach instead reuses the slash-command `<skill>/scripts/` convention that's already cross-platform-portable and exercised by three existing skills.
+
+- **Option B — Have the orchestrator pre-fetch inventory and pass it into the subagent's dispatch prompt** (issue #794's "Suggested resolution path 2"). Orchestrator runs `ce-session-inventory` once, includes the JSONL inventory in the historian's dispatch prompt; the historian still does selection + per-session extraction. **Rejected because**: the historian iteratively runs `ce-session-extract` once per selected session (up to 5 calls per run), and each of those is a Skill-tool call in the current architecture — Option B fixes the inventory call but leaves the per-session extract calls hanging on the same subagent-Skill-tool deadlock. Pre-fetching all sessions' extraction content upfront defeats the selection logic (you'd extract sessions before deciding which 5 to deep-dive). The full fix requires moving every Skill-tool call out of subagent context, which is what the chosen approach does.
+
+- **Option C (chosen) — Move all orchestration into the `ce-sessions` skill (main context); reshape the agent to synthesis-only that reads pre-extracted scratch files.** Closes the deadlock structurally — no Skill-tool call ever originates from subagent context. ce-sessions is itself a slash-command skill, so it inherits the established `<skill>/scripts/` cross-platform-portable invocation pattern. The synthesis-only agent becomes a clean handoff point: receives file paths, reads via native file-read tool, returns prose findings. The breadth of the change is the trade-off — six implementation units versus two for Option A — but each unit is independently meaningful work (script home consolidation, orchestrator promotion, agent simplification, ce-compound delegation refactor, regression test, cleanup of the now-callerless wrapping skills). The forcing function was #794's specific deadlock, but the broader refactor closes other latent issues at the same time: removes two `user-invocable: false` skills that were essentially script holders, simplifies the agent's responsibility surface, and makes the orchestration testable from main context where slash-creator's eval workflow can exercise it.
+
+A fourth option — **delete the synthesis subagent entirely and have the orchestrator synthesize inline** — was raised in review. Rejected because: with the `--output PATH` arg adopted on extract scripts (U2), the synthesizer's specific value is *context isolation*. Extraction content lands in the synthesizer's subagent context (via Read), not in the orchestrator's context. Deleting the synthesizer would force the orchestrator to Read the scratch files itself, putting all extraction bytes in main-context tool results — exactly the cumulative growth the `--output PATH` change exists to avoid. The synthesizer earns its keep specifically because the file-mediated handoff is clean.
+
+---
+
+## High-Level Technical Design
+
+> *This illustrates the intended approach and is directional guidance for review, not implementation specification. The implementing agent should treat it as context, not code to reproduce.*
+
+```
+BEFORE (broken on Claude Code subagent context)
+  /ce-compound  /ce-sessions
+        \           /
+         \         /
+    Agent(ce-session-historian)  ← runs in subagent context
+              |
+              |  Skill(ce-session-inventory)   ← HANGS at "Initializing…"
+              |  Skill(ce-session-extract)     ← HANGS at "Initializing…"
+              |
+              v
+        synthesis text
+
+AFTER (Skill tool only invoked from main context)
+  /ce-compound  (skill, main context — launches parallel research subagents first,
+                 then invokes ce-sessions via the platform's skill-invocation primitive
+                 so the parallel research keeps running while ce-sessions executes)
+       |
+       v
+  /ce-sessions  (skill, main context)
+       |
+       |  bash scripts/discover-sessions.sh ... | tr '\n' '\0' \
+       |     | xargs -0 python3 scripts/extract-metadata.py --cwd-filter <repo>
+       |       → inventory JSONL (held in main context for filter/rank judgment)
+       |
+       |  filter by branch / window / keyword / top-5 cap
+       |
+       |  mktemp -d -t ce-sessions-XXXXXX → $SCRATCH
+       |
+       |  for each selected session, scripts write directly to scratch (no stdout
+       |  round-trip through main context):
+       |    python3 scripts/extract-skeleton.py --output $SCRATCH/{session-id}.skeleton.txt < <file>
+       |    (optionally) python3 scripts/extract-errors.py --output $SCRATCH/{session-id}.errors.txt < <file>
+       |
+       |  Dispatch ce-session-historian via the platform's subagent primitive
+       |  with prompt = {problem_topic, scratch_dir, [{path, platform, branch?, ts, ...}], output_schema}
+       v
+  ce-session-historian  (subagent, synthesis-only)
+       |
+       |  for each path: read via native file-read tool   ← no Skill calls
+       |  synthesize per output schema
+       v
+  findings prose returned to /ce-sessions  →  returned to /ce-compound  →  folded into doc
+```
+
+The bug is structurally gone because no subagent ever invokes the Skill tool. Every `Skill(...)` call sits in main conversation context, which is the verified-working path.
+
+---
+
+## Implementation Units
+
+### U1. Move scripts into `ce-sessions/scripts/` and repoint test paths
+
+**Goal:** Relocate the four extraction scripts to their new home under `ce-sessions/scripts/` as a pure file move, with the test suite updated to find them at the new location. After this unit, the scripts are at the new path and the script test suite passes against the new path; nothing else has changed yet.
+
+**Requirements:** R3, R5
+
+**Dependencies:** None
+
+**Files:**
+- Move: `plugins/compound-engineering/skills/ce-session-inventory/scripts/discover-sessions.sh` → `plugins/compound-engineering/skills/ce-sessions/scripts/discover-sessions.sh`
+- Move: `plugins/compound-engineering/skills/ce-session-inventory/scripts/extract-metadata.py` → `plugins/compound-engineering/skills/ce-sessions/scripts/extract-metadata.py`
+- Move: `plugins/compound-engineering/skills/ce-session-extract/scripts/extract-skeleton.py` → `plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py`
+- Move: `plugins/compound-engineering/skills/ce-session-extract/scripts/extract-errors.py` → `plugins/compound-engineering/skills/ce-sessions/scripts/extract-errors.py`
+- Modify: `tests/session-history-scripts.test.ts` (collapse `INVENTORY_SCRIPTS_DIR` and `EXTRACT_SCRIPTS_DIR` constants into a single `SCRIPTS_DIR` pointing at the new path; simplify or remove the `scriptsDirFor()` dispatcher per how the tests reference it)
+
+**Approach:**
+- Pure file move via `git mv` to preserve blame.
+- Scripts have no internal cross-references between each other (verified — `discover-sessions.sh` does not call `extract-metadata.py` directly; the pipe is composed in skill body), so no script content changes are required.
+- Test path update is mechanical: the constants live at `tests/session-history-scripts.test.ts` lines 4-19 per research findings.
+
+**Patterns to follow:**
+- Co-located scripts under `<skill>/scripts/` directory — same pattern as `ce-clean-gone-branches/scripts/`, `ce-optimize/scripts/`, `ce-resolve-pr-feedback/scripts/`.
+
+**Test scenarios:**
+- Test expectation: `tests/session-history-scripts.test.ts` continues to pass after path constant updates. No test cases themselves need behavioral changes — fixtures in `tests/fixtures/session-history/` stay put.
+- Integration: `git log --follow` on each script preserves history through the move.
+
+**Verification:**
+- `bun test tests/session-history-scripts.test.ts` passes.
+- The four scripts exist at `plugins/compound-engineering/skills/ce-sessions/scripts/` and no longer exist at their old paths.
+
+---
+
+### U2. Rewrite `ce-sessions/SKILL.md` as the full session-history orchestrator
+
+**Goal:** Replace the current 32-line thin-wrapper SKILL.md with a full orchestrator that discovers sessions, filters/ranks, extracts content to a `mktemp` scratch dir, dispatches the synthesis-only historian, and returns findings text. After this unit, `/ce-sessions` invoked directly and `ce-sessions` invoked from another skill (e.g., from `ce-compound` Phase 1) both run the new flow.
+
+**Requirements:** R1, R2, R3, R4
+
+**Dependencies:** U1 (scripts must exist at the new location before SKILL.md references them)
+
+**Files:**
+- Modify: `plugins/compound-engineering/skills/ce-sessions/SKILL.md` (full rewrite)
+- Modify: `plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py` (add `--output PATH` arg; when set, write output to the named file instead of stdout, and emit a one-line `{"_meta": ..., "wrote": "<path>", "bytes": N}` status to stdout)
+- Modify: `plugins/compound-engineering/skills/ce-sessions/scripts/extract-errors.py` (same `--output PATH` treatment, parallel API)
+- Modify: `tests/session-history-scripts.test.ts` (add coverage for the new `--output PATH` mode on both extract scripts: file is written, status line is emitted on stdout, original stdout-mode behavior preserved when flag is omitted)
+
+**Approach:**
+- **Frontmatter:** keep `name: ce-sessions`, update `description` to reflect orchestrator role (longer than current; under 1024 chars per `tests/frontmatter.test.ts`).
+- **Pre-resolved git branch** (existing): keep the `!`-backtick `git rev-parse --abbrev-ref HEAD` line that the current SKILL.md uses; the orchestrator passes branch into selection logic and (when relevant) into the synthesis dispatch prompt.
+- **Step 1 — Discover and inventory:** invoke the discover-then-extract-metadata pipeline using the **exact same shape as the current `ce-session-inventory/SKILL.md` line 27-31** — null-delimited xargs hardening preserved verbatim:
+  ```
+  bash scripts/discover-sessions.sh <repo> <days> [--platform <platform>] \
+    | tr '\n' '\0' \
+    | xargs -0 python3 scripts/extract-metadata.py --cwd-filter <repo>
+  ```
+  The `tr '\n' '\0' | xargs -0` segment is load-bearing — it converts newline-delimited file paths to null-delimited args so `extract-metadata.py` runs in batch mode (positional file args). Dropping it would silently regress to single-file stdin mode and produce wrong output. Receive JSONL inventory in main context. Document the time-range mapping table (1 day / 7 days / 30 days / 90 days) ported from the current historian agent so the orchestrator owns scan-window selection.
+- **Step 2 — Filter and rank:** port the historian's branch filter, keyword-filter (re-invoke discover/extract pipeline with `--keyword K1,K2,...`), scan-window enforcement, current-session exclusion, and top-5 deep-dive cap into the orchestrator. Same logic, different host.
+- **Step 3 — Scratch dir:** `mktemp -d -t ce-sessions-XXXXXX` → capture absolute path; thread into Step 4 and Step 5.
+- **Step 4 — Per-session extraction (file-mediated, no stdout round-trip):** for each selected session, invoke the extraction scripts with their new `--output` flag so content writes directly to the scratch file:
+  ```
+  python3 scripts/extract-skeleton.py --output "$SCRATCH/{session-id}.skeleton.txt" < <file>
+  ```
+  The script returns only a short status line on stdout (bytes written, parse errors); the bulk extraction content never lands in main-context tool results. Conditional tail extract and errors extract (also `--output`-aware) follow the existing historian heuristics. The new `--output` flag is additive — when omitted, scripts behave exactly as before, preserving existing test coverage and any manual / agent-driven invocations.
+- **Step 5 — Dispatch synthesis subagent:** dispatch `ce-session-historian` via the platform's subagent primitive (omit `mode` parameter so user permission settings apply). Pass: problem topic, scratch dir absolute path, list of `{path, platform, branch, ts, ...}` per selected session, output schema. Run on the mid-tier model (e.g., `model: "sonnet"` in Claude Code) per the existing dispatch convention.
+- **Step 6 — Return findings:** return the synthesizer's text output to the caller verbatim, or "no relevant prior sessions" when discovery / keyword filter returns zero.
+
+**Execution note:** SKILL.md changes are not directly testable by `bun test` — use `/skill-creator` per AGENTS.md ("Validating Agent and Skill Changes") to evaluate behavior against the test scenarios below.
+
+**Patterns to follow:**
+- `plugins/compound-engineering/skills/ce-clean-gone-branches/SKILL.md` lines 14-22 — bash script invocation with `__NONE__` sentinel handling pattern.
+- `plugins/compound-engineering/skills/ce-demo-reel/SKILL.md` lines 109-117 — `mktemp -d -t <prefix>-XXXXXX` per-run-throwaway pattern.
+- `plugins/compound-engineering/skills/ce-plan/references/deepening-workflow.md` lines 170-177 — capture absolute scratch path; thread it into a subagent dispatch prompt.
+- Cross-platform user-interaction blocks per repo AGENTS.md "Cross-Platform User Interaction" section (when ce-sessions asks for the question if invoked without args — current SKILL.md already handles this).
+
+**Test scenarios:**
+- Happy path: invoke `/ce-sessions "did we decide where notification mute state lives"` against a fixture-backed Claude Code session store → orchestrator runs discover + extract-metadata, picks ≤ 5 sessions, extracts skeletons to scratch via `--output`, dispatches synthesizer → returns prose findings.
+- Edge case (Empty inventory): no session files in scan window → orchestrator returns "no relevant prior sessions" without dispatching synthesizer or creating scratch dir.
+- Edge case (Zero keyword matches): branch filter returns zero, keyword filter returns `files_matched: 0` → orchestrator returns "no relevant prior sessions" without dispatching synthesizer.
+- Edge case (Scan widening): narrow scan returns zero, request implies longer history → orchestrator widens window per the time-range table, re-invokes discover, retries selection.
+- Error path (Parse errors): inventory `_meta` reports `parse_errors > 0` → orchestrator notes partial in the dispatch prompt and proceeds; synthesizer flags partial in findings.
+- Error path (Script `--output` write fails): scratch path unwriteable (disk full, permission) → script returns non-zero, orchestrator surfaces clear error to user, does not dispatch synthesizer.
+- Integration (No subagent Skill calls): grep the runtime trace — no `Skill(...)` tool call originates from the dispatched historian.
+- Integration (Skill primitive from skill body): invoking `ce-sessions` from inside `ce-compound`'s skill body via the platform's skill-invocation primitive returns findings text without hanging. Already empirically validated by the current `ce-plan → ce-doc-review` invocation path; this scenario locks the verification in for ce-compound's specific call-site.
+- Integration (Script invocation from runtime Bash): `bash scripts/discover-sessions.sh` and `python3 scripts/extract-skeleton.py --output ...` resolve correctly when ce-sessions runs as a slash-invoked skill on a marketplace-cached install (not `--plugin-dir`). This addresses the contradiction between repo-root AGENTS.md ("relative paths resolve to skill dir on all platforms") and plugin AGENTS.md "Permission gate" ("runtime Bash CWD is user's project, not skill dir").
+- Cumulative context check: invoke `/ce-sessions` against a 5-session fixture → after run completes, the orchestrator's tool-result bytes attributable to extraction content are bounded by the script status lines (a few hundred bytes total), not the skeleton/errors content itself.
+
+**Verification:**
+- `/skill-creator` eval against the test scenarios above passes.
+- `bun test tests/frontmatter.test.ts` passes (description length, ce- prefix, no angle brackets, etc.).
+- `bun test tests/skill-shell-safety.test.ts` passes (any new `!`-backtick pre-resolution lines are safety-compliant).
+- `bun test tests/session-history-scripts.test.ts` covers both stdout-mode (existing behavior) and `--output PATH` mode for the modified extract scripts.
+- **Marketplace-install smoke test** (manual): on a fresh install via `/plugin install` (not `--plugin-dir`), invoke `/ce-sessions "what did we work on this week"` and confirm the orchestrator's `bash scripts/...` invocations resolve. If they fail with `No such file or directory`, the cross-platform-portable-relative-path assumption is wrong and the architecture must shift to `${CLAUDE_SKILL_DIR}` + pinned `allowed-tools` (Claude-Code-only path; treats R4 as a known regression). Fail-fast is preferable to shipping a broken release.
+
+---
+
+### U3. Refactor `ce-session-historian.agent.md` to synthesis-only
+
+**Goal:** Strip the agent down to synthesis: it receives problem topic + extracted file paths in the dispatch prompt, reads files via the native file-read tool (Read in Claude Code), and returns prose findings per the existing output schema. All `Skill(...)` invocations and orchestration logic (discovery, selection, extraction primitives, time-range mapping) are removed — those now live in `ce-sessions`.
+
+**Requirements:** R1, R2, R3
+
+**Dependencies:** U2 (the orchestrator's dispatch shape determines the agent's input contract; they must agree)
+
+**Files:**
+- Modify: `plugins/compound-engineering/agents/ce-session-historian.agent.md` (substantial rewrite)
+
+**Approach:**
+- **Drop:** the "Extraction Primitives" section (lines 100-108), the "Methodology" Steps 1 / 3 / 4 / 5 (orchestration now in ce-sessions), the time-range mapping table, the branch-filter and keyword-filter rules, the deep-dive cap, and all `Skill(ce-session-inventory)` / `Skill(ce-session-extract)` / "Invoke them through the Skill tool" prose.
+- **Drop:** the "two modes" framing (compound enrichment + conversational) at lines 11-13 — no actual caller dispatches the agent in a mode that bypasses the orchestrator. Single-purpose framing replaces it.
+- **Keep:** the Guardrails section (no thinking-block leakage, never read whole session files into context, technical content not personal content, fail-fast on access errors).
+- **Keep:** Step 6's synthesis methodology (Investigation journey, User corrections, Decisions and rationale, Error patterns, Evolution across sessions, Cross-tool blind spots, Staleness caveat).
+- **Keep:** the output format (caller-supplied schema honored; default header line otherwise).
+- **Add:** input-contract section documenting the dispatch prompt shape — `{problem_topic, scratch_dir, [{path, platform, branch?, ts, ...}], output_schema}`. Agent reads each `path` using the native file-read tool; never reads source session files directly.
+- **Add:** standalone fallback per `docs/solutions/skill-design/pass-paths-not-content-to-subagents-2026-03-26.md` — when dispatch prompt arrives without paths, return "no relevant prior sessions" rather than attempting any Skill or Bash discovery (defensive against future direct-dispatch).
+
+**Execution note:** Use `/skill-creator` for behavioral testing per AGENTS.md. The plugin agent definition caches at session start, so iterative testing requires either skill-creator's content-injection workflow or a fresh session.
+
+**Patterns to follow:**
+- `docs/solutions/skill-design/compound-refresh-skill-improvements.md` Solution #5 — subagents use native file-read tools, not shell.
+- Output schema prose (default and caller-supplied) — port verbatim from current agent's Output section.
+
+**Test scenarios:**
+- Happy path: dispatch prompt with problem topic + 3 valid scratch paths → agent reads each via Read, synthesizes per output schema, returns prose findings within time budget.
+- Edge case (Empty paths): dispatch prompt with empty paths array → agent returns "no relevant prior sessions" without invoking any tools.
+- Edge case (Caller-supplied schema): dispatch prompt names a custom output schema → agent honors that schema verbatim, omits its own header.
+- Error path (Unreadable file): one path returns Read error → agent notes partial extraction, synthesizes from the rest.
+- Integration (No Skill calls): trace agent's tool-call list — no `Skill(...)` calls. Caught by U5 regression test.
+- Integration (Cross-tool synthesis): paths span Claude Code + Codex + Cursor → synthesis includes Cross-tool blind spots when genuinely informative.
+
+**Verification:**
+- Static: agent file does not contain `Skill(ce-session-inventory)`, `Skill(ce-session-extract)`, or "Invoke them through the Skill tool" prose. Locked in by U5.
+- `/skill-creator` eval covers the test scenarios above.
+
+---
+
+### U4. Update `ce-compound/SKILL.md` Phase 1 to delegate to `ce-sessions` via the skill-invocation primitive
+
+**Goal:** Replace the direct historian-dispatch block in `ce-compound` Phase 1 with a delegation to the `ce-sessions` skill, invoked via the platform's skill-invocation primitive. Receive findings text; existing fold-into-doc flow in Phase 2 is preserved unchanged. Wall-clock parallelism with the other Phase 1 research subagents is preserved by ordering the invocation correctly.
+
+**Requirements:** R1, R4, R6
+
+**Dependencies:** U2 (ce-sessions orchestrator must exist and work), U3 (the historian agent — invoked transitively by ce-sessions — must be refactored)
+
+**Files:**
+- Modify: `plugins/compound-engineering/skills/ce-compound/SKILL.md` (Phase 1 historian-dispatch block, lines 175-198)
+
+**Approach:**
+- **Replace** the "Session Historian (foreground, after launching the above — only if the user opted in)" block with a delegation to `ce-sessions`. Use the **established semantic-prose convention** per `ce-plan/references/plan-handoff.md` line 57 and plugin AGENTS.md "Cross-Platform Reference Rules":
+  > *Invoke the `ce-sessions` skill via the platform's skill-invocation primitive (`Skill` in Claude Code, `Skill` in Codex, the equivalent on Gemini/Pi), passing the problem topic and time window as the skill argument.*
+
+  Do **not** write a literal `Skill(ce-sessions, ...)` tool-call expression in the SKILL.md body — that propagates Claude-Code-specific syntax to non-Claude targets when the skill ships verbatim through the converters (R4 regression).
+- **Specify dispatch ordering explicitly to preserve wall-clock parallelism**: the current Phase 1 design dispatches three background research subagents (`Context Analyzer`, `Solution Extractor`, `Related Docs Finder`) and a foreground historian *concurrently* — explicitly designed so the foreground call "runs while the background agents work, adding no wall-clock time" (current SKILL.md line 105). The new ordering: **launch the three background research subagents first; then issue the skill-invocation primitive call to `ce-sessions`.** The skill call is synchronous from `ce-compound`'s main-context turn (it blocks until ce-sessions returns), but the already-dispatched background subagents continue running in parallel underneath — so the wall-clock benefit is preserved even though the concurrency primitive shifted from "foreground subagent" to "synchronous skill call." Document this rationale inline in the rewritten Phase 1 prose so future refactors don't re-invert it.
+- **Carry the dispatch payload forward**: pre-resolved branch (already pre-resolved at lines 25-27), problem topic (one sentence per existing dispatch shape), explicit time window (default 7 days), and the existing single-line filter rule. ce-sessions parses these out of the skill argument string.
+- **Preserve Phase 1 contract** per `pass-paths-not-content-to-subagents-2026-03-26.md` and ce-pipeline-end-to-end-learnings:
+  - Conditional invocation (skip when user declined session history; skipped entirely in lightweight mode) — preserved.
+  - Text-only return — preserved.
+  - Fold-into-doc behavior in Phase 2 (sections 222-227 of current SKILL.md) — unchanged.
+
+**Patterns to follow:**
+- `plugins/compound-engineering/skills/ce-plan/references/plan-handoff.md` line 57 — the canonical semantic-prose form for one skill invoking another. Mirror that exact phrasing structure.
+- Existing Phase 1 dispatch-prompt template at current lines 182-198 — reuse the "tight prompt" discipline (single-line filter rule, explicit time window, problem topic as one sentence).
+
+**Test scenarios:**
+- Happy path: `/ce-compound` Full mode, user opts into session history → background research subagents launch, then ce-compound delegates to ce-sessions and receives findings → folds into "What Didn't Work" / "Context" sections.
+- Wall-clock check: `/ce-compound` Full mode with session history opt-in → end-to-end runtime is approximately `max(ce-sessions, slowest background subagent)`, not their sum. Measurable by comparing against today's foreground-subagent baseline on a fixture-backed run.
+- Edge case (User declines session history): Phase 1 does not invoke ce-sessions; existing Phase 1 parallel research (Context Analyzer, Solution Extractor, Related Docs Finder) runs unchanged.
+- Edge case (Lightweight mode): session-history follow-up question is not asked; ce-sessions is not invoked.
+- Edge case ("no relevant prior sessions" returned): findings string equals the no-results sentinel → Phase 2 fold-in is skipped per existing logic.
+- Integration (No subagent Skill calls): the historian dispatched transitively by ce-sessions runs in subagent context but never invokes Skill (locked by U5 regression test).
+- Integration (Cross-platform conversion): after `bun convert --to codex|cursor|gemini`, the converted ce-compound's Phase 1 prose still describes the skill invocation in terms each target's primitive can route to — semantic prose survives conversion intact, while a literal `Skill(ce-sessions, ...)` would have leaked Claude-Code-specific syntax.
+
+**Verification:**
+- `/skill-creator` eval of `/ce-compound` against a fixture-backed session store passes.
+- The rewritten Phase 1 block in ce-compound/SKILL.md contains the semantic-prose form (matching the plan-handoff.md line 57 shape) and does NOT contain a literal `Skill(ce-sessions, ...)` tool-call expression.
+- The dispatch block no longer contains `Agent(ce-session-historian)` or `Task ce-session-historian` direct calls.
+
+---
+
+### U5. Add regression test against the agent file body
+
+**Goal:** Lock in the no-`Skill(...)`-from-subagent invariant with a static test that fails if the agent file is reverted to the old shape. This prevents future edits from accidentally reintroducing the deadlock.
+
+**Requirements:** R2
+
+**Dependencies:** U3 (the agent must already be refactored before the test asserts the new shape)
+
+**Files:**
+- Create: `tests/skills/ce-session-historian-no-skill-tool.test.ts`
+
+**Approach:**
+- Read `plugins/compound-engineering/agents/ce-session-historian.agent.md` at module load via `readFileSync`.
+- Three assertions:
+  1. `expect(body).not.toMatch(/Skill\(\s*["'`]?ce-session-inventory/)` — no `Skill(ce-session-inventory)` invocation in any quote style.
+  2. `expect(body).not.toMatch(/Skill\(\s*["'`]?ce-session-extract/)` — no `Skill(ce-session-extract)` invocation.
+  3. `expect(body).not.toMatch(/Invoke them through the Skill tool/i)` — prose fingerprint of the broken pattern.
+
+**Patterns to follow:**
+- `tests/skills/ce-plan-handoff-routing.test.ts` — read SKILL.md once at module load, regex-anchor scope, iterate expected fragments. Shape: same.
+
+**Test scenarios:**
+- Happy path: test passes against the refactored agent (post-U3) file.
+- Regression check: locally revert the agent to its current (broken) state — test fails. This is the value the test is buying.
+
+**Verification:**
+- `bun test tests/skills/ce-session-historian-no-skill-tool.test.ts` passes against post-U3 state.
+
+---
+
+### U6. Cleanup: delete unused skills, register them as legacy, fix doc broken links
+
+**Goal:** Remove `ce-session-inventory` and `ce-session-extract` (now callerless), register them in all three legacy-cleanup lookups so existing flat-installs sweep on upgrade, and fix the now-broken cross-references in user-facing docs.
+
+**Requirements:** R5, R6
+
+**Dependencies:** U1 (scripts moved out of these skill dirs), U2 (no caller invokes them anymore), U3 (agent no longer invokes them)
+
+**Files:**
+- Delete: `plugins/compound-engineering/skills/ce-session-inventory/` (directory and all contents — only `SKILL.md` remains since scripts moved in U1)
+- Delete: `plugins/compound-engineering/skills/ce-session-extract/` (same)
+- Modify: `src/utils/legacy-cleanup.ts` — add `ce-session-inventory` and `ce-session-extract` to `STALE_SKILL_DIRS` (in the "Removed skills (no replacement)" cluster) and to `LEGACY_ONLY_SKILL_DESCRIPTIONS` (with the verbatim `description:` strings copied from the deleted skills' frontmatter)
+- Modify: `src/data/plugin-legacy-artifacts.ts` — add `ce-session-inventory` and `ce-session-extract` to `EXTRA_LEGACY_ARTIFACTS_BY_PLUGIN["compound-engineering"].skills[]`, alphabetically sorted
+- Modify: `docs/skills/ce-sessions.md` — fix broken `See Also` links at lines 110, 175-176; either rewrite to point at `ce-sessions/scripts/<script>` or remove the entries (these are agent-facing primitives that are no longer separate user-discoverable skills, so removal is the cleaner option)
+
+**Approach:**
+- Delete the two skill directories last, after U1-U4 land. Per repo AGENTS.md "removing a skill" checklist, the registry updates ride in the same commit as the directory deletions.
+- Insert `ce-session-extract` and `ce-session-inventory` alphabetically in `EXTRA_LEGACY_ARTIFACTS_BY_PLUGIN["compound-engineering"].skills[]` — likely between `ce-reproduce-bug` / `ce-review` for inventory and `ce-review-beta` / `ce-update` for extract per research.
+- For `LEGACY_ONLY_SKILL_DESCRIPTIONS`, copy the frontmatter `description:` strings from the deleted skills before deletion. The strings are the ownership-fingerprint proofs per the file's docstring.
+- For `docs/skills/ce-sessions.md`: lines 110, 175-176 link to deleted skill directories. Removing the bullets is cleaner than rewriting (the user-facing doc shouldn't direct readers at internal-only skill dirs that no longer exist).
+
+**Patterns to follow:**
+- `src/utils/legacy-cleanup.ts` "Removed skills (no replacement)" comment block at line 89 — established cluster for the new entries.
+- `src/utils/legacy-cleanup.ts` `LEGACY_ONLY_SKILL_DESCRIPTIONS` entries (lines 253-284) — keep the alphabetical sort and the verbatim-description discipline.
+- `src/data/plugin-legacy-artifacts.ts` skills array — alphabetical sort, comment-free entries.
+
+**Test scenarios:**
+- Test expectation: none — pure cleanup, no new behavior to test. Existing `tests/legacy-registry-invariants.test.ts` will pass by construction (deleted directories no longer match current-skill names).
+- Verification (Registry tests): existing `tests/legacy-registry-invariants.test.ts`, `tests/legacy-cleanup.test.ts`, and `tests/plugin-legacy-artifacts.test.ts` continue to pass.
+- Verification (Marketplace parity): `bun run release:validate` passes.
+- Verification (Broken links): the modified `docs/skills/ce-sessions.md` contains no markdown links to `../../plugins/compound-engineering/skills/ce-session-inventory/` or `../../plugins/compound-engineering/skills/ce-session-extract/`.
+
+**Verification:**
+- `bun test` passes.
+- `bun run release:validate` passes.
+- `plugins/compound-engineering/skills/ce-session-inventory/` and `plugins/compound-engineering/skills/ce-session-extract/` no longer exist on disk.
+
+---
+
+## System-Wide Impact
+
+- **Interaction graph:**
+  - `/ce-sessions` (user-facing slash) → ce-sessions skill orchestrator → ce-session-historian synthesis subagent → return findings.
+  - `/ce-compound` Phase 1 → background research subagents launched first (Context Analyzer / Solution Extractor / Related Docs Finder) → then ce-sessions invoked via the platform's skill-invocation primitive → ce-sessions orchestrator → historian → return findings → folded into doc Phase 2.
+  - The historian agent has only one type of caller after the refactor (the ce-sessions orchestrator). Direct dispatch via `Agent(ce-session-historian)` is not a supported pattern — the agent's standalone-fallback returns "no relevant prior sessions" gracefully.
+- **Error propagation:**
+  - Script execution errors (permission, missing files) surface to the orchestrator via non-zero exit codes; orchestrator reports the issue to the user and stops, per existing fail-fast guardrail.
+  - Synthesizer Read errors on individual files → noted as partial extraction in findings; remaining files still synthesized.
+- **State lifecycle risks:**
+  - `mktemp -d` scratch dir is per-run throwaway. OS handles cleanup. No explicit cleanup required, but a one-line `rm -rf "$SCRATCH"` at end-of-skill is harmless and makes intent explicit.
+  - Plugin agent and skill caching at session start (per repo AGENTS.md "Validating Agent and Skill Changes"): testing during dev requires either `/skill-creator` content-injection or a fresh session — the in-session cache won't reflect file edits.
+- **API surface parity:**
+  - ce-compound's delegation to ce-sessions uses the established semantic-prose convention (per `ce-plan/references/plan-handoff.md` line 57 and plugin AGENTS.md "Cross-Platform Reference Rules"), not a literal `Skill(ce-sessions, ...)` tool-call expression. This avoids leaking Claude-Code-specific syntax to Codex/Cursor/Gemini/OpenCode/Pi/Kiro when the skill ships verbatim through the converters. Each target's converter routes the semantic prose to its native primitive at install time.
+  - Cross-platform conversion writers (`src/converters/claude-to-codex.ts`, `claude-to-gemini.ts`, etc.) handle agent and skill content as opaque text and copy script directories under `<skill>/scripts/` already. The script move and skill deletion should round-trip cleanly through every target writer per the legacy-cleanup machinery in U6.
+- **Integration coverage:**
+  - End-to-end: `/ce-compound` Full mode with session history opt-in completes without hangs (the headline test for issue #794 closure).
+  - End-to-end: `/ce-sessions` with a question completes without hangs.
+  - Cross-platform: `bun test` covers script behavior; the SKILL.md / agent.md changes are validated via `/skill-creator`.
+- **Unchanged invariants:**
+  - Cross-platform session discovery (Claude Code, Codex, Cursor) — script behavior unchanged.
+  - Output schemas (default historian header; caller-supplied schema honored verbatim) — preserved.
+  - Time-range table, branch filter, keyword filter, top-5 deep-dive cap — moved from agent to orchestrator but logic preserved.
+  - `/ce-compound` Phase 2 fold-in behavior — unchanged.
+  - `/ce-sessions` user-facing question prompt for empty argument — preserved.
+
+---
+
+## Risks & Dependencies
+
+| Risk | Mitigation |
+|------|------------|
+| Plugin agent and skill definitions cache at session start; in-session edits do not propagate (per repo AGENTS.md). Iterative testing during dev would test stale content. | Use `/skill-creator`'s eval workflow per AGENTS.md "Validating Agent and Skill Changes". Restart sessions only when skill-creator can't isolate the variable. |
+| Subtle behavioral drift moving methodology from subagent to orchestrator — judgment calls (when to widen, what keywords to derive) execute in main context (opus / orchestrator) rather than subagent (sonnet historian). | Port the methodology rules verbatim from agent to orchestrator. Document the model-tier shift explicitly in ce-sessions/SKILL.md so future refactors don't introduce silent drift. |
+| Cross-platform script-path resolution in slash-invoked skills — repo-root AGENTS.md says relative paths resolve to skill dir on all platforms; plugin AGENTS.md "Permission gate" warns runtime Bash CWD is user's project. The contradiction is unresolved in docs. | U2 Verification includes a marketplace-install smoke test (not `--plugin-dir`) that invokes `/ce-sessions` and confirms `bash scripts/...` resolves. If it fails, fall back to `${CLAUDE_SKILL_DIR}` + pinned `allowed-tools` (treats R4 as a known regression and triggers a follow-up plan to address other targets). Existing precedents (ce-clean-gone-branches, ce-resolve-pr-feedback, ce-optimize) argue the relative-path form works, but verifying empirically before merge is cheap insurance. |
+| `/ce-compound` Phase 1 wall-clock parallelism could regress if the skill-invocation primitive call to ce-sessions is issued *before* the parallel background research subagents launch. | U4 Approach pins the dispatch ordering explicitly: launch background research subagents first, then invoke ce-sessions. Background subagents continue running underneath the synchronous skill call. U4 Test scenarios include a wall-clock comparison against the current foreground baseline. |
+| Legacy-cleanup descriptions map (`LEGACY_ONLY_SKILL_DESCRIPTIONS`) requires verbatim historical `description:` strings. | Copy the strings from the deleted skills' frontmatter before the deletion lands. Both strings are short and stable. |
+
+---
+
+## Documentation / Operational Notes
+
+- **Skill documentation sync** (`docs/skills/ce-sessions.md`): the high-level user-facing description ("Search and ask questions about your coding agent session history") is unchanged. The "How it works" mechanics shifted (orchestration moved from agent to skill), but the doc's level of abstraction does not surface that detail. Edits in U6 are minimal — fix broken `See Also` links to deleted skill dirs. No sync to mechanics-level prose required.
+- **Stable/Beta sync**: neither `ce-sessions` nor `ce-session-historian` has a `-beta` counterpart. No sync action.
+- **CHANGELOG / release**: release-please owns this; do not hand-edit. The conventional commit prefix `fix(ce-sessions): ` (or `fix(session-history): `) classifies correctly per AGENTS.md.
+- **Rollout**: standard merge-to-main; no migration or feature-flag needed. The bug is currently breaking session-history features on Claude Code; fix lands clean.
+
+---
+
+## Sources & References
+
+- **Origin issue**: [EveryInc/compound-engineering-plugin#794](https://github.com/EveryInc/compound-engineering-plugin/issues/794) — `ce-session-historian` deadlocks under Claude Code: subagent cannot invoke `Skill(ce-session-inventory)`.
+- **Upstream tracker**: [anthropics/claude-code#38719](https://github.com/anthropics/claude-code/issues/38719) — Allow subagents to invoke skills for parallel workflow execution (closed; architectural limit current).
+- **Institutional learnings**:
+  - `docs/solutions/skill-design/pass-paths-not-content-to-subagents-2026-03-26.md`
+  - `docs/solutions/skill-design/script-first-skill-architecture.md`
+  - `docs/solutions/skill-design/compound-refresh-skill-improvements.md`
+  - `docs/solutions/skill-design/research-agent-pipeline-separation-2026-04-05.md`
+  - `docs/solutions/skill-design/post-menu-routing-belongs-inline-2026-04-28.md`
+  - `docs/solutions/best-practices/ce-pipeline-end-to-end-learnings-2026-04-17.md`
+- **Repo conventions**:
+  - `plugins/compound-engineering/AGENTS.md` — Plugin Maintenance, Skill Compliance Checklist, Permission gate on extracted scripts (clarifies `!` pre-resolution scope).
+  - Repo-root `AGENTS.md` — Plugin Maintenance, Adding a New Plugin, Script Path References in Skills, Plugin Maintenance "removing a skill" cleanup-registry checklist.
+- **Pattern precedents**:
+  - `plugins/compound-engineering/skills/ce-clean-gone-branches/SKILL.md`, `ce-resolve-pr-feedback/SKILL.md`, `ce-optimize/SKILL.md` — bare relative-path script invocations from slash-invoked skill bodies.
+  - `plugins/compound-engineering/skills/ce-plan/references/plan-handoff.md` line 57 — semantic-prose convention for one skill invoking another, mirrored by ce-compound's delegation to ce-sessions.
+  - `plugins/compound-engineering/skills/ce-demo-reel/SKILL.md`, `ce-plan/references/deepening-workflow.md`, `ce-work-beta/references/codex-delegation-workflow.md` — `mktemp -d` scratch + path-to-subagent patterns.
+  - `tests/skills/ce-plan-handoff-routing.test.ts` — regression test pattern for the new U5 test.
diff --git a/docs/skills/ce-sessions.md b/docs/skills/ce-sessions.md
index 6ec116c16..b6d07a0a4 100644
--- a/docs/skills/ce-sessions.md
+++ b/docs/skills/ce-sessions.md
@@ -107,7 +107,7 @@ Reach for `ce-sessions` when:
 Skip `ce-sessions` when:
 
 - The context lives in committed code or docs, not in agent sessions → just read the code/docs
-- You want general session metadata (count, timestamps, sizes) without semantic search → use the underlying `ce-session-inventory` directly
+- You want general session metadata (count, timestamps, sizes) without semantic search → run `discover-sessions.sh` and `extract-metadata.py` from `plugins/compound-engineering/skills/ce-sessions/scripts/` directly
 - The question is about a single specific session you remember well — open the session file directly
 
 ---
@@ -116,10 +116,10 @@ Skip `ce-sessions` when:
 
 `ce-sessions` is mostly invoked standalone, but interlocks with other skills:
 
-- **`/ce-compound` Phase 1 (Full mode)** — optionally dispatches the same `ce-session-historian` as a foreground agent to search prior sessions for related context, folding findings into the new learning's "What Didn't Work" section
+- **`/ce-compound` Phase 1 (Full mode)** — optionally invokes `ce-sessions` via the platform's skill-invocation primitive to search prior sessions for related context, folding findings into the new learning's "What Didn't Work" section
 - **`/ce-debug` Triage** — prior-attempt awareness; when the user indicates failed attempts, asking "what have you already tried" before investigating avoids repeating known-failed approaches
 
-This skill is the user-facing way to invoke session search; the other skills invoke the agent directly when they need it.
+This skill is the canonical entry point for session search across Claude Code, Codex, and Cursor; other skills invoke it via the platform's skill-invocation primitive when they need session-history context.
 
 ---
 
@@ -142,35 +142,34 @@ Most use is direct:
 | `<question>` | Direct question to search history for |
 | `<topic>` | Topic to gather context on |
 
-The skill pre-resolves the current git branch and passes it to the historian when it resolves cleanly. The historian decides time windows from the question; the default is bounded.
+The skill pre-resolves the current git branch and uses it for branch filtering when it resolves cleanly. The orchestrator picks time windows from the question; the default is bounded (7 days).
 
 ---
 
 ## FAQ
 
 **Does it work across Claude Code, Codex, and Cursor?**
-Yes — `ce-session-historian` reads from `~/.claude/projects/`, `~/.codex/sessions/`, and `~/.cursor/projects/`. Cross-harness work shows up.
+Yes — `ce-sessions` reads from `~/.claude/projects/`, `~/.codex/sessions/`, and `~/.cursor/projects/`. Cross-harness work shows up.
 
 **What does it return when there's no relevant prior session?**
 A "no relevant prior sessions" message in the digest. The skill doesn't fabricate findings to fill the digest.
 
 **How does it filter for relevance?**
-The historian uses the question to drive a relevance filter. It reads session content selectively — usually filtered by repo, branch, and time window — and synthesizes against what the question asks for. It doesn't dump raw transcripts.
+The skill uses the question to drive a relevance filter — repo, branch, and time window first, keyword match if branch turns up nothing. Up to five sessions are deep-dived; the rest are skipped. The synthesis subagent reads only the pre-extracted skeleton/error files, not the raw session JSONL.
 
-**Why a separate skill vs just a slash to the agent?**
-Because the user-facing surface should ask the right question if one wasn't given, and the skill handles the branch pre-resolution and dispatch shape consistently. The agent does the actual work, but invoking it directly skips the user-facing check and the cross-harness convention.
+**Why does this skill exist instead of dispatching the historian agent directly?**
+The user-facing surface should ask the right question if one wasn't given, and the orchestrator handles branch pre-resolution, scan-window choice, deep-dive selection, and per-session extraction in main context where script invocation works portably. The synthesis-only `ce-session-historian` subagent receives pre-extracted file paths and produces prose findings — it cannot run the discovery pipeline itself, by design.
 
 **Can it read sessions from machines I'm not on?**
 No. It reads local session files only — `~/.claude/projects/` etc. Sessions on other machines aren't accessible.
 
 **Does it work for non-software questions?**
-The historian doesn't care about the topic — it searches whatever is in your session files. If you've used the agent for non-software work and want history on that, this skill works.
+The skill doesn't care about the topic — it searches whatever is in your session files. If you've used the agent for non-software work and want history on that, this skill works.
 
 ---
 
 ## See Also
 
-- [`ce-compound`](./ce-compound.md) — invokes the same `ce-session-historian` (opt-in) during Full-mode capture for prior-context enrichment
+- [`ce-compound`](./ce-compound.md) — invokes `ce-sessions` (opt-in) during Full-mode capture for prior-context enrichment
 - [`ce-debug`](./ce-debug.md) — prior-attempt awareness uses similar context; ask the user about prior failed attempts when the signal is there
-- [`ce-session-inventory`](../../plugins/compound-engineering/skills/ce-session-inventory/) — lower-level skill for session metadata (timestamps, sizes, branch); used by the historian internally
-- [`ce-session-extract`](../../plugins/compound-engineering/skills/ce-session-extract/) — extracts conversation skeleton or error signals from a single session file; also used by the historian
+- `plugins/compound-engineering/skills/ce-sessions/scripts/` — the underlying scripts (`discover-sessions.sh`, `extract-metadata.py`, `extract-skeleton.py`, `extract-errors.py`) that ce-sessions invokes; can be run directly when raw metadata or extraction output is needed without orchestration
diff --git a/plugins/compound-engineering/agents/ce-session-historian.agent.md b/plugins/compound-engineering/agents/ce-session-historian.agent.md
index 2d0fbbdca..8448d5320 100644
--- a/plugins/compound-engineering/agents/ce-session-historian.agent.md
+++ b/plugins/compound-engineering/agents/ce-session-historian.agent.md
@@ -1,196 +1,89 @@
 ---
 name: ce-session-historian
-description: "Searches Claude Code, Codex, and Cursor session history for prior sessions about the same problem. Use to surface investigation context, failed approaches, and learnings the current session cannot see."
+description: "Synthesizes findings from prior coding-agent sessions about the same problem or topic. Receives pre-extracted skeleton/error file paths from a `ce-sessions` orchestrator and returns prose findings — investigation journey, what didn't work, key decisions, related context. Not intended for direct dispatch — use `/ce-sessions` (or another caller that runs the full discovery + extract pipeline first)."
 model: inherit
 ---
 
 **Note: The current year is 2026.** Use this when interpreting session timestamps.
 
-You are an expert at extracting institutional knowledge from coding agent session history. Your mission is to find *prior sessions* about the same problem, feature, or topic across Claude Code, Codex, and Cursor, and surface what was learned, tried, and decided -- context that the current session cannot see.
+You are an expert at extracting institutional knowledge from coding agent session history. You receive pre-extracted skeleton and error files from a `ce-sessions` orchestrator and synthesize findings about a specific problem or topic — what was learned, tried, decided in prior sessions across Claude Code, Codex, and Cursor.
 
-This agent serves two modes of use:
-- **Compound enrichment** -- dispatched by `/ce-compound` to add cross-session context to documentation
-- **Conversational** -- invoked directly when someone wants to ask about past work, recent activity, or what happened in prior sessions
+Your scope is **synthesis only**. The orchestrator (`ce-sessions`) handles discovery, branch/keyword filtering, scan-window selection, deep-dive selection, and per-session extraction before dispatching you.
+
+## Input contract
+
+The dispatch prompt provides:
+
+- **`problem_topic`** — one sentence naming the concrete question or problem to synthesize against.
+- **`scratch_dir`** — absolute path to a `mktemp` scratch directory holding pre-extracted files.
+- **`sessions`** — an array of objects (5 max), one per pre-extracted session, each with:
+  - `path` — absolute path to a skeleton text file inside `scratch_dir`
+  - `errors_path` *(optional)* — absolute path to an errors text file when the orchestrator extracted errors-mode for this session
+  - `platform` — `claude`, `codex`, or `cursor`
+  - `branch` — git branch when present (Claude Code only)
+  - `cwd` — working directory when present (Codex only)
+  - `ts` and `last_ts` — session start and last-message timestamps
+  - `match_count` and `keyword_matches` — when keyword filtering was used by the orchestrator
+- **`output_schema`** *(optional)* — the structure the response should follow. When supplied, honor it verbatim.
+
+## Standalone fallback
+
+If the dispatch prompt arrives without a `sessions` array, or with an empty array, return the literal string `no relevant prior sessions` and stop. Do not attempt to discover or extract sessions on your own — that is the orchestrator's job, and direct dispatch without an orchestrator is not a supported pattern.
 
 ## Guardrails
 
-These rules apply at all times during extraction and synthesis.
+These rules apply at all times during synthesis.
 
-- **Never read entire session files into context.** Session files can be 1-7MB. Always use the extraction skills described below to filter first, then reason over the filtered output.
+- **Read only the paths the orchestrator gave you.** Use the platform's native file-read tool (e.g., `Read` in Claude Code) on each `path`. Do not read source session files directly under `~/.claude/projects/`, `~/.codex/sessions/`, or `~/.cursor/projects/` — those are MB-scale and would blow the context window. The orchestrator already extracted what's relevant.
+- **Never invoke the Skill tool.** This agent runs in subagent context where Skill calls deadlock. The orchestrator has already done all extraction; you only synthesize.
 - **Never extract or reproduce tool call inputs/outputs verbatim.** Summarize what was attempted and what happened.
-- **Never include thinking or reasoning block content.** Claude Code thinking blocks are internal reasoning; Codex reasoning blocks are encrypted. Neither is actionable.
-- **Never analyze the current session.** Its conversation history is already available to the caller.
+- **Never include thinking or reasoning block content.** Claude Code thinking blocks are internal reasoning; Codex reasoning blocks are encrypted. Neither is actionable. The skeleton extractor already strips these — do not surface them if any survived.
+- **Never analyze the current session.** Its conversation history is already available to the caller; the orchestrator already excluded it from the dispatch payload.
 - **Never make claims about team dynamics or other people's work.** This is one person's session data.
 - **Never write any files.** Return text findings only.
 - **Surface technical content, not personal content.** Sessions contain everything — credentials, frustration, half-formed opinions. Use judgment about what belongs in a technical summary and what doesn't.
-- **Never substitute other data sources when session files are inaccessible.** If session files cannot be read (permission errors, missing directories), report the limitation and what was attempted. Do not fall back to git history, commit logs, or other sources — that is a different agent's job.
-- **Fail fast on access errors.** If the first extraction attempt fails on permissions, report the issue immediately. Do not retry the same operation with different tools or approaches — repeated retries waste tokens without changing the outcome.
-- **Never extract a session to verify whether it is relevant.** `ce-session-extract` is for sessions whose relevance is already confirmed. Before invoking it on any session, you MUST have at least one of: (a) the session's `branch` field matches the dispatch branch (Claude Code), (b) the session's branch contains a keyword from the dispatch's problem topic, or (c) `ce-session-inventory --keyword K1,K2,...` returned `match_count > 0` for that session. If you are tempted to "extract to check content" — that is what `--keyword` is for. Run the keyword filter first; if it returns zero matches, return "no relevant prior sessions" without extracting anything.
 
 ## Time budget
 
-**Stop as soon as you have a complete answer.** A confident "no relevant prior sessions" within seconds is a complete answer; do not extend the search to fill time. If you have extracted 3-5 sessions and have synthesis material, stop. Do not chase additional candidates "just in case."
-
-The structural caps in Step 3 (max 5 deep-dives) and Step 4 (conditional tail-extract) bound runtime by construction — trust them rather than picking up speculative work. There is no minute target; the right runtime is whatever the evidence allows.
-
-## Why this matters
-
-Compound documentation (`/ce-compound`) captures what happened in the current session. But problems often span multiple sessions across different tools -- a developer might investigate in Claude Code, try an approach in Codex, and fix it in a third session. Each session only sees its own conversation. This agent bridges that gap by searching across all session history.
-
-## Time Range
-
-The caller may specify a time range -- either explicitly ("last 3 days", "this past week", "last month") or implicitly through context ("what did I work on recently" implies a few days; "how did this feature evolve" implies the full feature branch lifetime).
-
-Infer the time range from the request and map it to a scan window. **Start narrow** — recent sessions on the same branch are almost always sufficient. Only widen if the narrow scan finds nothing relevant and the request warrants it.
-
-| Signal | Scan window | Codex directory strategy |
-|--------|-------------|--------------------------|
-| "today", "this morning" | 1 day | Current date dir only |
-| "recently", "last few days", "this week", or no time signal (default) | 7 days | Last 7 date dirs |
-| "last few weeks", "this month" | 30 days | Last 30 date dirs |
-| "last few months", broad feature history | 90 days | Last 90 date dirs |
-
-**Widen only when needed.** If the initial scan finds related sessions, stop there. If it comes up empty and the request suggests a longer history matters (feature evolution, recurring problem), widen to the next tier and scan again. Do not jump straight to 30 or 90 days — step through the tiers one at a time.
-
-**When widening the time window**, re-invoke `ce-session-inventory` with the larger `<days>` argument. The underlying discovery applies `-mtime` filtering, so files outside the original window were never returned — a wider scan needs a fresh invocation, not a continuation.
-
-**For Codex**, sessions are in date directories. A narrow window means fewer directories to list and fewer files to process.
-
-## Session Sources
-
-Search Claude Code, Codex, and Cursor session history. A developer may use any combination of tools on the same project, so findings from all sources are valuable regardless of which harness is currently active.
-
-### Claude Code
-
-Sessions stored at `~/.claude/projects/<encoded-cwd>/<session-id>.jsonl`, where `<encoded-cwd>` replaces `/` with `-` in the working directory path (e.g., `/Users/alice/Code/my-project` becomes `-Users-alice-Code-my-project`). Claude Code retains session history for ~30 days by default. Wider scan tiers (90 days) may find nothing unless the user has extended retention. Codex and Cursor may retain longer.
-
-Key message types:
-- `type: "user"` -- Human messages. First user message includes `gitBranch` and `cwd` metadata.
-- `type: "assistant"` -- Claude responses. `content` array contains `thinking`, `text`, and `tool_use` blocks.
-- Tool results appear as `type: "user"` messages with `content[].type: "tool_result"`.
-
-### Codex
-
-Sessions stored at `~/.codex/sessions/YYYY/MM/DD/<session-file>.jsonl`, organized by date. Also check `~/.agents/sessions/YYYY/MM/DD/` as Codex may migrate to this location.
-
-Unlike Claude Code, Codex sessions are not organized by project directory. Filter by matching the `cwd` field in `session_meta` against the current working directory.
-
-Key message types:
-- `session_meta` -- Contains `cwd`, session `id`, `source`, `cli_version`.
-- `turn_context` -- Contains `cwd`, `model`, `current_date`.
-- `event_msg/user_message` -- User message text.
-- `response_item/message` with `role: "assistant"` -- Assistant text in `output_text` blocks.
-- `event_msg/exec_command_end` -- Command execution results with exit codes.
-- Codex does not store git branch in session metadata. Correlation relies on CWD matching and keyword search.
-
-### Cursor
-
-Agent transcripts stored at `~/.cursor/projects/<encoded-cwd>/agent-transcripts/<session-id>/<session-id>.jsonl`. Same CWD-encoding as Claude Code.
-
-Limitations compared to Claude Code and Codex:
-- No timestamps in the JSONL — file modification date is the only time signal.
-- No git branch, session ID, or CWD metadata in the data — derived from directory structure.
-- No tool results logged — tool calls are captured but not their outcomes (no success/fail signal).
-- `[REDACTED]` markers appear where Cursor stripped thinking/reasoning content.
-
-Key message types:
-- `role: "user"` -- User messages. Text wrapped in `<user_query>` tags (stripped by extraction scripts).
-- `role: "assistant"` -- Assistant responses. Same `content` array structure as Claude Code (`text`, `tool_use` blocks).
-
-## Extraction Primitives
-
-Extraction is delegated to two agent-facing skills. Invoke them through the Skill tool — do not read or execute platform-specific scripts directly. The skills own the JSONL format knowledge and return clean, parsed output.
+Stop as soon as you have a complete answer. A confident "no relevant prior sessions" within seconds is a complete answer; do not extend the search to fill time. The orchestrator already capped the deep-dive set at 5 sessions — do not request more, and do not loop over the same files multiple times for diminishing returns.
 
-- **`ce-session-inventory`** — inventory of sessions for a repo. Given `<repo> <days> [<platform>]`, returns one JSON object per session (platform, file, size, ts, session, plus platform-specific fields like branch or cwd) followed by a `_meta` line with `files_processed` and `parse_errors`. Use this in Step 1 to discover what sessions exist before deciding which to deep-dive.
+## Synthesis methodology
 
-- **`ce-session-extract`** — per-session extraction. Given `<file> <mode> [<limit>]` where mode is `skeleton` or `errors` and limit is `head:N` or `tail:N`, returns filtered content from a single session file. Use this in Steps 4 and 5 for selected sessions.
+Read each `path` in the dispatch payload, then synthesize against the `problem_topic`. Look for:
 
-Both skills emit a `_meta` line with processing stats. When `parse_errors > 0`, note in the response that extraction was partial.
+- **Investigation journey** — What approaches were tried? What failed and why? What led to the eventual solution?
+- **User corrections** — Moments where the user redirected the approach. These reveal what NOT to do and why.
+- **Decisions and rationale** — Why one approach was chosen over alternatives.
+- **Error patterns** — Recurring errors across sessions (most visible when the orchestrator supplied an `errors_path` for a session) that indicate a systemic issue.
+- **Evolution across sessions** — How understanding of the problem changed from session to session, potentially across different tools.
+- **Cross-tool blind spots** — When sessions span Claude Code + Codex + Cursor, look for things the user might not realize from any single tool alone. Complementary work (one tool tackled the schema while the other tackled the API), duplicated effort (same approach tried in both tools days apart), or gaps (neither tool's sessions touched a component that connects the work). Only call out cross-tool observations when genuinely informative — if both sources tell the same story, there's nothing to flag.
+- **Staleness** — Older sessions may reflect conclusions about code that has since changed. When surfacing findings from sessions more than a few days old, consider whether the relevant code or context is likely to have moved on. Caveat older findings rather than presenting them with the same confidence as recent ones.
 
-## Methodology
+Cite actual evidence from the extracted files, not vibe-summaries. When a finding is anchored in a specific session's content, that session's metadata (platform, branch/cwd, ts) helps the caller locate it.
 
-### Step 1: Determine scope and discover sessions
-
-**Scope decision.** Two dimensions to resolve before scanning:
-
-- **Project scope**: Default to the current project. Widen to all projects only when the question explicitly asks.
-- **Platform scope**: Default to all platforms (Claude Code, Codex, Cursor). Narrow to a single platform when the question specifies one. If unclear on either dimension, use the default.
-
-Determine the scan window from the Time Range table above, then discover and extract metadata.
-
-**Derive the repo name** using a worktree-safe approach: `git rev-parse --path-format=absolute --git-common-dir` always returns an absolute path to the main repo's `.git`, so `basename "$(dirname "$common")"` yields the same value in regular checkouts and in linked worktrees. Guard against empty output (e.g., not inside a repo) so the failure path stays empty rather than a literal `.`. Example: `common=$(git rev-parse --path-format=absolute --git-common-dir 2>/dev/null) && [ -n "$common" ] && basename "$(dirname "$common")"`. If the repo name was pre-resolved in the dispatch prompt, use that instead.
-
-**Discover sessions and gather metadata via `ce-session-inventory`.** Invoke the skill with `<repo-name> <days>` (or add a `<platform>` arg to restrict to a single platform). The skill handles directory discovery, mtime filtering, zsh glob safety, and Codex CWD filtering internally, and returns one JSON object per session plus a `_meta` line.
-
-If the `_meta` line shows `files_processed: 0`, return: "No session history found within the requested time range." If `parse_errors > 0`, note that some sessions could not be parsed.
-
-### Step 3: Select sessions to deep-dive (or stop)
-
-A session being returned by `ce-session-inventory` only confirms it lives in the same repo (or matches the CWD filter for Codex). Same-repo is **not** the same as same-topic — repo membership is necessary, never sufficient. Follow this exact decision sequence after inventory returns:
-
-1. **Branch filter (Claude Code only).** Keep sessions where `branch == dispatch_branch` exactly, or where the branch name contains a keyword from the dispatch's problem topic (e.g., dispatch about "auth middleware" matches branches `feat/auth-fix`, `chore/auth-refactor`). For Codex (no `gitBranch`), this filter is empty — proceed to step 2.
-
-2. **If the branch filter returned zero sessions** (or you are processing Codex sessions):
-   - **a.** Derive 2-4 keywords from the dispatch's problem topic. For "a recent crash in the auth middleware where session-validation rejects valid tokens," derive `auth,middleware,session,token` (or similar).
-   - **b.** Invoke `ce-session-inventory` a second time with `<repo> <days> --keyword K1,K2,...`. The skill returns sessions with non-zero `match_count` plus per-keyword counts.
-   - **c.** **If `files_matched: 0`, return "no relevant prior sessions" immediately. Do not invoke `ce-session-extract`. STOP.**
-   - **d.** If `files_matched > 0`, treat those sessions as the candidate list. Rank by `match_count`, break ties by per-keyword counts.
-
-3. **Drop sessions outside the scan window before selecting.** A session is within the window if it was active during that period — use `last_ts` when available, fall back to `ts`. Discard sessions where both fall before the window start.
-
-4. **Exclude the current session** — its conversation history is already available to the caller.
-
-5. **Apply the deep-dive cap.** From the candidates remaining after the window and current-session filters, take at most **5 sessions total across all platforms**. If you have more, narrow by branch-match → `match_count` → file size > 30KB → recency.
-
-6. **Proceed to Step 4 only if you have at least one selected session.** If zero candidates remain after dropping out-of-window and the current session, return "no relevant prior sessions" and STOP.
-
-Do **not** roll your own per-file `grep -l` calls — step 2 (the `--keyword` mode) replaces that pattern.
-
-**Note: `gitBranch` is captured at the first user message only.** A session that began on `main` and did substantive work on a feature branch via mid-session `git checkout` records `branch: "main"`. Branch-match returning nothing is **not** conclusive evidence of "no prior history" — that is exactly why step 2 is required in the zero-branch-match case.
-
-Prefer sessions that are:
-- Strongly correlated (same branch)
-- Topically dense (high `match_count` when keyword-filtering was used)
-- Substantive (file size > 30KB suggests meaningful work)
-
-### Step 4: Extract conversation skeleton
-
-**Only run this step if Step 3 produced one or more selected sessions.** If Step 3 returned "no relevant prior sessions" and stopped, skip Step 4 entirely — do not extract any session for any reason, including "to verify."
-
-For each selected session, invoke `ce-session-extract` with mode `skeleton` and limit `head:200`. Large sessions (4MB+) can produce 500-700 skeleton lines — the opening turns establish the topic and the final turns show the conclusion, but the middle is often repetitive tool call cycles. 200 lines is enough to understand the narrative arc without flooding context.
-
-**Tail extraction is conditional, not default.** Only invoke `ce-session-extract` again with `tail:50` when the `head:200` output appears to terminate mid-investigation (e.g., last visible turn is a tool call with no resolution, or the assistant is mid-debugging without a conclusion). If `head:200` already shows the session reaching a conclusion or running out of substantive activity, do not run a second extract — the head covers it.
-
-### Step 5: Extract error signals (selective)
-
-For sessions where investigation dead-ends are likely valuable, invoke `ce-session-extract` with mode `errors`. Use this selectively — only when understanding what went wrong adds value.
-
-### Step 6: Synthesize findings
-
-Reason over the extracted conversation skeletons and error signals from both sources.
+## Output
 
-Look for:
+If the dispatch prompt supplies an `output_schema`, follow it verbatim. Do not add extra sections. Do not prepend the default header below.
 
-- **Investigation journey** -- What approaches were tried? What failed and why? What led to the eventual solution?
-- **User corrections** -- Moments where the user redirected the approach. These reveal what NOT to do and why.
-- **Decisions and rationale** -- Why one approach was chosen over alternatives.
-- **Error patterns** -- Recurring errors across sessions that indicate a systemic issue.
-- **Evolution across sessions** -- How understanding of the problem changed from session to session, potentially across different tools.
-- **Cross-tool blind spots** -- When findings come from both Claude Code and Codex, look for things the user might not realize from either tool alone. This could be complementary work (one tool tackled the schema while the other tackled the API), duplicated effort (same approach tried in both tools days apart), or gaps (neither tool's sessions touched a component that connects the work). Only mention cross-tool observations when they're genuinely informative — if both sources tell the same story, there's nothing to call out.
-- **Staleness** -- Older sessions may reflect conclusions about code that has since changed. When surfacing findings from sessions more than a few days old, consider whether the relevant code or context is likely to have moved on. Caveat older findings when appropriate rather than presenting them with the same confidence as recent ones.
+Otherwise, lead with a brief one-line provenance header:
 
-## Output
-
-**If the caller specifies an output format**, use it. The dispatching skill or user knows what structure serves their workflow best. Follow their format instructions and do not add extra sections.
+```
+**Sessions read**: [count] ([N] Claude Code, [N] Codex, [N] Cursor) | [date range]
+```
 
-**If no format is specified**, respond in whatever way best answers the question. Include a brief header noting what was searched:
+Then the synthesis prose, organized under the default schema:
 
 ```
-**Sessions searched**: [count] ([N] Claude Code, [N] Codex, [N] Cursor) | [date range]
+- What was tried before
+- What didn't work
+- Key decisions
+- Related context
 ```
 
+Omit any section with no findings. If no sessions yielded relevant content, return `no relevant prior sessions` instead of empty section headings.
 
-## Tool Guidance
+## Tool guidance
 
-- Delegate all JSONL extraction to the `ce-session-inventory` and `ce-session-extract` skills. Do not read session files directly — they can be multiple MB and will blow the context.
-- Use native content-search (e.g., Grep in Claude Code) only when searching for a specific keyword across session files that the extraction skills have already surfaced as candidates.
+- Use the platform's native file-read tool (e.g., `Read` in Claude Code) for each path the orchestrator supplied. Do not pipe `cat` through shell — native tools avoid permission prompts and are more reliable.
+- Native content-search (e.g., `Grep`) is appropriate when you want to locate a specific keyword across the supplied scratch files (not across source session files).
+- **Do not invoke the `Skill` tool, the `Bash` tool to run extraction scripts, or any discovery primitive.** All discovery and extraction is the orchestrator's responsibility; this agent's contract is "read the paths you were given and synthesize."
diff --git a/plugins/compound-engineering/skills/ce-compound/SKILL.md b/plugins/compound-engineering/skills/ce-compound/SKILL.md
index 794092cef..c883c918e 100644
--- a/plugins/compound-engineering/skills/ce-compound/SKILL.md
+++ b/plugins/compound-engineering/skills/ce-compound/SKILL.md
@@ -24,7 +24,7 @@ Captures problem solutions while context is fresh, creating structured documenta
 
 **Git branch (pre-resolved):** !`git rev-parse --abbrev-ref HEAD 2>/dev/null || true`
 
-If the line above resolved to a plain branch name (like `feat/my-branch`), pass it into the Session Historian dispatch in Phase 1 so the agent does not waste a turn deriving it. If it still contains a backtick command string or is empty, omit it and let the agent derive it at runtime.
+If the line above resolved to a plain branch name (like `feat/my-branch`), include it in the `ce-sessions` invocation payload in Phase 1 so the orchestrator does not waste a turn deriving it. If it still contains a backtick command string or is empty, omit it and let `ce-sessions` derive it at runtime.
 
 ## Support Files
 
@@ -61,7 +61,7 @@ for relevant knowledge to help the Compound process? This adds
 time and token usage.
 ```
 
-If the user says yes, dispatch the Session Historian in Phase 1. If no, skip it. Do not ask this in lightweight mode.
+If the user says yes, invoke `ce-sessions` in Phase 1 (see step 4). If no, skip it. Do not ask this in lightweight mode.
 
 ---
 
@@ -100,8 +100,7 @@ Launch research subagents. Each returns text data to the orchestrator.
 
 **Dispatch order:**
 - Launch `Context Analyzer`, `Solution Extractor`, and `Related Docs Finder` in parallel (background)
-- Then dispatch `ce-session-historian` in foreground — it reads session files outside the working directory that background agents may not have access to
-- The foreground dispatch runs while the background agents work, adding no wall-clock time
+- **Then** invoke the `ce-sessions` skill via the platform's skill-invocation primitive (see step 4 below) — only if the user opted in to session history. The skill call is synchronous from this orchestrator's main-context turn, but the already-dispatched background subagents continue running in parallel underneath, so the wall-clock benefit is preserved (`max(ce-sessions, slowest background subagent)`, not their sum). Issuing the skill call before the parallel block would serialize ce-sessions in front of the research subagents and regress wall-clock time.
 
 <parallel_tasks>
 
@@ -172,16 +171,13 @@ Launch research subagents. Each returns text data to the orchestrator.
 
 </parallel_tasks>
 
-#### 4. **Session Historian** (foreground, after launching the above — only if the user opted in)
-   - **Skip entirely** if the user declined session history in the follow-up question
-   - Dispatched as `ce-session-historian`
-   - Dispatch in **foreground** — this agent reads session files outside the working directory (`~/.claude/projects/`, `~/.codex/sessions/`, `~/.cursor/projects/`) which background agents may not have access to
-   - Omit the `mode` parameter so the user's configured permission settings apply
-   - Dispatch on the mid-tier model (e.g., `model: "sonnet"` in Claude Code) — the synthesis feeds into compound assembly and doesn't need frontier reasoning
+#### 4. **Session History via `ce-sessions`** (synchronous skill call, after launching the parallel block — only if the user opted in)
+   - **Skip entirely** if the user declined session history in the follow-up question, or if running in lightweight mode.
+   - Invoke the `ce-sessions` skill via the platform's skill-invocation primitive (`Skill` in Claude Code, `Skill` in Codex, the equivalent on Gemini/Pi). Pass the dispatch payload below as the skill argument string. `ce-sessions` runs in main context — it owns discovery, branch/keyword filtering, scan-window selection, the deep-dive cap, per-session extraction to a `mktemp` scratch dir, and dispatch of the synthesis-only `ce-session-historian` subagent. The compound orchestrator only needs to pass the topic and time window and read back the findings text.
 
-   **Dispatch prompt — keep tight.** A long, keyword-rich prompt licenses the agent to keep widening. Use this shape:
+   **Dispatch payload — keep tight.** A long, keyword-rich payload licenses ce-sessions to keep widening. Use this shape:
 
-   - **Pre-resolved context** (only if values resolved cleanly above; otherwise omit and let the agent derive): repo name, current git branch.
+   - **Pre-resolved context** (only if values resolved cleanly above; otherwise omit): repo name, current git branch.
    - **Time window**: explicit `7 days` unless the documented problem clearly spans a longer arc.
    - **Problem topic**: one sentence naming the concrete issue — error message, module name, what broke and how it was fixed. Not a paragraph; not a bullet list of related topics.
    - **Filter rule (one line)**: "Only surface findings directly relevant to this specific problem. Ignore unrelated work from the same sessions or branches."
@@ -195,8 +191,8 @@ Launch research subagents. Each returns text data to the orchestrator.
      - Related context
      ```
 
-   Do not append additional context blocks, exclusion lists, or topic-keyword bullets — verbose dispatch prompts give the agent license to keep widening the search and rapidly compound wall time. If the agent needs keyword search, it owns that decision via the `--keyword` mode on `ce-session-inventory`.
-   - Returns: structured digest of findings from prior sessions, or "no relevant prior sessions" if none found
+   Do not append additional context blocks, exclusion lists, or topic-keyword bullets — verbose payloads give ce-sessions license to keep widening the search and rapidly compound wall time. If keyword search is needed, ce-sessions owns that decision internally based on the topic.
+   - Returns: structured digest of findings from prior sessions, or "no relevant prior sessions" if none found.
 
 ### Phase 2: Assembly & Write
 
@@ -219,7 +215,7 @@ The orchestrating agent (main conversation) performs these steps:
 
    When updating an existing doc, preserve its file path and frontmatter structure. Update the solution, code examples, prevention tips, and any stale references. Add a `last_updated: YYYY-MM-DD` field to the frontmatter. Do not change the title unless the problem framing has materially shifted.
 
-3. **Incorporate session history findings** (if available). When the Session History Researcher returned relevant prior-session context:
+3. **Incorporate session history findings** (if available). When `ce-sessions` returned relevant prior-session context:
    - Fold investigation dead ends and failed approaches into the **What Didn't Work** section (bug track) or **Context** section (knowledge track)
    - Use cross-session patterns to enrich the **Prevention** or **Why This Matters** sections
    - Tag session-sourced content with "(session history)" so its origin is clear to future readers
diff --git a/plugins/compound-engineering/skills/ce-session-extract/SKILL.md b/plugins/compound-engineering/skills/ce-session-extract/SKILL.md
deleted file mode 100644
index f7738e210..000000000
--- a/plugins/compound-engineering/skills/ce-session-extract/SKILL.md
+++ /dev/null
@@ -1,64 +0,0 @@
----
-name: ce-session-extract
-description: "Extract conversation skeleton or error signals from a single session file at a given path. Invoked by session-research agents after they have selected which sessions to deep-dive — not intended for direct user queries."
-user-invocable: false
-context: fork
----
-
-# Session extract
-
-Agent-facing primitive. Extract filtered content from a single Claude Code, Codex, or Cursor session file — either a conversation skeleton or error signals.
-
-This skill exists so that agents do not read multi-megabyte session files into context. The scripts under `scripts/` own the JSONL shape knowledge and emit a narrative-readable digest.
-
-## Arguments
-
-Space-separated positional args:
-
-1. `<file>` — absolute path to a session JSONL file (typically a `file` value returned by `ce-session-inventory`).
-2. `<mode>` — `skeleton` or `errors`.
-3. `<limit>` *(optional)* — `head:N` or `tail:N` to cap output at N lines (e.g., `head:200`). Omit to return full extraction.
-
-## Execution
-
-**Skeleton mode** — narrative of user messages, assistant text, and collapsed tool-call summaries:
-
-```bash
-cat <file> | python3 scripts/extract-skeleton.py
-```
-
-**Errors mode** — just error signals:
-
-```bash
-cat <file> | python3 scripts/extract-errors.py
-```
-
-If `<limit>` is `head:N`, pipe through `head -n N`. If `tail:N`, pipe through `tail -n N`. Apply the limit after the Python script, never before — the `_meta` line is emitted last and a head cap may drop it; that is acceptable when the caller asks for a head cap.
-
-Return the raw stdout verbatim. Do not paraphrase, annotate, or synthesize — the caller does synthesis across multiple sessions.
-
-## What each mode returns
-
-### Skeleton
-
-Narrative output with one logical event per block, separated by `---`:
-
-- User messages (text only, no tool results, framework wrapper tags stripped)
-- Assistant text (no thinking/reasoning blocks — those are internal or encrypted)
-- Tool call summaries; 3+ consecutive same-name calls are collapsed (e.g., `[tools] 5x Read (file1, file2, +3 more) -> all ok`)
-
-Ends with a `_meta` line: `{"_meta": true, "lines": N, "parse_errors": N, "user": N, "assistant": N, "tool": N}`.
-
-### Errors
-
-One line per error, separated by `---`:
-
-- Claude Code: tool results with `is_error: true`
-- Codex: `exec_command_end` events with non-zero exit or non-empty stderr
-- Cursor: always empty — Cursor agent transcripts do not log tool results
-
-Ends with a `_meta` line: `{"_meta": true, "lines": N, "parse_errors": N, "errors_found": N}`.
-
-## Error handling
-
-If the file cannot be read, let the error surface to the caller. If `_meta` reports `parse_errors > 0`, return the output as-is — partial extraction is still useful and the caller decides whether to widen the search or deep-dive further.
diff --git a/plugins/compound-engineering/skills/ce-session-inventory/SKILL.md b/plugins/compound-engineering/skills/ce-session-inventory/SKILL.md
deleted file mode 100644
index a3c70878e..000000000
--- a/plugins/compound-engineering/skills/ce-session-inventory/SKILL.md
+++ /dev/null
@@ -1,68 +0,0 @@
----
-name: ce-session-inventory
-description: "Discover session files for a repo across Claude Code, Codex, and Cursor, and extract session metadata (timestamps, branch, cwd, size, platform). Invoked by session-research agents — not intended for direct user queries."
-user-invocable: false
-context: fork
----
-
-# Session inventory
-
-Agent-facing primitive. Discover session files and emit session metadata as JSONL across Claude Code, Codex, and Cursor.
-
-This skill exists so that agents researching session history do not need to know the layout of session stores on disk or the JSONL shapes of each platform. The scripts under `scripts/` own that knowledge.
-
-## Arguments
-
-Space-separated positional args:
-
-1. `<repo>` — repo folder name (e.g., `my-project`). Used for directory matching in Claude Code and Cursor, and as the CWD filter for Codex sessions.
-2. `<days>` — scan window in days (e.g., `7`). Session files older than this are skipped.
-3. `<platform>` *(optional)* — one of `claude`, `codex`, `cursor`. Omit to search all three.
-4. `--keyword K1[,K2,...]` *(optional)* — filter to sessions whose full file content matches at least one of the comma-separated keywords (case-insensitive substring). Each emitted session line gains `match_count` and `keyword_matches` ({K: N, ...}) fields, and the `_meta` line gains `files_matched`. Use this instead of rolling per-file `grep -l` calls when ranking many sessions by topical relevance.
-
-## Execution
-
-Run the discovery-plus-metadata pipeline from the skill's own `scripts/` directory:
-
-```bash
-bash scripts/discover-sessions.sh <repo> <days> [--platform <platform>] \
-  | tr '\n' '\0' \
-  | xargs -0 python3 scripts/extract-metadata.py --cwd-filter <repo>
-```
-
-To filter by keyword, append `--keyword K1[,K2,...]` to the `extract-metadata.py` invocation. Keyword scanning reads the full file (not just the head metadata window), so it costs more than a metadata-only run — use it when you need to rank candidates by topic across many sessions, not as a default.
-
-Return the raw stdout verbatim — one JSON object per session, then a final `_meta` line. Callers parse the JSONL directly, so do not paraphrase, reformat, or summarize.
-
-If discovery finds no files, the pipeline still emits a clean `_meta` line (`files_processed: 0`). Return that as-is.
-
-## Output format
-
-Each session line is a JSON object. Common fields across platforms:
-
-- `platform` — `claude`, `codex`, or `cursor`
-- `file` — absolute path to the session JSONL
-- `size` — file size in bytes
-- `ts` — session start timestamp (ISO 8601)
-- `session` — session identifier
-
-Platform-specific fields:
-
-- Claude Code adds `branch` (git branch) and `last_ts` (last message timestamp).
-- Codex adds `cwd` (working directory), `source`, `cli_version`, `model`, `last_ts`.
-- Cursor has no in-file timestamps or metadata — `ts` is derived from file mtime and `session` from the containing directory name.
-
-The final `_meta` line has `files_processed`, `parse_errors`, and optionally `filtered_by_cwd` (count of Codex sessions dropped by the CWD filter) and `files_matched` (count of sessions retained by the keyword filter, present only when `--keyword` was set).
-
-When `--keyword` is set, each session line additionally carries:
-
-- `match_count` — total occurrences across all keywords
-- `keyword_matches` — per-keyword counts, e.g., `{"middleware": 4, "auth": 12}`
-
-Sessions with `match_count: 0` are excluded from output.
-
-## Error handling
-
-If the discovery script errors (e.g., unreadable home directory, permission failure), let the error surface to the caller. Do not substitute git log, file listings, or other sources — this skill's contract is session metadata, nothing else.
-
-If `_meta` reports `parse_errors > 0`, return the JSONL as-is. The caller decides how to handle partial data.
diff --git a/plugins/compound-engineering/skills/ce-sessions/SKILL.md b/plugins/compound-engineering/skills/ce-sessions/SKILL.md
index d0a863f68..af7cb5d3d 100644
--- a/plugins/compound-engineering/skills/ce-sessions/SKILL.md
+++ b/plugins/compound-engineering/skills/ce-sessions/SKILL.md
@@ -1,11 +1,11 @@
 ---
 name: ce-sessions
-description: "Search and ask questions about your coding agent session history. Use when asking what you worked on, what was tried before, how a problem was investigated across sessions, what happened recently, or any question about past agent sessions. Also use when the user references prior sessions, previous attempts, or past investigations — even without saying 'sessions' explicitly."
+description: "Search and ask questions about coding agent session history across Claude Code, Codex, and Cursor. Use when asking what was worked on, what was tried before, how a problem was investigated across sessions, what happened recently, or any question about past agent sessions. Also use when the user references prior sessions, previous attempts, or past investigations — even without saying 'sessions' explicitly."
 ---
 
 # /ce-sessions
 
-Search your session history.
+Search session history across Claude Code, Codex, and Cursor and synthesize findings about what was worked on, tried, decided, or learned in prior sessions.
 
 ## Usage
 
@@ -18,14 +18,199 @@ Search your session history.
 
 **Git branch (pre-resolved):** !`git rev-parse --abbrev-ref HEAD 2>/dev/null || true`
 
-If the line above resolved to a plain branch name (like `feat/my-branch`), pass it to the agent. If it still contains a backtick command string or is empty, it did not resolve — omit it and let the agent derive it at runtime.
+If the line above resolved to a plain branch name (like `feat/my-branch`), use it for branch filtering and pass it to the synthesis subagent. If it still contains a backtick command string or is empty, derive the branch at runtime instead.
+
+**Repo name (pre-resolved):** !`basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || true`
+
+If the line above resolved to a plain repo folder name, use it for session discovery. Otherwise derive at runtime.
+
+## Note: 2026
+
+The current year is 2026. Use this when interpreting session timestamps.
+
+## Guardrails
+
+These rules apply at all times during orchestration and synthesis.
+
+- **Never read entire session files into context.** Session files can be 1-7MB. Always use the extraction scripts to filter first, then reason over the filtered output.
+- **Never extract or reproduce tool call inputs/outputs verbatim.** Summarize what was attempted and what happened.
+- **Never include thinking or reasoning block content.** Claude Code thinking blocks are internal reasoning; Codex reasoning blocks are encrypted. Neither is actionable.
+- **Never analyze the current session.** Its conversation history is already available to the caller.
+- **Surface technical content, not personal content.** Sessions contain everything — credentials, frustration, half-formed opinions. Use judgment about what belongs in a technical summary and what doesn't.
+- **Fail fast on access errors.** If session discovery fails on permissions, report the issue immediately. Do not retry the same operation with different tools or approaches — repeated retries waste tokens without changing the outcome.
 
 ## Execution
 
-If no argument is provided, ask what the user wants to know about their session history. Use the platform's blocking question tool: `AskUserQuestion` in Claude Code (call `ToolSearch` with `select:AskUserQuestion` first if its schema isn't loaded), `request_user_input` in Codex, `ask_user` in Gemini, `ask_user` in Pi (requires the `pi-ask-user` extension). Fall back to asking in plain text only when no blocking tool exists in the harness or the call errors (e.g., Codex edit modes) — not because a schema load is required. Never silently skip the question.
+If no question argument is provided, ask what the user wants to know about their session history. Use the platform's blocking question tool: `AskUserQuestion` in Claude Code (call `ToolSearch` with `select:AskUserQuestion` first if its schema isn't loaded), `request_user_input` in Codex, `ask_user` in Gemini, `ask_user` in Pi (requires the `pi-ask-user` extension). Fall back to asking in plain text only when no blocking tool exists in the harness or the call errors (e.g., Codex edit modes) — not because a schema load is required. Never silently skip the question.
+
+### Step 1 — Determine scan window
+
+Infer a time range from the user's question. Start narrow; widen only if a narrow scan finds nothing relevant.
+
+| Signal | Initial scan window |
+|--------|---------------------|
+| "today", "this morning" | 1 day |
+| "recently", "last few days", "this week", or no time signal | 7 days |
+| "last few weeks", "this month" | 30 days |
+| "last few months", broad feature history | 90 days |
+
+Claude Code retains session history for ~30 days by default. Wider windows may find nothing on Claude Code unless the user has extended retention.
+
+### Step 2 — Discover sessions and extract metadata
+
+Run the discovery + metadata pipeline (preserving the null-delimited xargs hardening that lets `extract-metadata.py` run in batch mode):
+
+```bash
+bash scripts/discover-sessions.sh <repo> <days> | tr '\n' '\0' | xargs -0 python3 scripts/extract-metadata.py --cwd-filter <repo>
+```
+
+Each output line is a JSON object describing a session (platform, file, size, ts, session, plus platform-specific fields). The final `_meta` line carries `files_processed` and `parse_errors`.
+
+If the inventory's `_meta` line shows `files_processed: 0`, return "no relevant prior sessions" and stop.
+
+If `parse_errors > 0`, note that some sessions could not be parsed and proceed with what was returned.
+
+To narrow the platform set, add `--platform claude`, `--platform codex`, or `--platform cursor` to the `discover-sessions.sh` invocation. Default to all three.
+
+### Step 3 — Filter and rank
+
+Apply these filters in order to pick the sessions worth deep-diving:
+
+1. **Branch filter (Claude Code only).** Keep sessions where `branch == dispatch_branch` exactly, or where the branch name contains a keyword from the question's topic (e.g., a question about "auth middleware" matches branches `feat/auth-fix`, `chore/auth-refactor`). Codex sessions don't carry `gitBranch` — skip this filter for them.
+
+2. **If the branch filter returned zero sessions, or you're processing Codex sessions:**
+   - Derive 2-4 keywords from the question's topic. For "a recent crash in the auth middleware where session-validation rejects valid tokens", derive `auth,middleware,session,token` (or similar).
+   - Re-invoke the discovery pipeline with `--keyword K1,K2,...` appended to the `extract-metadata.py` invocation. The script returns sessions with non-zero `match_count` plus per-keyword counts.
+   - **If `files_matched: 0`, return "no relevant prior sessions" and stop.** Do not extract anything.
+   - If `files_matched > 0`, treat those sessions as candidates. Rank by `match_count`, break ties by per-keyword counts.
+
+3. **Drop sessions outside the scan window.** Use `last_ts` when available, fall back to `ts`. Discard sessions where both fall before the window start.
+
+4. **Exclude the current session** — its conversation history is already available to the caller.
+
+5. **Apply the deep-dive cap.** Take at most **5 sessions total across all platforms**. Narrow by branch-match → `match_count` → file size > 30KB → recency.
+
+6. **Proceed only if at least one session remains after filtering.** Otherwise return "no relevant prior sessions" and stop.
+
+**Note: `gitBranch` is captured at the first user message only.** A session that began on `main` and did substantive work on a feature branch via mid-session `git checkout` records `branch: "main"`. Branch-match returning nothing is not conclusive evidence — that's why the keyword-filter fallback in step 2 is required.
+
+### Step 4 — Set up scratch space
+
+Create a per-run throwaway scratch directory:
+
+```bash
+SCRATCH=$(mktemp -d -t ce-sessions-XXXXXX)
+```
+
+Capture the absolute path; thread it into Step 5 and Step 6. The OS handles cleanup on session end; an explicit `rm -rf "$SCRATCH"` at the end of Step 7 is harmless and makes intent explicit.
+
+### Step 5 — Extract per-session content (file-mediated)
+
+For each selected session, run the skeleton extractor with `--output` so content writes directly to the scratch file — extraction bytes never round-trip through the orchestrator's tool results:
+
+```bash
+python3 scripts/extract-skeleton.py --output "$SCRATCH/<session-id>.skeleton.txt" < <session-file>
+```
+
+Stdout receives only a one-line JSON status (`{"_meta": true, "wrote": "...", "bytes": N, ...}`). Capture `bytes` and `parse_errors` from each status line.
+
+**Conditional tail-extract** — if a skeleton terminates mid-investigation (last visible turn is a tool call with no resolution, or the assistant is mid-debugging without a conclusion), re-extract with a `tail` shape:
+
+```bash
+python3 scripts/extract-skeleton.py --output "$SCRATCH/<session-id>.skeleton.tail.txt" < <session-file>
+```
+
+(The skeleton script does not accept a `tail:N` cap directly; if a tail-only view is needed, post-process the scratch file in shell with `tail -n 50` after extraction. Use this only when the head output suggests the session was truncated mid-investigation.)
+
+**Conditional errors-mode** — for sessions where investigation dead-ends are likely valuable:
+
+```bash
+python3 scripts/extract-errors.py --output "$SCRATCH/<session-id>.errors.txt" < <session-file>
+```
+
+Use selectively — only when understanding what went wrong adds value. Cursor agent transcripts don't log tool results, so errors-mode produces nothing for Cursor sessions.
+
+### Step 6 — Dispatch synthesis subagent
+
+Dispatch the `ce-session-historian` subagent via the platform's subagent primitive (`Agent` in Claude Code, `spawn_agent` in Codex, `subagent` in Pi via the `pi-subagents` extension). Omit the `mode` parameter so the user's configured permission settings apply. Run on the mid-tier model (e.g., `model: "sonnet"` in Claude Code) — the synthesizer doesn't need frontier reasoning.
+
+The dispatch prompt is the agent's input contract. Pass these fields:
+
+- `problem_topic` — one sentence naming the concrete question. Lift from the user's argument or, if missing, from the answer to the no-arg prompt.
+- `scratch_dir` — absolute path to `$SCRATCH`.
+- `sessions` — an array of objects, one per extracted session, each with:
+  - `path` — absolute path to the skeleton file (and optionally `errors_path` for the errors file when extracted)
+  - `platform` — `claude`, `codex`, or `cursor`
+  - `branch` — git branch when present (Claude Code only)
+  - `cwd` — working directory when present (Codex only)
+  - `ts` and `last_ts` — session timestamps
+  - `match_count` and `keyword_matches` — when keyword filtering was used
+- `output_schema` — the structure the agent's response should follow. Default schema:
+  ```
+  Structure your response with these sections (omit any with no findings):
+  - What was tried before
+  - What didn't work
+  - Key decisions
+  - Related context
+  ```
+  When the caller (e.g., `ce-compound`) supplies a schema in the skill argument, pass it through verbatim.
+
+Example dispatch shape:
+
+```
+Synthesize findings from these prior sessions:
+
+Problem topic: <one-line topic>
+
+Sessions to read (paths in $SCRATCH):
+1. /tmp/ce-sessions-XXXX/abc123.skeleton.txt
+   platform=claude branch=feat/auth-fix ts=2026-05-01
+2. /tmp/ce-sessions-XXXX/def456.skeleton.txt  errors=/tmp/ce-sessions-XXXX/def456.errors.txt
+   platform=codex cwd=/Users/.../my-project ts=2026-05-03
+...
+
+Output schema:
+- What was tried before
+- What didn't work
+- Key decisions
+- Related context
+
+Filter rule: only surface findings directly relevant to this specific problem.
+Ignore unrelated work from the same sessions or branches.
+```
+
+The agent reads each path via the platform's native file-read tool and returns prose findings. Bulk extraction content lives only in the agent's subagent context — the orchestrator's working state stays at file paths plus small inventory metadata.
+
+### Step 7 — Return findings
+
+Return the synthesizer's output text to the caller verbatim. If discovery or keyword filtering returned zero sessions (Step 2 or Step 3), return the literal string `no relevant prior sessions` instead.
+
+Optionally clean up scratch:
+
+```bash
+rm -rf "$SCRATCH"
+```
+
+The OS handles cleanup eventually regardless; the explicit cleanup is for readers who expect it.
+
+## Output
+
+When the caller (typically a user typing `/ce-sessions`, or another skill invoking ce-sessions via the platform's skill-invocation primitive) does not specify an output format, include a brief header noting what was searched:
+
+```
+**Sessions searched**: [count] ([N] Claude Code, [N] Codex, [N] Cursor) | [date range]
+```
+
+Then the synthesizer's prose findings. When the caller supplies a schema, honor it verbatim and omit the default header.
+
+## Time budget
+
+Stop as soon as a complete answer is available. A confident "no relevant prior sessions" within seconds is a complete answer; do not extend the search to fill time. The structural caps in Step 3 (max 5 sessions deep-dived) and Step 5 (conditional tail/errors extraction) bound runtime by construction.
+
+## Error handling
+
+If the discovery pipeline fails (e.g., unreadable home directory, permission failure), surface the error to the caller. Do not substitute git log, file listings, or other sources — this skill's contract is session metadata and synthesis.
 
-Dispatch `ce-session-historian` with the user's question as the task prompt. Omit the `mode` parameter so the user's configured permission settings apply. Include in the dispatch prompt:
+If extraction `--output` write fails (disk full, permission), surface a clear error and do not dispatch the synthesizer with partial paths.
 
-- The user's question
-- The current working directory
-- The repo name and git branch from pre-resolved context (only if they resolved to plain values — do not pass literal command strings)
+If `_meta` reports `parse_errors > 0` from any script, note partial extraction in the dispatch prompt and proceed; the synthesizer flags partial in findings.
diff --git a/plugins/compound-engineering/skills/ce-session-inventory/scripts/discover-sessions.sh b/plugins/compound-engineering/skills/ce-sessions/scripts/discover-sessions.sh
similarity index 100%
rename from plugins/compound-engineering/skills/ce-session-inventory/scripts/discover-sessions.sh
rename to plugins/compound-engineering/skills/ce-sessions/scripts/discover-sessions.sh
diff --git a/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-errors.py b/plugins/compound-engineering/skills/ce-sessions/scripts/extract-errors.py
similarity index 75%
rename from plugins/compound-engineering/skills/ce-session-extract/scripts/extract-errors.py
rename to plugins/compound-engineering/skills/ce-sessions/scripts/extract-errors.py
index 1b557fd16..9d48c82f1 100644
--- a/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-errors.py
+++ b/plugins/compound-engineering/skills/ce-sessions/scripts/extract-errors.py
@@ -1,16 +1,39 @@
 #!/usr/bin/env python3
 """Extract error signals from a Claude Code, Codex, or Cursor JSONL session file.
 
-Usage: cat <session.jsonl> | python3 extract-errors.py
+Usage:
+  cat <session.jsonl> | python3 extract-errors.py
+  cat <session.jsonl> | python3 extract-errors.py --output PATH
 
 Auto-detects platform from the JSONL structure.
 Note: Cursor agent transcripts do not log tool results, so no errors can be extracted.
 Finds failed tool calls / commands and outputs them with timestamps.
-Outputs a _meta line at the end with processing stats.
+
+When --output PATH is given, the extracted error log is written to PATH and
+stdout receives only a one-line JSON status (_meta with wrote/bytes/stats).
+This lets callers route bulk content to a scratch file without round-tripping
+extraction bytes through orchestrator tool results.
+
+Without --output, extracted content goes to stdout and ends with a _meta line.
 """
+import argparse
+import io
+import os
 import sys
 import json
 
+parser = argparse.ArgumentParser(add_help=True)
+parser.add_argument(
+    "--output",
+    metavar="PATH",
+    help="Write extracted errors to PATH instead of stdout. Stdout receives a one-line _meta status.",
+)
+args = parser.parse_args()
+
+_original_stdout = sys.stdout
+if args.output:
+    sys.stdout = io.StringIO()
+
 stats = {"lines": 0, "parse_errors": 0, "errors_found": 0}
 
 
@@ -102,3 +125,11 @@ def handle_noop(obj):
         stats["parse_errors"] += 1
 
 print(json.dumps({"_meta": True, **stats}))
+
+if args.output:
+    body = sys.stdout.getvalue()
+    sys.stdout = _original_stdout
+    with open(args.output, "w") as f:
+        f.write(body)
+    bytes_written = os.path.getsize(args.output)
+    print(json.dumps({"_meta": True, "wrote": args.output, "bytes": bytes_written, **stats}))
diff --git a/plugins/compound-engineering/skills/ce-session-inventory/scripts/extract-metadata.py b/plugins/compound-engineering/skills/ce-sessions/scripts/extract-metadata.py
similarity index 100%
rename from plugins/compound-engineering/skills/ce-session-inventory/scripts/extract-metadata.py
rename to plugins/compound-engineering/skills/ce-sessions/scripts/extract-metadata.py
diff --git a/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-skeleton.py b/plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py
similarity index 89%
rename from plugins/compound-engineering/skills/ce-session-extract/scripts/extract-skeleton.py
rename to plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py
index 15de188c2..e6581c6b8 100644
--- a/plugins/compound-engineering/skills/ce-session-extract/scripts/extract-skeleton.py
+++ b/plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py
@@ -1,7 +1,9 @@
 #!/usr/bin/env python3
 """Extract the conversation skeleton from a Claude Code, Codex, or Cursor JSONL session file.
 
-Usage: cat <session.jsonl> | python3 extract-skeleton.py
+Usage:
+  cat <session.jsonl> | python3 extract-skeleton.py
+  cat <session.jsonl> | python3 extract-skeleton.py --output PATH
 
 Auto-detects platform (Claude Code, Codex, or Cursor) from the JSONL structure.
 Extracts:
@@ -12,12 +14,36 @@
 Consecutive tool calls of the same type are collapsed:
   3+ Read calls -> "[tools] 3x Read (file1, file2, +1 more) -> all ok"
 Codex call/result pairs are deduplicated (only the result with status is kept).
-Outputs a _meta line at the end with processing stats.
+
+When --output PATH is given, the extracted skeleton is written to PATH and
+stdout receives only a one-line JSON status (_meta with wrote/bytes/stats).
+This lets callers route bulk content to a scratch file without round-tripping
+extraction bytes through orchestrator tool results.
+
+Without --output, extracted content goes to stdout and ends with a _meta line.
 """
+import argparse
+import io
+import os
 import sys
 import json
 import re
 
+parser = argparse.ArgumentParser(add_help=True)
+parser.add_argument(
+    "--output",
+    metavar="PATH",
+    help="Write extracted skeleton to PATH instead of stdout. Stdout receives a one-line _meta status.",
+)
+args = parser.parse_args()
+
+# Capture-and-redirect when --output is set: prints in the rest of the script
+# go to the buffer; at the end the buffer is written to PATH and a status
+# line is emitted to the real stdout.
+_original_stdout = sys.stdout
+if args.output:
+    sys.stdout = io.StringIO()
+
 stats = {"lines": 0, "parse_errors": 0, "user": 0, "assistant": 0, "tool": 0}
 
 # Claude Code wrapper tags to strip from user message content.
@@ -315,3 +341,11 @@ def handle_cursor(obj):
 flush_tools()
 
 print(json.dumps({"_meta": True, **stats}))
+
+if args.output:
+    body = sys.stdout.getvalue()
+    sys.stdout = _original_stdout
+    with open(args.output, "w") as f:
+        f.write(body)
+    bytes_written = os.path.getsize(args.output)
+    print(json.dumps({"_meta": True, "wrote": args.output, "bytes": bytes_written, **stats}))
diff --git a/src/data/plugin-legacy-artifacts.ts b/src/data/plugin-legacy-artifacts.ts
index 7dc6182bb..90c6e5dcb 100644
--- a/src/data/plugin-legacy-artifacts.ts
+++ b/src/data/plugin-legacy-artifacts.ts
@@ -56,6 +56,8 @@ const EXTRA_LEGACY_ARTIFACTS_BY_PLUGIN: Record<string, LegacyPluginArtifacts> =
       "ce-reproduce-bug",
       "ce-review",
       "ce-review-beta",
+      "ce-session-extract",
+      "ce-session-inventory",
       "ce-update",
       "changelog",
       "claude-permissions-optimizer",
diff --git a/src/utils/legacy-cleanup.ts b/src/utils/legacy-cleanup.ts
index 8bcb09d60..b163dcf76 100644
--- a/src/utils/legacy-cleanup.ts
+++ b/src/utils/legacy-cleanup.ts
@@ -94,6 +94,13 @@ export const STALE_SKILL_DIRS = [
   "ce-every-style-editor",
   "ce-onboarding",
   "ce-pr-description",
+
+  // ce-session-inventory and ce-session-extract were script-host skills called
+  // only from ce-session-historian via the Skill tool. That dispatch path
+  // deadlocked on Claude Code (subagents cannot invoke Skill — issue #794), so
+  // their scripts moved into ce-sessions/scripts/ and the skills were removed.
+  "ce-session-inventory",
+  "ce-session-extract",
 ]
 
 /** Old agent names (used as generated skill dirs or flat .md files). */
@@ -281,6 +288,10 @@ const LEGACY_ONLY_SKILL_DESCRIPTIONS: Record<string, string> = {
     "This skill should be used when reviewing or editing copy to ensure adherence to Every's style guide. It provides a systematic line-by-line review process for grammar, punctuation, mechanics, and style guide compliance.",
   "ce-pr-description":
     "Write or regenerate a value-first pull-request description (title + body) for the current branch's commits or for a specified PR. Use when the user says 'write a PR description', 'refresh the PR description', 'regenerate the PR body', 'rewrite this PR', 'freshen the PR', 'update the PR description', 'draft a PR body for this diff', 'describe this PR properly', 'generate the PR title', or pastes a GitHub PR URL / #NN / number. Also used internally by ce-commit-push-pr (single-PR flow) and ce-pr-stack (per-layer stack descriptions) so all callers share one writing voice. Input is a natural-language prompt. A PR reference (a full GitHub PR URL, `pr:561`, `#561`, or a bare number alone) picks a specific PR; anything else is treated as optional steering for the default 'describe my current branch' mode. Returns structured {title, body_file} (body written to an OS temp file) for the caller to apply via gh pr edit or gh pr create — this skill never edits the PR itself and never prompts for confirmation.",
+  "ce-session-extract":
+    "Extract conversation skeleton or error signals from a single session file at a given path. Invoked by session-research agents after they have selected which sessions to deep-dive — not intended for direct user queries.",
+  "ce-session-inventory":
+    "Discover session files for a repo across Claude Code, Codex, and Cursor, and extract session metadata (timestamps, branch, cwd, size, platform). Invoked by session-research agents — not intended for direct user queries.",
 }
 
 /**
diff --git a/tests/session-history-scripts.test.ts b/tests/session-history-scripts.test.ts
index 5c3cb5865..837a9d3c7 100644
--- a/tests/session-history-scripts.test.ts
+++ b/tests/session-history-scripts.test.ts
@@ -1,29 +1,20 @@
 import { describe, expect, test } from "bun:test"
+import fs from "fs"
+import os from "os"
 import path from "path"
 
-const INVENTORY_SCRIPTS_DIR = path.join(
+const SCRIPTS_DIR = path.join(
   __dirname,
-  "../plugins/compound-engineering/skills/ce-session-inventory/scripts"
-)
-const EXTRACT_SCRIPTS_DIR = path.join(
-  __dirname,
-  "../plugins/compound-engineering/skills/ce-session-extract/scripts"
+  "../plugins/compound-engineering/skills/ce-sessions/scripts"
 )
 const FIXTURES_DIR = path.join(__dirname, "fixtures/session-history")
 
-function scriptsDirFor(scriptName: string): string {
-  if (scriptName === "extract-metadata.py" || scriptName === "discover-sessions.sh") {
-    return INVENTORY_SCRIPTS_DIR
-  }
-  return EXTRACT_SCRIPTS_DIR
-}
-
 async function runScript(
   scriptName: string,
   args: string[] = [],
   stdin?: string
 ): Promise<{ stdout: string; stderr: string; exitCode: number }> {
-  const scriptPath = path.join(scriptsDirFor(scriptName), scriptName)
+  const scriptPath = path.join(SCRIPTS_DIR, scriptName)
   const proc = Bun.spawn(["python3", scriptPath, ...args], {
     stdin: stdin ? new TextEncoder().encode(stdin) : undefined,
     stdout: "pipe",
@@ -602,6 +593,97 @@ describe("extract-errors", () => {
   })
 })
 
+// ---------------------------------------------------------------------------
+// --output PATH mode: extract-skeleton.py and extract-errors.py
+//
+// When --output PATH is set, scripts write extracted bytes to PATH and emit
+// only a one-line _meta status to stdout (with wrote/bytes fields).
+// This lets ce-sessions route bulk extraction content to a scratch file
+// without round-tripping through orchestrator tool results. Without --output,
+// stdout-mode behavior is preserved (covered by tests above).
+// ---------------------------------------------------------------------------
+describe("--output PATH mode", () => {
+  function tmpFile(): string {
+    return path.join(
+      fs.mkdtempSync(path.join(os.tmpdir(), "ce-sessions-test-")),
+      "out.txt"
+    )
+  }
+
+  test("extract-skeleton writes file and emits status to stdout", async () => {
+    const fixture = await Bun.file(
+      path.join(FIXTURES_DIR, "claude-session.jsonl")
+    ).text()
+    const outPath = tmpFile()
+    const { stdout, exitCode } = await runScript(
+      "extract-skeleton.py",
+      ["--output", outPath],
+      fixture
+    )
+    expect(exitCode).toBe(0)
+
+    // stdout receives only a one-line _meta status with wrote/bytes
+    const stdoutLines = stdout.trim().split("\n").filter((l) => l.trim())
+    expect(stdoutLines).toHaveLength(1)
+    const status = JSON.parse(stdoutLines[0])
+    expect(status._meta).toBe(true)
+    expect(status.wrote).toBe(outPath)
+    expect(status.bytes).toBeGreaterThan(0)
+    expect(status.parse_errors).toBe(0)
+
+    // The file contains the actual extracted body, ending with the inner _meta line
+    const body = fs.readFileSync(outPath, "utf-8")
+    expect(body.length).toBe(status.bytes)
+    const bodyLines = body.trim().split("\n")
+    const innerMeta = JSON.parse(bodyLines[bodyLines.length - 1])
+    expect(innerMeta._meta).toBe(true)
+    expect(body).not.toMatch(/"wrote":/) // status field is stdout-only
+  })
+
+  test("extract-errors writes file and emits status to stdout", async () => {
+    const fixture = await Bun.file(
+      path.join(FIXTURES_DIR, "claude-session.jsonl")
+    ).text()
+    const outPath = tmpFile()
+    const { stdout, exitCode } = await runScript(
+      "extract-errors.py",
+      ["--output", outPath],
+      fixture
+    )
+    expect(exitCode).toBe(0)
+
+    const stdoutLines = stdout.trim().split("\n").filter((l) => l.trim())
+    expect(stdoutLines).toHaveLength(1)
+    const status = JSON.parse(stdoutLines[0])
+    expect(status._meta).toBe(true)
+    expect(status.wrote).toBe(outPath)
+    expect(status.bytes).toBeGreaterThan(0)
+    expect(status.errors_found).toBeGreaterThan(0)
+
+    const body = fs.readFileSync(outPath, "utf-8")
+    expect(body).toContain("[error]")
+    expect(body.length).toBe(status.bytes)
+  })
+
+  test("extract-skeleton stdout-mode still works when --output is omitted", async () => {
+    const fixture = await Bun.file(
+      path.join(FIXTURES_DIR, "claude-session.jsonl")
+    ).text()
+    const { stdout, exitCode } = await runScript(
+      "extract-skeleton.py",
+      [],
+      fixture
+    )
+    expect(exitCode).toBe(0)
+    // No status JSON with `wrote` field — stdout has the body and ends with inner _meta
+    expect(stdout).not.toMatch(/"wrote":/)
+    const lines = stdout.trim().split("\n")
+    const meta = JSON.parse(lines[lines.length - 1])
+    expect(meta._meta).toBe(true)
+    expect(meta).not.toHaveProperty("wrote")
+  })
+})
+
 // ---------------------------------------------------------------------------
 // Cross-platform auto-detection
 // ---------------------------------------------------------------------------
@@ -639,7 +721,7 @@ describe("discover-sessions", () => {
   async function runDiscover(
     ...args: string[]
   ): Promise<{ stdout: string; stderr: string; exitCode: number }> {
-    const scriptPath = path.join(scriptsDirFor("discover-sessions.sh"), "discover-sessions.sh")
+    const scriptPath = path.join(SCRIPTS_DIR, "discover-sessions.sh")
     const proc = Bun.spawn(["bash", scriptPath, ...args], {
       stdout: "pipe",
       stderr: "pipe",
diff --git a/tests/skills/ce-session-historian-no-skill-tool.test.ts b/tests/skills/ce-session-historian-no-skill-tool.test.ts
new file mode 100644
index 000000000..27ba78bb9
--- /dev/null
+++ b/tests/skills/ce-session-historian-no-skill-tool.test.ts
@@ -0,0 +1,56 @@
+import { readFileSync } from "fs"
+import path from "path"
+import { describe, expect, test } from "bun:test"
+
+const AGENT_PATH = path.join(
+  process.cwd(),
+  "plugins/compound-engineering/agents/ce-session-historian.agent.md",
+)
+const AGENT_BODY = readFileSync(AGENT_PATH, "utf8")
+
+// Regression guard for https://github.com/EveryInc/compound-engineering-plugin/issues/794.
+//
+// `ce-session-historian` runs in subagent context (dispatched by `ce-sessions`
+// and historically by `ce-compound` Phase 1). Claude Code does not permit
+// subagents to invoke the `Skill` tool — the call hangs at "Initializing…"
+// indefinitely, eventually surfacing to the orchestrator as a spurious
+// "user doesn't want to proceed with this tool use" rejection
+// (anthropics/claude-code#38719).
+//
+// The fix moved all script orchestration into the `ce-sessions` skill
+// (main context), reshaping this agent into synthesis-only that reads
+// pre-extracted scratch files via the platform's native file-read tool.
+//
+// This test locks the no-Skill-from-subagent invariant: the agent's body
+// must not instruct any `Skill(...)` invocation. Silent regression here
+// reintroduces the deadlock.
+describe("ce-session-historian no-Skill-tool regression guard", () => {
+  test("agent body does not instruct Skill(ce-session-inventory) calls", () => {
+    expect(AGENT_BODY).not.toMatch(/Skill\(\s*["'`]?ce-session-inventory/)
+  })
+
+  test("agent body does not instruct Skill(ce-session-extract) calls", () => {
+    expect(AGENT_BODY).not.toMatch(/Skill\(\s*["'`]?ce-session-extract/)
+  })
+
+  test("agent body does not contain the broken-pattern prose fingerprint", () => {
+    expect(AGENT_BODY).not.toMatch(/Invoke them through the Skill tool/i)
+  })
+
+  test("agent body does not instruct any Skill(...) tool-call expression", () => {
+    // Belt-and-suspenders: any literal `Skill(...)` tool-call form in the
+    // agent body would deadlock under the same constraint. The agent's
+    // contract is "read paths via native file-read; never invoke Skill."
+    // Backtick-quoted prose mentions like `Skill` are fine — only literal
+    // call expressions are flagged. Match `Skill(` followed by a non-space
+    // character (excluding the closing backtick that would mark a code span).
+    const skillCallPattern = /(?<!`)Skill\([^)`]/
+    const match = AGENT_BODY.match(skillCallPattern)
+    expect(
+      match,
+      `Agent body contains a literal Skill(...) tool-call expression: ${match?.[0]}. ` +
+        `Subagents cannot invoke the Skill tool in Claude Code (issue #794). ` +
+        `Use the platform's native file-read tool on pre-extracted paths instead.`,
+    ).toBeNull()
+  })
+})

From 8069c020b422152e2fb43bb1d057adb0c8458bde Mon Sep 17 00:00:00 2001
From: Trevin Chow <trevin@trevinchow.com>
Date: Fri, 8 May 2026 13:50:32 -0700
Subject: [PATCH 2/2] fix(ce-sessions): guard tool-input slicing against
 dict-shaped values

summarize_claude_tool sliced inp.get("query", "") and inp.get("prompt", "")
unconditionally. When MCP or specialized tools put a dict in those fields,
dict[:80] raises TypeError: unhashable type: 'slice' and the per-session
extraction silently fails. Same exposure existed in handle_cursor's
tool_use path.

Add a _safe_slice helper and reroute every potentially-non-string field
through it, then add regression tests for dict-shaped query, command,
prompt, pattern, fall-through to a later string field, and the cursor path.

Fixes #805
---
 .../ce-sessions/scripts/extract-skeleton.py   |  38 +++--
 tests/session-history-scripts.test.ts         | 133 ++++++++++++++++++
 2 files changed, 159 insertions(+), 12 deletions(-)

diff --git a/plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py b/plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py
index e6581c6b8..e5e9267c3 100644
--- a/plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py
+++ b/plugins/compound-engineering/skills/ce-sessions/scripts/extract-skeleton.py
@@ -121,17 +121,29 @@ def flush_tools():
     pending_tools.clear()
 
 
+def _safe_slice(value, n):
+    """Slice value if it is a string; otherwise return ''.
+
+    Some Claude Code / MCP tool inputs put structured data (dicts, lists) in
+    fields like `query` or `prompt`. `dict[:N]` raises TypeError, so guard
+    every slice with an isinstance check.
+    """
+    return value[:n] if isinstance(value, str) else ""
+
+
 def summarize_claude_tool(block):
     """Extract name and target from a Claude Code tool_use block."""
     name = block.get("name", "unknown")
     inp = block.get("input", {})
+    fp = inp.get("file_path")
+    p = inp.get("path")
     target = (
-        inp.get("file_path")
-        or inp.get("path")
-        or inp.get("command", "")[:120]
-        or inp.get("pattern", "")
-        or inp.get("query", "")[:80]
-        or inp.get("prompt", "")[:80]
+        (fp if isinstance(fp, str) else None)
+        or (p if isinstance(p, str) else None)
+        or _safe_slice(inp.get("command"), 120)
+        or _safe_slice(inp.get("pattern"), 200)
+        or _safe_slice(inp.get("query"), 80)
+        or _safe_slice(inp.get("prompt"), 80)
         or ""
     )
     if isinstance(target, str) and len(target) > 120:
@@ -290,13 +302,15 @@ def handle_cursor(obj):
             elif block.get("type") == "tool_use":
                 name = block.get("name", "unknown")
                 inp = block.get("input", {})
+                p = inp.get("path")
+                fp = inp.get("file_path")
                 target = (
-                    inp.get("path")
-                    or inp.get("file_path")
-                    or inp.get("command", "")[:120]
-                    or inp.get("pattern", "")
-                    or inp.get("glob_pattern", "")
-                    or inp.get("target_directory", "")
+                    (p if isinstance(p, str) else None)
+                    or (fp if isinstance(fp, str) else None)
+                    or _safe_slice(inp.get("command"), 120)
+                    or _safe_slice(inp.get("pattern"), 200)
+                    or _safe_slice(inp.get("glob_pattern"), 200)
+                    or _safe_slice(inp.get("target_directory"), 200)
                     or ""
                 )
                 if isinstance(target, str) and len(target) > 120:
diff --git a/tests/session-history-scripts.test.ts b/tests/session-history-scripts.test.ts
index 837a9d3c7..577ab7906 100644
--- a/tests/session-history-scripts.test.ts
+++ b/tests/session-history-scripts.test.ts
@@ -517,6 +517,139 @@ describe("extract-skeleton", () => {
     expect(stdout).toContain("[tools] 4x Read")
     expect(stdout).toContain("all ok")
   })
+
+  // Regression: issue #805 — some Claude Code / MCP tool inputs put a dict in
+  // fields the summarizer slices (`command`, `query`, `prompt`, `pattern`).
+  // `dict[:80]` raises TypeError: unhashable type: 'slice'. The fix guards
+  // every slice with isinstance(value, str); dict-shaped fields fall through
+  // to the next candidate or empty target without crashing the extraction.
+  test("does not crash when Claude tool input has a dict-shaped query", async () => {
+    const lines = [
+      JSON.stringify({
+        type: "assistant",
+        message: {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "t1",
+              name: "WebSearch",
+              input: { query: { foo: "bar" } },
+            },
+          ],
+        },
+        timestamp: "2026-05-08T10:00:00.000Z",
+      }),
+    ]
+    const { stdout, exitCode, stderr } = await runScript(
+      "extract-skeleton.py",
+      [],
+      lines.join("\n")
+    )
+    expect(exitCode).toBe(0)
+    expect(stderr).not.toContain("TypeError")
+    expect(stdout).toContain("[tool] WebSearch")
+    const metaLine = stdout.trim().split("\n").at(-1)!
+    expect(JSON.parse(metaLine).parse_errors).toBe(0)
+  })
+
+  test("dict-shaped command/prompt/pattern fields do not crash and fall back to empty target", async () => {
+    const lines = [
+      JSON.stringify({
+        type: "assistant",
+        message: {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "c1",
+              name: "Bash",
+              input: { command: { cmd: "ls" } },
+            },
+            {
+              type: "tool_use",
+              id: "p1",
+              name: "Task",
+              input: { prompt: { description: "x" } },
+            },
+            {
+              type: "tool_use",
+              id: "g1",
+              name: "Grep",
+              input: { pattern: { regex: "foo" } },
+            },
+          ],
+        },
+        timestamp: "2026-05-08T10:00:01.000Z",
+      }),
+    ]
+    const { stdout, exitCode } = await runScript(
+      "extract-skeleton.py",
+      [],
+      lines.join("\n")
+    )
+    expect(exitCode).toBe(0)
+    expect(stdout).toContain("[tool] Bash")
+    expect(stdout).toContain("[tool] Task")
+    expect(stdout).toContain("[tool] Grep")
+  })
+
+  test("falls through dict-shaped query to a later string field", async () => {
+    // When `query` is a dict, the summarizer must skip it and try `prompt`.
+    const lines = [
+      JSON.stringify({
+        type: "assistant",
+        message: {
+          role: "assistant",
+          content: [
+            {
+              type: "tool_use",
+              id: "x1",
+              name: "MCPTool",
+              input: {
+                query: { structured: true },
+                prompt: "fallback prompt text",
+              },
+            },
+          ],
+        },
+        timestamp: "2026-05-08T10:00:02.000Z",
+      }),
+    ]
+    const { stdout, exitCode } = await runScript(
+      "extract-skeleton.py",
+      [],
+      lines.join("\n")
+    )
+    expect(exitCode).toBe(0)
+    expect(stdout).toContain("fallback prompt text")
+  })
+
+  test("dict-shaped Cursor tool inputs do not crash", async () => {
+    // Same exposure exists in handle_cursor's tool_use path.
+    const lines = [
+      JSON.stringify({
+        role: "assistant",
+        message: {
+          content: [
+            {
+              type: "tool_use",
+              name: "search",
+              input: { pattern: { regex: "foo" }, glob_pattern: { type: "x" } },
+            },
+          ],
+        },
+      }),
+    ]
+    const { stdout, exitCode, stderr } = await runScript(
+      "extract-skeleton.py",
+      [],
+      lines.join("\n")
+    )
+    expect(exitCode).toBe(0)
+    expect(stderr).not.toContain("TypeError")
+    expect(stdout).toContain("[tool] search")
+  })
 })
 
 // ---------------------------------------------------------------------------