diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5d943aee3b..3de066b56c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -147,3 +147,63 @@ jobs:
           name: playwright-report
           path: apps/code/playwright-report/
           retention-days: 7
+
+  e2e:
+    # Live-model e2e for the @posthog/agent adapters (claude + codex). Runs only
+    # after the unit + integration jobs pass — a red tree never reaches the
+    # gateway. Opt-in and safe by default: without vars.AGENT_E2E_ENABLED it is
+    # skipped, and even when enabled it self-skips every arm unless the
+    # E2E_GATEWAY_TOKEN secret is present (fork PRs never see it) and
+    # E2E_GATEWAY_URL points at a runner-reachable gateway. Drives cheap models
+    # (claude-haiku-4-5 / gpt-5-mini), so an enabled run is a handful of short turns.
+    needs: [unit-test, integration-test]
+    # Enabled at the org level, and skipped on fork PRs — secrets (the gateway
+    # token) are withheld from forks, so the fail-loud token guard would red them
+    # spuriously. Same-repo PRs get the secret and enforce the guard.
+    if: ${{ vars.AGENT_E2E_ENABLED == 'true' && (github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository) }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Setup pnpm
+        uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1 # v4.3.0
+
+      - name: Setup Node.js
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
+        with:
+          node-version: 22
+          cache: "pnpm"
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Build agent dependencies
+        run: |
+          pnpm --filter @posthog/shared run build
+          pnpm --filter @posthog/git run build
+          pnpm --filter @posthog/enricher run build
+
+      - name: Download native codex binary
+        # Non-fatal at the STEP so a failure surfaces as the fail-loud binary guard
+        # (guard.e2e.test.ts) with a clear message rather than an opaque download
+        # error. A missing binary then REDS the run (the guard fails when a token is
+        # set) instead of letting the codex arm silently skip to green.
+        run: node apps/code/scripts/download-binaries.mjs || echo "codex binary download failed; the binary guard test will red the run"
+
+      - name: Run live e2e (both adapters)
+        run: pnpm --filter agent run test:e2e
+        env:
+          E2E_GATEWAY_TOKEN: ${{ secrets.E2E_GATEWAY_TOKEN }}
+          E2E_GATEWAY_URL: ${{ vars.E2E_GATEWAY_URL }}
+          E2E_CLAUDE_MODEL: ${{ vars.E2E_CLAUDE_MODEL }}
+          E2E_CODEX_MODEL: ${{ vars.E2E_CODEX_MODEL }}
+          # Optional: set vars.E2E_ENVIRONMENT=cloud to exercise the cloud code
+          # path (sandbox/permission-profile gating). Unset = local. The OS-sandbox
+          # enforcement test is macOS-gated, so it doesn't red this linux runner.
+          E2E_ENVIRONMENT: ${{ vars.E2E_ENVIRONMENT }}
diff --git a/.gitignore b/.gitignore
index fa269ba709..fe3abb2fe8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,8 @@ bin/
 
 # tsup bundled config artifacts (temporary files left behind when bundling TS configs)
 *.config.bundled_*.mjs
+# vite bundled config artifacts (left behind when a vitest run is interrupted)
+*.config.ts.timestamp-*.mjs
 
 # Environment
 .env
diff --git a/packages/agent/e2e/README.md b/packages/agent/e2e/README.md
new file mode 100644
index 0000000000..21ff545fe7
--- /dev/null
+++ b/packages/agent/e2e/README.md
@@ -0,0 +1,94 @@
+# Live agent e2e suite
+
+Drives representative sessions **end to end** through the real adapter, the real
+binary (codex `app-server` / Claude Code CLI), and the real llm-gateway on a cheap
+model — parametrized across `claude` and `codex`. The only thing mocked is the
+host/UI client (a recording `sessionUpdate`, an auto-allow `requestPermission`,
+and real file read/write against a throwaway git repo). Nothing in the
+agent/model/tool path is stubbed.
+
+## What it covers
+
+Two suites, each a per-adapter loop with `describe.skipIf` over `["claude",
+"codex"]` (titles carry a `(claude)` / `(codex)` marker so `-t "(codex)"` selects
+one arm across both files):
+
+`session-lifecycle.e2e.test.ts` — one shared golden turn plus focused scenarios:
+- **newSession config options** — model / effort selectors are offered.
+- **working turn** — `initialize → newSession → prompt` (read a file, edit a
+  line, run a command): streamed assistant text, tool calls + a completed tool
+  call, the exact usage signal, `stopReason: end_turn`, the real on-disk file
+  edit, and (codex) the `_posthog/sdk_session` + `_posthog/turn_complete`
+  ext-notifications.
+- **setSessionConfigOption** — switching a config option is accepted + acked.
+- **interrupt** — `cancel` during an in-flight (unbounded) turn yields `cancelled`.
+- **resumeSession** — reconnect returns config options.
+- **loadSession** — a fresh connection reattaches and the transcript replays
+  (asserts the tool transcript replays, not just any update).
+
+Codex-only (advertised codex capabilities; registered as skipped on the claude
+arm so the gap is visible):
+- **mode switch** → `current_mode_update`.
+- **steering** — a mid-turn prompt folds into the running turn via `turn/steer`.
+- **list + fork** — `listSessions` finds the session; `forkSession` branches it.
+
+The command/file approval `{decision}` round-trip is **not** covered here: codex
+spawns under a `danger-full-access` sandbox and auto-approves, so it never sends
+an approval request to assert on. That envelope is covered by unit tests instead.
+
+`structured-output.e2e.test.ts` — `_meta.jsonSchema` + `onStructuredOutput`
+delivers a parsed, schema-constrained object (the signals-pipeline contract).
+
+Assertions are structural lifecycle invariants + the deterministic file/JSON
+side effects — never model prose — so they hold across adapters and cheap models.
+
+## Structure
+
+- `config.ts` — gateway/token/model resolution, per-adapter env wiring, skip logic.
+- `driver.ts` — the in-process ACP host client (recording capture, auto-allow,
+  real FS), `openConnection` / `openSession` helpers, the throwaway-repo helpers,
+  and `waitFor`.
+- `*.e2e.test.ts` — the scenarios.
+
+## Running
+
+These never run under `pnpm test` or per-PR CI (the default vitest config only
+includes `src/**`). They are opt-in and cost a couple of short model turns.
+
+In CI they run as the **`e2e` job in `.github/workflows/test.yml`**, on pull
+requests only, after the unit + integration jobs pass. The job is opt-in and safe
+by default: it self-skips unless the repo variable `AGENT_E2E_ENABLED` is `true`
+with an `E2E_GATEWAY_TOKEN` secret and an `E2E_GATEWAY_URL` variable pointing at a
+gateway reachable from the runner, and it never runs for fork PRs (their secrets
+are withheld, which would otherwise red the fail-loud token guard). Off by
+default, so it costs nothing until explicitly enabled; the codex arm self-skips if
+the native binary isn't on the runner.
+
+```bash
+# from packages/agent — reads the local dev API key from the posthog repo, runs both arms
+bash e2e/run-e2e.sh
+
+# just one adapter (matches the (codex) / (claude) marker in every title)
+bash e2e/run-e2e.sh -t "(codex)"
+```
+
+Prereqs: a local llm-gateway up (`./bin/start` in the posthog repo) and the
+native codex binary present at `apps/code/resources/codex-acp/codex` (the codex
+arm self-skips if it is missing).
+
+## Configuration (env)
+
+| Var | Default | Notes |
+| --- | --- | --- |
+| `E2E_GATEWAY_TOKEN` | — | Required. A token the gateway accepts — the `llm_gateway` product takes a personal API key (no OAuth). Without it every arm skips. `run-e2e.sh` reads the local dev key. |
+| `E2E_GATEWAY_URL` | `http://localhost:3308/llm_gateway` | Gateway base (codex appends `/v1`). `llm_gateway` accepts a personal API key; `posthog_code` is OAuth-only. |
+| `E2E_CLAUDE_MODEL` | `claude-haiku-4-5` | Override if the gateway serves a different cheap Claude id. |
+| `E2E_CODEX_MODEL` | `gpt-5-mini` | Cheapest codex id the local gateway serves; override if needed. |
+| `POSTHOG_REPO` | sibling `../posthog` | Where `run-e2e.sh` reads the local dev key from. |
+| `E2E_DEBUG` | — | `1` for verbose adapter logging. |
+
+If a default model isn't served by your gateway, the turn fails loudly (never a
+false green) — set the matching `E2E_*_MODEL`.
+
+Each arm self-skips with a visible reason (missing token / missing binary) rather
+than passing silently.
diff --git a/packages/agent/e2e/compaction.e2e.test.ts b/packages/agent/e2e/compaction.e2e.test.ts
new file mode 100644
index 0000000000..3a6653d1e4
--- /dev/null
+++ b/packages/agent/e2e/compaction.e2e.test.ts
@@ -0,0 +1,101 @@
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { type Adapter, E2E } from "./config";
+import {
+  cleanupRepo,
+  killCodexStragglers,
+  openSession,
+  setupRepo,
+} from "./driver";
+
+/**
+ * Live compaction e2e — codex only. codex auto-compacts when the context crosses
+ * `model_auto_compact_token_limit`; we spawn with a low limit and a big cheap input
+ * blob so a later turn trips it, and the adapter must surface `_posthog/compact_boundary`.
+ * Claude is excluded: its manual `/compact` hangs `prompt()` and forcing auto
+ * compaction is too costly. Tuning: if it never compacts, raise the limit and FILLER together.
+ */
+const ADAPTERS: Adapter[] = ["codex"];
+
+// A limit above codex's resident baseline, with FILLER > limit so the crossing is baseline-independent.
+const AUTO_COMPACT_TOKEN_LIMIT = 16000;
+// ~20k tokens (~45 chars ≈ 11 tokens × 1800) — larger than the limit above.
+const FILLER = "The quick brown fox jumps over the lazy dog. ".repeat(1800);
+const MAX_CODEX_TURNS = 3;
+
+for (const adapter of ADAPTERS) {
+  const skip = E2E.skipReason(adapter);
+  const title = `compaction (${adapter})${skip ? ` — SKIPPED (${skip})` : ""}`;
+
+  describe.skipIf(!!skip)(title, () => {
+    let repo: string;
+
+    beforeAll(() => {
+      if (adapter === "codex") killCodexStragglers();
+      E2E.configureEnv(adapter);
+      repo = setupRepo();
+    });
+
+    afterAll(() => {
+      cleanupRepo(repo);
+    });
+
+    it("surfaces a compaction to the host via compact_boundary", async () => {
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions:
+          adapter === "codex"
+            ? E2E.codexOptions(repo, {
+                // The model-scoped key is the effective one; set both to be safe.
+                model_auto_compact_token_limit: AUTO_COMPACT_TOKEN_LIMIT,
+                auto_compact_token_limit: AUTO_COMPACT_TOKEN_LIMIT,
+              })
+            : undefined,
+        meta: {
+          systemPrompt: "You are a coding assistant in a tiny test repo.",
+          model: E2E.model(adapter),
+          permissionMode: "bypassPermissions",
+          taskRunId: "e2e-compaction",
+        },
+      });
+      try {
+        const compacted = () =>
+          s.capture.extMethods().includes("_posthog/compact_boundary");
+
+        if (adapter === "claude") {
+          // A little conversation, then the cheap deterministic trigger: manual /compact.
+          await s.conn.prompt({
+            sessionId: s.sessionId,
+            prompt: [{ type: "text", text: "Reply with only: hello." }],
+          });
+          await s.conn.prompt({
+            sessionId: s.sessionId,
+            prompt: [{ type: "text", text: "/compact" }],
+          });
+        } else {
+          // codex: turn 1's big input blob fills the context past the limit; turn 2+
+          // trips auto-compaction. Stop once the boundary is surfaced.
+          for (let i = 0; i < MAX_CODEX_TURNS && !compacted(); i++) {
+            const text =
+              i === 0
+                ? `Reference text — do not summarize, reply with only: OK.\n\n${FILLER}`
+                : "Reply with only: DONE.";
+            await s.conn.prompt({
+              sessionId: s.sessionId,
+              prompt: [{ type: "text", text }],
+            });
+          }
+        }
+
+        expect(
+          compacted(),
+          `expected a _posthog/compact_boundary; saw methods: ${s.capture
+            .extMethods()
+            .join(", ")}`,
+        ).toBe(true);
+      } finally {
+        await s.cleanup();
+      }
+    }, 300_000);
+  });
+}
diff --git a/packages/agent/e2e/config.ts b/packages/agent/e2e/config.ts
new file mode 100644
index 0000000000..0670dd82eb
--- /dev/null
+++ b/packages/agent/e2e/config.ts
@@ -0,0 +1,106 @@
+import { existsSync } from "node:fs";
+import { join } from "node:path";
+
+export type Adapter = "claude" | "codex";
+
+/**
+ * Live e2e configuration, resolved entirely from the environment so no secret is
+ * committed. Needs a local llm-gateway and a token in `E2E_GATEWAY_TOKEN`; targets
+ * the `llm_gateway` product, which accepts a personal API key (no OAuth mint,
+ * unlike prod's `posthog_code`). Without the token every arm self-skips.
+ */
+// `||` not `??`: CI sets unset vars to "" which should fall back to the default.
+const GATEWAY_URL =
+  process.env.E2E_GATEWAY_URL || "http://localhost:3308/llm_gateway";
+const TOKEN = process.env.E2E_GATEWAY_TOKEN ?? "";
+
+// The native app-server binary, relative to packages/agent/e2e.
+const NATIVE_CODEX_BIN = join(
+  __dirname,
+  "..",
+  "..",
+  "..",
+  "apps",
+  "code",
+  "resources",
+  "codex-acp",
+  "codex",
+);
+
+/** The gateway base with a trailing `/v1` (codex / OpenAI-format endpoint). */
+function openAiBase(): string {
+  return GATEWAY_URL.endsWith("/v1") ? GATEWAY_URL : `${GATEWAY_URL}/v1`;
+}
+
+export const E2E = {
+  token: TOKEN,
+  hasToken: !!TOKEN,
+  gatewayUrl: GATEWAY_URL,
+  codexBin: NATIVE_CODEX_BIN,
+  /** Deployment environment. `E2E_ENVIRONMENT=cloud` exercises the cloud code path; undefined = local. */
+  environment:
+    (process.env.E2E_ENVIRONMENT as "local" | "cloud" | undefined) || undefined,
+
+  /** Cheap model per adapter, overridable via `E2E_CLAUDE_MODEL` / `E2E_CODEX_MODEL`. */
+  model(adapter: Adapter): string {
+    // `||` so an empty CI variable falls back to the default.
+    if (adapter === "claude") {
+      return process.env.E2E_CLAUDE_MODEL || "claude-haiku-4-5";
+    }
+    // gpt-5-mini is on the product block list, but that gate is only enforced in
+    // Agent.run — the e2e drives createAcpConnection directly, so it's accepted.
+    return process.env.E2E_CODEX_MODEL || "gpt-5-mini";
+  },
+
+  /** Null => runnable; a string => skip this arm with that reason (never silent). */
+  skipReason(adapter: Adapter): string | null {
+    if (!TOKEN) return "E2E_GATEWAY_TOKEN not set";
+    if (adapter === "codex" && !existsSync(NATIVE_CODEX_BIN)) {
+      return `native codex binary missing at ${NATIVE_CODEX_BIN}`;
+    }
+    return null;
+  },
+
+  /** Point the adapter at the gateway as the host's `configureEnvironment` does. */
+  configureEnv(adapter: Adapter): void {
+    if (adapter === "claude") {
+      process.env.ANTHROPIC_BASE_URL = GATEWAY_URL;
+      process.env.ANTHROPIC_AUTH_TOKEN = TOKEN;
+      return;
+    }
+    process.env.OPENAI_BASE_URL = openAiBase();
+    process.env.OPENAI_API_KEY = TOKEN;
+    process.env.POSTHOG_CODEX_USE_APP_SERVER = "1";
+  },
+
+  /** The codexOptions the codex arm passes through `createAcpConnection`. */
+  codexOptions(
+    cwd: string,
+    configOverrides?: Record<string, string | number>,
+    modelOverride?: string,
+  ): {
+    cwd: string;
+    binaryPath: string;
+    apiBaseUrl: string;
+    apiKey: string;
+    model: string;
+    configOverrides?: Record<string, string | number>;
+  } {
+    return {
+      cwd,
+      binaryPath: NATIVE_CODEX_BIN,
+      apiBaseUrl: openAiBase(),
+      apiKey: TOKEN,
+      model: modelOverride || this.model("codex"),
+      ...(configOverrides ? { configOverrides } : {}),
+    };
+  },
+
+  /** A stronger model for tests the cheapest models can't handle (e.g. structured-output decodes). */
+  strongModel(adapter: Adapter): string {
+    if (adapter === "claude") {
+      return process.env.E2E_CLAUDE_MODEL || "claude-sonnet-4-5";
+    }
+    return process.env.E2E_CODEX_MODEL || "gpt-5.5";
+  },
+};
diff --git a/packages/agent/e2e/driver.ts b/packages/agent/e2e/driver.ts
new file mode 100644
index 0000000000..7110e6f40a
--- /dev/null
+++ b/packages/agent/e2e/driver.ts
@@ -0,0 +1,287 @@
+/**
+ * Adapter-agnostic ACP driver for the live e2e suite. Stands up the same in-process
+ * ACP transport the real host uses and drives a real adapter + binary + gateway.
+ * The only thing mocked is the host/UI client (recording sessionUpdate, auto-allow
+ * requestPermission, real fs read/write against the test repo).
+ */
+import { execFileSync } from "node:child_process";
+import {
+  promises as fsp,
+  mkdtempSync,
+  readFileSync,
+  realpathSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+// @ts-expect-error - runtime ESM export resolved by vitest
+import { ClientSideConnection, ndJsonStream } from "@agentclientprotocol/sdk";
+import { createAcpConnection } from "../src/adapters/acp-connection";
+import { Logger } from "../src/utils/logger";
+import { type Adapter, E2E } from "./config";
+
+export type { Adapter } from "./config";
+
+export interface CapturedEvent {
+  kind: "sessionUpdate" | "requestPermission" | "extNotification";
+  sessionUpdate?: string;
+  method?: string;
+  data?: Record<string, unknown>;
+}
+
+export interface Capture {
+  events: CapturedEvent[];
+  updates(type: string): CapturedEvent[];
+  approvals(): CapturedEvent[];
+  extMethods(): string[];
+}
+
+export interface NewSessionResponse {
+  sessionId: string;
+  configOptions?: ConfigOption[];
+  modes?: unknown;
+}
+
+export interface ConfigOption {
+  id?: string;
+  category?: string;
+  currentValue?: unknown;
+  options?: Array<{ name?: string; value?: unknown }>;
+}
+
+export interface AcpConn {
+  initialize: (p: unknown) => Promise<any>;
+  newSession: (p: unknown) => Promise<NewSessionResponse>;
+  loadSession: (p: unknown) => Promise<any>;
+  resumeSession: (p: unknown) => Promise<any>;
+  listSessions: (
+    p: unknown,
+  ) => Promise<{ sessions?: Array<{ sessionId?: string }> }>;
+  unstable_forkSession: (p: unknown) => Promise<NewSessionResponse>;
+  prompt: (p: unknown) => Promise<{ stopReason?: string; usage?: unknown }>;
+  setSessionConfigOption: (p: unknown) => Promise<any>;
+  cancel: (p: unknown) => Promise<void>;
+  // Client→agent ext-method (the host drives _posthog/refresh_session).
+  extMethod: (method: string, params: unknown) => Promise<unknown>;
+}
+
+export interface E2EConnection {
+  conn: AcpConn;
+  capture: Capture;
+  cleanup: () => Promise<void>;
+}
+
+/**
+ * The ACP `initialize` params our host client sends. Matches the cloud host, which
+ * advertises no clientCapabilities — so the adapter runs file/terminal tools
+ * in-process rather than proxying through the host's fs callbacks.
+ */
+export const INIT_PARAMS = {
+  protocolVersion: 1,
+  clientCapabilities: {},
+};
+
+export function openConnection(opts: {
+  adapter: Adapter;
+  cwd: string;
+  codexOptions?: Record<string, unknown>;
+  onStructuredOutput?: (output: Record<string, unknown>) => Promise<void>;
+}): E2EConnection {
+  const { adapter, cwd } = opts;
+  const events: CapturedEvent[] = [];
+
+  // Mirror the cloud host's client surface. Deliberately no extMethod: the real
+  // host doesn't implement it, so an adapter calling it should fail e2e as in prod.
+  const client = {
+    async sessionUpdate(p: any): Promise<void> {
+      events.push({
+        kind: "sessionUpdate",
+        sessionUpdate: p?.update?.sessionUpdate,
+        data: p?.update,
+      });
+    },
+    async requestPermission(p: any): Promise<unknown> {
+      events.push({
+        kind: "requestPermission",
+        data: {
+          title: p?.toolCall?.title,
+          kind: p?.toolCall?.kind,
+          // request_user_input surfaces as a permission with codeToolKind: "question"; codex only offers it in Plan mode.
+          codeToolKind: p?.toolCall?._meta?.codeToolKind,
+        },
+      });
+      const options = p?.options ?? [];
+      const allow =
+        options.find(
+          (o: any) => o?.kind === "allow_once" || o?.kind === "allow_always",
+        ) ?? options[0];
+      return {
+        outcome: { outcome: "selected", optionId: allow?.optionId ?? "allow" },
+      };
+    },
+    async readTextFile(p: any): Promise<unknown> {
+      return { content: await fsp.readFile(resolve(cwd, p.path), "utf8") };
+    },
+    async writeTextFile(p: any): Promise<unknown> {
+      await fsp.writeFile(resolve(cwd, p.path), p.content);
+      return {};
+    },
+    async extNotification(method: string, params: any): Promise<void> {
+      events.push({ kind: "extNotification", method, data: params });
+    },
+  };
+
+  const logger = new Logger({
+    debug: !!process.env.E2E_DEBUG,
+    prefix: "[e2e]",
+  });
+  const acp = createAcpConnection({
+    adapter,
+    codexOptions: opts.codexOptions as any,
+    onStructuredOutput: opts.onStructuredOutput,
+    logger,
+  });
+  const stream = ndJsonStream(
+    acp.clientStreams.writable,
+    acp.clientStreams.readable,
+  );
+  const conn = new ClientSideConnection(
+    () => client,
+    stream,
+  ) as unknown as AcpConn;
+
+  const capture: Capture = {
+    events,
+    updates: (type) =>
+      events.filter(
+        (e) => e.kind === "sessionUpdate" && e.sessionUpdate === type,
+      ),
+    approvals: () => events.filter((e) => e.kind === "requestPermission"),
+    extMethods: () => [
+      ...new Set(
+        events
+          .filter((e) => e.kind === "extNotification" && e.method)
+          .map((e) => e.method as string),
+      ),
+    ],
+  };
+
+  return {
+    conn,
+    capture,
+    cleanup: async () => {
+      // Bounded: a wedged adapter cleanup must never hang the suite.
+      await Promise.race([
+        acp.cleanup().catch(() => undefined),
+        new Promise<void>((r) => setTimeout(r, 8000)),
+      ]);
+    },
+  };
+}
+
+export interface OpenSession {
+  conn: AcpConn;
+  capture: Capture;
+  sessionId: string;
+  newSession: NewSessionResponse;
+  cleanup: () => Promise<void>;
+}
+
+/** openConnection + initialize + newSession — the common scenario setup. */
+export async function openSession(opts: {
+  adapter: Adapter;
+  cwd: string;
+  codexOptions?: Record<string, unknown>;
+  onStructuredOutput?: (output: Record<string, unknown>) => Promise<void>;
+  meta: Record<string, unknown>;
+}): Promise<OpenSession> {
+  const c = openConnection(opts);
+  await c.conn.initialize(INIT_PARAMS);
+  const newSession = await c.conn.newSession({
+    cwd: opts.cwd,
+    mcpServers: [],
+    // Inject E2E_ENVIRONMENT so the suite can run as a cloud session without threading it through every test's meta.
+    _meta: {
+      ...opts.meta,
+      ...(E2E.environment ? { environment: E2E.environment } : {}),
+    },
+  });
+  return {
+    conn: c.conn,
+    capture: c.capture,
+    sessionId: newSession.sessionId,
+    newSession,
+    cleanup: c.cleanup,
+  };
+}
+
+export const ORIGINAL_TARGET = "line1\nline2\nline3\n";
+
+export function setupRepo(): string {
+  // realpath so cwd is canonical: on macOS os.tmpdir() is a symlink. The Claude
+  // SDK keys its session store by the resolved path, so loadSession's replay finds
+  // nothing if a fresh connection uses a different path.
+  const repo = realpathSync(mkdtempSync(join(tmpdir(), "agent-e2e-")));
+  writeFileSync(join(repo, "target.txt"), ORIGINAL_TARGET);
+  execFileSync("git", ["init", "-q"], { cwd: repo });
+  execFileSync("git", ["add", "-A"], { cwd: repo });
+  // -c commit.gpgsign=false: ignore the user's global signing config, which fails in this non-interactive context.
+  execFileSync(
+    "git",
+    [
+      "-c",
+      "commit.gpgsign=false",
+      "-c",
+      "user.email=e2e@posthog.dev",
+      "-c",
+      "user.name=e2e",
+      "commit",
+      "-qm",
+      "init",
+    ],
+    { cwd: repo },
+  );
+  return repo;
+}
+
+export function readTarget(repo: string): string {
+  return readFileSync(join(repo, "target.txt"), "utf8");
+}
+
+export function cleanupRepo(repo: string): void {
+  try {
+    rmSync(repo, { recursive: true, force: true });
+  } catch {
+    /* best effort */
+  }
+}
+
+/** Poll `fn` until it returns a non-undefined value or the timeout elapses. */
+export async function waitFor<T>(
+  fn: () => T | undefined,
+  timeoutMs = 5000,
+  intervalMs = 100,
+): Promise<T | undefined> {
+  const start = Date.now();
+  for (;;) {
+    const value = fn();
+    if (value !== undefined) return value;
+    if (Date.now() - start >= timeoutMs) return undefined;
+    await new Promise((r) => setTimeout(r, intervalMs));
+  }
+}
+
+/**
+ * codex spawns detached; a killed run can orphan it holding a flock under
+ * ~/.codex/tmp, wedging the next run. Kill stragglers first to release the flock.
+ */
+export function killCodexStragglers(): void {
+  try {
+    execFileSync("pkill", ["-9", "-f", "resources/codex-acp"], {
+      stdio: "ignore",
+    });
+  } catch {
+    /* none running */
+  }
+}
diff --git a/packages/agent/e2e/guard.e2e.test.ts b/packages/agent/e2e/guard.e2e.test.ts
new file mode 100644
index 0000000000..bf859a4969
--- /dev/null
+++ b/packages/agent/e2e/guard.e2e.test.ts
@@ -0,0 +1,30 @@
+import { describe, expect, it } from "vitest";
+import { E2E } from "./config";
+
+/**
+ * Fail-loud precondition for the live e2e suite. Without E2E_GATEWAY_TOKEN every
+ * arm self-skips and `vitest run` exits 0 — a green run that tested nothing. This
+ * one non-skipped test turns a missing token into a RED run.
+ */
+describe("live e2e preconditions", () => {
+  it("requires E2E_GATEWAY_TOKEN (else the suite would skip-to-green)", () => {
+    expect(
+      E2E.hasToken,
+      "E2E_GATEWAY_TOKEN is not set — every adapter arm would skip and the run " +
+        "would pass without testing anything. Mint one via e2e/run-e2e.sh or " +
+        "set E2E_GATEWAY_TOKEN against a reachable E2E_GATEWAY_URL.",
+    ).toBe(true);
+  });
+
+  // When a token is present, the codex arm must not skip silently — a missing
+  // binary would let the run pass with zero codex coverage.
+  it("requires the native codex binary when a token is set (else codex skips-to-green)", () => {
+    if (!E2E.hasToken) return; // no token → whole suite skips; nothing to guard
+    expect(
+      E2E.skipReason("codex"),
+      "E2E_GATEWAY_TOKEN is set but the native codex binary is missing — the " +
+        "codex arm would silently skip and the run would pass without exercising " +
+        "the codex adapter. Ensure apps/code/scripts/download-binaries.mjs ran.",
+    ).toBeNull();
+  });
+});
diff --git a/packages/agent/e2e/run-e2e.sh b/packages/agent/e2e/run-e2e.sh
new file mode 100755
index 0000000000..37e18e4054
--- /dev/null
+++ b/packages/agent/e2e/run-e2e.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# Run the live golden-path e2e for both adapters (claude + codex).
+#
+# Needs a local llm-gateway (run `./bin/start` in the posthog repo) and a token.
+# The suite targets the gateway's `llm_gateway` product, which accepts a personal
+# API key (no OAuth), so if E2E_GATEWAY_TOKEN is unset this reads the repo's
+# hardcoded local dev key from ee/settings.py (override the repo with POSTHOG_REPO).
+# That key must be registered in the local DB — run `python manage.py
+# setup_local_api_key` in the posthog repo once if auth fails.
+#
+# Usage:
+#   bash e2e/run-e2e.sh              # both adapters, both suites
+#   bash e2e/run-e2e.sh -t "(codex)" # only the codex arm (vitest -t name filter)
+# Env overrides: E2E_GATEWAY_URL, E2E_CLAUDE_MODEL, E2E_CODEX_MODEL, E2E_DEBUG=1
+set -euo pipefail
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+AGENT_DIR="$(cd "$HERE/.." && pwd)"
+POSTHOG_REPO="${POSTHOG_REPO:-$(cd "$AGENT_DIR/../../.." && pwd)/posthog}"
+
+if [[ -z "${E2E_GATEWAY_TOKEN:-}" ]]; then
+  SETTINGS="$POSTHOG_REPO/ee/settings.py"
+  if [[ ! -f "$SETTINGS" ]]; then
+    echo "E2E_GATEWAY_TOKEN unset and posthog settings not found at $SETTINGS." >&2
+    echo "Set E2E_GATEWAY_TOKEN, or POSTHOG_REPO to the posthog checkout." >&2
+    exit 1
+  fi
+  # The `llm_gateway` product accepts personal API keys, so no OAuth mint needed.
+  E2E_GATEWAY_TOKEN="$(grep -E '^DEV_API_KEY[[:space:]]*=' "$SETTINGS" | head -1 | sed -E 's/^DEV_API_KEY[[:space:]]*=[[:space:]]*"([^"]+)".*/\1/')"
+fi
+
+if [[ -z "${E2E_GATEWAY_TOKEN:-}" ]]; then
+  echo "Failed to obtain an E2E_GATEWAY_TOKEN (no DEV_API_KEY in ee/settings.py?)." >&2
+  echo "If auth then fails, run 'python manage.py setup_local_api_key' in the posthog repo." >&2
+  exit 1
+fi
+
+export E2E_GATEWAY_TOKEN
+echo "token: ${E2E_GATEWAY_TOKEN:0:8}…  gateway: ${E2E_GATEWAY_URL:-http://localhost:3308/llm_gateway}"
+cd "$AGENT_DIR"
+pnpm test:e2e "$@"
diff --git a/packages/agent/e2e/session-lifecycle.e2e.test.ts b/packages/agent/e2e/session-lifecycle.e2e.test.ts
new file mode 100644
index 0000000000..4de1acf3e5
--- /dev/null
+++ b/packages/agent/e2e/session-lifecycle.e2e.test.ts
@@ -0,0 +1,572 @@
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { type Adapter, E2E } from "./config";
+import {
+  type Capture,
+  type ConfigOption,
+  cleanupRepo,
+  INIT_PARAMS,
+  killCodexStragglers,
+  type NewSessionResponse,
+  ORIGINAL_TARGET,
+  openConnection,
+  openSession,
+  readTarget,
+  setupRepo,
+  waitFor,
+} from "./driver";
+
+/**
+ * Live session-lifecycle e2e per adapter: drives a real session end to end against
+ * the real gateway + binary on a cheap model. Assertions are structural lifecycle
+ * invariants + the on-disk edit, never model prose. Opt-in: each arm self-skips
+ * unless `E2E_GATEWAY_TOKEN` is set (codex also needs the native binary).
+ */
+const ADAPTERS: Adapter[] = ["claude", "codex"];
+
+const EDIT_PROMPT =
+  "Do exactly these steps and nothing else: 1) Read the file target.txt. " +
+  "2) Edit it so the second line reads FOO instead of line2. " +
+  "3) Run the shell command `cat target.txt`. " +
+  "4) In one sentence confirm what you changed, then stop.";
+
+for (const adapter of ADAPTERS) {
+  const skip = E2E.skipReason(adapter);
+  const title = `session lifecycle (${adapter})${skip ? ` — SKIPPED (${skip})` : ""}`;
+  // Codex-only; skipped on the claude arm so the gap is visible.
+  const itCodex = adapter === "codex" ? it : it.skip;
+  // Read-only profile only tightens per-turn on macOS + non-cloud (elsewhere the
+  // spawn is danger-full-access / no profile), so gate to where it actually applies.
+  const itCodexSandbox =
+    adapter === "codex" &&
+    process.platform === "darwin" &&
+    E2E.environment !== "cloud"
+      ? it
+      : it.skip;
+
+  describe.skipIf(!!skip)(title, () => {
+    let repo: string;
+    const codexOptions = () =>
+      adapter === "codex" ? E2E.codexOptions(repo) : undefined;
+    const meta = (extra: Record<string, unknown> = {}) => ({
+      systemPrompt: "You are a coding assistant in a tiny test repo.",
+      model: E2E.model(adapter),
+      permissionMode: "bypassPermissions",
+      // Drives the cloud ext-notifications (_posthog/sdk_session + turn_complete).
+      taskRunId: "e2e-run",
+      ...extra,
+    });
+
+    let sessionId: string;
+    let newSessionResponse: NewSessionResponse;
+    let turn:
+      | { stopReason?: string; capture: Capture; target: string }
+      | undefined;
+    let goldenError: unknown;
+
+    beforeAll(async () => {
+      if (adapter === "codex") killCodexStragglers();
+      E2E.configureEnv(adapter);
+      repo = setupRepo();
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+        meta: meta(),
+      });
+      sessionId = s.sessionId;
+      newSessionResponse = s.newSession;
+      try {
+        const res = await s.conn.prompt({
+          sessionId,
+          prompt: [{ type: "text", text: EDIT_PROMPT }],
+        });
+        turn = {
+          stopReason: res.stopReason,
+          capture: s.capture,
+          target: readTarget(repo),
+        };
+      } catch (err) {
+        // Don't fail the whole describe on a flaky golden turn — record it so only
+        // the test that consumes `turn` fails.
+        goldenError = err;
+      } finally {
+        await s.cleanup();
+      }
+    }, 180_000);
+
+    afterAll(() => {
+      cleanupRepo(repo);
+    });
+
+    it("newSession exposes selectable config options (model / effort)", () => {
+      const opts = newSessionResponse.configOptions ?? [];
+      expect(opts.length).toBeGreaterThan(0);
+      expect(opts.some((o) => (o.options?.length ?? 0) > 1)).toBe(true);
+    });
+
+    it("streams a working turn: assistant text, tool calls, usage, file edit", () => {
+      if (goldenError) throw goldenError;
+      if (!turn) throw new Error("golden turn did not produce a result");
+      expect(turn.stopReason).toBe("end_turn");
+      expect(
+        turn.capture.updates("agent_message_chunk").length,
+      ).toBeGreaterThan(0);
+      expect(turn.capture.updates("tool_call").length).toBeGreaterThan(0);
+      const anyToolCompleted = [
+        ...turn.capture.updates("tool_call"),
+        ...turn.capture.updates("tool_call_update"),
+      ].some((e) => e.data?.status === "completed");
+      expect(anyToolCompleted).toBe(true);
+
+      const hasUsage =
+        turn.capture.updates("usage_update").length > 0 ||
+        turn.capture.extMethods().includes("_posthog/usage_update");
+      expect(hasUsage).toBe(true);
+
+      expect(turn.capture.extMethods()).toContain("_posthog/sdk_session");
+
+      expect(turn.target).not.toBe(ORIGINAL_TARGET);
+      expect(turn.target).toContain("FOO");
+
+      // codex additionally emits turn_complete; claude signals completion via the prompt response.
+      if (adapter === "codex") {
+        // Reasoning parity is unit-covered (mapping.test.ts); a live assertion
+        // would be flaky on the cheap model.
+        expect(turn.capture.extMethods()).toContain("_posthog/turn_complete");
+        const tc = turn.capture.events.find(
+          (e) =>
+            e.kind === "extNotification" &&
+            e.method === "_posthog/turn_complete",
+        );
+        const usage = (tc?.data as { usage?: Record<string, number> })?.usage;
+        expect(usage).toBeTruthy();
+        expect(usage?.totalTokens ?? 0).toBeGreaterThan(0);
+        expect(usage?.totalTokens).toBe(
+          (usage?.inputTokens ?? 0) +
+            (usage?.outputTokens ?? 0) +
+            (usage?.cachedReadTokens ?? 0) +
+            (usage?.cachedWriteTokens ?? 0),
+        );
+      }
+    });
+
+    it("switches a config option via setSessionConfigOption", async () => {
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+        meta: meta(),
+      });
+      try {
+        const opt = (s.newSession.configOptions ?? []).find(
+          (o) => (o.options?.length ?? 0) > 1,
+        );
+        expect(
+          opt,
+          "expected a config option with multiple values",
+        ).toBeTruthy();
+        const alt =
+          opt?.options?.find((v) => v.value !== opt.currentValue) ??
+          opt?.options?.[0];
+        const res = await s.conn.setSessionConfigOption({
+          sessionId: s.sessionId,
+          configId: opt?.id,
+          value: alt?.value,
+        });
+        expect(res).toBeTruthy();
+        if (adapter === "codex") {
+          // codex re-emits config_option_update as the side effect of a switch.
+          expect(
+            s.capture.updates("config_option_update").length,
+          ).toBeGreaterThan(0);
+        } else {
+          // claude returns updated configOptions — assert the switch actually took,
+          // not merely that an ack array was produced (unconditionally true).
+          const updated = ((res?.configOptions ?? []) as ConfigOption[]).find(
+            (o) => o.id === opt?.id,
+          );
+          expect(updated?.currentValue).toBe(alt?.value);
+        }
+      } finally {
+        await s.cleanup();
+      }
+    }, 90_000);
+
+    // Cloud host switches mode only via setSessionConfigOption(configId:"mode"), so exercise both arms.
+    it("emits current_mode_update when the mode is switched via setSessionConfigOption", async () => {
+      if (adapter === "codex") killCodexStragglers();
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+        meta: meta(),
+      });
+      try {
+        // codex synthesizes modes; claude exposes a "mode" configOption — pick an alternate value.
+        let value = "read-only";
+        if (adapter === "claude") {
+          const modeOpt = (s.newSession.configOptions ?? []).find(
+            (o) => o.id === "mode",
+          );
+          value =
+            (modeOpt?.options?.find((v) => v.value !== modeOpt.currentValue)
+              ?.value as string) ?? "plan";
+        }
+        await s.conn.setSessionConfigOption({
+          sessionId: s.sessionId,
+          configId: "mode",
+          value,
+        });
+        expect(s.capture.updates("current_mode_update").length).toBeGreaterThan(
+          0,
+        );
+      } finally {
+        await s.cleanup();
+      }
+    }, 60_000);
+
+    // Proves the mode picker isn't cosmetic: read-only maps to an OS-level
+    // :read-only profile that blocks the write even though the host auto-approves.
+    // macOS-only (see itCodexSandbox).
+    itCodexSandbox(
+      "read-only mode actually blocks a file edit (sandbox restricts, not just approval)",
+      async () => {
+        if (adapter === "codex") killCodexStragglers();
+        const s = await openSession({
+          adapter,
+          cwd: repo,
+          codexOptions: codexOptions(),
+          meta: meta(),
+        });
+        try {
+          await s.conn.setSessionConfigOption({
+            sessionId: s.sessionId,
+            configId: "mode",
+            value: "read-only",
+          });
+          const before = readTarget(repo);
+          const res = await s.conn.prompt({
+            sessionId: s.sessionId,
+            prompt: [
+              {
+                type: "text",
+                text:
+                  "Use your file-editing tool to change target.txt so its second " +
+                  "line reads SENTINEL_RO_EDIT. You MUST attempt the edit with your " +
+                  "tool even if it appears restricted. Then stop.",
+              },
+            ],
+          });
+          expect(res.stopReason).toBeTruthy();
+          // >=1 tool call, so a pure prose no-op can't masquerade as enforcement.
+          expect(s.capture.updates("tool_call").length).toBeGreaterThan(0);
+          // File unchanged: the read-only sandbox blocked the write despite host auto-approval.
+          expect(readTarget(repo)).toBe(before);
+          expect(readTarget(repo)).not.toContain("SENTINEL_RO_EDIT");
+        } finally {
+          await s.cleanup();
+        }
+      },
+      180_000,
+    );
+
+    // Proves Plan is a real mode: codex only offers request_user_input in its plan
+    // collaboration mode. Also covers the revert — the collaboration mode is sticky,
+    // so switching back to auto must push default explicitly.
+    itCodex(
+      "plan mode engages codex's plan collaboration, and reverts when switched back to auto",
+      async () => {
+        if (adapter === "codex") killCodexStragglers();
+        const s = await openSession({
+          adapter,
+          cwd: repo,
+          codexOptions: codexOptions(),
+          meta: meta(),
+        });
+        const askToUseTool =
+          "Before doing anything else, you MUST call the request_user_input tool " +
+          "to ask the user a single question: whether to proceed with approach A " +
+          "or approach B. Ask exactly that one question via the tool, then stop.";
+        const questionCount = () =>
+          s.capture
+            .approvals()
+            .filter((e) => e.data?.codeToolKind === "question").length;
+        try {
+          await s.conn.setSessionConfigOption({
+            sessionId: s.sessionId,
+            configId: "mode",
+            value: "plan",
+          });
+          await s.conn.prompt({
+            sessionId: s.sessionId,
+            prompt: [{ type: "text", text: askToUseTool }],
+          });
+          const afterPlan = questionCount();
+          expect(afterPlan).toBeGreaterThan(0);
+
+          // Switch back to auto: request_user_input is gone, so the same prompt yields no new question.
+          await s.conn.setSessionConfigOption({
+            sessionId: s.sessionId,
+            configId: "mode",
+            value: "auto",
+          });
+          await s.conn.prompt({
+            sessionId: s.sessionId,
+            prompt: [{ type: "text", text: askToUseTool }],
+          });
+          expect(questionCount()).toBe(afterPlan);
+        } finally {
+          await s.cleanup();
+        }
+      },
+      240_000,
+    );
+
+    it("handles the host's refresh_session extMethod per adapter", async () => {
+      if (adapter === "codex") killCodexStragglers();
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+        meta: meta(),
+      });
+      try {
+        const call = s.conn.extMethod("_posthog/refresh_session", {
+          mcpServers: [],
+        });
+        if (adapter === "claude") {
+          // claude implements refresh_session; haiku is on the MCP-injection exclude
+          // list, so it rejects on the model gate (not method-not-found), proving the
+          // call reaches the handler.
+          await expect(call).rejects.toThrow(/MCP injection/i);
+        } else {
+          // codex doesn't implement extMethod — the call rejects cleanly (known adapter divergence).
+          await expect(call).rejects.toThrow();
+        }
+      } finally {
+        await s.cleanup();
+      }
+    }, 60_000);
+
+    // Known gap: the approval {decision} round-trip and requestPermission policy
+    // aren't exercised here (codex auto-approves under danger-full-access) —
+    // unit-covered in codex-app-server-agent.test.ts / approvals.test.ts.
+
+    it("incorporates a prompt's _meta.prContext without error", async () => {
+      if (adapter === "codex") killCodexStragglers();
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+        meta: meta(),
+      });
+      try {
+        // The host attaches prContext on PR-follow-up runs; both adapters prepend it.
+        const res = await s.conn.prompt({
+          sessionId: s.sessionId,
+          prompt: [
+            {
+              type: "text",
+              text: "Acknowledge the linked pull request in one short sentence, then stop.",
+            },
+          ],
+          _meta: {
+            prContext:
+              "Context: PR #4242 'Fix the thing' is open and under review.",
+          },
+        });
+        expect(res.stopReason).toBe("end_turn");
+        expect(s.capture.updates("agent_message_chunk").length).toBeGreaterThan(
+          0,
+        );
+      } finally {
+        await s.cleanup();
+      }
+    }, 120_000);
+
+    itCodex(
+      "folds a mid-turn prompt into the running turn via steering",
+      async () => {
+        killCodexStragglers();
+        const s = await openSession({
+          adapter,
+          cwd: repo,
+          codexOptions: codexOptions(),
+          meta: meta(),
+        });
+        try {
+          const p1 = s.conn.prompt({
+            sessionId: s.sessionId,
+            prompt: [
+              {
+                type: "text",
+                text: "Count up from 1, one number per line, and keep going.",
+              },
+            ],
+          });
+          await waitFor(
+            () =>
+              s.capture.updates("agent_message_chunk").length > 0
+                ? true
+                : undefined,
+            20_000,
+          );
+          const p2 = s.conn.prompt({
+            sessionId: s.sessionId,
+            prompt: [{ type: "text", text: "Now stop and say DONE." }],
+          });
+          const [r1] = await Promise.all([p1, p2]);
+          expect(r1.stopReason).toBe("end_turn");
+          expect(
+            s.capture.updates("user_message_chunk").length,
+          ).toBeGreaterThanOrEqual(2);
+          // The steer proof: folded into a SINGLE turn (one turn_complete). Two would
+          // mean the steer didn't take and p2 ran as its own turn.
+          const turnCompletes = s.capture.events.filter(
+            (e) =>
+              e.kind === "extNotification" &&
+              e.method === "_posthog/turn_complete",
+          ).length;
+          expect(
+            turnCompletes,
+            "expected the steered prompt to fold into one running turn (1 " +
+              "turn_complete); 2 means the steer didn't take",
+          ).toBe(1);
+        } finally {
+          await s.cleanup();
+        }
+      },
+      120_000,
+    );
+
+    itCodex(
+      "lists the session and forks it",
+      async () => {
+        killCodexStragglers();
+        const b = openConnection({
+          adapter,
+          cwd: repo,
+          codexOptions: codexOptions(),
+        });
+        try {
+          await b.conn.initialize(INIT_PARAMS);
+          const listed = await b.conn.listSessions({ cwd: repo });
+          const ids = (listed.sessions ?? []).map((x) => x.sessionId);
+          expect(ids).toContain(sessionId);
+          const forked = await b.conn.unstable_forkSession({
+            sessionId,
+            cwd: repo,
+            mcpServers: [],
+            _meta: { model: E2E.model(adapter) },
+          });
+          expect(forked.sessionId).toBeTruthy();
+          expect(forked.sessionId).not.toBe(sessionId);
+        } finally {
+          await b.cleanup();
+        }
+      },
+      60_000,
+    );
+
+    // Known gap: the permission DENY path isn't exercised (neither arm reliably
+    // surfaces a deny-able approval to a cheap model) — unit-covered in
+    // approvals.test.ts / codex-app-server-agent.test.ts.
+
+    it("interrupts an in-flight turn", async () => {
+      if (adapter === "codex") killCodexStragglers();
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+        meta: meta(),
+      });
+      try {
+        const p = s.conn.prompt({
+          sessionId: s.sessionId,
+          prompt: [
+            {
+              type: "text",
+              text: "Count up from 1, one number per line, and never stop until told to.",
+            },
+          ],
+        });
+        // Cancel as soon as the turn is in flight (unbounded work, so no race).
+        await waitFor(
+          () =>
+            s.capture.updates("agent_message_chunk").length > 0 ||
+            s.capture.updates("tool_call").length > 0
+              ? true
+              : undefined,
+          20_000,
+        );
+        await s.conn.cancel({ sessionId: s.sessionId });
+        const res = await p;
+        expect(res.stopReason).toBe("cancelled");
+
+        // After a cancel the session must be usable again — a bounded follow-up must complete.
+        const followUp = await s.conn.prompt({
+          sessionId: s.sessionId,
+          prompt: [{ type: "text", text: "Stop. Reply with just: OK" }],
+        });
+        expect(followUp.stopReason).toBe("end_turn");
+      } finally {
+        await s.cleanup();
+      }
+    }, 120_000);
+
+    it("resumeSession reconnects and returns config options", async () => {
+      if (adapter === "codex") killCodexStragglers();
+      const b = openConnection({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+      });
+      try {
+        await b.conn.initialize(INIT_PARAMS);
+        const resumed = await b.conn.resumeSession({
+          sessionId,
+          cwd: repo,
+          mcpServers: [],
+          _meta: { model: E2E.model(adapter) },
+        });
+        expect(resumed).toBeTruthy();
+        expect(Array.isArray(resumed.configOptions)).toBe(true);
+      } finally {
+        await b.cleanup();
+      }
+    }, 60_000);
+
+    it("reattach (loadSession) restores the session and replays the transcript", async () => {
+      if (adapter === "codex") killCodexStragglers();
+      const b = openConnection({
+        adapter,
+        cwd: repo,
+        codexOptions: codexOptions(),
+      });
+      try {
+        await b.conn.initialize(INIT_PARAMS);
+        const loaded = await b.conn.loadSession({
+          sessionId,
+          cwd: repo,
+          mcpServers: [],
+          _meta: { model: E2E.model(adapter) },
+        });
+        expect(loaded).toBeTruthy();
+        // loadSession runs no turn, so any update here is replayed history. The
+        // shape differs by adapter: codex replays message chunks, claude tool calls.
+        const replayed = await waitFor(() => {
+          const n =
+            adapter === "codex"
+              ? b.capture.updates("user_message_chunk").length +
+                b.capture.updates("agent_message_chunk").length
+              : b.capture.updates("tool_call").length +
+                b.capture.updates("tool_call_update").length;
+          return n > 0 ? n : undefined;
+        }, 8000);
+        expect(replayed ?? 0).toBeGreaterThan(0);
+      } finally {
+        await b.cleanup();
+      }
+    }, 60_000);
+  });
+}
diff --git a/packages/agent/e2e/structured-output.e2e.test.ts b/packages/agent/e2e/structured-output.e2e.test.ts
new file mode 100644
index 0000000000..05cddc1003
--- /dev/null
+++ b/packages/agent/e2e/structured-output.e2e.test.ts
@@ -0,0 +1,85 @@
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { type Adapter, E2E } from "./config";
+import {
+  cleanupRepo,
+  killCodexStragglers,
+  openSession,
+  setupRepo,
+} from "./driver";
+
+/**
+ * Live structured-output e2e: both adapters constrain the final message to a JSON
+ * schema (`_meta.jsonSchema`) and deliver the parsed object via `onStructuredOutput`
+ * — the contract the signals pipeline relies on. Deterministic answer so a cheap
+ * model passes reliably. Opt-in (same gating as the lifecycle suite).
+ */
+const ADAPTERS: Adapter[] = ["claude", "codex"];
+
+const SCHEMA = {
+  type: "object",
+  properties: { capital: { type: "string" } },
+  required: ["capital"],
+  additionalProperties: false,
+};
+
+for (const adapter of ADAPTERS) {
+  const skip = E2E.skipReason(adapter);
+  const title = `structured output (${adapter})${skip ? ` — SKIPPED (${skip})` : ""}`;
+
+  describe.skipIf(!!skip)(title, () => {
+    let repo: string;
+
+    beforeAll(() => {
+      if (adapter === "codex") killCodexStragglers();
+      E2E.configureEnv(adapter);
+      repo = setupRepo();
+    });
+
+    afterAll(() => {
+      cleanupRepo(repo);
+    });
+
+    it("delivers schema-constrained structured output", async () => {
+      let captured: Record<string, unknown> | undefined;
+      // The cheapest models hang on the constrained decode; use a stronger one.
+      const model = E2E.strongModel(adapter);
+      const s = await openSession({
+        adapter,
+        cwd: repo,
+        codexOptions:
+          adapter === "codex"
+            ? E2E.codexOptions(repo, undefined, model)
+            : undefined,
+        onStructuredOutput: async (o) => {
+          captured = o;
+        },
+        meta: {
+          systemPrompt: "You answer strictly with JSON matching the schema.",
+          model,
+          permissionMode: "bypassPermissions",
+          jsonSchema: SCHEMA,
+          // Prod always sets taskRunId — exercise structured output + the session ext-notification together.
+          taskRunId: "e2e-structured",
+        },
+      });
+      try {
+        const res = await s.conn.prompt({
+          sessionId: s.sessionId,
+          prompt: [
+            {
+              type: "text",
+              text: "What is the capital of France? Answer using the required JSON schema.",
+            },
+          ],
+        });
+        expect(res.stopReason).toBe("end_turn");
+        expect(captured, "onStructuredOutput should fire").toBeTruthy();
+        expect(typeof captured?.capital).toBe("string");
+        expect((captured?.capital as string).toLowerCase()).toContain("paris");
+        expect(s.capture.extMethods()).toContain("_posthog/sdk_session");
+      } finally {
+        await s.cleanup();
+      }
+    }, 120_000);
+  });
+}
diff --git a/packages/agent/package.json b/packages/agent/package.json
index 1ccc632d6e..43d4a980f2 100644
--- a/packages/agent/package.json
+++ b/packages/agent/package.json
@@ -108,6 +108,7 @@
     "dev": "tsup --watch",
     "test": "vitest run",
     "test:watch": "vitest",
+    "test:e2e": "vitest run --config vitest.e2e.config.ts",
     "typecheck": "pnpm exec tsc --noEmit",
     "prepublishOnly": "pnpm run build",
     "clean": "node ../../scripts/rimraf.mjs dist .turbo"
@@ -132,6 +133,7 @@
     "@anthropic-ai/claude-agent-sdk": "0.3.170",
     "@anthropic-ai/sdk": "0.104.1",
     "@hono/node-server": "^1.19.9",
+    "@openai/codex": "0.140.0",
     "@opentelemetry/api-logs": "^0.208.0",
     "@opentelemetry/exporter-logs-otlp-http": "^0.208.0",
     "@opentelemetry/resources": "^2.0.0",
diff --git a/packages/agent/parity/harness.ts b/packages/agent/parity/harness.ts
new file mode 100644
index 0000000000..0132b1e7a8
--- /dev/null
+++ b/packages/agent/parity/harness.ts
@@ -0,0 +1,242 @@
+/**
+ * Differential parity harness for the two Codex adapters.
+ *
+ * Drives a scripted scenario (a stateful sequence of ACP client operations)
+ * through one codex adapter — selected by the POSTHOG_CODEX_USE_ACP env toggle —
+ * over the same in-process ACP transport the real host uses, and captures the
+ * full ACP stream (every sessionUpdate, every server→client requestPermission,
+ * and each call's response). Run the same scenario through both adapters and
+ * diff the captured streams to find parity gaps. No HTTP/JWT/Temporal.
+ */
+import { promises as fs } from "node:fs";
+import { resolve } from "node:path";
+// @ts-expect-error - resolved by tsx at runtime
+import { ClientSideConnection, ndJsonStream } from "@agentclientprotocol/sdk";
+import { createAcpConnection } from "../src/adapters/acp-connection";
+import type { Logger } from "../src/utils/logger";
+
+export type AdapterMode = "acp" | "app-server";
+
+export interface CapturedEvent {
+  t: number;
+  kind:
+    | "step"
+    | "sessionUpdate"
+    | "requestPermission"
+    | "extNotification"
+    | "extMethod";
+  op?: string;
+  sessionUpdate?: string;
+  data?: any;
+}
+
+export interface CapturedRun {
+  adapter: AdapterMode;
+  scenario: string;
+  events: CapturedEvent[];
+  stepResults: Array<{ op: string; ok: boolean; result?: any; error?: string }>;
+  fatalError?: string;
+}
+
+export interface ScenarioCtx {
+  cwd: string;
+  model?: string;
+  /** Run one ACP operation, record it as a step boundary + its (redacted) result. */
+  step<T>(op: string, fn: () => Promise<T>): Promise<T>;
+}
+
+export interface Scenario {
+  name: string;
+  run: (conn: any, ctx: ScenarioCtx) => Promise<void>;
+}
+
+export interface HarnessConfig {
+  cwd: string;
+  codexOptions: {
+    cwd: string;
+    binaryPath?: string;
+    apiBaseUrl?: string;
+    apiKey?: string;
+    model?: string;
+    reasoningEffort?: string;
+  };
+  /** Override flag plumbing once the migration adds useCodexAppServer. */
+  selectAppServer?: boolean;
+  timeoutMs?: number;
+  logger?: Logger;
+}
+
+/** Keep result shapes comparable: drop big/nondeterministic blobs, keep structure. */
+function redact(value: any): any {
+  if (!value || typeof value !== "object") return value;
+  const out: any = {};
+  for (const [k, v] of Object.entries(value)) {
+    if (k === "sessionId") out[k] = "<id>";
+    else if (k === "configOptions" && Array.isArray(v)) {
+      out[k] = v.map((o: any) => ({
+        id: o?.id,
+        category: o?.category,
+        value: o?.value,
+        options: (o?.options ?? []).map((x: any) => x?.id ?? x?.optionId),
+      }));
+    } else if (k === "modes") {
+      out[k] = {
+        currentModeId: (v as any)?.currentModeId,
+        availableModes: ((v as any)?.availableModes ?? []).map(
+          (m: any) => m?.id,
+        ),
+      };
+    } else if (k === "usage" && v && typeof v === "object") {
+      out[k] = Object.fromEntries(
+        Object.entries(v).map(([uk, uv]) => [
+          uk,
+          typeof uv === "number" ? (uv > 0 ? ">0" : 0) : uv,
+        ]),
+      );
+    } else if (typeof v === "string" && v.length > 120)
+      out[k] = `<str:${v.length}>`;
+    else out[k] = v;
+  }
+  return out;
+}
+
+export async function runScenario(
+  mode: AdapterMode,
+  scenario: Scenario,
+  cfg: HarnessConfig,
+): Promise<CapturedRun> {
+  // Select the adapter. Until the migration adds a passed-in option, the env
+  // toggle is the only lever: set => codex-acp, unset => native app-server.
+  if (mode === "acp") process.env.POSTHOG_CODEX_USE_ACP = "1";
+  else delete process.env.POSTHOG_CODEX_USE_ACP;
+
+  const captured: CapturedRun = {
+    adapter: mode,
+    scenario: scenario.name,
+    events: [],
+    stepResults: [],
+  };
+  let ord = 0;
+
+  const client = {
+    async sessionUpdate(p: any): Promise<void> {
+      captured.events.push({
+        t: ord++,
+        kind: "sessionUpdate",
+        sessionUpdate: p?.update?.sessionUpdate,
+        data: p?.update,
+      });
+    },
+    async requestPermission(p: any): Promise<any> {
+      captured.events.push({
+        t: ord++,
+        kind: "requestPermission",
+        data: {
+          title: p?.toolCall?.title,
+          kind: p?.toolCall?.kind,
+          options: (p?.options ?? []).map((o: any) => ({
+            id: o?.optionId,
+            kind: o?.kind,
+          })),
+        },
+      });
+      const allow =
+        (p?.options ?? []).find(
+          (o: any) => o?.kind === "allow_once" || o?.kind === "allow_always",
+        ) ?? p?.options?.[0];
+      return {
+        outcome: { outcome: "selected", optionId: allow?.optionId ?? "allow" },
+      };
+    },
+    async readTextFile(p: any): Promise<any> {
+      return { content: await fs.readFile(resolve(cfg.cwd, p.path), "utf8") };
+    },
+    async writeTextFile(p: any): Promise<any> {
+      await fs.writeFile(resolve(cfg.cwd, p.path), p.content);
+      return {};
+    },
+    // PostHog ext-notifications (_posthog/usage_update, _posthog/turn_complete,
+    // _posthog/sdk_session, ...) are part of the parity surface and are sent
+    // outside sessionUpdate — capture them so the report covers them.
+    async extNotification(method: string, params: any): Promise<void> {
+      captured.events.push({
+        t: ord++,
+        kind: "extNotification",
+        op: method,
+        data: redact(params),
+      });
+    },
+    async extMethod(method: string, params: any): Promise<any> {
+      captured.events.push({
+        t: ord++,
+        kind: "extMethod",
+        op: method,
+        data: redact(params),
+      });
+      return {};
+    },
+  };
+
+  const acp = createAcpConnection({
+    adapter: "codex",
+    codexOptions: cfg.codexOptions as any,
+    logger: cfg.logger,
+  });
+  const stream = ndJsonStream(
+    acp.clientStreams.writable,
+    acp.clientStreams.readable,
+  );
+  const conn = new ClientSideConnection(() => client, stream);
+
+  const ctx: ScenarioCtx = {
+    cwd: cfg.cwd,
+    model: cfg.codexOptions.model,
+    async step(op, fn) {
+      captured.events.push({ t: ord++, kind: "step", op });
+      const started = Date.now();
+      console.error(`  [step] ${op} ...`);
+      try {
+        const result = await fn();
+        console.error(`  [step] ${op} ✓ (${Date.now() - started}ms)`);
+        captured.stepResults.push({ op, ok: true, result: redact(result) });
+        return result;
+      } catch (e: any) {
+        console.error(
+          `  [step] ${op} ✗ (${Date.now() - started}ms): ${String(e?.message ?? e)}`,
+        );
+        captured.stepResults.push({
+          op,
+          ok: false,
+          error: String(e?.message ?? e),
+        });
+        throw e;
+      }
+    },
+  };
+
+  const timeout = new Promise((_, rej) =>
+    setTimeout(
+      () =>
+        rej(new Error(`scenario timeout after ${cfg.timeoutMs ?? 180000}ms`)),
+      cfg.timeoutMs ?? 180000,
+    ),
+  );
+  try {
+    await ctx.step("initialize", () =>
+      conn.initialize({
+        protocolVersion: 1,
+        clientCapabilities: { fs: { readTextFile: true, writeTextFile: true } },
+      }),
+    );
+    await Promise.race([scenario.run(conn, ctx), timeout]);
+  } catch (e: any) {
+    captured.fatalError = String(e?.message ?? e);
+  } finally {
+    // Bounded: a wedged adapter cleanup must never hang the loop.
+    await Promise.race([
+      acp.cleanup().catch(() => undefined),
+      new Promise<void>((resolve) => setTimeout(resolve, 5000)),
+    ]);
+  }
+  return captured;
+}
diff --git a/packages/agent/parity/run.ts b/packages/agent/parity/run.ts
new file mode 100644
index 0000000000..01a7c4d7eb
--- /dev/null
+++ b/packages/agent/parity/run.ts
@@ -0,0 +1,408 @@
+/**
+ * Parity runner: drive scenarios through both codex adapters, extract a
+ * normalized feature report from each ACP stream, and diff app-server vs
+ * codex-acp. Writes raw captures + parity-report.json to parity/out/.
+ *
+ * Usage (from packages/agent):
+ *   PARITY_API_KEY=<token> pnpm exec tsx parity/run.ts [--only acp|app-server] [--scenario name]
+ * Env:
+ *   PARITY_GATEWAY_URL  default http://localhost:3308/posthog_code/v1
+ *   PARITY_API_KEY      PostHog token the local llm-gateway accepts (required for a live run)
+ *   PARITY_MODEL        default gpt-5.5
+ */
+import { execFileSync } from "node:child_process";
+import { existsSync, mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { Logger } from "../src/utils/logger";
+import {
+  type AdapterMode,
+  type CapturedRun,
+  runScenario,
+  type Scenario,
+} from "./harness";
+
+const OUT_DIR = join(import.meta.dirname, "out");
+const RESOURCES = join(
+  import.meta.dirname,
+  "..",
+  "..",
+  "..",
+  "apps",
+  "code",
+  "resources",
+  "codex-acp",
+);
+const CODEX_ACP_BIN = join(RESOURCES, "codex-acp");
+const NATIVE_CODEX_BIN = join(RESOURCES, "codex");
+const GATEWAY =
+  process.env.PARITY_GATEWAY_URL ?? "http://localhost:3308/posthog_code/v1";
+const API_KEY = process.env.PARITY_API_KEY ?? "";
+const MODEL = process.env.PARITY_MODEL ?? "gpt-5.5";
+const REPO = "/tmp/codex-parity-repo";
+
+const SCENARIOS: Scenario[] = [
+  {
+    name: "basic-task",
+    async run(conn, ctx) {
+      const session = await ctx.step("newSession", () =>
+        conn.newSession({
+          cwd: ctx.cwd,
+          mcpServers: [],
+          _meta: {
+            sessionId: "parity",
+            systemPrompt: "You are a coding assistant in a tiny test repo.",
+            model: ctx.model,
+            permissionMode: "bypassPermissions",
+          },
+        }),
+      );
+      const sessionId = session.sessionId;
+      await ctx.step("prompt", () =>
+        conn.prompt({
+          sessionId,
+          prompt: [
+            {
+              type: "text",
+              text: "Do exactly these steps and nothing else: 1) Read the file target.txt. 2) Edit it so the second line reads FOO instead of line2. 3) Run the shell command `cat target.txt`. 4) In one sentence confirm what you changed, then stop.",
+            },
+          ],
+        }),
+      );
+    },
+  },
+  {
+    name: "modes-and-resume",
+    async run(conn, ctx) {
+      const session = await ctx.step("newSession", () =>
+        conn.newSession({
+          cwd: ctx.cwd,
+          mcpServers: [],
+          _meta: {
+            sessionId: "parity2",
+            systemPrompt: "You are a coding assistant.",
+            model: ctx.model,
+            permissionMode: "auto",
+          },
+        }),
+      );
+      const sessionId = session.sessionId;
+      // Mode switch — codex-acp supports it; app-server gap until migration.
+      await ctx.step("setSessionConfigOption(mode)", () =>
+        conn
+          .setSessionConfigOption({
+            sessionId,
+            configId: "mode",
+            value: "read-only",
+          })
+          .catch((e: any) => {
+            throw e;
+          }),
+      );
+      await ctx.step("prompt", () =>
+        conn.prompt({
+          sessionId,
+          prompt: [
+            {
+              type: "text",
+              text: "List the files in this repo with `ls`, then stop.",
+            },
+          ],
+        }),
+      );
+      // Resume in the same connection (host calls resumeSession on reconnect).
+      await ctx.step("resumeSession", () =>
+        conn.resumeSession({
+          sessionId,
+          cwd: ctx.cwd,
+          mcpServers: [],
+          _meta: {
+            systemPrompt: "You are a coding assistant.",
+            model: ctx.model,
+          },
+        }),
+      );
+    },
+  },
+];
+
+function extractFeatures(run: CapturedRun): Record<string, any> {
+  const updateTypes = new Set<string>();
+  const toolKinds = new Set<string>();
+  const toolStatuses = new Set<string>();
+  let hasDiff = false;
+  let hasToolContent = false;
+  const approvals: string[] = [];
+  let usageFields = new Set<string>();
+  let modeUpdate = false;
+  const extNotifs = new Set<string>();
+
+  for (const e of run.events) {
+    if (e.kind === "requestPermission") approvals.push(e.data?.kind ?? "?");
+    if (e.kind === "extNotification") extNotifs.add(e.op ?? "?");
+    if (e.kind !== "sessionUpdate") continue;
+    const u = e.sessionUpdate ?? "?";
+    updateTypes.add(u);
+    const d = e.data ?? {};
+    if (u === "tool_call") {
+      if (d.kind) toolKinds.add(d.kind);
+      if (d.status) toolStatuses.add(d.status);
+    }
+    if (u === "tool_call_update") {
+      if (d.status) toolStatuses.add(d.status);
+      const content = d.content ?? [];
+      if (Array.isArray(content)) {
+        for (const c of content) {
+          if (c?.type === "diff") hasDiff = true;
+          if (c?.type === "content") hasToolContent = true;
+        }
+      }
+      if (
+        d.rawInput?.diff ||
+        (typeof d.rawOutput === "string" && d.rawOutput.includes("diff"))
+      )
+        hasDiff = true;
+    }
+    if (u === "current_mode_update" || u === "config_option_update")
+      modeUpdate = true;
+    if (u === "usage_update")
+      usageFields = new Set([
+        ...usageFields,
+        ...Object.keys(d.usage ?? d ?? {}),
+      ]);
+  }
+
+  // newSession response: configOptions / modes
+  const ns = run.stepResults.find((s) => s.op === "newSession")?.result ?? {};
+  const configCategories = (ns.configOptions ?? [])
+    .map((o: any) => o.category)
+    .filter(Boolean);
+  const modes = ns.modes ?? null;
+  // prompt response usage / stopReason
+  const promptRes = run.stepResults
+    .filter((s) => s.op === "prompt")
+    .map((s) => s.result ?? {});
+  const stopReasons = promptRes.map((r) => r.stopReason).filter(Boolean);
+  const promptUsage = promptRes.some(
+    (r) => r.usage && Object.keys(r.usage).length > 0,
+  );
+
+  return {
+    fatalError: run.fatalError ?? null,
+    updateTypes: [...updateTypes].sort(),
+    toolKinds: [...toolKinds].sort(),
+    toolStatuses: [...toolStatuses].sort(),
+    hasDiffContent: hasDiff,
+    hasToolContent: hasToolContent,
+    hasUsage:
+      promptUsage ||
+      updateTypes.has("usage_update") ||
+      extNotifs.has("_posthog/usage_update"),
+    usageFields: [...usageFields].sort(),
+    configOptionCategories: [...new Set(configCategories)].sort(),
+    modesPresent: !!modes,
+    modeChangeEmitted: modeUpdate,
+    approvalsRequested: approvals.length,
+    extNotifications: [...extNotifs].sort(),
+    stopReasons,
+    steps: run.stepResults.map((s) => ({ op: s.op, ok: s.ok, error: s.error })),
+  };
+}
+
+// Adapter-level features must match for parity. tool-rendering features depend
+// on which tools the model chose (native codex edits via shell `execute`;
+// codex-acp exposes Edit/Read) — a tool-surface difference, not an adapter bug —
+// so they're reported as behavioral, not counted as parity gaps.
+const ADAPTER_KEYS = [
+  "fatalError",
+  "updateTypes",
+  "hasUsage",
+  "usageFields",
+  "configOptionCategories",
+  "modesPresent",
+  "modeChangeEmitted",
+  "extNotifications",
+  "stopReasons",
+];
+const BEHAVIORAL_KEYS = [
+  "toolKinds",
+  "toolStatuses",
+  "hasDiffContent",
+  "hasToolContent",
+];
+
+function diffFeatures(
+  acp: Record<string, any>,
+  app: Record<string, any>,
+): Array<{
+  feature: string;
+  acp: any;
+  appServer: any;
+  match: boolean;
+  behavioral: boolean;
+}> {
+  const j = (v: any) => JSON.stringify(v);
+  const mk = (k: string, behavioral: boolean) => ({
+    feature: k,
+    acp: acp[k],
+    appServer: app[k],
+    match: j(acp[k]) === j(app[k]),
+    behavioral,
+  });
+  return [
+    ...ADAPTER_KEYS.map((k) => mk(k, false)),
+    ...BEHAVIORAL_KEYS.map((k) => mk(k, true)),
+  ];
+}
+
+function setupRepo(): void {
+  if (!existsSync(REPO)) mkdirSync(REPO, { recursive: true });
+  execFileSync("git", ["init", "-q"], { cwd: REPO });
+  writeFileSync(join(REPO, "target.txt"), "line1\nline2\nline3\n");
+  execFileSync("git", ["add", "-A"], { cwd: REPO });
+  try {
+    // -c commit.gpgsign=false: ignore the user's global commit-signing config
+    // (e.g. 1Password SSH signer), which fails in this non-interactive context.
+    execFileSync(
+      "git",
+      [
+        "-c",
+        "commit.gpgsign=false",
+        "-c",
+        "user.email=p@p.dev",
+        "-c",
+        "user.name=parity",
+        "commit",
+        "-qm",
+        "init",
+      ],
+      { cwd: REPO },
+    );
+  } catch {
+    /* already committed */
+  }
+}
+
+async function main(): Promise<void> {
+  const args = process.argv.slice(2);
+  const only = args.includes("--only")
+    ? (args[args.indexOf("--only") + 1] as AdapterMode)
+    : null;
+  const scenarioFilter = args.includes("--scenario")
+    ? args[args.indexOf("--scenario") + 1]
+    : null;
+  mkdirSync(OUT_DIR, { recursive: true });
+  setupRepo();
+
+  const modes: AdapterMode[] = [];
+  if (!only || only === "acp") modes.push("acp");
+  if ((!only || only === "app-server") && existsSync(NATIVE_CODEX_BIN))
+    modes.push("app-server");
+  else if (only === "app-server")
+    console.warn(
+      `native codex binary missing at ${NATIVE_CODEX_BIN}; app-server arm skipped`,
+    );
+
+  const scenarios = SCENARIOS.filter(
+    (s) => !scenarioFilter || s.name === scenarioFilter,
+  );
+  const logger = new Logger({
+    debug: !!process.env.PARITY_DEBUG,
+    prefix: "[parity]",
+  });
+  const featuresByMode: Record<string, Record<string, any>> = {};
+
+  for (const scenario of scenarios) {
+    featuresByMode[scenario.name] = {};
+    for (const mode of modes) {
+      console.log(`\n▶ ${scenario.name} via ${mode} ...`);
+      // codex spawns detached (own process group); a timed-out run orphans it
+      // holding a flock under ~/.codex/tmp, which wedges the next run. Kill any
+      // stragglers first — process death releases the flock. (Uses the default
+      // CODEX_HOME: an isolated empty home makes codex-acp crash at startup.)
+      try {
+        execFileSync("pkill", ["-9", "-f", "resources/codex-acp"], {
+          stdio: "ignore",
+        });
+      } catch {
+        /* none running */
+      }
+      const cfg = {
+        cwd: REPO,
+        codexOptions: {
+          cwd: REPO,
+          binaryPath: CODEX_ACP_BIN,
+          apiBaseUrl: GATEWAY,
+          apiKey: API_KEY,
+          model: MODEL,
+        },
+        timeoutMs: 240000,
+        logger,
+      };
+      const run = await runScenario(mode, scenario, cfg);
+      writeFileSync(
+        join(OUT_DIR, `${scenario.name}.${mode}.json`),
+        JSON.stringify(run, null, 2),
+      );
+      const feats = extractFeatures(run);
+      featuresByMode[scenario.name][mode] = feats;
+      writeFileSync(
+        join(OUT_DIR, `${scenario.name}.${mode}.features.json`),
+        JSON.stringify(feats, null, 2),
+      );
+      console.log(
+        `  steps: ${feats.steps.map((s: any) => `${s.op}${s.ok ? "✓" : "✗"}`).join(" ")}`,
+      );
+      console.log(
+        `  updates: ${feats.updateTypes.join(",")} | tools: ${feats.toolKinds.join(",")} | usage:${feats.hasUsage} diff:${feats.hasDiffContent} stop:${feats.stopReasons.join(",")}`,
+      );
+      if (feats.fatalError) console.log(`  ⚠ fatalError: ${feats.fatalError}`);
+    }
+  }
+
+  // Diff report (only meaningful when both arms ran)
+  const report: any = { gateway: GATEWAY, model: MODEL, scenarios: {} };
+  let totalGaps = 0;
+  for (const scenario of scenarios) {
+    const acp = featuresByMode[scenario.name].acp;
+    const app = featuresByMode[scenario.name]["app-server"];
+    if (acp && app) {
+      const diff = diffFeatures(acp, app);
+      const gaps = diff.filter((d) => !d.match && !d.behavioral);
+      const behavioral = diff.filter((d) => !d.match && d.behavioral);
+      totalGaps += gaps.length;
+      report.scenarios[scenario.name] = {
+        gaps,
+        behavioral,
+        allMatch: gaps.length === 0,
+      };
+      console.log(`\n=== parity diff: ${scenario.name} ===`);
+      if (!gaps.length) console.log("  ✅ adapter parity");
+      for (const g of gaps)
+        console.log(
+          `  ✗ ${g.feature}: acp=${JSON.stringify(g.acp)} app-server=${JSON.stringify(g.appServer)}`,
+        );
+      for (const b of behavioral)
+        console.log(
+          `  · behavioral: ${b.feature} acp=${JSON.stringify(b.acp)} app-server=${JSON.stringify(b.appServer)}`,
+        );
+    } else {
+      report.scenarios[scenario.name] = {
+        baselineOnly: acp ? "acp" : "app-server",
+        features: acp ?? app,
+      };
+    }
+  }
+  writeFileSync(
+    join(OUT_DIR, "parity-report.json"),
+    JSON.stringify(report, null, 2),
+  );
+  console.log(
+    `\nWrote ${join(OUT_DIR, "parity-report.json")} — ${totalGaps} parity gap(s).`,
+  );
+  process.exit(totalGaps > 0 ? 1 : 0);
+}
+
+main().catch((e) => {
+  console.error("parity runner failed:", e);
+  process.exit(2);
+});
diff --git a/packages/agent/src/adapters/acp-connection.test.ts b/packages/agent/src/adapters/acp-connection.test.ts
new file mode 100644
index 0000000000..b1b1d82833
--- /dev/null
+++ b/packages/agent/src/adapters/acp-connection.test.ts
@@ -0,0 +1,47 @@
+import { afterEach, describe, expect, it } from "vitest";
+import { resolveUseCodexAppServer } from "./acp-connection";
+
+describe("resolveUseCodexAppServer", () => {
+  const saved = {
+    app: process.env.POSTHOG_CODEX_USE_APP_SERVER,
+    acp: process.env.POSTHOG_CODEX_USE_ACP,
+  };
+  afterEach(() => {
+    if (saved.app === undefined)
+      delete process.env.POSTHOG_CODEX_USE_APP_SERVER;
+    else process.env.POSTHOG_CODEX_USE_APP_SERVER = saved.app;
+    if (saved.acp === undefined) delete process.env.POSTHOG_CODEX_USE_ACP;
+    else process.env.POSTHOG_CODEX_USE_ACP = saved.acp;
+  });
+
+  it("host flag wins over env and default", () => {
+    process.env.POSTHOG_CODEX_USE_ACP = "1";
+    process.env.POSTHOG_CODEX_USE_APP_SERVER = "1";
+    expect(resolveUseCodexAppServer({ useCodexAppServer: false })).toBe(false);
+    expect(resolveUseCodexAppServer({ useCodexAppServer: true })).toBe(true);
+  });
+
+  it("POSTHOG_CODEX_USE_APP_SERVER=1 forces app-server", () => {
+    delete process.env.POSTHOG_CODEX_USE_ACP;
+    process.env.POSTHOG_CODEX_USE_APP_SERVER = "1";
+    expect(resolveUseCodexAppServer({})).toBe(true);
+  });
+
+  it("POSTHOG_CODEX_USE_ACP=1 forces codex-acp", () => {
+    delete process.env.POSTHOG_CODEX_USE_APP_SERVER;
+    process.env.POSTHOG_CODEX_USE_ACP = "1";
+    expect(resolveUseCodexAppServer({})).toBe(false);
+  });
+
+  it("defaults to codex-acp when nothing is set (app-server is opt-in)", () => {
+    delete process.env.POSTHOG_CODEX_USE_APP_SERVER;
+    delete process.env.POSTHOG_CODEX_USE_ACP;
+    expect(resolveUseCodexAppServer({})).toBe(false);
+  });
+
+  it("host flag false beats POSTHOG_CODEX_USE_APP_SERVER=1", () => {
+    process.env.POSTHOG_CODEX_USE_APP_SERVER = "1";
+    delete process.env.POSTHOG_CODEX_USE_ACP;
+    expect(resolveUseCodexAppServer({ useCodexAppServer: false })).toBe(false);
+  });
+});
diff --git a/packages/agent/src/adapters/acp-connection.ts b/packages/agent/src/adapters/acp-connection.ts
index 97251c8b7c..f67dbb1f10 100644
--- a/packages/agent/src/adapters/acp-connection.ts
+++ b/packages/agent/src/adapters/acp-connection.ts
@@ -27,6 +27,14 @@ export type AcpConnectionConfig = {
   processCallbacks?: ProcessSpawnedCallback;
   codexOptions?: CodexProcessOptions;
   allowedModelIds?: Set<string>;
+  /**
+   * Feature-flag lever for the codex sub-adapter, passed by the host from the
+   * `codex-app-server` PostHog flag (gradual rollout / kill-switch). `true` =>
+   * native app-server, `false` => codex-acp. When undefined, falls back to env
+   * overrides then the default (codex-acp). Lets app-server roll out alongside
+   * codex-acp without a code change.
+   */
+  useCodexAppServer?: boolean;
   /** Callback invoked when the agent calls the create_output tool for structured output */
   onStructuredOutput?: (output: Record<string, unknown>) => Promise<void>;
   /** PostHog API config; when set, enables file-read enrichment unless disabled. */
@@ -70,6 +78,24 @@ function resolveEnricherApiConfig(
   return enabled ? config.posthogApiConfig : undefined;
 }
 
+/**
+ * Resolves which codex sub-adapter to use. Precedence: host flag
+ * (`config.useCodexAppServer`, from the `codex-app-server` PostHog flag) > env
+ * overrides (`POSTHOG_CODEX_USE_APP_SERVER=1` / `POSTHOG_CODEX_USE_ACP=1`) >
+ * default (codex-acp, the proven fallback). The native app-server is opt-in:
+ * the host turns it on per-user via the flag (cloud passes the resolved env;
+ * desktop passes `useCodexAppServer`), so it can roll out alongside codex-acp
+ * without a code change and be killed instantly by flipping the flag off.
+ */
+export function resolveUseCodexAppServer(config: AcpConnectionConfig): boolean {
+  if (typeof config.useCodexAppServer === "boolean") {
+    return config.useCodexAppServer;
+  }
+  if (process.env.POSTHOG_CODEX_USE_APP_SERVER === "1") return true;
+  if (process.env.POSTHOG_CODEX_USE_ACP === "1") return false;
+  return false;
+}
+
 function createClaudeConnection(config: AcpConnectionConfig): AcpConnection {
   const logger =
     config.logger?.child("AcpConnection") ??
@@ -210,10 +236,18 @@ function createCodexConnection(config: AcpConnectionConfig): AcpConnection {
     const codexOptions = config.codexOptions ?? {};
     const nativeBinary = nativeCodexBinaryPath(codexOptions.binaryPath);
 
-    // The native app-server is the default Codex harness. Fall back to the
-    // codex-acp (Zed) adapter only when the codex binary isn't bundled or when
-    // POSTHOG_CODEX_USE_ACP is set as an escape hatch.
-    if (nativeBinary && process.env.POSTHOG_CODEX_USE_ACP !== "1") {
+    // Use the native app-server when its binary is bundled AND the host (flag)
+    // / env selects it. See resolveUseCodexAppServer for precedence.
+    const useAppServer = !!nativeBinary && resolveUseCodexAppServer(config);
+    logger.info(
+      `Codex sub-adapter selected: ${useAppServer ? "app-server (native codex)" : "codex-acp"}`,
+      {
+        useAppServer,
+        nativeBinaryFound: !!nativeBinary,
+        hostFlag: config.useCodexAppServer,
+      },
+    );
+    if (useAppServer) {
       agent = new CodexAppServerAgent(client, {
         processOptions: {
           binaryPath: nativeBinary,
@@ -221,10 +255,12 @@ function createCodexConnection(config: AcpConnectionConfig): AcpConnection {
           apiBaseUrl: codexOptions.apiBaseUrl,
           apiKey: codexOptions.apiKey,
           developerInstructions: codexOptions.developerInstructions,
+          configOverrides: codexOptions.configOverrides,
         },
         model: codexOptions.model,
         reasoningEffort: codexOptions.reasoningEffort,
         processCallbacks: config.processCallbacks,
+        onStructuredOutput: config.onStructuredOutput,
         logger: config.logger?.child("CodexAppServerAgent"),
       });
       return agent;
diff --git a/packages/agent/src/adapters/codex-app-server/app-server-client.test.ts b/packages/agent/src/adapters/codex-app-server/app-server-client.test.ts
index db734950b0..cc688e061d 100644
--- a/packages/agent/src/adapters/codex-app-server/app-server-client.test.ts
+++ b/packages/agent/src/adapters/codex-app-server/app-server-client.test.ts
@@ -7,17 +7,14 @@ import {
 import { AppServerClient } from "./app-server-client";
 
 interface RpcMessage {
-  id?: number;
+  id?: number | string;
   method?: string;
   params?: unknown;
   result?: unknown;
   error?: { code: number; message: string };
 }
 
-/**
- * Drives the "server" end of a {@link StreamPair}: reads newline-delimited
- * JSON-RPC the client sent and writes framed responses/notifications back.
- */
+/** Drives the "server" end of a {@link StreamPair}: reads client JSON-RPC and writes framed replies back. */
 function makeFakeServer(transport: StreamPair) {
   const writer = transport.writable.getWriter();
   const reader = transport.readable.getReader();
@@ -142,6 +139,28 @@ describe("AppServerClient", () => {
     await client.close();
   });
 
+  it("answers a server request with a STRING id (RequestId is string|number)", async () => {
+    const streams = createBidirectionalStreams();
+    const onRequest = vi.fn(async () => ({ decision: "approved" }));
+    const client = new AppServerClient(streams.client, {
+      logger: silentLogger,
+      onRequest,
+    });
+    const server = makeFakeServer(streams.agent);
+
+    await server.send({
+      id: "req-abc",
+      method: "item/commandExecution/requestApproval",
+      params: {},
+    });
+
+    const response = await server.readMessage();
+    expect(onRequest).toHaveBeenCalledTimes(1);
+    expect(response.id).toBe("req-abc");
+    expect(response.result).toEqual({ decision: "approved" });
+    await client.close();
+  });
+
   it("rejects in-flight requests when closed", async () => {
     const streams = createBidirectionalStreams();
     const client = new AppServerClient(streams.client, {
diff --git a/packages/agent/src/adapters/codex-app-server/app-server-client.ts b/packages/agent/src/adapters/codex-app-server/app-server-client.ts
index 1fc5564ced..c437155be3 100644
--- a/packages/agent/src/adapters/codex-app-server/app-server-client.ts
+++ b/packages/agent/src/adapters/codex-app-server/app-server-client.ts
@@ -1,14 +1,11 @@
 import { Logger } from "../../utils/logger";
 import type { StreamPair } from "../../utils/streams";
-import type { JsonRpcMessage, JsonRpcResponse } from "./protocol";
+import type { JsonRpcMessage, JsonRpcResponse, RequestId } from "./protocol";
 
 export interface AppServerClientHandlers {
   /** Server-pushed notification (no id), e.g. `item/agentMessage/delta`. */
   onNotification?: (method: string, params: unknown) => void;
-  /**
-   * Server-initiated request (has an id), e.g. an approval. The resolved value
-   * is returned to the server as the JSON-RPC result.
-   */
+  /** Server-initiated request (has an id), e.g. an approval; the resolved value is returned as the JSON-RPC result. */
   onRequest?: (method: string, params: unknown) => Promise<unknown>;
   /** Fired once when the stream ends without an explicit close() (process exit). */
   onClose?: () => void;
@@ -28,17 +25,13 @@ export interface AppServerRpc {
 }
 
 /**
- * Bidirectional newline-delimited JSON-RPC client for the native Codex
- * `app-server` subprocess. Unlike the codex-acp adapter this speaks Codex's
- * own protocol rather than ACP, so it cannot reuse the ACP SDK connection.
- *
- * Transport-agnostic: it is given a {@link StreamPair} so tests can drive it
- * over in-memory streams without spawning a process.
+ * Bidirectional newline-delimited JSON-RPC client for the native Codex `app-server` subprocess.
+ * Transport-agnostic via a {@link StreamPair} so tests can drive it over in-memory streams.
  */
 export class AppServerClient implements AppServerRpc {
   private readonly writer: WritableStreamDefaultWriter<Uint8Array>;
   private readonly encoder = new TextEncoder();
-  private readonly pending = new Map<number, PendingCall>();
+  private readonly pending = new Map<RequestId, PendingCall>();
   private readonly handlers: AppServerClientHandlers;
   private readonly logger: Logger;
   private reader?: ReadableStreamDefaultReader<Uint8Array>;
@@ -126,9 +119,7 @@ export class AppServerClient implements AppServerRpc {
         // lock already released by cancel()
       }
       if (!this.closed) {
-        // The stream ended without an explicit close() (the process exited).
-        // Fail in-flight calls and notify the owner so a pending turn does not
-        // hang forever.
+        // Stream ended without close() (process exited): fail in-flight calls so the turn doesn't hang.
         this.closed = true;
         for (const call of this.pending.values()) {
           call.reject(new Error("codex app-server stream closed"));
@@ -151,20 +142,22 @@ export class AppServerClient implements AppServerRpc {
     const id = (message as { id?: unknown }).id;
     const method = (message as { method?: unknown }).method;
     const params = (message as { params?: unknown }).params;
-
-    if (typeof method !== "string") {
-      if (typeof id === "number") {
-        this.handleResponse(message as JsonRpcResponse);
+    // Discriminate on id presence, not `typeof id === "number"` — RequestId is
+    // string|number, so a string-id server request must still be answered.
+    const hasId = id !== undefined && id !== null;
+
+    if (typeof method === "string") {
+      if (hasId) {
+        void this.handleIncomingRequest(id as RequestId, method, params);
+      } else {
+        this.handlers.onNotification?.(method, params);
       }
       return;
     }
 
-    if (typeof id === "number") {
-      void this.handleIncomingRequest(id, method, params);
-      return;
+    if (hasId) {
+      this.handleResponse(message as JsonRpcResponse);
     }
-
-    this.handlers.onNotification?.(method, params);
   }
 
   private handleResponse(message: JsonRpcResponse): void {
@@ -182,7 +175,7 @@ export class AppServerClient implements AppServerRpc {
   }
 
   private async handleIncomingRequest(
-    id: number,
+    id: RequestId,
     method: string,
     params: unknown,
   ): Promise<void> {
diff --git a/packages/agent/src/adapters/codex-app-server/approvals.test.ts b/packages/agent/src/adapters/codex-app-server/approvals.test.ts
new file mode 100644
index 0000000000..f1ea74b941
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/approvals.test.ts
@@ -0,0 +1,322 @@
+import type {
+  RequestPermissionRequest,
+  RequestPermissionResponse,
+} from "@agentclientprotocol/sdk";
+import { describe, expect, it, vi } from "vitest";
+import { QuestionMetaSchema } from "../claude/questions/utils";
+import { handleServerRequest } from "./approvals";
+import { APP_SERVER_REQUESTS } from "./protocol";
+
+// A fake ACP client whose requestPermission returns queued outcomes positionally.
+function fakeClient(outcomes: RequestPermissionResponse["outcome"][]) {
+  const calls: RequestPermissionRequest[] = [];
+  let next = 0;
+  const requestPermission = vi.fn(
+    async (
+      params: RequestPermissionRequest,
+    ): Promise<RequestPermissionResponse> => {
+      calls.push(params);
+      const outcome = outcomes[next++] ?? { outcome: "cancelled" as const };
+      return { outcome };
+    },
+  );
+  return { client: { requestPermission }, calls };
+}
+
+const opts = { sessionId: "sess-1" };
+
+describe("handleServerRequest", () => {
+  it("maps a requestUserInput question's selected option back to an answer", async () => {
+    const { client, calls } = fakeClient([
+      { outcome: "selected", optionId: "option_1" },
+    ]);
+
+    const params = {
+      threadId: "t",
+      turnId: "turn",
+      itemId: "item-9",
+      autoResolutionMs: null,
+      questions: [
+        {
+          id: "q1",
+          header: "Pick one",
+          question: "Which environment?",
+          isOther: false,
+          isSecret: false,
+          options: [
+            { label: "staging", description: "" },
+            { label: "production", description: "danger" },
+          ],
+        },
+      ],
+    };
+
+    const result = await handleServerRequest(
+      APP_SERVER_REQUESTS.TOOL_USER_INPUT,
+      params,
+      client,
+      opts,
+    );
+
+    expect(result.handled).toBe(true);
+    expect(result.response).toEqual({
+      answers: { q1: { answers: ["production"] } },
+    });
+
+    expect(calls).toHaveLength(1);
+    expect(calls[0].sessionId).toBe("sess-1");
+    expect(calls[0].options.map((o) => o.name)).toEqual([
+      "staging",
+      "production",
+    ]);
+  });
+
+  it("carries a QuestionMetaSchema-valid questions array so the host card renders", async () => {
+    const { client, calls } = fakeClient([
+      { outcome: "selected", optionId: "option_0" },
+    ]);
+
+    const params = {
+      threadId: "t",
+      turnId: "turn",
+      itemId: "item-1",
+      autoResolutionMs: null,
+      questions: [
+        {
+          id: "q1",
+          header: "Environment",
+          question: "Which environment?",
+          isOther: false,
+          isSecret: false,
+          options: [
+            { label: "staging", description: "" },
+            { label: "production", description: "danger" },
+          ],
+        },
+      ],
+    };
+
+    await handleServerRequest(
+      APP_SERVER_REQUESTS.TOOL_USER_INPUT,
+      params,
+      client,
+      opts,
+    );
+
+    // A bare `{ header }` _meta fails QuestionMetaSchema, rendering an empty card.
+    const parsed = QuestionMetaSchema.safeParse(calls[0].toolCall?._meta);
+    expect(parsed.success).toBe(true);
+    expect(parsed.data?.questions).toEqual([
+      {
+        question: "Which environment?",
+        header: "Environment",
+        // The non-empty description rides along; the empty one is dropped.
+        options: [
+          { label: "staging" },
+          { label: "production", description: "danger" },
+        ],
+      },
+    ]);
+  });
+
+  it("defaults a cancelled question to an empty answer", async () => {
+    const { client } = fakeClient([{ outcome: "cancelled" }]);
+
+    const params = {
+      threadId: "t",
+      turnId: "turn",
+      itemId: "item-1",
+      autoResolutionMs: null,
+      questions: [
+        {
+          id: "q1",
+          header: "h",
+          question: "q?",
+          isOther: false,
+          isSecret: false,
+          options: [{ label: "a", description: "" }],
+        },
+      ],
+    };
+
+    const result = await handleServerRequest(
+      APP_SERVER_REQUESTS.TOOL_USER_INPUT,
+      params,
+      client,
+      opts,
+    );
+
+    expect(result.response).toEqual({ answers: { q1: { answers: [] } } });
+  });
+
+  it.each([
+    // "allow_once" grants for the turn, not session-wide; reject grants nothing.
+    { optionId: "allow", expected: { network: { enabled: true } } },
+    { optionId: "reject", expected: {} },
+  ])(
+    "resolves a permission approval on $optionId",
+    async ({ optionId, expected }) => {
+      const { client } = fakeClient([{ outcome: "selected", optionId }]);
+
+      const params = {
+        threadId: "t",
+        turnId: "turn",
+        itemId: "perm-1",
+        environmentId: null,
+        startedAtMs: 0,
+        cwd: "/repo",
+        reason: "needs network",
+        permissions: {
+          network: { enabled: true },
+          fileSystem: null,
+        },
+      };
+
+      const result = await handleServerRequest(
+        APP_SERVER_REQUESTS.PERMISSIONS_APPROVAL,
+        params,
+        client,
+        opts,
+      );
+
+      expect(result.handled).toBe(true);
+      expect(result.response).toEqual({
+        permissions: expected,
+        scope: "turn",
+      });
+    },
+  );
+
+  it("fails closed to the safe default when a payload is malformed", async () => {
+    const { client } = fakeClient([{ outcome: "selected", optionId: "allow" }]);
+    const result = await handleServerRequest(
+      APP_SERVER_REQUESTS.PERMISSIONS_APPROVAL,
+      null,
+      client,
+      opts,
+    );
+    expect(result).toEqual({
+      handled: true,
+      response: { permissions: {}, scope: "turn" },
+    });
+  });
+
+  it.each([
+    { optionId: "accept", action: "accept", content: {} },
+    { optionId: "decline", action: "decline", content: null },
+  ])(
+    "resolves an elicitation on $optionId",
+    async ({ optionId, action, content }) => {
+      const { client } = fakeClient([{ outcome: "selected", optionId }]);
+
+      const result = await handleServerRequest(
+        APP_SERVER_REQUESTS.MCP_ELICITATION,
+        {
+          threadId: "t",
+          turnId: "turn",
+          serverName: "posthog",
+          mode: "form",
+          message: "Confirm the export",
+        },
+        client,
+        opts,
+      );
+
+      expect(result.handled).toBe(true);
+      expect(result.response).toEqual({ action, content, _meta: null });
+    },
+  );
+
+  it("enriches an elicitation with the in-flight MCP tool call so the host renders the real tool", async () => {
+    const { client, calls } = fakeClient([
+      { outcome: "selected", optionId: "accept" },
+    ]);
+
+    await handleServerRequest(
+      APP_SERVER_REQUESTS.MCP_ELICITATION,
+      {
+        threadId: "t",
+        turnId: "turn",
+        serverName: "posthog",
+        mode: "form",
+        message: 'Allow the posthog MCP server to run tool "exec"?',
+      },
+      client,
+      {
+        ...opts,
+        resolveMcpToolCall: (serverName) =>
+          serverName === "posthog"
+            ? {
+                server: "posthog",
+                tool: "exec",
+                args: { command: "search project|insight" },
+              }
+            : undefined,
+      },
+    );
+
+    expect(calls[0].toolCall).toMatchObject({
+      toolCallId: "posthog:elicitation",
+      rawInput: { command: "search project|insight" },
+      _meta: {
+        posthog: {
+          toolName: "mcp__posthog__exec",
+          mcp: { server: "posthog", tool: "exec" },
+        },
+      },
+    });
+  });
+
+  it("falls back to codex's generic elicitation text when no MCP call correlates", async () => {
+    const { client, calls } = fakeClient([
+      { outcome: "selected", optionId: "decline" },
+    ]);
+
+    await handleServerRequest(
+      APP_SERVER_REQUESTS.MCP_ELICITATION,
+      {
+        threadId: "t",
+        turnId: "t",
+        serverName: "posthog",
+        mode: "form",
+        message: "Confirm",
+      },
+      client,
+      // resolveMcpToolCall absent (e.g. server mismatch) → no enrichment.
+      opts,
+    );
+
+    expect(calls[0].toolCall).not.toHaveProperty("_meta");
+    expect(calls[0].toolCall).toMatchObject({
+      toolCallId: "posthog:elicitation",
+      title: "Confirm",
+    });
+  });
+
+  it("returns handled:false for the simple command approval (caller owns it)", async () => {
+    const { client, calls } = fakeClient([]);
+
+    const result = await handleServerRequest(
+      APP_SERVER_REQUESTS.COMMAND_APPROVAL,
+      { itemId: "x", command: "ls" },
+      client,
+      opts,
+    );
+
+    expect(result).toEqual({ handled: false, response: undefined });
+    expect(calls).toHaveLength(0);
+  });
+
+  it("returns handled:false for an unknown method", async () => {
+    const { client } = fakeClient([]);
+
+    const result = await handleServerRequest(
+      "some/unknown/method",
+      {},
+      client,
+      opts,
+    );
+
+    expect(result).toEqual({ handled: false, response: undefined });
+  });
+});
diff --git a/packages/agent/src/adapters/codex-app-server/approvals.ts b/packages/agent/src/adapters/codex-app-server/approvals.ts
new file mode 100644
index 0000000000..950e978307
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/approvals.ts
@@ -0,0 +1,392 @@
+/**
+ * Handlers for the richer Codex app-server server-requests that carry a typed
+ * response object rather than a yes/no decision string (requestUserInput,
+ * permissions/requestApproval, mcpServer/elicitation). Each is surfaced through
+ * ACP `requestPermission`; on cancel/error we default to the safe outcome so a
+ * dropped prompt never silently grants access.
+ */
+
+import type {
+  AgentSideConnection,
+  PermissionOption,
+  RequestPermissionResponse,
+} from "@agentclientprotocol/sdk";
+import { mcpToolKey, posthogToolMeta } from "@posthog/shared";
+import type { Logger } from "../../utils/logger";
+import { OPTION_PREFIX } from "../claude/questions/utils";
+import { APP_SERVER_REQUESTS } from "./protocol";
+
+// Native app-server shapes, re-declared locally so this module doesn't depend on
+// the generated schema at build time.
+
+interface ToolRequestUserInputOption {
+  label: string;
+  description: string;
+}
+
+interface ToolRequestUserInputQuestion {
+  id: string;
+  header: string;
+  question: string;
+  isOther: boolean;
+  isSecret: boolean;
+  options: ToolRequestUserInputOption[] | null;
+}
+
+interface ToolRequestUserInputParams {
+  threadId: string;
+  turnId: string;
+  itemId: string;
+  questions: ToolRequestUserInputQuestion[];
+  autoResolutionMs: number | null;
+}
+
+interface ToolRequestUserInputResponse {
+  answers: { [questionId: string]: { answers: string[] } };
+}
+
+interface AdditionalNetworkPermissions {
+  enabled: boolean | null;
+}
+
+interface AdditionalFileSystemPermissions {
+  read: string[] | null;
+  write: string[] | null;
+  globScanMaxDepth?: number;
+  entries?: unknown[];
+}
+
+interface RequestPermissionProfile {
+  network: AdditionalNetworkPermissions | null;
+  fileSystem: AdditionalFileSystemPermissions | null;
+}
+
+interface PermissionsRequestApprovalParams {
+  threadId: string;
+  turnId: string;
+  itemId: string;
+  environmentId: string | null;
+  startedAtMs: number;
+  cwd: string;
+  reason: string | null;
+  permissions: RequestPermissionProfile;
+}
+
+interface GrantedPermissionProfile {
+  network?: AdditionalNetworkPermissions;
+  fileSystem?: AdditionalFileSystemPermissions;
+}
+
+type PermissionGrantScope = "turn" | "session";
+
+interface PermissionsRequestApprovalResponse {
+  permissions: GrantedPermissionProfile;
+  scope: PermissionGrantScope;
+}
+
+type McpServerElicitationAction = "accept" | "decline" | "cancel";
+
+interface McpServerElicitationRequestParams {
+  threadId: string;
+  turnId: string | null;
+  serverName: string;
+  mode: "form" | "url";
+  message: string;
+  // Only `message` is needed to render the prompt; the rest stays untyped.
+  [key: string]: unknown;
+}
+
+interface McpServerElicitationRequestResponse {
+  action: McpServerElicitationAction;
+  content: unknown | null;
+  _meta?: unknown | null;
+}
+
+export interface HandleServerRequestResult {
+  // false → not a richer request; the caller handles it (simple approvals).
+  handled: boolean;
+  response: unknown;
+}
+
+export interface HandleServerRequestOptions {
+  sessionId: string;
+  logger?: Logger;
+  /**
+   * Resolve the in-flight MCP tool call for an elicitation's `serverName`. codex's
+   * elicitation carries no tool/args, so supplying the originating `mcpToolCall`
+   * lets the prompt render the real operation. Undefined → codex's generic text.
+   */
+  resolveMcpToolCall?: (
+    serverName: string,
+  ) => { server: string; tool: string; args: unknown } | undefined;
+}
+
+/**
+ * Routes a server-initiated request to the matching richer-response handler.
+ * Returns `{ handled: false }` for anything this module doesn't own.
+ */
+export async function handleServerRequest(
+  method: string,
+  params: unknown,
+  client: Pick<AgentSideConnection, "requestPermission">,
+  opts: HandleServerRequestOptions,
+): Promise<HandleServerRequestResult> {
+  try {
+    switch (method) {
+      case APP_SERVER_REQUESTS.TOOL_USER_INPUT:
+        return {
+          handled: true,
+          response: await handleToolUserInput(
+            params as ToolRequestUserInputParams,
+            client,
+            opts,
+          ),
+        };
+      case APP_SERVER_REQUESTS.PERMISSIONS_APPROVAL:
+        return {
+          handled: true,
+          response: await handlePermissionsApproval(
+            params as PermissionsRequestApprovalParams,
+            client,
+            opts,
+          ),
+        };
+      case APP_SERVER_REQUESTS.MCP_ELICITATION:
+        return {
+          handled: true,
+          response: await handleMcpElicitation(
+            params as McpServerElicitationRequestParams,
+            client,
+            opts,
+          ),
+        };
+      default:
+        return { handled: false, response: undefined };
+    }
+  } catch (err) {
+    // Malformed payload fails closed to the safe default — never throw, never grant.
+    opts.logger?.warn("server-request handler threw; failing closed", {
+      method,
+      error: String(err),
+    });
+    return { handled: true, response: safeDefaultFor(method) };
+  }
+}
+
+function safeDefaultFor(method: string): unknown {
+  if (method === APP_SERVER_REQUESTS.PERMISSIONS_APPROVAL) {
+    return { permissions: {}, scope: "turn" };
+  }
+  if (method === APP_SERVER_REQUESTS.MCP_ELICITATION) {
+    return { action: "decline", content: null, _meta: null };
+  }
+  return { answers: {} };
+}
+
+function buildQuestionOptions(
+  question: ToolRequestUserInputQuestion,
+): PermissionOption[] {
+  return (question.options ?? []).map((opt, idx) => ({
+    kind: "allow_once" as const,
+    name: opt.label,
+    optionId: `${OPTION_PREFIX}${idx}`,
+    _meta: opt.description ? { description: opt.description } : undefined,
+  }));
+}
+
+// Maps a selected optionId (`option_<idx>`) back to the chosen option's label.
+function answerFromSelection(
+  question: ToolRequestUserInputQuestion,
+  optionId: string | undefined,
+): string[] {
+  if (!optionId || !optionId.startsWith(OPTION_PREFIX)) {
+    return [];
+  }
+  const idx = Number(optionId.slice(OPTION_PREFIX.length));
+  const opt = question.options?.[idx];
+  return opt ? [opt.label] : [];
+}
+
+async function handleToolUserInput(
+  params: ToolRequestUserInputParams,
+  client: Pick<AgentSideConnection, "requestPermission">,
+  opts: HandleServerRequestOptions,
+): Promise<ToolRequestUserInputResponse> {
+  const answers: ToolRequestUserInputResponse["answers"] = {};
+
+  for (const question of params.questions ?? []) {
+    // Default to "no answer" so cancel/failure leaves a well-formed empty response.
+    answers[question.id] = { answers: [] };
+
+    const options = buildQuestionOptions(question);
+    // Free-text questions have no options; requestPermission can't collect them.
+    if (options.length === 0) {
+      continue;
+    }
+
+    let response: RequestPermissionResponse;
+    try {
+      response = await client.requestPermission({
+        sessionId: opts.sessionId,
+        options,
+        toolCall: {
+          toolCallId: `${params.itemId}:${question.id}`,
+          title: question.question,
+          kind: "other",
+          // The host's QuestionPermission renders from `_meta.questions`; a bare
+          // `header` renders empty. codex prompts one question per request.
+          _meta: {
+            codeToolKind: "question",
+            questions: [
+              {
+                question: question.question,
+                header: question.header,
+                options: (question.options ?? []).map((opt) => ({
+                  label: opt.label,
+                  ...(opt.description?.trim()
+                    ? { description: opt.description }
+                    : {}),
+                })),
+              },
+            ],
+          },
+        },
+      });
+    } catch (err) {
+      opts.logger?.warn("requestUserInput prompt failed; leaving empty", {
+        questionId: question.id,
+        error: String(err),
+      });
+      continue;
+    }
+
+    if (response.outcome.outcome !== "selected") {
+      continue;
+    }
+    answers[question.id] = {
+      answers: answerFromSelection(question, response.outcome.optionId),
+    };
+  }
+
+  return { answers };
+}
+
+async function handlePermissionsApproval(
+  params: PermissionsRequestApprovalParams,
+  client: Pick<AgentSideConnection, "requestPermission">,
+  opts: HandleServerRequestOptions,
+): Promise<PermissionsRequestApprovalResponse> {
+  const denied: PermissionsRequestApprovalResponse = {
+    permissions: {},
+    scope: "turn",
+  };
+
+  let response: RequestPermissionResponse;
+  try {
+    response = await client.requestPermission({
+      sessionId: opts.sessionId,
+      options: [
+        { kind: "allow_once", name: "Allow", optionId: "allow" },
+        { kind: "reject_once", name: "Reject", optionId: "reject" },
+      ],
+      toolCall: {
+        toolCallId: params.itemId,
+        title: params.reason ?? "Grant additional permissions",
+        kind: "other",
+      },
+    });
+  } catch (err) {
+    opts.logger?.warn("permissions approval prompt failed; denying", {
+      itemId: params.itemId,
+      error: String(err),
+    });
+    return denied;
+  }
+
+  if (
+    response.outcome.outcome === "selected" &&
+    response.outcome.optionId === "allow"
+  ) {
+    // Grant only what was requested, scoped to this turn (option is "allow_once").
+    return {
+      permissions: grantedFromRequested(params.permissions),
+      scope: "turn",
+    };
+  }
+  return denied;
+}
+
+function grantedFromRequested(
+  requested: RequestPermissionProfile,
+): GrantedPermissionProfile {
+  const granted: GrantedPermissionProfile = {};
+  if (requested.network) {
+    granted.network = requested.network;
+  }
+  if (requested.fileSystem) {
+    granted.fileSystem = requested.fileSystem;
+  }
+  return granted;
+}
+
+async function handleMcpElicitation(
+  params: McpServerElicitationRequestParams,
+  client: Pick<AgentSideConnection, "requestPermission">,
+  opts: HandleServerRequestOptions,
+): Promise<McpServerElicitationRequestResponse> {
+  const declined: McpServerElicitationRequestResponse = {
+    action: "decline",
+    content: null,
+    _meta: null,
+  };
+
+  // If the elicitation gates a known in-flight MCP call, carry its real tool +
+  // args + `_meta.posthog` so the host renders the proper MCP permission.
+  const mcp = opts.resolveMcpToolCall?.(params.serverName);
+  const toolCall = mcp
+    ? {
+        toolCallId: `${params.serverName}:elicitation`,
+        title: params.message || `${params.serverName} requests input`,
+        kind: "other" as const,
+        rawInput: mcp.args,
+        _meta: posthogToolMeta({
+          toolName: mcpToolKey({ server: mcp.server, tool: mcp.tool }),
+          mcp: { server: mcp.server, tool: mcp.tool },
+        }),
+      }
+    : {
+        toolCallId: `${params.serverName}:elicitation`,
+        title: params.message || `${params.serverName} requests input`,
+        kind: "other" as const,
+      };
+
+  let response: RequestPermissionResponse;
+  try {
+    response = await client.requestPermission({
+      sessionId: opts.sessionId,
+      options: [
+        { kind: "allow_once", name: "Accept", optionId: "accept" },
+        { kind: "reject_once", name: "Decline", optionId: "decline" },
+      ],
+      toolCall,
+    });
+  } catch (err) {
+    opts.logger?.warn("elicitation prompt failed; declining", {
+      serverName: params.serverName,
+      error: String(err),
+    });
+    return declined;
+  }
+
+  if (response.outcome.outcome === "cancelled") {
+    return { action: "cancel", content: null, _meta: null };
+  }
+  if (
+    response.outcome.outcome === "selected" &&
+    response.outcome.optionId === "accept"
+  ) {
+    // No structured form UI over requestPermission; accept with empty content.
+    return { action: "accept", content: {}, _meta: null };
+  }
+  return declined;
+}
diff --git a/packages/agent/src/adapters/codex-app-server/binary-path.test.ts b/packages/agent/src/adapters/codex-app-server/binary-path.test.ts
index f8e46a544d..27d472ff02 100644
--- a/packages/agent/src/adapters/codex-app-server/binary-path.test.ts
+++ b/packages/agent/src/adapters/codex-app-server/binary-path.test.ts
@@ -1,4 +1,4 @@
-import { describe, expect, it, vi } from "vitest";
+import { beforeEach, describe, expect, it, vi } from "vitest";
 
 const existsSyncMock = vi.hoisted(() => vi.fn());
 vi.mock("node:fs", async (importOriginal) => ({
@@ -6,24 +6,44 @@ vi.mock("node:fs", async (importOriginal) => ({
   existsSync: existsSyncMock,
 }));
 
+const resolveMock = vi.hoisted(() => vi.fn());
+vi.mock("node:module", async (importOriginal) => ({
+  ...(await importOriginal<typeof import("node:module")>()),
+  createRequire: () => ({ resolve: resolveMock }),
+}));
+
 const { nativeCodexBinaryPath } = await import("./binary-path");
 
 describe("nativeCodexBinaryPath", () => {
-  it("returns undefined without a codex-acp path", () => {
-    expect(nativeCodexBinaryPath(undefined)).toBeUndefined();
-  });
-
-  it("returns undefined when the sibling codex binary is absent", () => {
-    existsSyncMock.mockReturnValue(false);
-    expect(
-      nativeCodexBinaryPath("/bundle/codex-acp/codex-acp"),
-    ).toBeUndefined();
+  beforeEach(() => {
+    existsSyncMock.mockReset();
+    resolveMock.mockReset();
   });
 
-  it("returns the sibling codex binary when present", () => {
+  it("returns the sibling codex binary bundled next to codex-acp when present", () => {
     existsSyncMock.mockReturnValue(true);
     expect(nativeCodexBinaryPath("/bundle/codex-acp/codex-acp")).toBe(
       "/bundle/codex-acp/codex",
     );
   });
+
+  it("falls back to the @openai/codex vendored binary when no sibling is bundled", () => {
+    resolveMock.mockReturnValue("/nm/@openai/codex-plat/package.json");
+    existsSyncMock.mockImplementation((p: string) => p.includes("/vendor/"));
+    const got = nativeCodexBinaryPath(undefined);
+    expect(got).toContain("@openai/codex-plat");
+    expect(got).toContain("/vendor/");
+    expect(got?.endsWith("/bin/codex")).toBe(true);
+  });
+
+  it("returns undefined when neither the sibling nor the @openai/codex dep is present", () => {
+    existsSyncMock.mockReturnValue(false);
+    resolveMock.mockImplementation(() => {
+      throw new Error("Cannot find module '@openai/codex-plat/package.json'");
+    });
+    expect(
+      nativeCodexBinaryPath("/bundle/codex-acp/codex-acp"),
+    ).toBeUndefined();
+    expect(nativeCodexBinaryPath(undefined)).toBeUndefined();
+  });
 });
diff --git a/packages/agent/src/adapters/codex-app-server/binary-path.ts b/packages/agent/src/adapters/codex-app-server/binary-path.ts
index c025522cd2..6af43597cb 100644
--- a/packages/agent/src/adapters/codex-app-server/binary-path.ts
+++ b/packages/agent/src/adapters/codex-app-server/binary-path.ts
@@ -1,17 +1,82 @@
 import { existsSync } from "node:fs";
+import { createRequire } from "node:module";
 import { dirname, join } from "node:path";
 
 /**
- * The native codex CLI is bundled next to codex-acp, so derive its path from
- * the codex-acp binary path (same directory, `codex` instead of `codex-acp`).
- * Returns undefined when the binary isn't present (e.g. the npx fallback), in
- * which case the caller keeps using the codex-acp adapter.
+ * Node `platform-arch` → codex target triple + `@openai/codex` platform sub-package
+ * that vendors the native binary. Mirrors `@openai/codex`'s own `bin/codex.js` shim.
+ */
+const CODEX_NATIVE_TARGETS: Record<
+  string,
+  { triple: string; pkg: string } | undefined
+> = {
+  "linux-x64": {
+    triple: "x86_64-unknown-linux-musl",
+    pkg: "@openai/codex-linux-x64",
+  },
+  "linux-arm64": {
+    triple: "aarch64-unknown-linux-musl",
+    pkg: "@openai/codex-linux-arm64",
+  },
+  "darwin-x64": {
+    triple: "x86_64-apple-darwin",
+    pkg: "@openai/codex-darwin-x64",
+  },
+  "darwin-arm64": {
+    triple: "aarch64-apple-darwin",
+    pkg: "@openai/codex-darwin-arm64",
+  },
+  "win32-x64": {
+    triple: "x86_64-pc-windows-msvc",
+    pkg: "@openai/codex-win32-x64",
+  },
+  "win32-arm64": {
+    triple: "aarch64-pc-windows-msvc",
+    pkg: "@openai/codex-win32-arm64",
+  },
+};
+
+/**
+ * Resolve the native codex binary vendored by `@openai/codex`'s platform sub-package,
+ * so the adapter works from a plain `npm install @posthog/agent` with no download.
+ * Returns undefined when the dep or this platform's sub-package isn't installed.
+ */
+function vendoredCodexBinary(): string | undefined {
+  const target = CODEX_NATIVE_TARGETS[`${process.platform}-${process.arch}`];
+  if (!target) return undefined;
+  const binaryName = process.platform === "win32" ? "codex.exe" : "codex";
+  try {
+    // Anchor resolution at this module's dir; the createRequire filename need not
+    // exist (only its directory is used).
+    const requireFrom = createRequire(
+      join(import.meta.dirname ?? __dirname, "_resolve.js"),
+    );
+    const pkgJson = requireFrom.resolve(`${target.pkg}/package.json`);
+    const binary = join(
+      dirname(pkgJson),
+      "vendor",
+      target.triple,
+      "bin",
+      binaryName,
+    );
+    return existsSync(binary) ? binary : undefined;
+  } catch {
+    return undefined;
+  }
+}
+
+/**
+ * Path to the native codex CLI (the one that exposes `app-server`), or undefined
+ * when unavailable. Two sources in order: bundled next to codex-acp, then vendored
+ * by the `@openai/codex` npm dependency.
  */
 export function nativeCodexBinaryPath(
   codexAcpPath?: string,
 ): string | undefined {
-  if (!codexAcpPath) return undefined;
   const binaryName = process.platform === "win32" ? "codex.exe" : "codex";
-  const candidate = join(dirname(codexAcpPath), binaryName);
-  return existsSync(candidate) ? candidate : undefined;
+  if (codexAcpPath) {
+    const candidate = join(dirname(codexAcpPath), binaryName);
+    if (existsSync(candidate)) return candidate;
+  }
+  return vendoredCodexBinary();
 }
diff --git a/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.test.ts b/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.test.ts
index 140c4abed1..bb2ad6be0a 100644
--- a/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.test.ts
+++ b/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.test.ts
@@ -11,6 +11,22 @@ import type {
 } from "./app-server-client";
 import { CodexAppServerAgent } from "./codex-app-server-agent";
 
+// Required-field invariants the native codex app-server enforces on each request.
+const REQUIRED_FIELDS: Record<string, string[]> = {
+  "turn/interrupt": ["threadId", "turnId"],
+  "turn/steer": ["threadId", "input", "expectedTurnId"],
+};
+
+function requiredFieldMissing(
+  method: string,
+  params: unknown,
+): string | undefined {
+  const p = (params ?? {}) as Record<string, unknown>;
+  return REQUIRED_FIELDS[method]?.find(
+    (f) => p[f] === undefined || p[f] === null || p[f] === "",
+  );
+}
+
 function makeStubRpc(responses: Record<string, unknown>) {
   let handlers: AppServerClientHandlers | undefined;
   const requests: Array<{ method: string; params?: unknown }> = [];
@@ -18,6 +34,14 @@ function makeStubRpc(responses: Record<string, unknown>) {
   const rpc: AppServerRpc = {
     async request<T = unknown>(method: string, params?: unknown): Promise<T> {
       requests.push({ method, params });
+      // Enforce the schema contract so a dropped required field fails loudly, not as a CI false-green.
+      const missing = requiredFieldMissing(method, params);
+      if (missing) {
+        throw {
+          code: -32600,
+          message: `Invalid request: missing field \`${missing}\``,
+        };
+      }
       return (responses[method] ?? {}) as T;
     },
     notify() {},
@@ -47,13 +71,17 @@ function makeFakeClient(
   outcome: unknown = { outcome: "selected", optionId: "allow" },
 ) {
   const sessionUpdates: unknown[] = [];
+  const extNotifications: Array<{ method: string; params: unknown }> = [];
   const client = {
     sessionUpdate: async (notification: unknown) => {
       sessionUpdates.push(notification);
     },
     requestPermission: async () => ({ outcome }),
+    extNotification: async (method: string, params: unknown) => {
+      extNotifications.push({ method, params });
+    },
   } as unknown as AgentSideConnection;
-  return { client, sessionUpdates };
+  return { client, sessionUpdates, extNotifications };
 }
 
 const init = { protocolVersion: 1 } as unknown as InitializeRequest;
@@ -83,7 +111,7 @@ describe("CodexAppServerAgent", () => {
       prompt: [{ type: "text", text: "hello" }],
     } as unknown as PromptRequest);
 
-    stub.emit("item/agentMessage/delta", { itemId: "i1", text: "Hi there" });
+    stub.emit("item/agentMessage/delta", { itemId: "i1", delta: "Hi there" });
     stub.emit("turn/completed", {
       turn: { id: "turn_1", status: "completed" },
     });
@@ -105,160 +133,1779 @@ describe("CodexAppServerAgent", () => {
     });
   });
 
-  it("maps a failed turn to a refusal stop reason", async () => {
-    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
-    const { client } = makeFakeClient();
+  it("enriches an MCP tool-call approval with the structured posthog channel", async () => {
+    const stub = makeStubRpc({
+      initialize: {},
+      "thread/start": { thread: { id: "thr_1" } },
+    });
+    const permissionToolCalls: unknown[] = [];
+    const client = {
+      sessionUpdate: async () => {},
+      requestPermission: async (params: { toolCall: unknown }) => {
+        permissionToolCalls.push(params.toolCall);
+        return { outcome: { outcome: "selected", optionId: "allow" } };
+      },
+      extNotification: async () => {},
+    } as unknown as AgentSideConnection;
+
     const agent = new CodexAppServerAgent(client, {
-      processOptions: { binaryPath: "/x/codex" },
+      processOptions: { binaryPath: "/bundle/codex" },
+      model: "gpt-5.5",
       rpcFactory: stub.factory,
     });
+    await agent.initialize(init);
+    await agent.newSession({ cwd: "/repo" } as unknown as NewSessionRequest);
 
-    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
-    const done = agent.prompt({
-      sessionId: "t",
-      prompt: [],
-    } as unknown as PromptRequest);
-    stub.emit("turn/completed", { turn: { status: "failed" } });
+    // The MCP tool call item arrives first, then codex approves it via a command-execution request.
+    stub.emit("item/started", {
+      item: {
+        type: "mcpToolCall",
+        id: "m1",
+        server: "posthog",
+        tool: "exec",
+        arguments: { command: "call execute-sql {}" },
+      },
+    });
+    const decision = await stub.invokeRequest(
+      "item/commandExecution/requestApproval",
+      {
+        itemId: "m1",
+        command: 'Allow the posthog MCP server to run tool "exec"?',
+      },
+    );
 
-    expect((await done).stopReason).toBe("refusal");
+    expect(decision).toEqual({ decision: "accept" });
+    expect(permissionToolCalls).toHaveLength(1);
+    expect(permissionToolCalls[0]).toMatchObject({
+      toolCallId: "m1",
+      kind: "other",
+      rawInput: { command: "call execute-sql {}" },
+      _meta: {
+        posthog: {
+          toolName: "mcp__posthog__exec",
+          mcp: { server: "posthog", tool: "exec" },
+        },
+      },
+    });
   });
 
-  it("routes command approvals to the host and maps allow to accept", async () => {
-    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
-    const { client } = makeFakeClient();
+  it("enriches the MCP elicitation approval (posthog exec) from the in-flight tool call", async () => {
+    // codex gates PostHog `exec` behind a generic elicitation (serverName only, no tool/args);
+    // the adapter correlates it to the in-flight mcpToolCall so the real tool + command render.
+    const stub = makeStubRpc({
+      initialize: {},
+      "thread/start": { thread: { id: "thr_1" } },
+    });
+    const permissionToolCalls: Array<Record<string, unknown>> = [];
+    const client = {
+      sessionUpdate: async () => {},
+      requestPermission: async (params: {
+        toolCall: Record<string, unknown>;
+      }) => {
+        permissionToolCalls.push(params.toolCall);
+        return { outcome: { outcome: "selected", optionId: "accept" } };
+      },
+      extNotification: async () => {},
+    } as unknown as AgentSideConnection;
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/bundle/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    await agent.initialize(init);
+    await agent.newSession({ cwd: "/repo" } as unknown as NewSessionRequest);
+
+    stub.emit("item/started", {
+      item: {
+        type: "mcpToolCall",
+        id: "m1",
+        server: "posthog",
+        tool: "exec",
+        arguments: { command: "call execute-sql {}" },
+      },
+    });
+    const decision = await stub.invokeRequest("mcpServer/elicitation/request", {
+      threadId: "thr_1",
+      turnId: "turn_1",
+      serverName: "posthog",
+      mode: "form",
+      message: 'Allow the posthog MCP server to run tool "exec"?',
+    });
+
+    expect(decision).toMatchObject({ action: "accept" });
+    expect(permissionToolCalls[0]).toMatchObject({
+      toolCallId: "posthog:elicitation",
+      rawInput: { command: "call execute-sql {}" },
+      _meta: {
+        posthog: {
+          toolName: "mcp__posthog__exec",
+          mcp: { server: "posthog", tool: "exec" },
+        },
+      },
+    });
+  });
+
+  function makeApprovalAgent(chooseOptionId = "allow") {
+    const stub = makeStubRpc({
+      initialize: {},
+      "thread/start": { thread: { id: "thr_1" } },
+    });
+    const permissionToolCalls: Array<Record<string, unknown>> = [];
+    const permissionOptions: Array<
+      Array<{ optionId?: string; kind?: string }>
+    > = [];
+    const client = {
+      sessionUpdate: async () => {},
+      requestPermission: async (params: {
+        toolCall: Record<string, unknown>;
+        options: Array<{ optionId?: string; kind?: string }>;
+      }) => {
+        permissionToolCalls.push(params.toolCall);
+        permissionOptions.push(params.options);
+        return { outcome: { outcome: "selected", optionId: chooseOptionId } };
+      },
+      extNotification: async () => {},
+    } as unknown as AgentSideConnection;
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/bundle/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    return { agent, stub, permissionToolCalls, permissionOptions };
+  }
+
+  it("routes a non-MCP command approval to an execute permission (kind + command body)", async () => {
+    // kind:"execute" + command text content makes the host render ExecutePermission (not the fallback).
+    const { agent, stub, permissionToolCalls } = makeApprovalAgent();
+    await agent.initialize(init);
+    await agent.newSession({ cwd: "/repo" } as unknown as NewSessionRequest);
+
+    await stub.invokeRequest("item/commandExecution/requestApproval", {
+      itemId: "c1",
+      command: "rm -rf build",
+    });
+
+    expect(permissionToolCalls).toHaveLength(1);
+    expect(permissionToolCalls[0]).toEqual({
+      toolCallId: "c1",
+      title: "rm -rf build",
+      kind: "execute",
+      content: [
+        { type: "content", content: { type: "text", text: "rm -rf build" } },
+      ],
+    });
+  });
+
+  it("surfaces Allow-always and echoes codex's remember decision when offered", async () => {
+    const { agent, stub, permissionOptions } =
+      makeApprovalAgent("allow_always");
+    await agent.initialize(init);
+    await agent.newSession({ cwd: "/repo" } as unknown as NewSessionRequest);
+
+    // codex offers the command-prefix allowlist decision for this approval.
+    const decision = await stub.invokeRequest(
+      "item/commandExecution/requestApproval",
+      {
+        itemId: "c1",
+        command: "pnpm test",
+        available_decisions: ["approved_execpolicy_amendment", "denied"],
+      },
+    );
+
+    expect(permissionOptions[0].map((o) => o.kind)).toContain("allow_always");
+    // Picking it echoes codex's own decision so it applies the amendment.
+    expect(decision).toEqual({ decision: "approved_execpolicy_amendment" });
+  });
+
+  it("omits Allow-always when codex offers no remember decision", async () => {
+    const { agent, stub, permissionOptions } = makeApprovalAgent("allow");
+    await agent.initialize(init);
+    await agent.newSession({ cwd: "/repo" } as unknown as NewSessionRequest);
+
+    const decision = await stub.invokeRequest(
+      "item/commandExecution/requestApproval",
+      { itemId: "c1", command: "ls" },
+    );
+
+    expect(permissionOptions[0].map((o) => o.kind)).not.toContain(
+      "allow_always",
+    );
+    expect(permissionOptions[0].map((o) => o.optionId)).toEqual([
+      "allow",
+      "reject",
+      "reject_with_feedback",
+    ]);
+    expect(decision).toEqual({ decision: "accept" });
+  });
+
+  it("reject-with-feedback declines and steers the user's guidance into the running turn", async () => {
+    const stub = makeStubRpc({
+      initialize: {},
+      "thread/start": { thread: { id: "thr_1" } },
+      "turn/start": { turn: { id: "turn_1" } },
+      // codex rotates the turn id on steer.
+      "turn/steer": { turnId: "turn_2" },
+    });
+    const offeredOptions: Array<Array<{ optionId?: string; kind?: string }>> =
+      [];
+    const client = {
+      sessionUpdate: async () => {},
+      requestPermission: async (params: {
+        options: Array<{ optionId?: string; kind?: string }>;
+      }) => {
+        offeredOptions.push(params.options);
+        return {
+          outcome: { outcome: "selected", optionId: "reject_with_feedback" },
+          _meta: { customInput: "use the SDK instead of shelling out" },
+        };
+      },
+      extNotification: async () => {},
+    } as unknown as AgentSideConnection;
     const agent = new CodexAppServerAgent(client, {
       processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
       rpcFactory: stub.factory,
     });
+    await agent.initialize(init);
+    await agent.newSession({ cwd: "/repo" } as unknown as NewSessionRequest);
+    // Start a turn so there's a live turnId for the steer to target.
+    const done = agent.prompt({
+      sessionId: "thr_1",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/started", { turn: { id: "turn_1" } });
 
-    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    // codex asks to run a command mid-turn; user rejects with guidance.
     const decision = await stub.invokeRequest(
       "item/commandExecution/requestApproval",
-      { itemId: "i", command: "ls -la" },
+      { itemId: "c1", command: "rm -rf build" },
+    );
+
+    expect(decision).toEqual({ decision: "decline" });
+    const feedbackOpt = offeredOptions[0].find(
+      (o) => o.optionId === "reject_with_feedback",
     );
+    expect(feedbackOpt).toBeTruthy();
+    // The guidance was steered into the running turn as a follow-up message.
+    const steer = stub.requests.find((r) => r.method === "turn/steer");
+    expect((steer?.params as { expectedTurnId?: string })?.expectedTurnId).toBe(
+      "turn_1",
+    );
+
+    // The rotated turn id from the steer response was adopted: a second
+    // rejection targets turn_2, not the dead turn_1.
+    await new Promise((r) => setImmediate(r));
+    await stub.invokeRequest("item/commandExecution/requestApproval", {
+      itemId: "c2",
+      command: "rm -rf dist",
+    });
+    const steers = stub.requests.filter((r) => r.method === "turn/steer");
+    expect(
+      (steers[1]?.params as { expectedTurnId?: string })?.expectedTurnId,
+    ).toBe("turn_2");
 
-    expect(decision).toBe("accept");
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
   });
 
-  it("rejects the pending turn when the app-server stream closes", async () => {
+  it("routes a non-MCP file-change approval to an edit permission (kind + diff + locations)", async () => {
+    const { agent, stub, permissionToolCalls } = makeApprovalAgent();
+    await agent.initialize(init);
+    await agent.newSession({ cwd: "/repo" } as unknown as NewSessionRequest);
+
+    await stub.invokeRequest("item/fileChange/requestApproval", {
+      itemId: "f1",
+      changes: [{ path: "src/a.ts", diff: "@@ -1 +1 @@\n-old\n+new\n" }],
+    });
+
+    expect(permissionToolCalls).toHaveLength(1);
+    const tc = permissionToolCalls[0];
+    expect(tc.kind).toBe("edit");
+    expect(tc.locations).toEqual([{ path: "src/a.ts" }]);
+    // A diff content block so the host's EditPermission renders the change.
+    expect(Array.isArray(tc.content)).toBe(true);
+    expect((tc.content as Array<{ type?: string }>)[0]?.type).toBe("diff");
+  });
+
+  it("passes outputSchema to turn/start and fires onStructuredOutput", async () => {
     const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
     const { client } = makeFakeClient();
+    const outputs: Array<Record<string, unknown>> = [];
+    const schema = {
+      type: "object",
+      properties: { repo: { type: "string" } },
+      required: ["repo"],
+    };
     const agent = new CodexAppServerAgent(client, {
       processOptions: { binaryPath: "/x/codex" },
       rpcFactory: stub.factory,
+      onStructuredOutput: async (o) => {
+        outputs.push(o);
+      },
     });
 
-    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { jsonSchema: schema },
+    } as unknown as NewSessionRequest);
     const done = agent.prompt({
       sessionId: "t",
-      prompt: [{ type: "text", text: "hi" }],
+      prompt: [{ type: "text", text: "pick a repo" }],
     } as unknown as PromptRequest);
 
-    stub.triggerClose();
+    // The schema-constrained final message is pure JSON.
+    stub.emit("item/completed", {
+      item: {
+        type: "agentMessage",
+        id: "a1",
+        text: '{"repo":"posthog/posthog"}',
+      },
+    });
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
 
-    await expect(done).rejects.toThrow(/exited before the turn completed/);
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    expect(turnStart?.params).toMatchObject({ outputSchema: schema });
+    expect(outputs).toEqual([{ repo: "posthog/posthog" }]);
   });
 
-  it("interrupts by sending turn/interrupt before reporting cancelled", async () => {
+  it("injects task instructions and mcp_servers into thread/start", async () => {
     const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
     const { client } = makeFakeClient();
     const agent = new CodexAppServerAgent(client, {
-      processOptions: { binaryPath: "/x/codex" },
+      processOptions: {
+        binaryPath: "/x/codex",
+        developerInstructions: "Codex guidance.",
+      },
       rpcFactory: stub.factory,
     });
 
-    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
-    const done = agent.prompt({
-      sessionId: "t",
-      prompt: [],
-    } as unknown as PromptRequest);
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { systemPrompt: "You are a repo selector." },
+      mcpServers: [
+        {
+          name: "posthog",
+          command: "node",
+          args: ["server.js"],
+          env: [{ name: "TOKEN", value: "abc" }],
+        },
+      ],
+    } as unknown as NewSessionRequest);
 
-    await agent.cancel({ sessionId: "t" });
+    const threadStart = stub.requests.find((r) => r.method === "thread/start");
+    expect(threadStart?.params).toMatchObject({
+      developerInstructions: "Codex guidance.\n\nYou are a repo selector.",
+      config: {
+        mcp_servers: {
+          posthog: {
+            command: "node",
+            args: ["server.js"],
+            env: { TOKEN: "abc" },
+          },
+        },
+      },
+    });
+  });
 
-    expect((await done).stopReason).toBe("cancelled");
-    expect(stub.requests.some((r) => r.method === "turn/interrupt")).toBe(true);
+  it("flattens the host's {append} systemPrompt and dedupes it against developerInstructions", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: {
+        binaryPath: "/x/codex",
+        // The host pre-flattens into developerInstructions AND sends the raw {append} form.
+        developerInstructions: "Be a careful engineer.",
+      },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { systemPrompt: { append: "Be a careful engineer." } },
+    } as unknown as NewSessionRequest);
+
+    const threadStart = stub.requests.find((r) => r.method === "thread/start");
+    // {append} is flattened (not "[object Object]") and, being identical, deduped to one copy.
+    expect(
+      (threadStart?.params as { developerInstructions?: string })
+        .developerInstructions,
+    ).toBe("Be a careful engineer.");
   });
 
-  it("rejects a concurrent prompt while a turn is in progress", async () => {
+  it("appends a distinct {append} systemPrompt to developerInstructions", async () => {
     const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
     const { client } = makeFakeClient();
     const agent = new CodexAppServerAgent(client, {
-      processOptions: { binaryPath: "/x/codex" },
+      processOptions: {
+        binaryPath: "/x/codex",
+        developerInstructions: "Codex base guidance.",
+      },
       rpcFactory: stub.factory,
     });
 
-    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
-    const first = agent.prompt({
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { systemPrompt: { append: "Task: fix the bug." } },
+    } as unknown as NewSessionRequest);
+
+    const threadStart = stub.requests.find((r) => r.method === "thread/start");
+    expect(
+      (threadStart?.params as { developerInstructions?: string })
+        .developerInstructions,
+    ).toBe("Codex base guidance.\n\nTask: fix the bug.");
+  });
+
+  it("honors the host's initial _meta.permissionMode (read-only) in turn/start", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { permissionMode: "read-only" },
+    } as unknown as NewSessionRequest);
+    const done = agent.prompt({
       sessionId: "t",
-      prompt: [],
+      prompt: [{ type: "text", text: "go" }],
     } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
 
-    await expect(
-      agent.prompt({ sessionId: "t", prompt: [] } as unknown as PromptRequest),
-    ).rejects.toThrow(/already in progress/);
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    // read-only maps to approvalPolicy "untrusted" (mirrors codex-acp).
+    expect(
+      (turnStart?.params as { approvalPolicy?: string }).approvalPolicy,
+    ).toBe("untrusted");
+  });
 
+  it("falls back to auto for a non-codex initial permissionMode", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    // "bypassPermissions" is a Claude mode, not a codex mode → default "auto".
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { permissionMode: "bypassPermissions" },
+    } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
     stub.emit("turn/completed", { turn: { status: "completed" } });
-    await first;
+    await done;
+
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    expect(
+      (turnStart?.params as { approvalPolicy?: string }).approvalPolicy,
+    ).toBe("on-request");
   });
 
-  it("runs sequential turns on the same session", async () => {
-    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+  it("applies a read-only sandboxPolicy + approvalPolicy when the picker is Plan", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
     const { client } = makeFakeClient();
     const agent = new CodexAppServerAgent(client, {
       processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
       rpcFactory: stub.factory,
     });
-
     await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
-
-    const first = agent.prompt({
+    await agent.setSessionConfigOption({
+      configId: "mode",
+      value: "plan",
       sessionId: "t",
-      prompt: [{ type: "text", text: "one" }],
+    } as never);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
     } as unknown as PromptRequest);
     stub.emit("turn/completed", { turn: { status: "completed" } });
-    expect((await first).stopReason).toBe("end_turn");
+    await done;
 
-    const second = agent.prompt({
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    const params = turnStart?.params as {
+      sandboxPolicy?: unknown;
+      approvalPolicy?: string;
+      collaborationMode?: unknown;
+    };
+    // Plan engages codex's plan collaboration AND blocks edits via a read-only sandbox.
+    expect(params.collaborationMode).toEqual({
+      mode: "plan",
+      settings: { model: "gpt-5.5" },
+    });
+    expect(params.sandboxPolicy).toEqual({
+      type: "readOnly",
+      networkAccess: true,
+    });
+    expect(params.approvalPolicy).toBe("on-request");
+  });
+
+  it("omits sandboxPolicy for an editing preset (auto) so the spawned full-access stays", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    // Default mode is "auto" → editing allowed, no sandbox override.
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
       sessionId: "t",
-      prompt: [{ type: "text", text: "two" }],
+      prompt: [{ type: "text", text: "go" }],
     } as unknown as PromptRequest);
     stub.emit("turn/completed", { turn: { status: "completed" } });
-    expect((await second).stopReason).toBe("end_turn");
+    await done;
+
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    const params = turnStart?.params as {
+      sandboxPolicy?: unknown;
+      collaborationMode?: unknown;
+    };
+    expect(params.sandboxPolicy).toBeUndefined();
+    // Default collaboration is pushed every turn so switching back from Plan reverts.
+    expect(params.collaborationMode).toEqual({
+      mode: "default",
+      settings: { model: "gpt-5.5" },
+    });
   });
 
-  it("maps a rejected approval to decline", async () => {
-    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
-    const { client } = makeFakeClient({
-      outcome: "selected",
-      optionId: "reject",
+  it("returns mode + model + thought_level configOptions and emits config_option_update", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "model/list": {
+        data: [
+          {
+            id: "gpt-5.5",
+            model: "gpt-5.5",
+            displayName: "GPT-5.5",
+            hidden: false,
+            supportedReasoningEfforts: [
+              { reasoningEffort: "low" },
+              { reasoningEffort: "high" },
+            ],
+          },
+        ],
+      },
     });
+    const { client, sessionUpdates } = makeFakeClient();
     const agent = new CodexAppServerAgent(client, {
       processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
       rpcFactory: stub.factory,
     });
-
-    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const session = await agent.newSession({
+      cwd: "/r",
+    } as unknown as NewSessionRequest);
+    const opts = (session.configOptions ?? []) as any[];
+    expect(opts.map((o) => o.category)).toEqual([
+      "mode",
+      "model",
+      "thought_level",
+    ]);
     expect(
-      await stub.invokeRequest("item/fileChange/requestApproval", {
-        itemId: "i",
-      }),
-    ).toBe("decline");
+      opts.find((o) => o.category === "mode").options.map((x: any) => x.value),
+    ).toEqual(["plan", "read-only", "auto", "full-access"]);
+    expect(
+      opts
+        .find((o) => o.category === "thought_level")
+        .options.map((x: any) => x.value),
+    ).toEqual(["low", "high"]);
+    expect(
+      sessionUpdates.some(
+        (u: any) => u.update?.sessionUpdate === "config_option_update",
+      ),
+    ).toBe(true);
   });
 
-  it("maps a cancelled approval to cancel", async () => {
-    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
-    const { client } = makeFakeClient({ outcome: "cancelled" });
+  it("drops Claude models from the picker and falls back to the codex effort map when model/list reports none", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "model/list": {
+        data: [
+          {
+            id: "gpt-5.5",
+            model: "gpt-5.5",
+            displayName: "GPT-5.5",
+            hidden: false,
+            // The PostHog gateway populates no efforts (defaultReasoningEffort:"none").
+            supportedReasoningEfforts: [],
+          },
+          {
+            // The gateway also serves Claude models — they must not leak into the picker.
+            id: "claude-opus-4-8",
+            model: "claude-opus-4-8",
+            hidden: false,
+            supportedReasoningEfforts: [],
+          },
+        ],
+      },
+    });
+    const { client } = makeFakeClient();
     const agent = new CodexAppServerAgent(client, {
       processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
       rpcFactory: stub.factory,
     });
+    const session = await agent.newSession({
+      cwd: "/r",
+    } as unknown as NewSessionRequest);
+    const opts = (session.configOptions ?? []) as any[];
 
-    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
     expect(
-      await stub.invokeRequest("item/commandExecution/requestApproval", {
-        itemId: "i",
-        command: "ls",
-      }),
-    ).toBe("cancel");
+      opts.find((o) => o.category === "model").options.map((x: any) => x.value),
+    ).toEqual(["gpt-5.5"]);
+    // No live efforts → shared codex map, which exposes xhigh for the gpt-5.5 family.
+    expect(
+      opts
+        .find((o) => o.category === "thought_level")
+        .options.map((x: any) => x.value),
+    ).toContain("xhigh");
+  });
+
+  it("setSessionConfigOption switches the model and re-emits config", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const res = await agent.setSessionConfigOption({
+      configId: "model",
+      value: "gpt-6",
+      sessionId: "t",
+    } as any);
+    const modelOpt = (res.configOptions as any[]).find(
+      (o) => o.category === "model",
+    );
+    expect(modelOpt.currentValue).toBe("gpt-6");
+  });
+
+  it("sends activePermissionProfile :read-only on turn/start in read-only mode", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    await agent.setSessionConfigOption({
+      configId: "mode",
+      value: "read-only",
+      sessionId: "t",
+    } as any);
+
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "look around" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
+
+    // codex 0.140.0 enforces the sandbox via the named profile, so read-only MUST send it alongside sandboxPolicy.
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    expect(turnStart?.params).toMatchObject({
+      activePermissionProfile: { extends: ":read-only" },
+      sandboxPolicy: { type: "readOnly" },
+    });
+  });
+
+  it("resumeSession resumes the existing thread and returns configOptions", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t1" } },
+      "thread/resume": { thread: { id: "t1" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const res = await agent.resumeSession({
+      sessionId: "t1",
+      cwd: "/r",
+      mcpServers: [],
+    } as any);
+    const resumeReq = stub.requests.find((r) => r.method === "thread/resume");
+    expect(resumeReq?.params).toMatchObject({ threadId: "t1" });
+    expect((res.configOptions as any[]).length).toBeGreaterThan(0);
+  });
+
+  it("listSessions maps thread/list to ACP sessions", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "thread/list": {
+        data: [
+          { id: "t1", cwd: "/r", name: "Task 1" },
+          { id: "t2", cwd: "/r2" },
+        ],
+      },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const res = await agent.listSessions({ cwd: "/r" } as any);
+    expect(res.sessions).toEqual([
+      { sessionId: "t1", cwd: "/r", title: "Task 1" },
+      { sessionId: "t2", cwd: "/r2" },
+    ]);
+  });
+
+  it("forkSession forks and returns a session id", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t1" } },
+      "thread/fork": { thread: { id: "t2" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const res = await agent.unstable_forkSession({
+      sessionId: "t1",
+      cwd: "/r",
+      mcpServers: [],
+    } as any);
+    expect(res.sessionId).toBe("t2");
+  });
+
+  it("maps a failed turn to a refusal stop reason", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "failed" } });
+
+    expect((await done).stopReason).toBe("refusal");
+  });
+
+  it("maps an interrupted turn to cancelled", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "interrupted" } });
+
+    expect((await done).stopReason).toBe("cancelled");
+  });
+
+  it("finalizes the turn on a non-retried error notification", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    // willRetry:false must resolve the turn rather than hang until stream close.
+    stub.emit("error", { willRetry: false, error: { message: "boom" } });
+
+    expect((await done).stopReason).toBe("refusal");
+  });
+
+  it("ends the turn without turn/start when no prompt block is usable", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const res = await agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "audio", data: "AAAA", mimeType: "audio/wav" }],
+    } as unknown as PromptRequest);
+
+    expect(res.stopReason).toBe("end_turn");
+    expect(stub.requests.some((r) => r.method === "turn/start")).toBe(false);
+  });
+
+  it("finalizes a turn once when error and turn/completed both arrive", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const outputs: Array<Record<string, unknown>> = [];
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+      onStructuredOutput: async (o) => {
+        outputs.push(o);
+      },
+    });
+    const schema = {
+      type: "object",
+      properties: { ok: { type: "boolean" } },
+      required: ["ok"],
+    };
+
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { jsonSchema: schema, taskRunId: "run_x" },
+    } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+
+    stub.emit("item/completed", {
+      item: { type: "agentMessage", id: "a1", text: '{"ok":true}' },
+    });
+    // error + turn/completed for one turn must not double-fire turn_complete (idempotent).
+    stub.emit("error", { willRetry: false, error: { message: "boom" } });
+    stub.emit("turn/completed", { turn: { status: "failed" } });
+    await done;
+
+    // Structured output is gated on a clean end_turn: a refused turn records nothing.
+    expect(outputs).toEqual([]);
+    expect(
+      extNotifications.filter((n) => n.method === "_posthog/turn_complete")
+        .length,
+    ).toBe(1);
+  });
+
+  it("routes command approvals to the host and maps allow to a decision envelope", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const decision = await stub.invokeRequest(
+      "item/commandExecution/requestApproval",
+      { itemId: "i", command: "ls -la" },
+    );
+
+    expect(decision).toEqual({ decision: "accept" });
+  });
+
+  it("rejects the pending turn when the app-server stream closes", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "hi" }],
+    } as unknown as PromptRequest);
+
+    stub.triggerClose();
+
+    await expect(done).rejects.toThrow(/exited before the turn completed/);
+  });
+
+  it("interrupts by sending turn/interrupt with the live threadId + turnId", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    // turn/started carries the live turnId the server REQUIRES on turn/interrupt (else -32600).
+    stub.emit("turn/started", { turn: { id: "turn_1" } });
+
+    await agent.cancel({ sessionId: "t" });
+
+    expect((await done).stopReason).toBe("cancelled");
+    const req = stub.requests.find((r) => r.method === "turn/interrupt");
+    expect(req?.params).toEqual({ threadId: "t", turnId: "turn_1" });
+  });
+
+  it("a cancelled turn's late completion does not cancel the follow-up turn", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+
+    // Turn 1, then cancel it (records turn_1 as interrupted).
+    const first = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/started", { turn: { id: "turn_1" } });
+    await agent.cancel({ sessionId: "t" });
+    expect((await first).stopReason).toBe("cancelled");
+
+    // Follow-up turn 2.
+    const second = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "again" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/started", { turn: { id: "turn_2" } });
+    // The cancelled turn's late completion arrives during turn 2 — it must be ignored.
+    stub.emit("turn/completed", {
+      turn: { id: "turn_1", status: "interrupted" },
+    });
+    stub.emit("turn/completed", {
+      turn: { id: "turn_2", status: "completed" },
+    });
+    expect((await second).stopReason).toBe("end_turn");
+  });
+
+  it("emits _posthog/turn_complete with cancelled on interrupt (matches codex-acp)", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { taskRunId: "run_c" },
+    } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    // Emit turn/started so the interrupt actually reaches the binary (else false-green on local finalize).
+    stub.emit("turn/started", { turn: { id: "turn_1" } });
+    await agent.cancel({ sessionId: "t" });
+
+    expect((await done).stopReason).toBe("cancelled");
+    // The interrupt RPC was genuinely sent (not just locally finalized)...
+    expect(
+      stub.requests.find((r) => r.method === "turn/interrupt")?.params,
+    ).toEqual({ threadId: "t", turnId: "turn_1" });
+    // ...and a cancelled turn still emits the cloud idle signal, exactly once.
+    const tcs = extNotifications.filter(
+      (n) => n.method === "_posthog/turn_complete",
+    );
+    expect(tcs).toHaveLength(1);
+    expect((tcs[0].params as { stopReason?: string }).stopReason).toBe(
+      "cancelled",
+    );
+  });
+
+  it("skips turn/interrupt (but still finalizes cancelled) when no turn/started arrived", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    // No turn/started → no turnId: interrupt() must skip the RPC (else -32600) and still finalize.
+    await agent.cancel({ sessionId: "t" });
+
+    expect((await done).stopReason).toBe("cancelled");
+    expect(stub.requests.some((r) => r.method === "turn/interrupt")).toBe(
+      false,
+    );
+  });
+
+  it("rejects a concurrent prompt while a turn is in progress", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const first = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+
+    await expect(
+      agent.prompt({
+        sessionId: "t",
+        prompt: [{ type: "text", text: "again" }],
+      } as unknown as PromptRequest),
+    ).rejects.toThrow(/already in progress/);
+
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await first;
+  });
+
+  it("runs sequential turns on the same session", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+
+    const first = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "one" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    expect((await first).stopReason).toBe("end_turn");
+
+    const second = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "two" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    expect((await second).stopReason).toBe("end_turn");
+  });
+
+  it("maps a rejected approval to decline", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient({
+      outcome: "selected",
+      optionId: "reject",
+    });
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    expect(
+      await stub.invokeRequest("item/fileChange/requestApproval", {
+        itemId: "i",
+      }),
+    ).toEqual({ decision: "decline" });
+  });
+
+  it("maps a cancelled approval to cancel", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient({ outcome: "cancelled" });
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    expect(
+      await stub.invokeRequest("item/commandExecution/requestApproval", {
+        itemId: "i",
+        command: "ls",
+      }),
+    ).toEqual({ decision: "cancel" });
+  });
+
+  it("folds a mid-turn prompt into the running turn via turn/steer", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const first = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "one" }],
+    } as unknown as PromptRequest);
+
+    // The active turn id arrives via turn/started; it's the steer precondition.
+    stub.emit("turn/started", { threadId: "t", turn: { id: "turn_1" } });
+
+    const second = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "more context" }],
+    } as unknown as PromptRequest);
+
+    // The single turn/completed resolves both the original and the folded prompt.
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    expect((await first).stopReason).toBe("end_turn");
+    expect((await second).stopReason).toBe("end_turn");
+
+    const steer = stub.requests.find((r) => r.method === "turn/steer");
+    expect(steer?.params).toMatchObject({
+      threadId: "t",
+      expectedTurnId: "turn_1",
+      input: [{ type: "text", text: "more context" }],
+    });
+    // Only one turn/start — the second prompt steered rather than starting anew.
+    expect(stub.requests.filter((r) => r.method === "turn/start")).toHaveLength(
+      1,
+    );
+  });
+
+  it("refreshes the live turnId from each turn/steer response", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+      "turn/steer": { turnId: "turn_2" }, // the server rotates the active turn id
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const first = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "one" }],
+    } as unknown as PromptRequest);
+    stub.emit("turn/started", { turn: { id: "turn_1" } });
+
+    const second = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "two" }],
+    } as unknown as PromptRequest);
+    // Let the first steer's rotated turnId apply before the next steer reads it.
+    await new Promise((r) => setTimeout(r, 0));
+    const third = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "three" }],
+    } as unknown as PromptRequest);
+
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await Promise.all([first, second, third]);
+
+    const steers = stub.requests.filter((r) => r.method === "turn/steer");
+    expect(steers).toHaveLength(2);
+    expect(
+      (steers[0].params as { expectedTurnId?: string }).expectedTurnId,
+    ).toBe("turn_1");
+    // After the first steer rotated the id, the second steer must target turn_2.
+    expect(
+      (steers[1].params as { expectedTurnId?: string }).expectedTurnId,
+    ).toBe("turn_2");
+  });
+
+  it("omits disabled skills from available_commands_update", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "skills/list": {
+        data: [
+          {
+            skills: [
+              { name: "deploy", description: "Deploy", enabled: true },
+              { name: "danger", description: "Disabled", enabled: false },
+            ],
+          },
+        ],
+      },
+    });
+    const { client, sessionUpdates } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+
+    const cmds = (
+      sessionUpdates.find(
+        (u: any) => u.update?.sessionUpdate === "available_commands_update",
+      ) as any
+    )?.update?.availableCommands;
+    expect(cmds.map((c: { name: string }) => c.name)).toEqual(["deploy"]);
+  });
+
+  it("emits _posthog/sdk_session when a taskRunId is present", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "thr_x" } } });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { taskRunId: "run_42" },
+    } as unknown as NewSessionRequest);
+
+    expect(extNotifications).toContainEqual({
+      method: "_posthog/sdk_session",
+      params: { taskRunId: "run_42", sessionId: "thr_x", adapter: "codex" },
+    });
+  });
+
+  it("does not emit _posthog/sdk_session without a taskRunId", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    expect(
+      extNotifications.some((n) => n.method === "_posthog/sdk_session"),
+    ).toBe(false);
+  });
+
+  it("emits _posthog/turn_complete and usage breakdown on turn completion", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { taskRunId: "run_1", systemPrompt: "be terse" },
+    } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "hi" }],
+    } as unknown as PromptRequest);
+
+    stub.emit("thread/tokenUsage/updated", {
+      threadId: "t",
+      turnId: "turn_1",
+      tokenUsage: {
+        total: {
+          totalTokens: 100,
+          inputTokens: 60,
+          cachedInputTokens: 10,
+          outputTokens: 30,
+          reasoningOutputTokens: 5,
+        },
+        modelContextWindow: 200000,
+      },
+    });
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
+
+    const turnComplete = extNotifications.find(
+      (n) => n.method === "_posthog/turn_complete",
+    );
+    expect(turnComplete?.params).toMatchObject({
+      sessionId: "t",
+      stopReason: "end_turn",
+      usage: {
+        inputTokens: 60,
+        outputTokens: 30,
+        cachedReadTokens: 10,
+        cachedWriteTokens: 0,
+        totalTokens: 100,
+      },
+    });
+    // The breakdown variant carries a per-source `breakdown`, not `used`.
+    const breakdown = extNotifications.find(
+      (n) =>
+        n.method === "_posthog/usage_update" &&
+        (n.params as { breakdown?: unknown }).breakdown,
+    );
+    expect(breakdown).toBeDefined();
+  });
+
+  it("context-usage indicator reports the latest turn, not the cumulative thread total", async () => {
+    // The window-occupancy indicator must track `last`, not the cumulative `total`
+    // (which over-reports the window as filling from accumulation alone).
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { taskRunId: "run_ctx" },
+    } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "hi" }],
+    } as unknown as PromptRequest);
+
+    stub.emit("thread/tokenUsage/updated", {
+      tokenUsage: {
+        total: {
+          totalTokens: 433289,
+          inputTokens: 432636,
+          cachedInputTokens: 76928,
+          outputTokens: 595,
+        },
+        last: {
+          totalTokens: 189075,
+          inputTokens: 111552,
+          cachedInputTokens: 76928,
+          outputTokens: 595,
+        },
+        modelContextWindow: 997500,
+      },
+    });
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
+
+    const usageUpdate = extNotifications.find(
+      (n) =>
+        n.method === "_posthog/usage_update" &&
+        typeof (n.params as { used?: unknown }).used === "number",
+    );
+    // `used` is last.totalTokens (189075), NOT total.totalTokens (433289).
+    expect(usageUpdate?.params).toMatchObject({
+      used: 189075,
+      size: 997500,
+      usage: { inputTokens: 111552, totalTokens: 189075 },
+    });
+  });
+
+  it("reports codex's per-turn `last` (not the cumulative total) in turn_complete", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({
+      cwd: "/r",
+      _meta: { taskRunId: "run_u" },
+    } as unknown as NewSessionRequest);
+
+    // We let `last` drive the per-turn number rather than diffing the cumulative `total`.
+    const t1 = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "a" }],
+    } as unknown as PromptRequest);
+    stub.emit("thread/tokenUsage/updated", {
+      tokenUsage: {
+        total: { inputTokens: 100, outputTokens: 50 },
+        last: { inputTokens: 100, outputTokens: 50 },
+      },
+    });
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await t1;
+
+    const t2 = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "b" }],
+    } as unknown as PromptRequest);
+    stub.emit("thread/tokenUsage/updated", {
+      tokenUsage: {
+        total: { inputTokens: 250, outputTokens: 120 },
+        last: { inputTokens: 150, outputTokens: 70 },
+      },
+    });
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await t2;
+
+    const tcs = extNotifications.filter(
+      (n) => n.method === "_posthog/turn_complete",
+    );
+    expect(tcs).toHaveLength(2);
+    expect(
+      (tcs[0].params as { usage: Record<string, number> }).usage,
+    ).toMatchObject({
+      inputTokens: 100,
+      outputTokens: 50,
+    });
+    // Turn 2 is codex's `last` (150/70) — NOT the cumulative total (250/120).
+    expect(
+      (tcs[1].params as { usage: Record<string, number> }).usage,
+    ).toMatchObject({
+      inputTokens: 150,
+      outputTokens: 70,
+    });
+  });
+
+  it("signals compaction start (_posthog/status) when a contextCompaction item begins", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({
+      cwd: "/r",
+      _meta: {},
+    } as unknown as NewSessionRequest);
+
+    stub.emit("item/started", {
+      item: { type: "contextCompaction", id: "c1" },
+    });
+
+    // Mirrors the Claude adapter — the host sets isCompacting (gates steer/queue).
+    const status = extNotifications.find((n) => n.method === "_posthog/status");
+    expect(status?.params).toMatchObject({
+      sessionId: "t",
+      status: "compacting",
+    });
+  });
+
+  it("emits compact_boundary + a transcript marker when the compaction item completes", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client, extNotifications, sessionUpdates } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({
+      cwd: "/r",
+      _meta: {},
+    } as unknown as NewSessionRequest);
+
+    // The compaction item brackets it: started → in progress, completed → boundary.
+    stub.emit("item/started", {
+      item: { type: "contextCompaction", id: "c1" },
+    });
+    stub.emit("item/completed", {
+      item: { type: "contextCompaction", id: "c1", summary: "…" },
+    });
+
+    // compact_boundary clears isCompacting + drains the host queue.
+    expect(
+      extNotifications.find((n) => n.method === "_posthog/compact_boundary")
+        ?.params,
+    ).toMatchObject({ sessionId: "t" });
+    // ...and a user-visible marker lands in the transcript.
+    expect(sessionUpdates).toContainEqual({
+      sessionId: "t",
+      update: {
+        sessionUpdate: "agent_message_chunk",
+        content: { type: "text", text: "\n\nContext compacted." },
+      },
+    });
+    // Exactly one boundary — the dedupe flag prevents a double-emit.
+    expect(
+      extNotifications.filter((n) => n.method === "_posthog/compact_boundary"),
+    ).toHaveLength(1);
+  });
+
+  it("still emits compact_boundary when the turn dies mid-compaction (no stuck isCompacting)", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({
+      cwd: "/r",
+      _meta: {},
+    } as unknown as NewSessionRequest);
+
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "go" }],
+    } as unknown as PromptRequest);
+    // A fatal error ends the turn before item/completed; the finalize-time recovery still fires the boundary.
+    stub.emit("item/started", {
+      item: { type: "contextCompaction", id: "c1" },
+    });
+    stub.emit("error", { willRetry: false, error: { message: "boom" } });
+    await done;
+
+    expect(
+      extNotifications.find((n) => n.method === "_posthog/compact_boundary")
+        ?.params,
+    ).toMatchObject({ sessionId: "t" });
+  });
+
+  it("loadSession resumes the thread and returns configOptions", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t1" } },
+      "thread/resume": { thread: { id: "t1" } },
+    });
+    const { client, extNotifications } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+
+    const res = await agent.loadSession({
+      sessionId: "t1",
+      cwd: "/r",
+      mcpServers: [],
+      _meta: { taskRunId: "run_load" },
+    } as unknown as Parameters<typeof agent.loadSession>[0]);
+
+    const resumeReq = stub.requests.find((r) => r.method === "thread/resume");
+    expect(resumeReq?.params).toMatchObject({ threadId: "t1" });
+    expect((res.configOptions as any[]).length).toBeGreaterThan(0);
+    // loadSession replays sdk_session so post-reload task tracking still works.
+    expect(extNotifications).toContainEqual({
+      method: "_posthog/sdk_session",
+      params: { taskRunId: "run_load", sessionId: "t1", adapter: "codex" },
+    });
+  });
+
+  it("loadSession replays the resumed thread's persisted transcript", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t1" } },
+      "thread/resume": {
+        thread: {
+          id: "t1",
+          turns: [
+            {
+              items: [
+                {
+                  type: "userMessage",
+                  id: "u1",
+                  content: [{ type: "text", text: "fix the bug" }],
+                },
+                {
+                  type: "commandExecution",
+                  id: "c1",
+                  command: "ls",
+                  status: "completed",
+                },
+                { type: "agentMessage", id: "a1", text: "fixed it" },
+              ],
+            },
+          ],
+        },
+      },
+    });
+    const { client, sessionUpdates } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      model: "gpt-5.5",
+      rpcFactory: stub.factory,
+    });
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+
+    await agent.loadSession({
+      sessionId: "t1",
+      cwd: "/r",
+      mcpServers: [],
+    } as unknown as Parameters<typeof agent.loadSession>[0]);
+
+    const kinds = (sessionUpdates as any[]).map((u) => u.update?.sessionUpdate);
+    expect(kinds).toEqual(
+      expect.arrayContaining([
+        "user_message_chunk",
+        "tool_call",
+        "agent_message_chunk",
+      ]),
+    );
+    expect(sessionUpdates).toContainEqual({
+      sessionId: "t1",
+      update: {
+        sessionUpdate: "user_message_chunk",
+        content: { type: "text", text: "fix the bug" },
+      },
+    });
+  });
+
+  it("forwards additionalDirectories to thread/start as writable_roots", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({
+      cwd: "/repo",
+      additionalDirectories: ["/repo/pkg-a", "/repo/pkg-b"],
+    } as unknown as NewSessionRequest);
+
+    const threadStart = stub.requests.find((r) => r.method === "thread/start");
+    expect(threadStart?.params).toMatchObject({
+      config: {
+        sandbox_workspace_write: {
+          writable_roots: ["/repo/pkg-a", "/repo/pkg-b"],
+        },
+      },
+    });
+  });
+
+  it("carries an image block through to turn/start input", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [
+        { type: "text", text: "look at this" },
+        { type: "image", data: "aGVsbG8=", mimeType: "image/png" },
+      ],
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
+
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    expect(turnStart?.params).toMatchObject({
+      input: [
+        { type: "text", text: "look at this", text_elements: [] },
+        { type: "image", url: "data:image/png;base64,aGVsbG8=" },
+      ],
+    });
+  });
+
+  it("prepends _meta.prContext to the forwarded turn input but not the echo", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client, sessionUpdates } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [{ type: "text", text: "fix the bug" }],
+      _meta: { prContext: "PR #123 is open; review before editing." },
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
+
+    // prContext is prepended to the FORWARDED prompt (parity with claude + codex-acp).
+    const turnStart = stub.requests.find((r) => r.method === "turn/start");
+    expect(
+      (turnStart?.params as { input: Array<{ text?: string }> }).input,
+    ).toEqual([
+      {
+        type: "text",
+        text: "PR #123 is open; review before editing.",
+        text_elements: [],
+      },
+      { type: "text", text: "fix the bug", text_elements: [] },
+    ]);
+    // The echoed user turn shows only the real message (no prContext prefix).
+    const echoes = (sessionUpdates as any[]).filter(
+      (u) => u.update?.sessionUpdate === "user_message_chunk",
+    );
+    expect(echoes).toEqual([
+      {
+        sessionId: "t",
+        update: {
+          sessionUpdate: "user_message_chunk",
+          content: { type: "text", text: "fix the bug" },
+        },
+      },
+    ]);
+  });
+
+  it("echoes an image-only user turn as a user_message_chunk", async () => {
+    const stub = makeStubRpc({
+      "thread/start": { thread: { id: "t" } },
+      "turn/start": { turn: { id: "turn_1" } },
+    });
+    const { client, sessionUpdates } = makeFakeClient();
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const image = { type: "image", data: "aGVsbG8=", mimeType: "image/png" };
+    const done = agent.prompt({
+      sessionId: "t",
+      prompt: [image],
+    } as unknown as PromptRequest);
+    stub.emit("turn/completed", { turn: { status: "completed" } });
+    await done;
+
+    expect(sessionUpdates).toContainEqual({
+      sessionId: "t",
+      update: { sessionUpdate: "user_message_chunk", content: image },
+    });
+  });
+
+  it("routes item/tool/requestUserInput through the richer-approval handler", async () => {
+    const stub = makeStubRpc({ "thread/start": { thread: { id: "t" } } });
+    const { client } = makeFakeClient({
+      outcome: "selected",
+      optionId: "option_0",
+    });
+    const agent = new CodexAppServerAgent(client, {
+      processOptions: { binaryPath: "/x/codex" },
+      rpcFactory: stub.factory,
+    });
+
+    await agent.newSession({ cwd: "/r" } as unknown as NewSessionRequest);
+    const response = await stub.invokeRequest("item/tool/requestUserInput", {
+      threadId: "t",
+      turnId: "turn_1",
+      itemId: "i1",
+      questions: [
+        {
+          id: "q1",
+          header: "Pick",
+          question: "Which one?",
+          isOther: false,
+          isSecret: false,
+          options: [
+            { label: "A", description: "" },
+            { label: "B", description: "" },
+          ],
+        },
+      ],
+      autoResolutionMs: null,
+    });
+
+    // The richer handler returns a typed { answers } object, not a decision string.
+    expect(response).toEqual({ answers: { q1: { answers: ["A"] } } });
   });
 });
diff --git a/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.ts b/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.ts
index 88797060eb..1008d89b22 100644
--- a/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.ts
+++ b/packages/agent/src/adapters/codex-app-server/codex-app-server-agent.ts
@@ -1,14 +1,25 @@
 import type {
   AgentSideConnection,
-  ContentBlock,
+  ForkSessionRequest,
+  ForkSessionResponse,
   InitializeRequest,
   InitializeResponse,
+  ListSessionsRequest,
+  ListSessionsResponse,
+  LoadSessionRequest,
+  LoadSessionResponse,
   NewSessionRequest,
   NewSessionResponse,
   PromptRequest,
   PromptResponse,
+  ResumeSessionRequest,
+  ResumeSessionResponse,
+  SetSessionConfigOptionRequest,
+  SetSessionConfigOptionResponse,
   StopReason,
 } from "@agentclientprotocol/sdk";
+import { mcpToolKey, posthogToolMeta } from "@posthog/shared";
+import { POSTHOG_NOTIFICATIONS } from "../../acp-extensions";
 import { DEFAULT_CODEX_MODEL } from "../../gateway-models";
 import type { ProcessSpawnedCallback } from "../../types";
 import { Logger } from "../../utils/logger";
@@ -17,25 +28,68 @@ import {
   nodeWritableToWebWritable,
 } from "../../utils/streams";
 import { BaseAcpAgent, type BaseSettingsManager } from "../base-acp-agent";
+import {
+  type ContextBreakdownBaseline,
+  emptyBaseline,
+  estimateTokens,
+} from "../claude/context-breakdown";
 import {
   AppServerClient,
   type AppServerClientHandlers,
   type AppServerRpc,
 } from "./app-server-client";
-import { mapAppServerNotification } from "./mapping";
+import { handleServerRequest } from "./approvals";
+import {
+  type AccumulatedUsage,
+  buildSdkSessionParams,
+  buildTurnCompleteParams,
+  buildUsageBreakdownParams,
+} from "./ext-notifications";
+import { toCodexInput } from "./input";
+import { buildLocalToolsServer, type LocalToolsMeta } from "./local-tools-mcp";
+import {
+  type AppServerItem,
+  changePaths,
+  diffContent,
+  mapAppServerNotification,
+  mapHistoryItem,
+} from "./mapping";
+import { toCodexMcpServers } from "./mcp-config";
+import { McpManager } from "./mcp-manager";
 import {
   APP_SERVER_METHODS,
   APP_SERVER_NOTIFICATIONS,
   APP_SERVER_REQUESTS,
 } from "./protocol";
+import { SessionConfigState } from "./session-config";
 import {
   type CodexAppServerProcess,
   type CodexAppServerProcessOptions,
   spawnCodexAppServerProcess,
 } from "./spawn";
+import { TurnController } from "./turn-controller";
+import { UsageTracker } from "./usage-tracker";
+
+type AppServerSessionMeta = {
+  // The host sends either a plain string or the Claude-style `{ append }` form.
+  systemPrompt?: string | { append?: string };
+  jsonSchema?: Record<string, unknown> | null;
+  permissionMode?: string;
+  taskRunId?: string;
+  taskId?: string;
+  persistence?: { taskId?: string };
+  environment?: "local" | "cloud";
+  channelMode?: boolean;
+  baseBranch?: string;
+};
+
+/** The subset of codex's `Thread` the adapter reads: id + persisted `turns` for history replay. */
+type AppServerThread = {
+  id?: string;
+  turns?: Array<{ items?: Parameters<typeof mapHistoryItem>[1][] }>;
+};
 
-// The native app-server owns its own configuration, so there is nothing for the
-// host to manage. BaseAcpAgent only calls dispose() on this.
+// The native app-server owns its config; BaseAcpAgent only calls dispose() on this.
 class NoopSettingsManager implements BaseSettingsManager {
   constructor(private cwd: string) {}
   dispose(): void {}
@@ -50,36 +104,44 @@ class NoopSettingsManager implements BaseSettingsManager {
 
 export interface CodexAppServerAgentOptions {
   processOptions: CodexAppServerProcessOptions;
-  /** Model id passed to thread/start. */
   model?: string;
-  /** Reasoning effort passed to turn/start. */
   reasoningEffort?: string;
   processCallbacks?: ProcessSpawnedCallback;
   logger?: Logger;
+  onStructuredOutput?: (output: Record<string, unknown>) => Promise<void>;
   /** Test seam: build the JSON-RPC client (defaults to spawning the process). */
   rpcFactory?: (handlers: AppServerClientHandlers) => AppServerRpc;
 }
 
 /**
- * ACP Agent backed by the native Codex `app-server` protocol. Presents the same
- * ACP surface to PostHog Code as the codex-acp adapter, but talks to Codex's own
- * JSON-RPC protocol underneath instead of going through the Zed translation layer.
- *
- * Spike scope: covers the core lifecycle (initialize, thread/start, turn/start
- * with streamed agent messages, interrupt, approvals). Resume/fork, tool-call
- * rendering, structured output and usage accounting are follow-ups.
+ * ACP Agent backed by the native Codex `app-server` JSON-RPC protocol. Presents the
+ * same ACP surface to PostHog Code as the codex-acp adapter, without the Zed
+ * translation layer, and stays at parity with it on the adapter surface.
  */
 export class CodexAppServerAgent extends BaseAcpAgent {
   readonly adapterName = "codex";
   private readonly rpc: AppServerRpc;
   private readonly proc?: CodexAppServerProcess;
-  private readonly model: string;
-  private readonly reasoningEffort?: string;
+  private readonly config: SessionConfigState;
+  private readonly onStructuredOutput?: (
+    output: Record<string, unknown>,
+  ) => Promise<void>;
+  /** Codex-specific guidance injected at spawn time; replayed per-thread. */
+  private readonly developerInstructions?: string;
   private threadId?: string;
-  private pendingTurn?: {
-    resolve: (reason: StopReason) => void;
-    reject: (err: Error) => void;
-  };
+  /** JSON schema constraining the final message; set per session via `_meta`. */
+  private jsonSchema?: Record<string, unknown>;
+  /** Final assistant message text for the in-flight turn (structured output). */
+  private lastAgentMessage = "";
+  /** True between a contextCompaction item's start and its boundary (dedupes the boundary). */
+  private compactionActive = false;
+  /** Maps the host's taskRunId to this session, replayed for cloud notifications. */
+  private taskRunId?: string;
+  /** Deployment environment; on "cloud" a non-danger sandbox would panic, so we skip the override. */
+  private environment?: "local" | "cloud";
+  private readonly mcp = new McpManager();
+  private readonly turns = new TurnController();
+  private readonly usage = new UsageTracker();
 
   constructor(
     client: AgentSideConnection,
@@ -89,8 +151,12 @@ export class CodexAppServerAgent extends BaseAcpAgent {
     this.logger =
       options.logger ??
       new Logger({ debug: true, prefix: "[CodexAppServerAgent]" });
-    this.model = options.model ?? DEFAULT_CODEX_MODEL;
-    this.reasoningEffort = options.reasoningEffort;
+    this.config = new SessionConfigState(
+      options.model ?? DEFAULT_CODEX_MODEL,
+      options.reasoningEffort,
+    );
+    this.onStructuredOutput = options.onStructuredOutput;
+    this.developerInstructions = options.processOptions.developerInstructions;
 
     const handlers: AppServerClientHandlers = {
       logger: this.logger,
@@ -134,83 +200,467 @@ export class CodexAppServerAgent extends BaseAcpAgent {
         title: "PostHog Code",
         version: "0.1.0",
       },
-      capabilities: { experimentalApi: false },
+      // Opt into codex's experimental API so experimental turn/start fields are honored.
+      capabilities: { experimentalApi: true, requestAttestation: false },
     });
     this.rpc.notify(APP_SERVER_NOTIFICATIONS.INITIALIZED, {});
     return {
       protocolVersion: request.protocolVersion,
+      agentCapabilities: {
+        promptCapabilities: {
+          image: true,
+          embeddedContext: true,
+        },
+        // Only http: we don't claim SSE rather than mistranslate it into the http shape.
+        mcpCapabilities: {
+          http: true,
+        },
+        loadSession: true,
+        sessionCapabilities: {
+          list: {},
+          fork: {},
+          resume: {},
+          additionalDirectories: {},
+        },
+        _meta: {
+          posthog: {
+            resumeSession: true,
+            steering: "native",
+          },
+        },
+      },
       agentInfo: {
         name: "codex",
         title: "Codex (app-server)",
         version: "0.1.0",
       },
+      authMethods: [],
     };
   }
 
   async newSession(params: NewSessionRequest): Promise<NewSessionResponse> {
-    const result = await this.rpc.request<{ thread?: { id?: string } }>(
+    const { threadId } = await this.setupThread(
       APP_SERVER_METHODS.THREAD_START,
-      { model: this.model, cwd: params.cwd },
+      {
+        cwd: params.cwd,
+        mcpServers: params.mcpServers,
+        meta: params._meta as AppServerSessionMeta | undefined,
+        additionalDirectories: params.additionalDirectories ?? undefined,
+      },
+    );
+    return { sessionId: threadId, configOptions: this.config.options };
+  }
+
+  async resumeSession(
+    params: ResumeSessionRequest,
+  ): Promise<ResumeSessionResponse> {
+    await this.setupThread(APP_SERVER_METHODS.THREAD_RESUME, {
+      cwd: params.cwd,
+      mcpServers: params.mcpServers,
+      meta: params._meta as AppServerSessionMeta | undefined,
+      threadId: params.sessionId,
+      additionalDirectories: params.additionalDirectories ?? undefined,
+    });
+    return { configOptions: this.config.options };
+  }
+
+  /** Re-attach to an existing thread without starting a turn: resume it, then replay the transcript. */
+  async loadSession(params: LoadSessionRequest): Promise<LoadSessionResponse> {
+    const { thread } = await this.setupThread(
+      APP_SERVER_METHODS.THREAD_RESUME,
+      {
+        cwd: params.cwd,
+        mcpServers: params.mcpServers,
+        meta: params._meta as AppServerSessionMeta | undefined,
+        threadId: params.sessionId,
+        additionalDirectories: params.additionalDirectories ?? undefined,
+      },
+    );
+    this.replayHistory(thread);
+    return { configOptions: this.config.options };
+  }
+
+  async unstable_forkSession(
+    params: ForkSessionRequest,
+  ): Promise<ForkSessionResponse> {
+    const { threadId } = await this.setupThread(
+      APP_SERVER_METHODS.THREAD_FORK,
+      {
+        cwd: params.cwd,
+        mcpServers: params.mcpServers,
+        meta: params._meta as AppServerSessionMeta | undefined,
+        threadId: params.sessionId,
+        additionalDirectories: params.additionalDirectories ?? undefined,
+      },
+    );
+    return { sessionId: threadId, configOptions: this.config.options };
+  }
+
+  /** Replay a resumed thread's persisted turns (from the thread/resume response) as session updates. */
+  private replayHistory(thread: AppServerThread | undefined): void {
+    if (!this.sessionId || !thread?.turns?.length) return;
+    for (const turn of thread.turns) {
+      for (const item of turn.items ?? []) {
+        for (const update of mapHistoryItem(this.sessionId, item)) {
+          void this.client.sessionUpdate(update).catch(() => undefined);
+        }
+      }
+    }
+  }
+
+  async listSessions(
+    params: ListSessionsRequest,
+  ): Promise<ListSessionsResponse> {
+    try {
+      const res = await this.rpc.request<{
+        data?: Array<{
+          id?: string;
+          cwd?: string;
+          name?: string | null;
+          preview?: string;
+        }>;
+      }>(APP_SERVER_METHODS.THREAD_LIST, { cwd: params.cwd });
+      const sessions = (res?.data ?? [])
+        .filter((t) => t?.id)
+        .map((t) => ({
+          sessionId: t.id as string,
+          cwd: t.cwd ?? params.cwd ?? "",
+          ...(t.name || t.preview
+            ? { title: t.name ?? t.preview ?? undefined }
+            : {}),
+        }));
+      return { sessions };
+    } catch (err) {
+      this.logger.warn("thread/list failed", { error: String(err) });
+      return { sessions: [] };
+    }
+  }
+
+  /** Shared thread setup for start/resume/fork. `threadId` present => resume/fork; absent => new thread. */
+  private async setupThread(
+    method: string,
+    params: {
+      cwd?: string;
+      mcpServers?: NewSessionRequest["mcpServers"];
+      meta?: AppServerSessionMeta;
+      threadId?: string;
+      additionalDirectories?: string[];
+    },
+  ): Promise<{ threadId: string; thread: AppServerThread | undefined }> {
+    this.jsonSchema = params.meta?.jsonSchema ?? undefined;
+    this.taskRunId = params.meta?.taskRunId;
+    this.environment = params.meta?.environment;
+    this.config.setInitialMode(params.meta?.permissionMode);
+    // Codex doesn't attribute input tokens by source; the baseline seeds the resident floor + system prompt.
+    this.usage.setBaseline(buildBaseline(params.meta));
+    // Flatten the {append} form (else "[object Object]") and dedupe identical parts
+    // (the host pre-flattens into developerInstructions, so the prod prompt would duplicate).
+    const developerInstructions = [
+      ...new Set(
+        [
+          this.developerInstructions,
+          flattenSystemPrompt(params.meta?.systemPrompt),
+        ].filter((s): s is string => !!s),
+      ),
+    ].join("\n\n");
+    // Degrade gracefully: an unresolvable bundled local-tools script skips it with a
+    // warning rather than killing thread setup.
+    let localTools: ReturnType<typeof buildLocalToolsServer> = null;
+    try {
+      localTools = buildLocalToolsServer(
+        { cwd: params.cwd },
+        this.localToolsMeta(params.meta),
+      );
+    } catch (err) {
+      this.logger.warn(
+        "local-tools server unavailable; continuing without it",
+        { error: String(err) },
+      );
+    }
+    const mcpServers = toCodexMcpServers([
+      ...(params.mcpServers ?? []),
+      ...(localTools ? [localTools] : []),
+    ]);
+    const config = buildThreadConfig(mcpServers, params.additionalDirectories);
+
+    const result = await this.rpc.request<{ thread?: AppServerThread }>(
+      method,
+      {
+        model: this.config.model,
+        cwd: params.cwd,
+        ...(params.threadId ? { threadId: params.threadId } : {}),
+        ...(developerInstructions ? { developerInstructions } : {}),
+        ...(config ? { config } : {}),
+      },
     );
-    const threadId = result?.thread?.id;
+    const thread = result?.thread;
+    const threadId = thread?.id ?? params.threadId;
     if (!threadId) {
-      throw new Error("codex app-server thread/start returned no thread id");
+      throw new Error(`codex app-server ${method} returned no thread id`);
     }
     this.threadId = threadId;
     this.sessionId = threadId;
-    this.logger.info("Codex app-server session created", { threadId });
-    return { sessionId: threadId };
+    await this.loadModelConfig();
+    this.emitConfigOptions();
+    await this.emitAvailableCommands();
+    await this.emitSdkSession();
+    this.logger.info("Codex app-server thread ready", {
+      method,
+      threadId,
+      mcpServers: mcpServers ? Object.keys(mcpServers) : [],
+      hasOutputSchema: !!this.jsonSchema,
+      hasLocalTools: !!localTools,
+    });
+    return { threadId, thread };
+  }
+
+  private localToolsMeta(
+    meta: AppServerSessionMeta | undefined,
+  ): LocalToolsMeta | undefined {
+    if (!meta) return undefined;
+    return {
+      environment: meta.environment,
+      channelMode: meta.channelMode,
+      taskId: meta.taskId,
+      persistence: meta.persistence,
+      baseBranch: meta.baseBranch,
+    };
+  }
+
+  private async emitSdkSession(): Promise<void> {
+    if (!this.taskRunId || !this.sessionId) return;
+    await this.client
+      .extNotification(
+        POSTHOG_NOTIFICATIONS.SDK_SESSION,
+        buildSdkSessionParams(
+          this.sessionId,
+          this.taskRunId,
+        ) as unknown as Record<string, unknown>,
+      )
+      .catch((err) =>
+        this.logger.warn("sdk_session extNotification failed", err),
+      );
+  }
+
+  async setSessionConfigOption(
+    params: SetSessionConfigOptionRequest,
+  ): Promise<SetSessionConfigOptionResponse> {
+    const { configId } = params as { configId?: string };
+    const value = (params as { value?: unknown }).value;
+    const { modeChanged } = this.config.setOption(configId, value);
+    // collaborationMode rides the next turn/start, so a mode switch only needs current_mode_update here.
+    if (modeChanged) this.emitCurrentMode(this.config.mode);
+    this.emitConfigOptions();
+    return { configOptions: this.config.options };
+  }
+
+  /** codex-acp emits current_mode_update on mode change; mirror it for the host's mode cache. */
+  private emitCurrentMode(modeId: string): void {
+    if (!this.sessionId) return;
+    void this.client
+      .sessionUpdate({
+        sessionId: this.sessionId,
+        update: { sessionUpdate: "current_mode_update", currentModeId: modeId },
+      } as unknown as Parameters<AgentSideConnection["sessionUpdate"]>[0])
+      .catch(() => undefined);
+  }
+
+  private async loadModelConfig(): Promise<void> {
+    try {
+      const res = await this.rpc.request<{ data?: any[] }>(
+        APP_SERVER_METHODS.MODEL_LIST,
+        {},
+      );
+      this.config.loadModels(res?.data ?? []);
+    } catch (err) {
+      this.logger.warn("model/list failed; using current model only", {
+        error: String(err),
+      });
+      this.config.clearModels();
+    }
+  }
+
+  private emitConfigOptions(): void {
+    if (!this.sessionId) return;
+    void this.client
+      .sessionUpdate({
+        sessionId: this.sessionId,
+        update: {
+          sessionUpdate: "config_option_update",
+          configOptions: this.config.options,
+        },
+      } as unknown as Parameters<AgentSideConnection["sessionUpdate"]>[0])
+      .catch((err) => this.logger.warn("config_option_update failed", err));
+  }
+
+  /** skills/list → available_commands_update so the slash-command menu fills. */
+  private async emitAvailableCommands(): Promise<void> {
+    if (!this.sessionId) return;
+    let commands: Array<{ name: string; description: string }> = [];
+    try {
+      const res = await this.rpc.request<{ data?: Array<{ skills?: any[] }> }>(
+        APP_SERVER_METHODS.SKILLS_LIST,
+        {},
+      );
+      commands = (res?.data ?? [])
+        .flatMap((entry) => entry?.skills ?? [])
+        // Drop explicitly-disabled skills; lenient `!== false` so a malformed payload still shows.
+        .filter((s) => s?.name && s?.enabled !== false)
+        .map((s: any) => ({ name: s.name, description: s.description ?? "" }));
+    } catch (err) {
+      this.logger.warn("skills/list failed", { error: String(err) });
+    }
+    void this.client
+      .sessionUpdate({
+        sessionId: this.sessionId,
+        update: {
+          sessionUpdate: "available_commands_update",
+          availableCommands: commands,
+        },
+      } as unknown as Parameters<AgentSideConnection["sessionUpdate"]>[0])
+      .catch(() => undefined);
   }
 
   async prompt(params: PromptRequest): Promise<PromptResponse> {
     if (!this.threadId) {
       throw new Error("prompt() called before newSession()");
     }
-    if (this.pendingTurn) {
-      // The host serializes turns; a concurrent prompt would clobber the
-      // single pendingTurn slot, so fail fast rather than corrupt it.
-      throw new Error("prompt() called while a turn is already in progress");
-    }
+    // Reopen the notification gate (a prior interrupt may have left session.cancelled set).
     this.session.cancelled = false;
-    const input = toTurnInput(params.prompt);
-    const dropped = params.prompt.length - input.length;
+    // Prepend _meta.prContext (host PR-follow-up / Slack runs) to the FORWARDED prompt,
+    // else codex cloud follow-ups lose the PR-review context. The echo omits it.
+    const prContext = (params._meta as { prContext?: unknown } | undefined)
+      ?.prContext;
+    const promptBlocks =
+      typeof prContext === "string" && prContext.length > 0
+        ? [{ type: "text" as const, text: prContext }, ...params.prompt]
+        : params.prompt;
+    const input = toCodexInput(promptBlocks);
+    if (input.length === 0) {
+      // turn/start rejects empty input, so end the turn cleanly.
+      this.logger.warn("prompt() had no usable input blocks; ending turn");
+      return { stopReason: "end_turn" };
+    }
+    // Count by type (not input.length): a resource block can fan out to multiple blocks.
+    const dropped = params.prompt.filter(
+      (b) =>
+        b.type !== "text" &&
+        b.type !== "image" &&
+        b.type !== "resource" &&
+        b.type !== "resource_link",
+    ).length;
     if (dropped > 0) {
-      this.logger.warn("Dropped non-text prompt blocks", { dropped });
+      this.logger.warn("Dropped non-text/non-image prompt blocks", { dropped });
     }
-    const completion = new Promise<StopReason>((resolve, reject) => {
-      this.pendingTurn = { resolve, reject };
-    });
+    // Echo the user prompt (codex emits none), for fresh turns and steering alike.
+    this.broadcastUserInput(params.prompt);
+
+    if (this.turns.isRunning) {
+      // A turn is already running: fold the message in via turn/steer (precondition: the
+      // active turnId). Refresh from the response's rotated turnId so a later steer/interrupt
+      // still targets the live turn (no turn/started is re-emitted for a steer).
+      const steerRes = await this.rpc
+        .request<{ turnId?: string }>(APP_SERVER_METHODS.TURN_STEER, {
+          threadId: this.threadId,
+          input,
+          expectedTurnId: this.turns.activeTurnId,
+        })
+        .catch((err) => {
+          this.logger.warn("turn/steer failed", err);
+          return undefined;
+        });
+      this.turns.onSteered(steerRes?.turnId);
+      return { stopReason: await this.turns.awaitCompletion() };
+    }
+    if (this.turns.isPending) {
+      // A turn is pending but has no turnId yet, so we can't steer; fail fast.
+      throw new Error("prompt() called while a turn is already in progress");
+    }
+
+    this.lastAgentMessage = "";
+    this.resetUsage();
+    const completion = this.turns.begin();
     try {
+      const approvalPolicy = this.config.approvalPolicy();
+      const sandboxPolicy = this.config.sandboxPolicy();
+      const activePermissionProfile = this.config.permissionProfile();
       await this.rpc.request(APP_SERVER_METHODS.TURN_START, {
         threadId: this.threadId,
         input,
-        ...(this.reasoningEffort ? { effort: this.reasoningEffort } : {}),
+        model: this.config.model,
+        ...(this.config.effort ? { effort: this.config.effort } : {}),
+        // Always request a reasoning summary; the default "auto" can skip it on trivial turns.
+        summary: "detailed",
+        // Picker preset applied per-turn. Skipped on cloud, where a non-danger sandbox
+        // re-engages the unavailable linux-sandbox and panics.
+        ...(approvalPolicy ? { approvalPolicy } : {}),
+        // Pushed every turn — codex remembers the last mode, so switching back from plan must be explicit.
+        collaborationMode: this.config.collaborationModeForTurn(),
+        ...(this.environment !== "cloud" && sandboxPolicy
+          ? { sandboxPolicy }
+          : {}),
+        // codex 0.140.0 enforces the sandbox via named profiles; sandboxPolicy alone is no
+        // longer honored, so plan/read-only also send this. Same cloud gating.
+        ...(this.environment !== "cloud" && activePermissionProfile
+          ? { activePermissionProfile }
+          : {}),
+        // Constrain the final message to the task schema for parseable structured output.
+        ...(this.jsonSchema ? { outputSchema: this.jsonSchema } : {}),
       });
       return { stopReason: await completion };
     } finally {
-      this.pendingTurn = undefined;
+      this.turns.finishPrompt();
+    }
+  }
+
+  /** Echo each user prompt block (text + image, so an image-only turn still renders) for the host log/UI. */
+  private broadcastUserInput(prompt: PromptRequest["prompt"]): void {
+    if (!this.sessionId) return;
+    for (const block of prompt) {
+      if (block.type !== "text" && block.type !== "image") continue;
+      void this.client
+        .sessionUpdate({
+          sessionId: this.sessionId,
+          update: {
+            sessionUpdate: "user_message_chunk",
+            content: block,
+          },
+        })
+        .catch(() => undefined);
     }
   }
 
+  private resetUsage(): void {
+    this.usage.resetForTurn();
+  }
+
   protected async interrupt(): Promise<void> {
-    // Tell the server to stop first, then report the turn cancelled, so the
-    // caller never sees "cancelled" while Codex is still running.
-    if (this.threadId) {
+    // Stop the server, then finalize through the shared path so a cancelled turn still emits
+    // the cloud idle signal (finalizeTurn claims idempotently). turn/interrupt requires BOTH
+    // threadId and turnId (else -32600); skip the RPC when no turn started.
+    const turnId = this.turns.markInterrupted();
+    if (this.threadId && turnId) {
       await this.rpc
-        .request(APP_SERVER_METHODS.TURN_INTERRUPT, { threadId: this.threadId })
+        .request(APP_SERVER_METHODS.TURN_INTERRUPT, {
+          threadId: this.threadId,
+          turnId,
+        })
         .catch((err) => this.logger.warn("turn/interrupt failed", err));
     }
-    this.pendingTurn?.resolve("cancelled");
-    this.pendingTurn = undefined;
+    await this.finalizeTurn("cancelled");
   }
 
   async closeSession(): Promise<void> {
     this.session.abortController.abort();
-    this.pendingTurn?.resolve("cancelled");
-    this.pendingTurn = undefined;
+    this.turns.close("cancelled");
     this.session.settingsManager.dispose();
+    // Close the transport BEFORE kill() destroys the stdio streams (else close() blocks on
+    // an ack that never arrives). Bounded so cleanup can't hang the caller.
+    await Promise.race([
+      this.rpc.close().catch(() => undefined),
+      new Promise<void>((resolve) => setTimeout(resolve, 2000)),
+    ]);
     this.proc?.kill();
-    await this.rpc.close();
   }
 
   private handleNotification(method: string, params: unknown): void {
@@ -228,71 +678,421 @@ export class CodexAppServerAgent extends BaseAcpAgent {
       }
     }
 
+    if (method === APP_SERVER_NOTIFICATIONS.TURN_STARTED) {
+      // Capture the active turn id (steer precondition / interrupt target).
+      this.turns.onStarted((params as { turn?: { id?: string } })?.turn?.id);
+    }
+
+    if (
+      method === APP_SERVER_NOTIFICATIONS.ITEM_STARTED ||
+      method === APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED
+    ) {
+      this.mcp.capture(params);
+    }
+
+    // codex auto-compaction surfaces as a contextCompaction item: item/started → in progress,
+    // item/completed → boundary (codex emits no separate thread/compacted; that's a guarded
+    // fallback). compactionActive dedupes to one boundary per compaction.
+    const isCompactionItem =
+      (params as { item?: { type?: string } })?.item?.type ===
+      "contextCompaction";
+    if (
+      method === APP_SERVER_NOTIFICATIONS.ITEM_STARTED &&
+      isCompactionItem &&
+      !this.compactionActive
+    ) {
+      this.compactionActive = true;
+      this.emitCompactionStarted();
+    }
+    if (
+      this.compactionActive &&
+      ((method === APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED &&
+        isCompactionItem) ||
+        method === APP_SERVER_NOTIFICATIONS.CONTEXT_COMPACTED)
+    ) {
+      this.compactionActive = false;
+      this.emitCompactionBoundary();
+    }
+
+    if (method === APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED) {
+      this.captureAgentMessage(params);
+    }
+
+    if (method === APP_SERVER_NOTIFICATIONS.TOKEN_USAGE_UPDATED) {
+      this.emitUsageExtNotification(params);
+    }
+
     if (method === APP_SERVER_NOTIFICATIONS.TURN_COMPLETED) {
-      const status = (params as { turn?: { status?: string } })?.turn?.status;
-      this.pendingTurn?.resolve(status === "failed" ? "refusal" : "end_turn");
-      this.pendingTurn = undefined;
+      const turn = (params as { turn?: { id?: string; status?: string } })
+        ?.turn;
+      // Drop the late completion of an already-interrupted turn (else it cancels the follow-up).
+      if (this.turns.shouldDropCompletion(turn?.id)) return;
+      void this.finalizeTurn(mapTurnStopReason(turn?.status));
+    }
+
+    if (method === APP_SERVER_NOTIFICATIONS.ERROR) {
+      // A non-retried fatal error: resolve the turn so prompt() returns rather than hangs.
+      const willRetry = (params as { willRetry?: boolean })?.willRetry;
+      if (willRetry === false) {
+        this.logger.warn("codex app-server fatal error notification", {
+          params,
+        });
+        void this.finalizeTurn("refusal");
+      }
+    }
+  }
+
+  /** Track the latest assistant message so the final one feeds structured output. */
+  private captureAgentMessage(params: unknown): void {
+    const item = (params as { item?: { type?: string; text?: string } })?.item;
+    if (item?.type === "agentMessage" && typeof item.text === "string") {
+      this.lastAgentMessage = item.text;
+    }
+  }
+
+  /** Compaction started: emit `_posthog/status` so the host sets `isCompacting` (gates steer/queue). */
+  private emitCompactionStarted(): void {
+    if (!this.sessionId) return;
+    void this.client
+      .extNotification(POSTHOG_NOTIFICATIONS.STATUS, {
+        sessionId: this.sessionId,
+        status: "compacting",
+      })
+      .catch(() => undefined);
+  }
+
+  /** Compaction finished: emit `_posthog/compact_boundary` (host clears isCompacting) + a transcript marker. */
+  private emitCompactionBoundary(): void {
+    if (!this.sessionId) return;
+    void this.client
+      .extNotification(POSTHOG_NOTIFICATIONS.COMPACT_BOUNDARY, {
+        sessionId: this.sessionId,
+      })
+      .catch(() => undefined);
+    void this.client
+      .sessionUpdate({
+        sessionId: this.sessionId,
+        update: {
+          sessionUpdate: "agent_message_chunk",
+          content: { type: "text", text: "\n\nContext compacted." },
+        },
+      })
+      .catch(() => undefined);
+  }
+
+  /** Mirror codex-acp's `_posthog/usage_update` so the host's token/cost UI fills. */
+  private emitUsageExtNotification(params: unknown): void {
+    if (!this.sessionId) return;
+    const update = this.usage.ingest(params);
+    if (!update) return;
+    void this.client
+      .extNotification(POSTHOG_NOTIFICATIONS.USAGE_UPDATE, {
+        sessionId: this.sessionId,
+        ...update,
+      })
+      .catch((err) => this.logger.warn("usage extNotification failed", err));
+  }
+
+  /** Deliver structured output (parsed from the final message) before resolving the turn. */
+  private async finalizeTurn(reason: StopReason): Promise<void> {
+    // Idempotent: claim synchronously (before any await) so a second finalize (e.g. an
+    // error racing turn/completed) is a no-op and callbacks don't double-fire.
+    const pending = this.turns.claim();
+    if (!pending) return;
+    // If the turn dies mid-compaction the boundary never fires, leaving isCompacting stuck
+    // true (silently queuing later messages). Recover here.
+    if (this.compactionActive) {
+      this.compactionActive = false;
+      this.emitCompactionBoundary();
+    }
+    const message = this.lastAgentMessage;
+    // Per-turn usage is codex's own `tokenUsage.last` (not a reconstructed delta).
+    const usage = this.usage.perTurnUsage();
+    const contextUsed = this.usage.contextTokens();
+
+    // Deliver structured output only on a clean end_turn — a cancelled/refused turn records nothing.
+    if (
+      reason === "end_turn" &&
+      this.jsonSchema &&
+      this.onStructuredOutput &&
+      message
+    ) {
+      const parsed = parseStructuredOutput(message);
+      if (parsed) {
+        try {
+          await this.onStructuredOutput(parsed);
+        } catch (err) {
+          this.logger.warn("onStructuredOutput callback threw", { error: err });
+        }
+      } else {
+        this.logger.warn(
+          "Could not parse structured output from final message",
+          {
+            preview: message.slice(0, 200),
+          },
+        );
+      }
+    }
+    await this.emitTurnComplete(reason, usage, contextUsed);
+    pending.resolve(reason);
+  }
+
+  /** Emit cloud per-turn notifications: `_posthog/turn_complete` (only with a taskRunId) + the usage breakdown (always). */
+  private async emitTurnComplete(
+    reason: StopReason,
+    usage: AccumulatedUsage,
+    contextUsed: number | undefined,
+  ): Promise<void> {
+    if (!this.sessionId) return;
+    if (this.taskRunId) {
+      await this.client
+        .extNotification(
+          POSTHOG_NOTIFICATIONS.TURN_COMPLETE,
+          buildTurnCompleteParams(
+            this.sessionId,
+            reason,
+            usage,
+          ) as unknown as Record<string, unknown>,
+        )
+        .catch((err) =>
+          this.logger.warn("turn_complete extNotification failed", err),
+        );
+    }
+    if (contextUsed !== undefined) {
+      await this.client
+        .extNotification(
+          POSTHOG_NOTIFICATIONS.USAGE_UPDATE,
+          buildUsageBreakdownParams(
+            this.sessionId,
+            this.usage.baselineBreakdown,
+            contextUsed,
+          ) as unknown as Record<string, unknown>,
+        )
+        .catch((err) =>
+          this.logger.warn("usage breakdown extNotification failed", err),
+        );
     }
   }
 
   private handleServerClosed(): void {
-    this.pendingTurn?.reject(
+    this.turns.fail(
       new Error("codex app-server exited before the turn completed"),
     );
-    this.pendingTurn = undefined;
   }
 
+  /**
+   * Server-initiated requests. Simple approvals resolve to a `{ decision }` envelope (a bare
+   * string is rejected); richer ones (AskUserQuestion / permission profile / elicitation) go
+   * to `handleServerRequest`. Whatever we return is sent back as the JSON-RPC result.
+   */
   private async handleApproval(
     method: string,
     params: unknown,
-  ): Promise<string> {
+  ): Promise<unknown> {
+    const richer = await handleServerRequest(method, params, this.client, {
+      sessionId: this.sessionId,
+      logger: this.logger,
+      resolveMcpToolCall: (serverName) => this.mcp.byServer(serverName),
+    });
+    if (richer.handled) {
+      return richer.response;
+    }
     if (
       method !== APP_SERVER_REQUESTS.COMMAND_APPROVAL &&
       method !== APP_SERVER_REQUESTS.FILE_CHANGE_APPROVAL
     ) {
       this.logger.warn("Unrecognized server request; declining", { method });
-      return "decline";
+      return { decision: "decline" };
     }
-    const detail = params as { itemId?: string; command?: string };
+    const isFileChange = method === APP_SERVER_REQUESTS.FILE_CHANGE_APPROVAL;
+    const detail = params as {
+      itemId?: string;
+      command?: string;
+      changes?: AppServerItem["changes"];
+      available_decisions?: unknown;
+    };
+    // codex tells us which decisions are valid here. When it offers an "approve and
+    // remember" decision (exec-policy allowlist / session approval), surface Allow-always.
+    const availableDecisions = Array.isArray(detail.available_decisions)
+      ? detail.available_decisions.filter(
+          (d): d is string => typeof d === "string",
+        )
+      : [];
+    const rememberDecision =
+      availableDecisions.find((d) => d === "approved_execpolicy_amendment") ??
+      availableDecisions.find((d) => d === "approved_for_session");
     const title =
-      detail.command ??
-      (method === APP_SERVER_REQUESTS.FILE_CHANGE_APPROVAL
-        ? "Apply file changes"
-        : "Run command");
+      detail.command ?? (isFileChange ? "Apply file changes" : "Run command");
+    const toolCallId = detail.itemId ?? "codex-approval";
+    // Codex has no MCP-specific approval; a known MCP call surfaces the real server/tool/args
+    // so the host renders the proper MCP permission (incl. PostHog `exec` unwrapping).
+    const mcp = this.mcp.byItemId(detail.itemId);
+    // kind + content route plain command/file approvals to Execute/EditPermission (not the fallback).
+    const toolCall = mcp
+      ? {
+          toolCallId,
+          title,
+          kind: "other" as const,
+          rawInput: mcp.args,
+          _meta: posthogToolMeta({
+            toolName: mcpToolKey({ server: mcp.server, tool: mcp.tool }),
+            mcp: { server: mcp.server, tool: mcp.tool },
+          }),
+        }
+      : isFileChange
+        ? {
+            toolCallId,
+            title,
+            kind: "edit" as const,
+            content: diffContent(detail.changes),
+            locations: changePaths(detail.changes).map((path) => ({ path })),
+          }
+        : {
+            toolCallId,
+            title,
+            kind: "execute" as const,
+            content: detail.command
+              ? [
+                  {
+                    type: "content" as const,
+                    content: { type: "text" as const, text: detail.command },
+                  },
+                ]
+              : undefined,
+          };
     try {
       const response = await this.client.requestPermission({
         sessionId: this.sessionId,
-        toolCall: { toolCallId: detail.itemId ?? "codex-approval", title },
+        toolCall,
         options: [
           { optionId: "allow", name: "Allow", kind: "allow_once" },
+          ...(rememberDecision
+            ? [
+                {
+                  optionId: "allow_always",
+                  name: isFileChange
+                    ? "Allow for the rest of this session"
+                    : "Allow and don't ask again",
+                  kind: "allow_always" as const,
+                },
+              ]
+            : []),
           { optionId: "reject", name: "Reject", kind: "reject_once" },
+          {
+            optionId: "reject_with_feedback",
+            name: "No, and tell Codex what to do differently",
+            kind: "reject_once",
+            _meta: { customInput: true },
+          },
         ],
       });
-      if (
-        response.outcome.outcome === "selected" &&
-        response.outcome.optionId === "allow"
-      ) {
-        return "accept";
+      if (response.outcome.outcome === "selected") {
+        if (response.outcome.optionId === "allow_always" && rememberDecision) {
+          // Echo codex's "approve and remember" decision so it applies the proposed amendment.
+          return { decision: rememberDecision };
+        }
+        if (response.outcome.optionId === "allow") {
+          return { decision: "accept" };
+        }
+        if (response.outcome.optionId === "reject_with_feedback") {
+          // codex's response has no feedback field, so decline and inject the guidance
+          // into the running turn (as its TUI does: Denied + a follow-up message).
+          const feedback = (response as { _meta?: { customInput?: unknown } })
+            ._meta?.customInput;
+          const activeTurnId = this.turns.activeTurnId;
+          if (typeof feedback === "string" && feedback.trim() && activeTurnId) {
+            void this.rpc
+              .request<{ turnId?: string }>(APP_SERVER_METHODS.TURN_STEER, {
+                threadId: this.threadId,
+                input: toCodexInput([{ type: "text", text: feedback.trim() }]),
+                expectedTurnId: activeTurnId,
+              })
+              // codex rotates the turn id on steer; adopt it or later
+              // interrupts/steers target a dead turn.
+              .then((res) => this.turns.onSteered(res?.turnId))
+              .catch((err) =>
+                this.logger.warn("turn/steer (reject feedback) failed", err),
+              );
+          }
+          return { decision: "decline" };
+        }
       }
       if (response.outcome.outcome === "cancelled") {
-        return "cancel";
+        return { decision: "cancel" };
       }
-      return "decline";
+      return { decision: "decline" };
     } catch (err) {
       this.logger.warn("requestPermission failed; declining", err);
-      return "decline";
+      return { decision: "decline" };
     }
   }
 }
 
-function toTurnInput(
-  prompt: ContentBlock[],
-): Array<{ type: "text"; text: string }> {
-  const input: Array<{ type: "text"; text: string }> = [];
-  for (const block of prompt) {
-    if (block.type === "text") {
-      input.push({ type: "text", text: block.text });
+// BASELINE_TOKENS from codex-rs protocol.rs — the resident floor we can't attribute per-source.
+const CODEX_BASELINE_TOKENS = 12000;
+
+/** codex `TurnStatus` → ACP `StopReason`: interrupted → cancel, failed → refusal, else end. */
+function mapTurnStopReason(status: string | undefined): StopReason {
+  if (status === "interrupted") return "cancelled";
+  if (status === "failed") return "refusal";
+  return "end_turn";
+}
+
+/** The codex thread config override map: folds in MCP servers + makes extra workspace roots writable. Undefined when empty. */
+function buildThreadConfig(
+  mcpServers: ReturnType<typeof toCodexMcpServers>,
+  additionalDirectories: string[] | undefined,
+): Record<string, unknown> | undefined {
+  const config: Record<string, unknown> = {};
+  if (mcpServers) {
+    config.mcp_servers = mcpServers;
+  }
+  if (additionalDirectories?.length) {
+    config.sandbox_workspace_write = { writable_roots: additionalDirectories };
+  }
+  return Object.keys(config).length > 0 ? config : undefined;
+}
+
+/** Seed the context-breakdown baseline with the resident floor + the host's system prompt. */
+function buildBaseline(
+  meta: AppServerSessionMeta | undefined,
+): ContextBreakdownBaseline {
+  const baseline = emptyBaseline();
+  baseline.systemPrompt =
+    CODEX_BASELINE_TOKENS +
+    estimateTokens(flattenSystemPrompt(meta?.systemPrompt));
+  return baseline;
+}
+
+/** Flatten the host's systemPrompt (`string | { append }`) to a string (else "[object Object]"). */
+function flattenSystemPrompt(
+  systemPrompt: string | { append?: string } | undefined,
+): string | undefined {
+  if (typeof systemPrompt === "string") return systemPrompt || undefined;
+  if (systemPrompt && typeof systemPrompt.append === "string") {
+    return systemPrompt.append || undefined;
+  }
+  return undefined;
+}
+
+/** Parse structured output from the final message, defensively (fenced block / first object). */
+function parseStructuredOutput(text: string): Record<string, unknown> | null {
+  const trimmed = text.trim();
+  const candidates = [trimmed];
+  const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (fenced) candidates.push(fenced[1].trim());
+  const brace = trimmed.match(/\{[\s\S]*\}/);
+  if (brace) candidates.push(brace[0]);
+
+  for (const candidate of candidates) {
+    try {
+      const parsed: unknown = JSON.parse(candidate);
+      if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
+        return parsed as Record<string, unknown>;
+      }
+    } catch {
+      // Try the next candidate.
     }
   }
-  return input;
+  return null;
 }
diff --git a/packages/agent/src/adapters/codex-app-server/ext-notifications.test.ts b/packages/agent/src/adapters/codex-app-server/ext-notifications.test.ts
new file mode 100644
index 0000000000..93538ee006
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/ext-notifications.test.ts
@@ -0,0 +1,74 @@
+import { describe, expect, it } from "vitest";
+import { emptyBaseline } from "../claude/context-breakdown";
+import {
+  buildSdkSessionParams,
+  buildTurnCompleteParams,
+  buildUsageBreakdownParams,
+} from "./ext-notifications";
+
+describe("ext-notifications builders", () => {
+  it("buildSdkSessionParams tags the codex adapter so resume keys on the family", () => {
+    expect(buildSdkSessionParams("sess-1", "run-42")).toEqual({
+      taskRunId: "run-42",
+      sessionId: "sess-1",
+      adapter: "codex",
+    });
+  });
+
+  it("buildTurnCompleteParams derives totalTokens from all four counts", () => {
+    const params = buildTurnCompleteParams("sess-1", "end_turn", {
+      inputTokens: 100,
+      outputTokens: 20,
+      cachedReadTokens: 5,
+      cachedWriteTokens: 3,
+    });
+
+    expect(params).toEqual({
+      sessionId: "sess-1",
+      stopReason: "end_turn",
+      usage: {
+        inputTokens: 100,
+        outputTokens: 20,
+        cachedReadTokens: 5,
+        cachedWriteTokens: 3,
+        totalTokens: 128,
+      },
+    });
+  });
+
+  it("buildTurnCompleteParams forwards non-default stop reasons", () => {
+    expect(
+      buildTurnCompleteParams("sess-1", "refusal", {
+        inputTokens: 0,
+        outputTokens: 0,
+        cachedReadTokens: 0,
+        cachedWriteTokens: 0,
+      }).stopReason,
+    ).toBe("refusal");
+  });
+
+  it("buildUsageBreakdownParams attributes overflow above the baseline to conversation", () => {
+    const baseline = { ...emptyBaseline(), systemPrompt: 1000, tools: 500 };
+
+    expect(buildUsageBreakdownParams("sess-1", baseline, 2000)).toEqual({
+      sessionId: "sess-1",
+      breakdown: {
+        systemPrompt: 1000,
+        tools: 500,
+        rules: 0,
+        skills: 0,
+        mcp: 0,
+        subagents: 0,
+        conversation: 500,
+      },
+    });
+  });
+
+  it("buildUsageBreakdownParams floors conversation at 0 when usage is below baseline", () => {
+    const baseline = { ...emptyBaseline(), systemPrompt: 1000 };
+
+    expect(
+      buildUsageBreakdownParams("sess-1", baseline, 200).breakdown.conversation,
+    ).toBe(0);
+  });
+});
diff --git a/packages/agent/src/adapters/codex-app-server/ext-notifications.ts b/packages/agent/src/adapters/codex-app-server/ext-notifications.ts
new file mode 100644
index 0000000000..f6898c8fa2
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/ext-notifications.ts
@@ -0,0 +1,106 @@
+/**
+ * Pure builders for the PostHog `_posthog/*` ext-notification params the app-server
+ * adapter emits, mirroring the codex-acp adapter so log consumers and the renderer
+ * see the same shapes. Param-only (no I/O) so each is unit-testable in isolation.
+ */
+
+import type { StopReason } from "@agentclientprotocol/sdk";
+import {
+  buildBreakdown,
+  type ContextBreakdown,
+  type ContextBreakdownBaseline,
+} from "../claude/context-breakdown";
+
+/**
+ * Adapter tag on `_posthog/sdk_session`. Kept `"codex"` (not `"codex-app-server"`)
+ * so resume/keying treats both Codex transports as the same agent family.
+ */
+const CODEX_ADAPTER = "codex" as const;
+
+export interface SdkSessionParams {
+  taskRunId: string;
+  sessionId: string;
+  adapter: typeof CODEX_ADAPTER;
+}
+
+/** `_posthog/sdk_session` — maps a taskRunId to the sessionId so the host can resume later. */
+export function buildSdkSessionParams(
+  sessionId: string,
+  taskRunId: string,
+): SdkSessionParams {
+  return {
+    taskRunId,
+    sessionId,
+    adapter: CODEX_ADAPTER,
+  };
+}
+
+/** Per-turn token usage. `totalTokens` is derived so consumers don't re-sum. */
+export interface TurnCompleteUsage {
+  inputTokens: number;
+  outputTokens: number;
+  cachedReadTokens: number;
+  cachedWriteTokens: number;
+  totalTokens: number;
+}
+
+export interface TurnCompleteParams {
+  sessionId: string;
+  stopReason: StopReason;
+  usage: TurnCompleteUsage;
+}
+
+/** The four component counts the caller accumulates; total is computed here. */
+export interface AccumulatedUsage {
+  inputTokens: number;
+  outputTokens: number;
+  cachedReadTokens: number;
+  cachedWriteTokens: number;
+}
+
+/**
+ * `_posthog/turn_complete` — fired when a prompt turn finishes. `totalTokens` is the
+ * sum of all four component counts, matching the codex-acp adapter.
+ */
+export function buildTurnCompleteParams(
+  sessionId: string,
+  stopReason: StopReason,
+  usage: AccumulatedUsage,
+): TurnCompleteParams {
+  return {
+    sessionId,
+    stopReason,
+    usage: {
+      inputTokens: usage.inputTokens,
+      outputTokens: usage.outputTokens,
+      cachedReadTokens: usage.cachedReadTokens,
+      cachedWriteTokens: usage.cachedWriteTokens,
+      totalTokens:
+        usage.inputTokens +
+        usage.outputTokens +
+        usage.cachedReadTokens +
+        usage.cachedWriteTokens,
+    },
+  };
+}
+
+export interface UsageBreakdownParams {
+  sessionId: string;
+  breakdown: ContextBreakdown;
+}
+
+/**
+ * `_posthog/usage_update` (breakdown variant) — per-source context attribution.
+ * Codex doesn't attribute tokens by source, so we fold the baseline estimate with
+ * the live `contextUsed` via `buildBreakdown`.
+ */
+export function buildUsageBreakdownParams(
+  sessionId: string,
+  baseline: ContextBreakdownBaseline,
+  contextUsed: number,
+): UsageBreakdownParams {
+  return {
+    sessionId,
+    breakdown: buildBreakdown(baseline, contextUsed),
+  };
+}
diff --git a/packages/agent/src/adapters/codex-app-server/input.test.ts b/packages/agent/src/adapters/codex-app-server/input.test.ts
new file mode 100644
index 0000000000..f63e6172ef
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/input.test.ts
@@ -0,0 +1,129 @@
+import type { ContentBlock } from "@agentclientprotocol/sdk";
+import { describe, expect, it } from "vitest";
+import { toCodexInput } from "./input";
+
+describe("toCodexInput", () => {
+  it("passes text blocks through with empty text_elements", () => {
+    const prompt: ContentBlock[] = [
+      { type: "text", text: "hello" },
+      { type: "text", text: "world" },
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      { type: "text", text: "hello", text_elements: [] },
+      { type: "text", text: "world", text_elements: [] },
+    ]);
+  });
+
+  it("maps a base64 image block to the codex image variant as a data URL", () => {
+    const prompt: ContentBlock[] = [
+      { type: "image", data: "AAAA", mimeType: "image/png" },
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      { type: "image", url: "data:image/png;base64,AAAA" },
+    ]);
+  });
+
+  it("maps an http(s) image URI to a remote image and file:// to localImage", () => {
+    const prompt: ContentBlock[] = [
+      {
+        type: "image",
+        data: "",
+        mimeType: "image/png",
+        uri: "https://x/y.png",
+      },
+      {
+        type: "image",
+        data: "",
+        mimeType: "image/png",
+        uri: "file:///tmp/pic.png",
+      },
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      { type: "image", url: "https://x/y.png" },
+      { type: "localImage", path: "/tmp/pic.png" },
+    ]);
+  });
+
+  it("drops only audio and unusable images, keeping text", () => {
+    const prompt: ContentBlock[] = [
+      { type: "text", text: "keep" },
+      { type: "audio", data: "AAAA", mimeType: "audio/wav" },
+      { type: "image", data: "", mimeType: "image/png", uri: "ftp://nope" },
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      { type: "text", text: "keep", text_elements: [] },
+    ]);
+  });
+
+  it("surfaces a file:// resource_link as its on-disk path", () => {
+    const prompt: ContentBlock[] = [
+      { type: "resource_link", uri: "file:///repo/doc.md", name: "doc" },
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      {
+        type: "text",
+        text: "Attached workspace file (read it from disk): /repo/doc.md",
+        text_elements: [],
+      },
+    ]);
+  });
+
+  it("inlines a non-file resource's text as a trailing <context> block", () => {
+    const prompt: ContentBlock[] = [
+      { type: "text", text: "use the snippet" },
+      {
+        type: "resource",
+        resource: { uri: "https://x/snippet", text: "const a = 1;" },
+      },
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      { type: "text", text: "use the snippet", text_elements: [] },
+      { type: "text", text: "https://x/snippet", text_elements: [] },
+      {
+        type: "text",
+        text: '<context ref="https://x/snippet">\nconst a = 1;\n</context>',
+        text_elements: [],
+      },
+    ]);
+  });
+
+  it("omits the bare-uri text block for a resource with no uri", () => {
+    const prompt: ContentBlock[] = [
+      {
+        type: "resource",
+        resource: { text: "inline snippet" },
+      } as unknown as ContentBlock,
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      {
+        type: "text",
+        text: '<context ref="">\ninline snippet\n</context>',
+        text_elements: [],
+      },
+    ]);
+  });
+
+  it("surfaces a file:// resource as its path, not inline text", () => {
+    const prompt: ContentBlock[] = [
+      {
+        type: "resource",
+        resource: { uri: "file:///repo/a.ts", text: "stale on-disk copy" },
+      },
+    ];
+
+    expect(toCodexInput(prompt)).toEqual([
+      {
+        type: "text",
+        text: "Attached workspace file (read it from disk): /repo/a.ts",
+        text_elements: [],
+      },
+    ]);
+  });
+});
diff --git a/packages/agent/src/adapters/codex-app-server/input.ts b/packages/agent/src/adapters/codex-app-server/input.ts
new file mode 100644
index 0000000000..3992cbbbad
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/input.ts
@@ -0,0 +1,103 @@
+import { fileURLToPath } from "node:url";
+import type { ContentBlock } from "@agentclientprotocol/sdk";
+
+/**
+ * Codex app-server `UserInput`, narrowed to the three variants an ACP prompt
+ * can produce (`text`, remote `image`, `localImage`).
+ */
+export type CodexUserInput =
+  | { type: "text"; text: string; text_elements: [] }
+  | { type: "image"; url: string }
+  | { type: "localImage"; path: string };
+
+function textInput(text: string): CodexUserInput {
+  return { type: "text", text, text_elements: [] };
+}
+
+/** A `file://` resource is surfaced as its path so codex reads it from disk. */
+function resourceLinkText(uri: string): string {
+  if (uri.startsWith("file://")) {
+    try {
+      return `Attached workspace file (read it from disk): ${fileURLToPath(uri)}`;
+    } catch {
+      return `Attached file: ${uri}`;
+    }
+  }
+  return `Attached resource: ${uri}`;
+}
+
+/**
+ * Maps ACP prompt content blocks to codex app-server `UserInput[]`. Text passes through;
+ * images map to `image`/`localImage`; `file://` resources become path notes and non-file
+ * resource text is inlined as a trailing `<context ref>` block. Audio/blob/malformed are dropped.
+ */
+export function toCodexInput(prompt: ContentBlock[]): CodexUserInput[] {
+  const input: CodexUserInput[] = [];
+  const context: string[] = [];
+  for (const block of prompt) {
+    if (block.type === "text") {
+      input.push(textInput(block.text));
+      continue;
+    }
+    if (block.type === "image") {
+      const mapped = imageToCodexInput(block);
+      if (mapped) {
+        input.push(mapped);
+      }
+      continue;
+    }
+    if (block.type === "resource_link") {
+      input.push(textInput(resourceLinkText(block.uri)));
+      continue;
+    }
+    if (block.type === "resource" && "text" in block.resource) {
+      const uri = block.resource.uri ?? "";
+      if (uri.startsWith("file://")) {
+        input.push(textInput(resourceLinkText(uri)));
+        continue;
+      }
+      if (uri) {
+        input.push(textInput(uri));
+      }
+      context.push(
+        `<context ref="${uri}">\n${block.resource.text}\n</context>`,
+      );
+    }
+  }
+  if (context.length > 0) {
+    input.push(textInput(context.join("\n")));
+  }
+  return input;
+}
+
+/**
+ * Prefer inline base64 (as a data URL); else fall back to the `uri`:
+ * `http(s)` → remote `image`, `file://` → `localImage`.
+ */
+function imageToCodexInput(block: {
+  data: string;
+  mimeType: string;
+  uri?: string | null;
+}): CodexUserInput | undefined {
+  if (block.data) {
+    return {
+      type: "image",
+      url: `data:${block.mimeType};base64,${block.data}`,
+    };
+  }
+  const uri = block.uri;
+  if (!uri) {
+    return undefined;
+  }
+  if (uri.startsWith("http://") || uri.startsWith("https://")) {
+    return { type: "image", url: uri };
+  }
+  if (uri.startsWith("file://")) {
+    try {
+      return { type: "localImage", path: fileURLToPath(uri) };
+    } catch {
+      return undefined;
+    }
+  }
+  return undefined;
+}
diff --git a/packages/agent/src/adapters/codex-app-server/local-tools-mcp.test.ts b/packages/agent/src/adapters/codex-app-server/local-tools-mcp.test.ts
new file mode 100644
index 0000000000..1a65e20dbd
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/local-tools-mcp.test.ts
@@ -0,0 +1,100 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { LOCAL_TOOLS_MCP_NAME } from "../local-tools";
+import { buildLocalToolsServer } from "./local-tools-mcp";
+
+// The dist asset isn't on the walk-up path in unit tests, so make existsSync
+// succeed; nothing spawns the script — we only inspect the path.
+vi.mock("node:fs", async (importActual) => {
+  const actual = await importActual<typeof import("node:fs")>();
+  return { ...actual, existsSync: vi.fn().mockReturnValue(true) };
+});
+
+describe("buildLocalToolsServer", () => {
+  const saved = {
+    sandbox: process.env.IS_SANDBOX,
+    ghToken: process.env.GH_TOKEN,
+    githubToken: process.env.GITHUB_TOKEN,
+  };
+
+  beforeEach(() => {
+    // The signed-git gate reads IS_SANDBOX and the token vars; clear them so each
+    // case controls the cloud signal (meta.environment) and token explicitly.
+    delete process.env.IS_SANDBOX;
+    delete process.env.GH_TOKEN;
+    delete process.env.GITHUB_TOKEN;
+  });
+
+  afterEach(() => {
+    restore("IS_SANDBOX", saved.sandbox);
+    restore("GH_TOKEN", saved.ghToken);
+    restore("GITHUB_TOKEN", saved.githubToken);
+  });
+
+  function restore(key: string, value: string | undefined): void {
+    if (value === undefined) {
+      delete process.env[key];
+    } else {
+      process.env[key] = value;
+    }
+  }
+
+  it("returns a stdio server config with command/args/env on a cloud run with a token", () => {
+    process.env.GH_TOKEN = "ghs_test";
+
+    const server = buildLocalToolsServer(
+      { cwd: "/repo" },
+      { environment: "cloud" },
+    );
+
+    expect(server).not.toBeNull();
+    expect(server?.name).toBe(LOCAL_TOOLS_MCP_NAME);
+    expect(server?.command).toBe(process.execPath);
+    expect(server?.args).toHaveLength(1);
+    expect(server?.args[0]).toMatch(/local-tools-mcp-server\.js$/);
+
+    const envNames = server?.env.map((e) => e.name) ?? [];
+    expect(envNames).toContain("POSTHOG_LOCAL_TOOLS_CTX");
+    expect(envNames).toContain("POSTHOG_LOCAL_TOOLS_ENABLED");
+    // Token is forwarded to the child so its own git remote ops authenticate.
+    expect(envNames).toContain("GH_TOKEN");
+    expect(envNames).toContain("GITHUB_TOKEN");
+
+    const ctxEntry = server?.env.find(
+      (e) => e.name === "POSTHOG_LOCAL_TOOLS_CTX",
+    );
+    const ctx = JSON.parse(
+      Buffer.from(ctxEntry?.value ?? "", "base64").toString("utf-8"),
+    );
+    expect(ctx.cwd).toBe("/repo");
+    expect(ctx.token).toBe("ghs_test");
+  });
+
+  it("returns a server but omits token env vars when no token is present", () => {
+    const server = buildLocalToolsServer(
+      { cwd: "/repo" },
+      { environment: "cloud" },
+    );
+
+    expect(server).not.toBeNull();
+    const envNames = server?.env.map((e) => e.name) ?? [];
+    expect(envNames).toContain("POSTHOG_LOCAL_TOOLS_CTX");
+    expect(envNames).not.toContain("GH_TOKEN");
+    expect(envNames).not.toContain("GITHUB_TOKEN");
+  });
+
+  it("returns null when no cwd is present", () => {
+    process.env.GH_TOKEN = "ghs_test";
+
+    expect(
+      buildLocalToolsServer({ cwd: undefined }, { environment: "cloud" }),
+    ).toBeNull();
+  });
+
+  it("returns null when no tool's gate passes (desktop run)", () => {
+    process.env.GH_TOKEN = "ghs_test";
+
+    expect(
+      buildLocalToolsServer({ cwd: "/repo" }, { environment: "local" }),
+    ).toBeNull();
+  });
+});
diff --git a/packages/agent/src/adapters/codex-app-server/local-tools-mcp.ts b/packages/agent/src/adapters/codex-app-server/local-tools-mcp.ts
new file mode 100644
index 0000000000..e7f0976f59
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/local-tools-mcp.ts
@@ -0,0 +1,104 @@
+/**
+ * Builds the stdio local-tools MCP server config to inject into a Codex
+ * app-server thread's `config.mcp_servers`, ported from the codex-acp adapter.
+ * Returns the ACP `McpServerStdio` shape so the existing translation layer stays
+ * the single owner of the ACP→Codex map.
+ */
+
+import { existsSync } from "node:fs";
+import { resolve as resolvePath } from "node:path";
+import type { McpServerStdio } from "@agentclientprotocol/sdk";
+import { ghTokenEnv } from "@posthog/git/signed-commit";
+import { resolveGithubToken } from "../../utils/github-token";
+import {
+  enabledLocalTools,
+  LOCAL_TOOLS_MCP_NAME,
+  type LocalToolCtx,
+  type LocalToolGateMeta,
+} from "../local-tools";
+import { resolveTaskId } from "../session-meta";
+
+/**
+ * Gate inputs the local-tools server needs beyond `LocalToolGateMeta`: the task id
+ * and the base branch the signed-git tools default to. Self-contained so this
+ * module doesn't depend on the hub agent's session-meta type.
+ */
+export interface LocalToolsMeta extends LocalToolGateMeta {
+  taskId?: string;
+  persistence?: { taskId?: string };
+  baseBranch?: string;
+}
+
+/**
+ * Resolve a shared dist asset by walking up from the compiled adapter location —
+ * its depth varies across bundle entry points. Mirrors the codex-acp adapter.
+ */
+function resolveBundledMcpScript(rel: string): string {
+  let dir = import.meta.dirname ?? __dirname;
+  for (let i = 0; i < 5; i++) {
+    const candidate = resolvePath(dir, rel);
+    if (existsSync(candidate)) return candidate;
+    dir = resolvePath(dir, "..");
+  }
+  throw new Error(
+    `Could not locate ${rel} relative to ${import.meta.dirname ?? __dirname}.`,
+  );
+}
+
+function toMcpServerStdio(
+  ctx: LocalToolCtx,
+  enabledNames: string[],
+): McpServerStdio {
+  const scriptPath = resolveBundledMcpScript(
+    "adapters/codex/local-tools-mcp-server.js",
+  );
+  const ctxBase64 = Buffer.from(JSON.stringify(ctx)).toString("base64");
+  const env = [
+    { name: "POSTHOG_LOCAL_TOOLS_CTX", value: ctxBase64 },
+    { name: "POSTHOG_LOCAL_TOOLS_ENABLED", value: enabledNames.join(",") },
+  ];
+  if (ctx.token) {
+    // Token also on the child env so its own git remote ops authenticate.
+    env.push(
+      ...Object.entries(ghTokenEnv(ctx.token)).map(([name, value]) => ({
+        name,
+        value,
+      })),
+    );
+  }
+  return {
+    name: LOCAL_TOOLS_MCP_NAME,
+    command: process.execPath,
+    args: [scriptPath],
+    env,
+  };
+}
+
+/**
+ * Returns the local-tools stdio server config to inject, or null when no tool's
+ * gate passes (e.g. local/desktop run with no GH token). Tools self-gate via the
+ * registry; the server is only injected when at least one passes.
+ */
+export function buildLocalToolsServer(
+  ctx: { cwd?: string },
+  meta: LocalToolsMeta | undefined,
+): McpServerStdio | null {
+  const cwd = ctx.cwd;
+  if (!cwd) {
+    return null;
+  }
+  const toolCtx: LocalToolCtx = {
+    cwd,
+    token: resolveGithubToken(),
+    taskId: resolveTaskId(meta),
+    baseBranch: meta?.baseBranch,
+  };
+  const tools = enabledLocalTools(toolCtx, meta);
+  if (tools.length === 0) {
+    return null;
+  }
+  return toMcpServerStdio(
+    toolCtx,
+    tools.map((t) => t.name),
+  );
+}
diff --git a/packages/agent/src/adapters/codex-app-server/mapping.test.ts b/packages/agent/src/adapters/codex-app-server/mapping.test.ts
index fd4f1882d0..18454844b3 100644
--- a/packages/agent/src/adapters/codex-app-server/mapping.test.ts
+++ b/packages/agent/src/adapters/codex-app-server/mapping.test.ts
@@ -1,5 +1,9 @@
 import { describe, expect, it } from "vitest";
-import { mapAppServerNotification } from "./mapping";
+import {
+  mapAppServerNotification,
+  mapHistoryItem,
+  parseUnifiedDiff,
+} from "./mapping";
 import { APP_SERVER_NOTIFICATIONS } from "./protocol";
 
 describe("mapAppServerNotification", () => {
@@ -7,7 +11,7 @@ describe("mapAppServerNotification", () => {
     const result = mapAppServerNotification(
       "s-1",
       APP_SERVER_NOTIFICATIONS.AGENT_MESSAGE_DELTA,
-      { itemId: "item_1", text: "Hello" },
+      { itemId: "item_1", delta: "Hello" },
     );
 
     expect(result).toEqual({
@@ -19,7 +23,26 @@ describe("mapAppServerNotification", () => {
     });
   });
 
-  it("returns null when the text is missing or empty", () => {
+  it.each([
+    ["raw textDelta", APP_SERVER_NOTIFICATIONS.REASONING_TEXT_DELTA],
+    ["summaryTextDelta", APP_SERVER_NOTIFICATIONS.REASONING_SUMMARY_TEXT_DELTA],
+  ])("maps a reasoning %s to an ACP agent_thought_chunk", (_label, method) => {
+    const result = mapAppServerNotification("s-1", method, {
+      itemId: "item_1",
+      delta: "thinking",
+      contentIndex: 0,
+    });
+
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "agent_thought_chunk",
+        content: { type: "text", text: "thinking" },
+      },
+    });
+  });
+
+  it("returns null when the delta is missing or empty", () => {
     expect(
       mapAppServerNotification(
         "s-1",
@@ -31,16 +54,586 @@ describe("mapAppServerNotification", () => {
       mapAppServerNotification(
         "s-1",
         APP_SERVER_NOTIFICATIONS.AGENT_MESSAGE_DELTA,
-        { itemId: "item_1", text: "" },
+        { itemId: "item_1", delta: "" },
+      ),
+    ).toBeNull();
+  });
+
+  it("maps a started command execution item to a tool_call", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_STARTED,
+      { item: { type: "commandExecution", id: "i1", command: "ls -la" } },
+    );
+
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call",
+        toolCallId: "i1",
+        title: "ls -la",
+        kind: "execute",
+        status: "in_progress",
+      },
+    });
+  });
+
+  it("maps a completed command execution item to a tool_call_update with output", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED,
+      {
+        item: {
+          type: "commandExecution",
+          id: "i1",
+          command: "ls",
+          status: "completed",
+          aggregatedOutput: "file.txt",
+        },
+      },
+    );
+
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call_update",
+        toolCallId: "i1",
+        status: "completed",
+        content: [
+          { type: "content", content: { type: "text", text: "file.txt" } },
+        ],
+      },
+    });
+  });
+
+  it("maps a started mcp tool call item, surfacing arguments as rawInput", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_STARTED,
+      {
+        item: {
+          type: "mcpToolCall",
+          id: "m1",
+          server: "posthog",
+          tool: "execute-sql",
+          arguments: { query: "SELECT 1" },
+        },
+      },
+    );
+
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call",
+        toolCallId: "m1",
+        title: "posthog/execute-sql",
+        kind: "other",
+        status: "in_progress",
+        rawInput: { query: "SELECT 1" },
+        _meta: {
+          posthog: {
+            toolName: "mcp__posthog__execute-sql",
+            mcp: { server: "posthog", tool: "execute-sql" },
+          },
+        },
+      },
+    });
+  });
+
+  it("tags an mcp exec tool call with the structured posthog channel the renderer routes on", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_STARTED,
+      {
+        item: {
+          type: "mcpToolCall",
+          id: "m2",
+          server: "posthog",
+          tool: "exec",
+          arguments: { command: "call execute-sql {}" },
+        },
+      },
+    );
+
+    const meta = (result?.update as { _meta?: unknown })._meta as {
+      posthog?: { toolName?: string; mcp?: { server: string; tool: string } };
+    };
+    expect(meta.posthog).toEqual({
+      toolName: "mcp__posthog__exec",
+      mcp: { server: "posthog", tool: "exec" },
+    });
+  });
+
+  it("drops agent message items (their deltas already streamed)", () => {
+    expect(
+      mapAppServerNotification("s-1", APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED, {
+        item: { type: "agentMessage", id: "a1", text: "done" },
+      }),
+    ).toBeNull();
+  });
+
+  it("maps thread/tokenUsage/updated to a usage_update from the per-turn `last` (not cumulative `total`)", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.TOKEN_USAGE_UPDATED,
+      {
+        threadId: "t",
+        turnId: "u",
+        tokenUsage: {
+          total: { totalTokens: 1500, inputTokens: 1000, outputTokens: 500 },
+          last: {
+            totalTokens: 600,
+            inputTokens: 500,
+            outputTokens: 100,
+            cachedInputTokens: 0,
+            reasoningOutputTokens: 0,
+          },
+          modelContextWindow: 200000,
+        },
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: { sessionUpdate: "usage_update", used: 600, size: 200000 },
+    });
+  });
+
+  it("falls back to cumulative `total` when `last` is absent (pre-`last` build / turn 1)", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.TOKEN_USAGE_UPDATED,
+      {
+        threadId: "t",
+        turnId: "u",
+        tokenUsage: {
+          total: { totalTokens: 1500, inputTokens: 1000, outputTokens: 500 },
+          modelContextWindow: 200000,
+        },
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: { sessionUpdate: "usage_update", used: 1500, size: 200000 },
+    });
+  });
+
+  it("maps turn/plan/updated to a plan update", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.TURN_PLAN_UPDATED,
+      {
+        threadId: "t",
+        turnId: "u",
+        plan: [
+          { step: "Read files", status: "completed" },
+          { step: "Edit", status: "inProgress" },
+        ],
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "plan",
+        entries: [
+          { content: "Read files", priority: "medium", status: "completed" },
+          { content: "Edit", priority: "medium", status: "in_progress" },
+        ],
+      },
+    });
+  });
+
+  it("maps a completed fileChange to a tool_call_update with diff content", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED,
+      {
+        item: {
+          type: "fileChange",
+          id: "f1",
+          status: "completed",
+          changes: [{ path: "a.txt", diff: "@@ -1 +1 @@\n-old\n+new" }],
+        },
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call_update",
+        toolCallId: "f1",
+        status: "completed",
+        content: [
+          { type: "diff", path: "a.txt", oldText: "old", newText: "new" },
+        ],
+      },
+    });
+  });
+
+  it("includes cwd as a follow-along location on a started command execution", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_STARTED,
+      {
+        item: {
+          type: "commandExecution",
+          id: "c1",
+          command: "pytest",
+          cwd: "/repo",
+        },
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call",
+        toolCallId: "c1",
+        title: "pytest",
+        kind: "execute",
+        status: "in_progress",
+        locations: [{ path: "/repo" }],
+      },
+    });
+  });
+
+  it("prefers command-action paths over cwd for read commands", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_STARTED,
+      {
+        item: {
+          type: "commandExecution",
+          id: "c2",
+          command: "cat foo.txt",
+          cwd: "/repo",
+          commandActions: [
+            { type: "read", path: "/repo/foo.txt" },
+            { type: "read", path: "/repo/foo.txt" },
+          ],
+        },
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call",
+        toolCallId: "c2",
+        title: "cat foo.txt",
+        kind: "read",
+        status: "in_progress",
+        locations: [{ path: "/repo/foo.txt" }],
+      },
+    });
+  });
+
+  it("titles a started fileChange with its path and exposes locations", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_STARTED,
+      {
+        item: {
+          type: "fileChange",
+          id: "f2",
+          changes: [{ path: "src/a.ts" }, { path: "src/b.ts" }],
+        },
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call",
+        toolCallId: "f2",
+        title: "src/a.ts (+1 more)",
+        kind: "edit",
+        status: "in_progress",
+        locations: [{ path: "src/a.ts" }, { path: "src/b.ts" }],
+      },
+    });
+  });
+
+  it("streams command output deltas as in-progress tool_call_update text", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.COMMAND_OUTPUT_DELTA,
+      { threadId: "t", turnId: "u", itemId: "c1", delta: "line 1\n" },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call_update",
+        toolCallId: "c1",
+        status: "in_progress",
+        content: [
+          { type: "content", content: { type: "text", text: "line 1\n" } },
+        ],
+      },
+    });
+  });
+
+  it("echoes terminal interaction stdin into the tool call output", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.TERMINAL_INTERACTION,
+      {
+        threadId: "t",
+        turnId: "u",
+        itemId: "c1",
+        processId: "p1",
+        stdin: "y\n",
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call_update",
+        toolCallId: "c1",
+        status: "in_progress",
+        content: [{ type: "content", content: { type: "text", text: "y\n" } }],
+      },
+    });
+  });
+
+  it("returns null for an output delta missing itemId or delta", () => {
+    expect(
+      mapAppServerNotification(
+        "s-1",
+        APP_SERVER_NOTIFICATIONS.COMMAND_OUTPUT_DELTA,
+        { itemId: "c1", delta: "" },
       ),
     ).toBeNull();
+    expect(
+      mapAppServerNotification(
+        "s-1",
+        APP_SERVER_NOTIFICATIONS.COMMAND_OUTPUT_DELTA,
+        { delta: "x" },
+      ),
+    ).toBeNull();
+  });
+
+  it("streams fileChange patch updates as in-progress diff content", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.FILE_CHANGE_PATCH_UPDATED,
+      {
+        threadId: "t",
+        turnId: "u",
+        itemId: "f1",
+        changes: [
+          {
+            path: "a.txt",
+            kind: { type: "update" },
+            diff: "@@ -1 +1 @@\n-x\n+y",
+          },
+        ],
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call_update",
+        toolCallId: "f1",
+        status: "in_progress",
+        content: [{ type: "diff", path: "a.txt", oldText: "x", newText: "y" }],
+      },
+    });
   });
 
-  it("returns null for notifications not yet mapped in the spike", () => {
+  it("returns null for the turn completion notification", () => {
     expect(
       mapAppServerNotification("s-1", APP_SERVER_NOTIFICATIONS.TURN_COMPLETED, {
-        usage: { input_tokens: 10 },
+        turn: { status: "completed" },
       }),
     ).toBeNull();
   });
 });
+
+describe("mapHistoryItem", () => {
+  it("replays a userMessage's text inputs as user_message_chunks", () => {
+    expect(
+      mapHistoryItem("s-1", {
+        type: "userMessage",
+        id: "u1",
+        content: [
+          { type: "text", text: "hello", text_elements: [] },
+          { type: "image", url: "data:image/png;base64,AAAA" },
+          { type: "text", text: "world", text_elements: [] },
+        ],
+      }),
+    ).toEqual([
+      {
+        sessionId: "s-1",
+        update: {
+          sessionUpdate: "user_message_chunk",
+          content: { type: "text", text: "hello" },
+        },
+      },
+      {
+        sessionId: "s-1",
+        update: {
+          sessionUpdate: "user_message_chunk",
+          content: { type: "text", text: "world" },
+        },
+      },
+    ]);
+  });
+
+  it("replays an agentMessage as an agent_message_chunk", () => {
+    expect(
+      mapHistoryItem("s-1", { type: "agentMessage", id: "a1", text: "done" }),
+    ).toEqual([
+      {
+        sessionId: "s-1",
+        update: {
+          sessionUpdate: "agent_message_chunk",
+          content: { type: "text", text: "done" },
+        },
+      },
+    ]);
+  });
+
+  it("replays a completed command as one tool_call carrying status + output", () => {
+    expect(
+      mapHistoryItem("s-1", {
+        type: "commandExecution",
+        id: "c1",
+        command: "ls -la",
+        status: "completed",
+        commandActions: [{ type: "read", path: "/repo/a.ts" }],
+        aggregatedOutput: "a.ts\n",
+      }),
+    ).toEqual([
+      {
+        sessionId: "s-1",
+        update: {
+          sessionUpdate: "tool_call",
+          toolCallId: "c1",
+          title: "ls -la",
+          kind: "read",
+          status: "completed",
+          locations: [{ path: "/repo/a.ts" }],
+          content: [
+            { type: "content", content: { type: "text", text: "a.ts\n" } },
+          ],
+        },
+      },
+    ]);
+  });
+
+  it("replays a fileChange as a tool_call with diff content", () => {
+    const [update] = mapHistoryItem("s-1", {
+      type: "fileChange",
+      id: "f1",
+      status: "completed",
+      changes: [{ path: "a.txt", diff: "-x\n+y", kind: "modify" }],
+    });
+    expect(update.update).toMatchObject({
+      sessionUpdate: "tool_call",
+      toolCallId: "f1",
+      kind: "edit",
+      status: "completed",
+      content: [{ type: "diff", path: "a.txt", oldText: "x", newText: "y" }],
+    });
+  });
+
+  it("does not replay ephemeral reasoning/plan items", () => {
+    expect(mapHistoryItem("s-1", { type: "reasoning", id: "r1" })).toEqual([]);
+    expect(
+      mapHistoryItem("s-1", { type: "plan", id: "p1", text: "the plan" }),
+    ).toEqual([]);
+  });
+});
+
+describe("parseUnifiedDiff", () => {
+  it("keeps added/removed content lines whose payload starts with ++ or --", () => {
+    expect(parseUnifiedDiff("@@ -1 +1 @@\n---count;\n+++count;")).toEqual({
+      oldText: "--count;",
+      newText: "++count;",
+    });
+  });
+
+  it("skips file headers and the no-newline marker", () => {
+    expect(
+      parseUnifiedDiff(
+        "--- a/x.ts\n+++ b/x.ts\n@@ -1 +1 @@\n-old\n+new\n\\ No newline at end of file",
+      ),
+    ).toEqual({ oldText: "old", newText: "new" });
+  });
+});
+
+describe("mcpToolCall result rendering", () => {
+  it("renders a completed mcpToolCall's result content as text", () => {
+    expect(
+      mapAppServerNotification("s-1", APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED, {
+        item: {
+          type: "mcpToolCall",
+          id: "m1",
+          server: "posthog",
+          tool: "query",
+          status: "completed",
+          arguments: { sql: "SELECT 1" },
+          result: { content: [{ type: "text", text: "42 rows" }] },
+        },
+      }),
+    ).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call_update",
+        toolCallId: "m1",
+        status: "completed",
+        content: [
+          { type: "content", content: { type: "text", text: "42 rows" } },
+        ],
+      },
+    });
+  });
+
+  it("renders a failed mcpToolCall's error message", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED,
+      {
+        item: {
+          type: "mcpToolCall",
+          id: "m2",
+          server: "x",
+          tool: "y",
+          status: "failed",
+          error: { message: "boom" },
+        },
+      },
+    );
+    expect(result?.update).toMatchObject({
+      sessionUpdate: "tool_call_update",
+      toolCallId: "m2",
+      status: "failed",
+      content: [{ type: "content", content: { type: "text", text: "boom" } }],
+    });
+  });
+
+  it("renders a dynamicToolCall (not dropped) with its inputText output", () => {
+    const result = mapAppServerNotification(
+      "s-1",
+      APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED,
+      {
+        item: {
+          type: "dynamicToolCall",
+          id: "d1",
+          namespace: "ns",
+          tool: "doit",
+          status: "completed",
+          arguments: { x: 1 },
+          contentItems: [{ type: "inputText", text: "result" }],
+        },
+      },
+    );
+    expect(result).toEqual({
+      sessionId: "s-1",
+      update: {
+        sessionUpdate: "tool_call_update",
+        toolCallId: "d1",
+        status: "completed",
+        content: [
+          { type: "content", content: { type: "text", text: "result" } },
+        ],
+      },
+    });
+  });
+});
diff --git a/packages/agent/src/adapters/codex-app-server/mapping.ts b/packages/agent/src/adapters/codex-app-server/mapping.ts
index d282981e14..46c6148b86 100644
--- a/packages/agent/src/adapters/codex-app-server/mapping.ts
+++ b/packages/agent/src/adapters/codex-app-server/mapping.ts
@@ -1,14 +1,15 @@
-import type { SessionNotification } from "@agentclientprotocol/sdk";
+import type {
+  SessionNotification,
+  ToolCallContent,
+  ToolCallLocation,
+} from "@agentclientprotocol/sdk";
+import { mcpToolKey, posthogToolMeta } from "@posthog/shared";
 import { APP_SERVER_NOTIFICATIONS } from "./protocol";
 
 /**
- * Translates a native app-server notification into an ACP SessionNotification
- * so the rest of PostHog Code, which speaks ACP, stays unchanged.
- *
- * Spike scope: only the streaming agent-message path is mapped, which is what
- * Phase A proves end to end. item/tool events, token usage and approvals are
- * mapped in Phase B once the generated schema pins their exact shapes.
- * Notifications without a mapping return null and are dropped.
+ * Translates a native app-server notification into an ACP SessionNotification.
+ * Streamed text maps to chunks; tool-like items map to `tool_call`/`tool_call_update`.
+ * Agent-message and reasoning items are dropped — their deltas already streamed.
  */
 export function mapAppServerNotification(
   sessionId: string,
@@ -17,22 +18,501 @@ export function mapAppServerNotification(
 ): SessionNotification | null {
   switch (method) {
     case APP_SERVER_NOTIFICATIONS.AGENT_MESSAGE_DELTA: {
-      // `item/agentMessage/delta` carries { itemId, text }.
-      const text = readStringField(params, "text");
-      if (!text) return null;
+      const delta = readStringField(params, "delta");
+      if (!delta) return null;
       return {
         sessionId,
         update: {
           sessionUpdate: "agent_message_chunk",
-          content: { type: "text", text },
+          content: { type: "text", text: delta },
         },
       };
     }
+    case APP_SERVER_NOTIFICATIONS.REASONING_TEXT_DELTA:
+    case APP_SERVER_NOTIFICATIONS.REASONING_SUMMARY_TEXT_DELTA: {
+      const delta = readStringField(params, "delta");
+      if (!delta) return null;
+      return {
+        sessionId,
+        update: {
+          sessionUpdate: "agent_thought_chunk",
+          content: { type: "text", text: delta },
+        },
+      };
+    }
+    case APP_SERVER_NOTIFICATIONS.TOKEN_USAGE_UPDATED: {
+      // Context indicator: renderer reads `used`/`size`; detailed breakdown comes via `_posthog/usage_update`.
+      const tu = (params as { tokenUsage?: any })?.tokenUsage;
+      // Use this turn's `last`, not cumulative `total` (which over-reports and pegs the
+      // gauge); `total` is the fallback for pre-`last` builds.
+      const context = tu?.last ?? tu?.total;
+      const used = context?.totalTokens ?? context?.inputTokens;
+      if (used == null) return null;
+      const size = tu?.modelContextWindow;
+      // `usage_update` is a PostHog-convention update, not in the ACP union.
+      return {
+        sessionId,
+        update: {
+          sessionUpdate: "usage_update",
+          used,
+          ...(size != null ? { size } : {}),
+        },
+      } as unknown as SessionNotification;
+    }
+    case APP_SERVER_NOTIFICATIONS.TURN_PLAN_UPDATED: {
+      const plan = (
+        params as { plan?: Array<{ step?: string; status?: string }> }
+      )?.plan;
+      if (!Array.isArray(plan)) return null;
+      return {
+        sessionId,
+        update: {
+          sessionUpdate: "plan",
+          entries: plan.map((s) => ({
+            content: s.step ?? "",
+            priority: "medium",
+            status: mapPlanStatus(s.status),
+          })),
+        },
+      } as unknown as SessionNotification;
+    }
+    case APP_SERVER_NOTIFICATIONS.ITEM_STARTED:
+    case APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED: {
+      const item = readItem(params);
+      if (!item) return null;
+      return mapItem(
+        sessionId,
+        item,
+        method === APP_SERVER_NOTIFICATIONS.ITEM_COMPLETED,
+      );
+    }
+    case APP_SERVER_NOTIFICATIONS.COMMAND_OUTPUT_DELTA: {
+      const itemId = readStringField(params, "itemId");
+      const delta = readStringField(params, "delta");
+      if (!itemId || !delta) return null;
+      return toolOutputChunk(sessionId, itemId, delta);
+    }
+    case APP_SERVER_NOTIFICATIONS.TERMINAL_INTERACTION: {
+      const itemId = readStringField(params, "itemId");
+      const stdin = readStringField(params, "stdin");
+      if (!itemId || !stdin) return null;
+      return toolOutputChunk(sessionId, itemId, stdin);
+    }
+    case APP_SERVER_NOTIFICATIONS.FILE_CHANGE_PATCH_UPDATED: {
+      const itemId = readStringField(params, "itemId");
+      if (!itemId) return null;
+      const changes = (params as { changes?: AppServerItem["changes"] })
+        ?.changes;
+      const content = diffContent(changes);
+      if (!content) return null;
+      return {
+        sessionId,
+        update: {
+          sessionUpdate: "tool_call_update",
+          toolCallId: itemId,
+          status: "in_progress",
+          content,
+        },
+      };
+    }
+    default:
+      return null;
+  }
+}
+
+/** A streamed text chunk on an in-progress tool call; the renderer appends successive single-chunk updates. */
+function toolOutputChunk(
+  sessionId: string,
+  toolCallId: string,
+  text: string,
+): SessionNotification {
+  return {
+    sessionId,
+    update: {
+      sessionUpdate: "tool_call_update",
+      toolCallId,
+      status: "in_progress",
+      content: [{ type: "content", content: { type: "text", text } }],
+    },
+  };
+}
+
+function mapPlanStatus(
+  status: string | undefined,
+): "pending" | "in_progress" | "completed" {
+  if (status === "inProgress") return "in_progress";
+  if (status === "completed") return "completed";
+  return "pending";
+}
+
+/**
+ * Extracts {oldText,newText} from a unified diff so a codex `fileChange` renders as an ACP diff.
+ * Cosmetic limit: a content line whose payload begins with "-- "/"++ " is misread as a header and dropped.
+ */
+export function parseUnifiedDiff(diff: string): {
+  oldText: string;
+  newText: string;
+} {
+  const oldLines: string[] = [];
+  const newLines: string[] = [];
+  for (const line of diff.split("\n")) {
+    // Skip diff/hunk metadata; match trailing space on ---/+++ so content lines like "++i;" aren't dropped.
+    if (
+      line.startsWith("@@") ||
+      line.startsWith("diff ") ||
+      line.startsWith("index ") ||
+      line.startsWith("--- ") ||
+      line.startsWith("+++ ") ||
+      line.startsWith("\\ ")
+    ) {
+      continue;
+    }
+    if (line.startsWith("-")) oldLines.push(line.slice(1));
+    else if (line.startsWith("+")) newLines.push(line.slice(1));
+    else {
+      const ctx = line.startsWith(" ") ? line.slice(1) : line;
+      oldLines.push(ctx);
+      newLines.push(ctx);
+    }
+  }
+  return { oldText: oldLines.join("\n"), newText: newLines.join("\n") };
+}
+
+export type AppServerItem = {
+  type?: string;
+  id?: string;
+  command?: string;
+  cwd?: string;
+  commandActions?: Array<{ type?: string; path?: string } | string>;
+  server?: string;
+  tool?: string;
+  namespace?: string | null;
+  contentItems?: unknown;
+  query?: string;
+  status?: string;
+  arguments?: unknown;
+  aggregatedOutput?: string | null;
+  changes?: Array<{ path?: string; diff?: string; kind?: unknown }>;
+  result?: { content?: unknown } | null;
+  error?: { message?: string } | null;
+  // Present on message/reasoning items replayed from thread history.
+  text?: string;
+  content?: unknown;
+};
+
+function mcpResultText(
+  result: AppServerItem["result"],
+  error: AppServerItem["error"],
+): string | null {
+  if (error?.message) return error.message;
+  const content = result?.content;
+  if (!Array.isArray(content)) return null;
+  const text = content
+    .filter(
+      (c) =>
+        c && typeof c === "object" && (c as { type?: string }).type === "text",
+    )
+    .map((c) => (c as { text?: string }).text ?? "")
+    .filter(Boolean)
+    .join("\n");
+  return text || null;
+}
+
+function dynamicToolText(items: unknown): string | null {
+  if (!Array.isArray(items)) return null;
+  const text = items
+    .filter(
+      (c) =>
+        c &&
+        typeof c === "object" &&
+        (c as { type?: string }).type === "inputText",
+    )
+    .map((c) => (c as { text?: string }).text ?? "")
+    .filter(Boolean)
+    .join("\n");
+  return text || null;
+}
+
+/**
+ * Re-renders a persisted `ThreadItem` as the ACP updates a live stream would have produced,
+ * so a reattaching host shows the full transcript. Tool items collapse to one completed
+ * `tool_call`; ephemeral items (reasoning, plan) are not replayed.
+ */
+export function mapHistoryItem(
+  sessionId: string,
+  item: AppServerItem,
+): SessionNotification[] {
+  switch (item.type) {
+    case "userMessage":
+      return userMessageChunks(sessionId, item.content);
+    case "agentMessage":
+      return item.text
+        ? [
+            {
+              sessionId,
+              update: {
+                sessionUpdate: "agent_message_chunk",
+                content: { type: "text", text: item.text },
+              },
+            },
+          ]
+        : [];
+    case "reasoning":
+    case "plan":
+      return [];
+    default: {
+      const tool = describeTool(item);
+      if (!tool || !item.id) return [];
+      const content = completedContent(item, tool);
+      return [
+        {
+          sessionId,
+          update: {
+            sessionUpdate: "tool_call",
+            toolCallId: item.id,
+            title: tool.title,
+            kind: tool.kind,
+            status: mapStatus(item.status),
+            ...(tool.rawInput !== undefined ? { rawInput: tool.rawInput } : {}),
+            ...(tool.locations?.length ? { locations: tool.locations } : {}),
+            ...(tool.mcp
+              ? {
+                  _meta: posthogToolMeta({
+                    toolName: mcpToolKey(tool.mcp),
+                    mcp: tool.mcp,
+                  }),
+                }
+              : {}),
+            ...(content ? { content } : {}),
+          },
+        },
+      ];
+    }
+  }
+}
+
+/** Replays a persisted `userMessage`'s text inputs; historical image attachments aren't re-rendered. */
+function userMessageChunks(
+  sessionId: string,
+  content: unknown,
+): SessionNotification[] {
+  if (!Array.isArray(content)) return [];
+  const out: SessionNotification[] = [];
+  for (const block of content) {
+    if (
+      block &&
+      typeof block === "object" &&
+      (block as { type?: string }).type === "text"
+    ) {
+      const text = (block as { text?: string }).text;
+      if (typeof text === "string" && text) {
+        out.push({
+          sessionId,
+          update: {
+            sessionUpdate: "user_message_chunk",
+            content: { type: "text", text },
+          },
+        });
+      }
+    }
+  }
+  return out;
+}
+
+type ToolDescriptor = {
+  title: string;
+  kind: "execute" | "edit" | "fetch" | "other" | "read" | "search";
+  rawInput?: unknown;
+  output?: string | null;
+  locations?: ToolCallLocation[];
+  /** Originating MCP server + tool, surfaced on `_meta.posthog` so the renderer routes MCP rendering. */
+  mcp?: { server: string; tool: string };
+};
+
+/** Classify a shell command by its actions so read-only commands render as read/search, not execute. */
+function commandKind(
+  actions: AppServerItem["commandActions"],
+): "read" | "search" | "execute" {
+  if (!actions?.length) return "execute";
+  const types = actions.map((a) => (typeof a === "string" ? a : a?.type));
+  if (types.every((t) => t === "read")) return "read";
+  if (types.every((t) => t === "search" || t === "listFiles")) return "search";
+  return "execute";
+}
+
+function describeTool(item: AppServerItem): ToolDescriptor | null {
+  switch (item.type) {
+    case "commandExecution":
+      return {
+        title: item.command ?? "Run command",
+        kind: commandKind(item.commandActions),
+        output: item.aggregatedOutput ?? null,
+        locations: commandLocations(item),
+      };
+    case "fileChange": {
+      const paths = changePaths(item.changes);
+      return {
+        title: fileChangeTitle(paths),
+        kind: "edit",
+        locations: paths.map((path) => ({ path })),
+      };
+    }
+    case "mcpToolCall":
+      return {
+        title: `${item.server ?? "mcp"}/${item.tool ?? "tool"}`,
+        kind: "other",
+        rawInput: item.arguments,
+        output: mcpResultText(item.result, item.error),
+        mcp: { server: item.server ?? "mcp", tool: item.tool ?? "tool" },
+      };
+    case "dynamicToolCall":
+      return {
+        title: item.namespace
+          ? `${item.namespace}/${item.tool ?? "tool"}`
+          : (item.tool ?? "tool"),
+        kind: "other",
+        rawInput: item.arguments,
+        output: dynamicToolText(item.contentItems),
+      };
+    case "webSearch":
+      return { title: item.query ?? "Web search", kind: "fetch" };
     default:
       return null;
   }
 }
 
+/** Distinct, non-empty changed paths for a fileChange item, order-preserved. */
+export function changePaths(changes: AppServerItem["changes"]): string[] {
+  if (!changes?.length) return [];
+  const seen = new Set<string>();
+  const paths: string[] = [];
+  for (const change of changes) {
+    const path = change?.path;
+    if (path && !seen.has(path)) {
+      seen.add(path);
+      paths.push(path);
+    }
+  }
+  return paths;
+}
+
+function fileChangeTitle(paths: string[]): string {
+  if (!paths.length) return "Edit files";
+  if (paths.length === 1) return paths[0];
+  return `${paths[0]} (+${paths.length - 1} more)`;
+}
+
+/** Clickable locations for a commandExecution: action paths, else the cwd as a fallback. */
+function commandLocations(item: AppServerItem): ToolCallLocation[] | undefined {
+  const paths: string[] = [];
+  const seen = new Set<string>();
+  for (const action of item.commandActions ?? []) {
+    const path = typeof action === "string" ? undefined : action?.path;
+    if (path && !seen.has(path)) {
+      seen.add(path);
+      paths.push(path);
+    }
+  }
+  if (!paths.length && item.cwd) paths.push(item.cwd);
+  if (!paths.length) return undefined;
+  return paths.map((path) => ({ path }));
+}
+
+function mapItem(
+  sessionId: string,
+  item: AppServerItem,
+  completed: boolean,
+): SessionNotification | null {
+  const tool = describeTool(item);
+  if (!tool || !item.id) {
+    return null;
+  }
+
+  if (!completed) {
+    return {
+      sessionId,
+      update: {
+        sessionUpdate: "tool_call",
+        toolCallId: item.id,
+        title: tool.title,
+        kind: tool.kind,
+        status: "in_progress",
+        ...(tool.rawInput !== undefined ? { rawInput: tool.rawInput } : {}),
+        ...(tool.locations?.length ? { locations: tool.locations } : {}),
+        ...(tool.mcp
+          ? {
+              _meta: posthogToolMeta({
+                toolName: mcpToolKey(tool.mcp),
+                mcp: tool.mcp,
+              }),
+            }
+          : {}),
+      },
+    };
+  }
+
+  const content = completedContent(item, tool);
+  return {
+    sessionId,
+    update: {
+      sessionUpdate: "tool_call_update",
+      toolCallId: item.id,
+      status: mapStatus(item.status),
+      ...(content ? { content } : {}),
+    },
+  };
+}
+
+function completedContent(
+  item: AppServerItem,
+  tool: ToolDescriptor,
+): ToolCallContent[] | undefined {
+  if (item.type === "fileChange") {
+    const diffs = diffContent(item.changes);
+    if (diffs) return diffs;
+  }
+  if (tool.output) {
+    return [{ type: "content", content: { type: "text", text: tool.output } }];
+  }
+  return undefined;
+}
+
+/** Maps a fileChange's `changes[]` to ACP `diff` content blocks. */
+export function diffContent(
+  changes: AppServerItem["changes"],
+): ToolCallContent[] | undefined {
+  if (!changes?.length) return undefined;
+  const diffs = changes
+    .filter((c) => c?.diff)
+    .map(
+      (c) =>
+        ({
+          type: "diff",
+          path: c.path,
+          ...parseUnifiedDiff(c.diff ?? ""),
+        }) as unknown as ToolCallContent,
+    );
+  return diffs.length ? diffs : undefined;
+}
+
+function mapStatus(
+  status: string | undefined,
+): "completed" | "failed" | "in_progress" {
+  if (status === "completed") return "completed";
+  if (status === "failed" || status === "declined") return "failed";
+  return "in_progress";
+}
+
+function readItem(params: unknown): AppServerItem | null {
+  if (params && typeof params === "object" && "item" in params) {
+    const item = (params as Record<string, unknown>).item;
+    if (item && typeof item === "object") {
+      return item as AppServerItem;
+    }
+  }
+  return null;
+}
+
 function readStringField(params: unknown, key: string): string | null {
   if (params && typeof params === "object" && key in params) {
     const value = (params as Record<string, unknown>)[key];
diff --git a/packages/agent/src/adapters/codex-app-server/mcp-config.test.ts b/packages/agent/src/adapters/codex-app-server/mcp-config.test.ts
new file mode 100644
index 0000000000..912d253b38
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/mcp-config.test.ts
@@ -0,0 +1,60 @@
+import type { McpServer } from "@agentclientprotocol/sdk";
+import { describe, expect, it } from "vitest";
+import { toCodexMcpServers } from "./mcp-config";
+
+describe("toCodexMcpServers", () => {
+  it("returns undefined for empty input", () => {
+    expect(toCodexMcpServers(undefined)).toBeUndefined();
+    expect(toCodexMcpServers([])).toBeUndefined();
+  });
+
+  it("translates a stdio server, folding env pairs into a map", () => {
+    const servers = [
+      {
+        name: "posthog",
+        command: "node",
+        args: ["server.js"],
+        env: [
+          { name: "TOKEN", value: "abc" },
+          { name: "BASE", value: "http://x" },
+        ],
+      },
+    ] as unknown as McpServer[];
+
+    expect(toCodexMcpServers(servers)).toEqual({
+      posthog: {
+        command: "node",
+        args: ["server.js"],
+        env: { TOKEN: "abc", BASE: "http://x" },
+      },
+    });
+  });
+
+  it("omits env when there are no pairs", () => {
+    const servers = [
+      { name: "bare", command: "run", args: [], env: [] },
+    ] as unknown as McpServer[];
+
+    expect(toCodexMcpServers(servers)).toEqual({
+      bare: { command: "run", args: [] },
+    });
+  });
+
+  it("translates an http server, folding headers into http_headers", () => {
+    const servers = [
+      {
+        type: "http",
+        name: "remote",
+        url: "https://mcp.example/mcp",
+        headers: [{ name: "Authorization", value: "Bearer t" }],
+      },
+    ] as unknown as McpServer[];
+
+    expect(toCodexMcpServers(servers)).toEqual({
+      remote: {
+        url: "https://mcp.example/mcp",
+        http_headers: { Authorization: "Bearer t" },
+      },
+    });
+  });
+});
diff --git a/packages/agent/src/adapters/codex-app-server/mcp-config.ts b/packages/agent/src/adapters/codex-app-server/mcp-config.ts
new file mode 100644
index 0000000000..4e4873e516
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/mcp-config.ts
@@ -0,0 +1,55 @@
+import type { McpServer } from "@agentclientprotocol/sdk";
+
+/**
+ * Codex's per-thread `mcp_servers` config entry (stdio: command/args/env; http:
+ * url + headers), accepted under `thread/start`'s `config.mcp_servers`.
+ */
+export type CodexMcpServerConfig =
+  | { command: string; args: string[]; env?: Record<string, string> }
+  | { url: string; http_headers?: Record<string, string> };
+
+/**
+ * Translates the ACP `McpServer[]` into the shape Codex's app-server expects under
+ * `config.mcp_servers` — ACP encodes env/headers as `{ name, value }[]`, Codex
+ * wants plain string maps. Returns undefined when there's nothing to inject.
+ */
+export function toCodexMcpServers(
+  servers: McpServer[] | undefined,
+): Record<string, CodexMcpServerConfig> | undefined {
+  if (!servers || servers.length === 0) {
+    return undefined;
+  }
+
+  const out: Record<string, CodexMcpServerConfig> = {};
+  for (const server of servers) {
+    if ("command" in server && server.command) {
+      const env = pairsToRecord(server.env);
+      out[server.name] = {
+        command: server.command,
+        args: server.args ?? [],
+        ...(env ? { env } : {}),
+      };
+    } else if ("url" in server && server.url) {
+      const headers = pairsToRecord(server.headers);
+      out[server.name] = {
+        url: server.url,
+        ...(headers ? { http_headers: headers } : {}),
+      };
+    }
+  }
+
+  return Object.keys(out).length > 0 ? out : undefined;
+}
+
+function pairsToRecord(
+  pairs: Array<{ name: string; value: string }> | undefined,
+): Record<string, string> | undefined {
+  if (!pairs || pairs.length === 0) {
+    return undefined;
+  }
+  const record: Record<string, string> = {};
+  for (const { name, value } of pairs) {
+    record[name] = value;
+  }
+  return record;
+}
diff --git a/packages/agent/src/adapters/codex-app-server/mcp-manager.ts b/packages/agent/src/adapters/codex-app-server/mcp-manager.ts
new file mode 100644
index 0000000000..6faae9f47d
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/mcp-manager.ts
@@ -0,0 +1,50 @@
+/** An MCP tool call codex is running: its server, tool, and arguments. */
+export interface McpCall {
+  server: string;
+  tool: string;
+  args: unknown;
+}
+
+/**
+ * Correlates codex approval prompts back to the MCP tool that triggered them: by
+ * item id for a command approval, or by server name for an elicitation (which
+ * carries no id, so it maps to the latest in-flight call — MCP calls are sequential).
+ */
+export class McpManager {
+  private readonly byId = new Map<string, McpCall>();
+  private latest?: McpCall;
+
+  /** Record an `mcpToolCall` item from an item/started or item/completed notification. */
+  capture(params: unknown): void {
+    const item = (
+      params as {
+        item?: {
+          type?: string;
+          id?: string;
+          server?: string;
+          tool?: string;
+          arguments?: unknown;
+        };
+      }
+    )?.item;
+    if (item?.type === "mcpToolCall" && item.id && item.server && item.tool) {
+      const call: McpCall = {
+        server: item.server,
+        tool: item.tool,
+        args: item.arguments,
+      };
+      this.byId.set(item.id, call);
+      this.latest = call;
+    }
+  }
+
+  /** The MCP call for a command-execution approval's item id, if known. */
+  byItemId(itemId: string | undefined): McpCall | undefined {
+    return itemId ? this.byId.get(itemId) : undefined;
+  }
+
+  /** The in-flight MCP call for an elicitation's server (elicitations carry no item id). */
+  byServer(serverName: string): McpCall | undefined {
+    return this.latest?.server === serverName ? this.latest : undefined;
+  }
+}
diff --git a/packages/agent/src/adapters/codex-app-server/protocol.ts b/packages/agent/src/adapters/codex-app-server/protocol.ts
index 0448513366..bd948e9a34 100644
--- a/packages/agent/src/adapters/codex-app-server/protocol.ts
+++ b/packages/agent/src/adapters/codex-app-server/protocol.ts
@@ -1,14 +1,7 @@
 /**
- * Minimal typings for the native Codex `app-server` JSON-RPC protocol.
- *
- * Method names and message shapes follow the documented protocol
- * (https://developers.openai.com/codex/app-server). The wire framing is
- * newline-delimited JSON that follows JSON-RPC 2.0 structure but omits the
- * `"jsonrpc": "2.0"` header on the wire.
- *
- * Spike scope: param/result shapes are still partial. Generate the exact,
- * version-pinned schema with `codex app-server generate-ts` once the codex
- * binary is bundled, then tighten these.
+ * Minimal typings for the native Codex `app-server` JSON-RPC protocol
+ * (https://developers.openai.com/codex/app-server). Wire framing is
+ * newline-delimited JSON that omits the `"jsonrpc": "2.0"` header.
  */
 
 export const APP_SERVER_METHODS = {
@@ -17,27 +10,56 @@ export const APP_SERVER_METHODS = {
   THREAD_RESUME: "thread/resume",
   THREAD_FORK: "thread/fork",
   TURN_START: "turn/start",
+  // Inject input into the active turn (mirrors Claude's mid-turn steering); fails unless `expectedTurnId` matches.
+  TURN_STEER: "turn/steer",
   TURN_INTERRUPT: "turn/interrupt",
+  MODEL_LIST: "model/list",
+  SKILLS_LIST: "skills/list",
+  THREAD_LIST: "thread/list",
 } as const;
 
 export const APP_SERVER_NOTIFICATIONS = {
   INITIALIZED: "initialized",
   THREAD_STARTED: "thread/started",
+  // Carries the active turn id — precondition for turn/steer + turn/interrupt.
+  TURN_STARTED: "turn/started",
   ITEM_STARTED: "item/started",
   ITEM_COMPLETED: "item/completed",
   AGENT_MESSAGE_DELTA: "item/agentMessage/delta",
+  REASONING_TEXT_DELTA: "item/reasoning/textDelta",
+  // Default reasoning stream for gpt-5 models; raw textDelta is off by default, so without this the host sees no reasoning.
+  REASONING_SUMMARY_TEXT_DELTA: "item/reasoning/summaryTextDelta",
+  TURN_PLAN_UPDATED: "turn/plan/updated",
   TURN_COMPLETED: "turn/completed",
+  // Fatal turn error; `willRetry:false` means it won't recover on its own.
+  ERROR: "error",
   TOKEN_USAGE_UPDATED: "thread/tokenUsage/updated",
+  // codex auto-compacted the thread; mirrors Claude's compact_boundary so the host's context indicator + queue drain fire.
+  CONTEXT_COMPACTED: "thread/compacted",
+  COMMAND_OUTPUT_DELTA: "item/commandExecution/outputDelta",
+  // PTY-level stdin echoed back for an interactive terminal command.
+  TERMINAL_INTERACTION: "item/commandExecution/terminalInteraction",
+  FILE_CHANGE_PATCH_UPDATED: "item/fileChange/patchUpdated",
 } as const;
 
-/** Server-initiated requests the client must answer (approvals). */
+/**
+ * Server-initiated requests the client must answer. The two approvals are yes/no
+ * decisions; the richer requests carry distinct response shapes (multi-question
+ * prompt, permission-profile grant, MCP elicitation).
+ */
 export const APP_SERVER_REQUESTS = {
   COMMAND_APPROVAL: "item/commandExecution/requestApproval",
   FILE_CHANGE_APPROVAL: "item/fileChange/requestApproval",
+  TOOL_USER_INPUT: "item/tool/requestUserInput",
+  PERMISSIONS_APPROVAL: "item/permissions/requestApproval",
+  MCP_ELICITATION: "mcpServer/elicitation/request",
 } as const;
 
+/** JSON-RPC ids are `string | number` per the codex schema (`RequestId.ts`). */
+export type RequestId = string | number;
+
 export interface JsonRpcRequest {
-  id: number;
+  id: RequestId;
   method: string;
   params?: unknown;
 }
@@ -54,7 +76,7 @@ export interface JsonRpcError {
 }
 
 export interface JsonRpcResponse {
-  id: number;
+  id: RequestId;
   result?: unknown;
   error?: JsonRpcError;
 }
diff --git a/packages/agent/src/adapters/codex-app-server/session-config.test.ts b/packages/agent/src/adapters/codex-app-server/session-config.test.ts
new file mode 100644
index 0000000000..08fbb428ab
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/session-config.test.ts
@@ -0,0 +1,166 @@
+import { describe, expect, it } from "vitest";
+import {
+  buildConfigOptions,
+  CODEX_MODES,
+  collaborationModeFor,
+  DEFAULT_EFFORTS,
+  modeApprovalPolicy,
+  sandboxPolicyFor,
+} from "./session-config";
+
+describe("modeApprovalPolicy", () => {
+  it.each([
+    ["read-only", "untrusted"],
+    ["auto", "on-request"],
+    ["full-access", "never"],
+  ])("maps mode %s to approval policy %s", (mode, policy) => {
+    expect(modeApprovalPolicy(mode)).toBe(policy);
+  });
+
+  it("returns undefined for an unknown mode", () => {
+    expect(modeApprovalPolicy("nonsense")).toBeUndefined();
+    expect(modeApprovalPolicy(undefined)).toBeUndefined();
+  });
+
+  it("every CODEX_MODES entry has a resolvable policy", () => {
+    for (const mode of CODEX_MODES) {
+      expect(modeApprovalPolicy(mode.id)).toBe(mode.approvalPolicy);
+    }
+  });
+});
+
+describe("sandboxPolicyFor", () => {
+  it("restricts plan + read-only to a read-only sandbox", () => {
+    expect(sandboxPolicyFor("plan")).toEqual({
+      type: "readOnly",
+      networkAccess: true,
+    });
+    expect(sandboxPolicyFor("read-only")).toEqual({
+      type: "readOnly",
+      networkAccess: true,
+    });
+  });
+
+  it("leaves auto + full-access at the spawned full-access sandbox (no override)", () => {
+    expect(sandboxPolicyFor("auto")).toBeUndefined();
+    expect(sandboxPolicyFor("full-access")).toBeUndefined();
+  });
+
+  it("returns undefined for unknown ids", () => {
+    expect(sandboxPolicyFor("bypassPermissions")).toBeUndefined();
+    expect(sandboxPolicyFor(undefined)).toBeUndefined();
+  });
+});
+
+describe("collaborationModeFor", () => {
+  it("maps only Plan to codex's plan collaboration; everything else is default", () => {
+    expect(collaborationModeFor("plan")).toBe("plan");
+    expect(collaborationModeFor("read-only")).toBe("default");
+    expect(collaborationModeFor("auto")).toBe("default");
+    expect(collaborationModeFor("full-access")).toBe("default");
+    expect(collaborationModeFor(undefined)).toBe("default");
+  });
+});
+
+describe("buildConfigOptions", () => {
+  const byCategory = (
+    opts: ReturnType<typeof buildConfigOptions>,
+    category: string,
+  ) =>
+    opts.find((o) => (o as { category: string }).category === category) as {
+      currentValue: string;
+      options: Array<{ value: string; name: string }>;
+    };
+
+  it("emits mode + model + thought_level selectors from the live lists", () => {
+    const opts = buildConfigOptions({
+      mode: "auto",
+      model: "gpt-5.5",
+      effort: "high",
+      models: [
+        { id: "gpt-5.5", name: "GPT-5.5" },
+        { id: "gpt-5-mini", name: "GPT-5 mini" },
+      ],
+      efforts: ["low", "high"],
+    });
+    expect(opts.map((o) => (o as { category: string }).category)).toEqual([
+      "mode",
+      "model",
+      "thought_level",
+    ]);
+    const model = byCategory(opts, "model");
+    expect(model.currentValue).toBe("gpt-5.5");
+    expect(model.options.map((o) => o.value)).toEqual([
+      "gpt-5.5",
+      "gpt-5-mini",
+    ]);
+  });
+
+  it("surfaces the flattened codex presets (incl. Plan) with the current mode selected", () => {
+    const mode = byCategory(
+      buildConfigOptions({
+        mode: "plan",
+        model: "gpt-5.5",
+        models: [],
+        efforts: [],
+      }),
+      "mode",
+    );
+    expect(mode.currentValue).toBe("plan");
+    expect(mode.options.map((o) => o.value)).toEqual([
+      "plan",
+      "read-only",
+      "auto",
+      "full-access",
+    ]);
+  });
+
+  it("keeps the active model/effort selectable even if the lists omit them", () => {
+    const opts = buildConfigOptions({
+      mode: "auto",
+      model: "gpt-5.5",
+      effort: "max",
+      models: [{ id: "gpt-5-mini", name: "GPT-5 mini" }],
+      efforts: ["low", "high"],
+    });
+    const model = byCategory(opts, "model");
+    const effort = byCategory(opts, "thought_level");
+    expect(model.currentValue).toBe("gpt-5.5");
+    expect(model.options.map((o) => o.value)).toContain("gpt-5.5");
+    expect(effort.currentValue).toBe("max");
+    expect(effort.options.map((o) => o.value)).toContain("max");
+  });
+
+  it("humanizes reasoning-effort labels (Title case) while keeping raw values", () => {
+    const effort = byCategory(
+      buildConfigOptions({
+        mode: "auto",
+        model: "gpt-5.5",
+        effort: "high",
+        models: [],
+        efforts: ["low", "medium", "high"],
+      }),
+      "thought_level",
+    );
+    expect(effort.options).toEqual([
+      { name: "Low", value: "low" },
+      { name: "Medium", value: "medium" },
+      { name: "High", value: "high" },
+    ]);
+  });
+
+  it("falls back to the single current model and DEFAULT_EFFORTS when lists are empty", () => {
+    const opts = buildConfigOptions({
+      mode: "auto",
+      model: "gpt-5.5",
+      models: [],
+      efforts: [],
+    });
+    expect(byCategory(opts, "model").options).toEqual([
+      { name: "gpt-5.5", value: "gpt-5.5" },
+    ]);
+    expect(
+      byCategory(opts, "thought_level").options.map((o) => o.value),
+    ).toEqual(DEFAULT_EFFORTS);
+  });
+});
diff --git a/packages/agent/src/adapters/codex-app-server/session-config.ts b/packages/agent/src/adapters/codex-app-server/session-config.ts
new file mode 100644
index 0000000000..dea07c69d6
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/session-config.ts
@@ -0,0 +1,325 @@
+import type { SessionConfigOption } from "@agentclientprotocol/sdk";
+import { type GatewayModel, isOpenAIModel } from "../../gateway-models";
+import { getReasoningEffortOptions } from "../codex/models";
+
+/**
+ * Session config + mode synthesis for the codex app-server adapter. The native
+ * app-server has no "mode" RPC (a thread is configured by `approvalPolicy` +
+ * `sandbox`), so modes are synthesized here and applied per-turn.
+ */
+
+/**
+ * Per-turn sandbox the mode maps to (subset of codex's SandboxPolicy). This is
+ * what makes read-only/plan actually block edits — `approvalPolicy` alone is
+ * neutralized because the process spawns editable.
+ */
+export type CodexSandboxPolicy =
+  | { type: "readOnly"; networkAccess: boolean }
+  | { type: "dangerFullAccess" };
+
+export interface CodexMode {
+  id: string;
+  name: string;
+  description: string;
+  /** codex AskForApproval the mode maps to, applied per-turn on turn/start. */
+  approvalPolicy: string;
+  /**
+   * Per-turn sandbox override; undefined keeps the spawned editable sandbox.
+   * Only applied off the cloud sandbox, where a non-danger policy would re-engage
+   * the unavailable linux-sandbox and panic.
+   */
+  sandboxPolicy?: CodexSandboxPolicy;
+  /**
+   * codex's native collaboration mode (per-turn on `turn/start`). "plan" unlocks
+   * plan proposals + `request_user_input`; everything else runs "default".
+   */
+  collaborationMode?: "plan" | "default";
+  /**
+   * codex's named permission profile (per-turn `activePermissionProfile.extends`).
+   * codex 0.140.0 enforces the sandbox through these built-in profiles; the raw
+   * `sandboxPolicy` is no longer honored alone. Undefined keeps the spawned default.
+   */
+  permissionProfile?: string;
+}
+
+// Flattened Claude-style presets. Restriction is driven by approvalPolicy + the
+// named permissionProfile (codex 0.140.0's enforced sandbox lever); plan/read-only
+// block edits, auto/full-access keep the spawned editable sandbox.
+export const CODEX_MODES: CodexMode[] = [
+  {
+    id: "plan",
+    name: "Plan",
+    description: "Plan first — inspect and propose; makes no changes",
+    approvalPolicy: "on-request",
+    sandboxPolicy: { type: "readOnly", networkAccess: true },
+    permissionProfile: ":read-only",
+    collaborationMode: "plan",
+  },
+  {
+    id: "read-only",
+    name: "Read only",
+    description: "Read-only — can inspect but not modify files",
+    approvalPolicy: "untrusted",
+    sandboxPolicy: { type: "readOnly", networkAccess: true },
+    permissionProfile: ":read-only",
+  },
+  {
+    id: "auto",
+    name: "Auto",
+    description: "Edits the workspace; asks before risky operations",
+    approvalPolicy: "on-request",
+  },
+  {
+    id: "full-access",
+    name: "Full access",
+    description: "Auto-approves all operations",
+    approvalPolicy: "never",
+  },
+];
+
+export const DEFAULT_MODE = "auto";
+
+export function modeApprovalPolicy(
+  modeId: string | undefined,
+): string | undefined {
+  return CODEX_MODES.find((m) => m.id === modeId)?.approvalPolicy;
+}
+
+/** Per-turn sandbox for a mode id (undefined keeps the spawned full-access). */
+export function sandboxPolicyFor(
+  modeId: string | undefined,
+): CodexSandboxPolicy | undefined {
+  return CODEX_MODES.find((m) => m.id === modeId)?.sandboxPolicy;
+}
+
+/** Named permission profile for a mode (undefined keeps the spawned default). */
+export function permissionProfileFor(
+  modeId: string | undefined,
+): string | undefined {
+  return CODEX_MODES.find((m) => m.id === modeId)?.permissionProfile;
+}
+
+/** codex collaboration mode for a preset — "plan" only for Plan, else "default". */
+export function collaborationModeFor(
+  modeId: string | undefined,
+): "plan" | "default" {
+  return (
+    CODEX_MODES.find((m) => m.id === modeId)?.collaborationMode ?? "default"
+  );
+}
+
+/**
+ * Resolve the host's initial `_meta.permissionMode` to a codex mode. A recognized
+ * mode is honored; anything else (e.g. "bypassPermissions") falls back to default.
+ */
+export function resolveInitialMode(permissionMode: string | undefined): string {
+  return permissionMode && CODEX_MODES.some((m) => m.id === permissionMode)
+    ? permissionMode
+    : DEFAULT_MODE;
+}
+
+/** Codex's standard reasoning efforts; used when model/list doesn't expose them. */
+export const DEFAULT_EFFORTS = ["low", "medium", "high"];
+
+// Display labels for reasoning efforts; the host renders `name` verbatim.
+const EFFORT_LABELS: Record<string, string> = {
+  low: "Low",
+  medium: "Medium",
+  high: "High",
+  xhigh: "Extra High",
+  max: "Max",
+};
+
+function humanizeEffort(effort: string): string {
+  return EFFORT_LABELS[effort] ?? effort;
+}
+
+/** The current selector values `buildConfigOptions` projects into ACP options. */
+export interface ConfigSelectors {
+  /** Current permission/collaboration preset id (one of CODEX_MODES). */
+  mode: string;
+  model: string;
+  effort?: string;
+  /** From model/list; falls back to the single current model when empty. */
+  models: Array<{ id: string; name: string }>;
+  efforts: string[];
+}
+
+/** Builds the ACP configOptions (mode + model + thought_level) the host renders. */
+export function buildConfigOptions(s: ConfigSelectors): SessionConfigOption[] {
+  const baseModels = s.models.length
+    ? s.models
+    : [{ id: s.model, name: s.model }];
+  // Ensure the active model stays selectable, else currentValue points at nothing.
+  const models = baseModels.some((m) => m.id === s.model)
+    ? baseModels
+    : [...baseModels, { id: s.model, name: s.model }];
+  const baseEfforts = s.efforts.length ? s.efforts : DEFAULT_EFFORTS;
+  const currentEffort = s.effort ?? baseEfforts[0];
+  const efforts = baseEfforts.includes(currentEffort)
+    ? baseEfforts
+    : [...baseEfforts, currentEffort];
+  return [
+    {
+      type: "select",
+      id: "mode",
+      name: "Mode",
+      category: "mode",
+      currentValue: s.mode,
+      options: CODEX_MODES.map((m) => ({
+        name: m.name,
+        value: m.id,
+        description: m.description,
+      })),
+    } as unknown as SessionConfigOption,
+    {
+      type: "select",
+      id: "model",
+      name: "Model",
+      category: "model",
+      currentValue: s.model,
+      options: models.map((m) => ({ name: m.name, value: m.id })),
+    } as unknown as SessionConfigOption,
+    {
+      type: "select",
+      id: "effort",
+      name: "Reasoning effort",
+      category: "thought_level",
+      currentValue: currentEffort,
+      options: efforts.map((e) => ({ name: humanizeEffort(e), value: e })),
+    } as unknown as SessionConfigOption,
+  ];
+}
+
+/** A model entry from the app-server's `model/list` (loosely typed). */
+interface RawModel {
+  id?: string;
+  model?: string;
+  displayName?: string;
+  hidden?: boolean;
+  supportedReasoningEfforts?: Array<{ reasoningEffort?: string } | string>;
+}
+
+/**
+ * Stateful holder for a codex session's model / effort / mode selectors and the
+ * ACP `configOptions` derived from them — synthesizing the Claude-style picker
+ * the app-server has no native concept of, rebuilt on every change.
+ */
+export class SessionConfigState {
+  private _model: string;
+  private _effort?: string;
+  private _mode = DEFAULT_MODE;
+  private models: Array<{ id: string; name: string }> = [];
+  private efforts: string[] = [];
+  private _options: SessionConfigOption[] = [];
+
+  constructor(model: string, effort?: string) {
+    this._model = model;
+    this._effort = effort;
+    this.rebuild();
+  }
+
+  get model(): string {
+    return this._model;
+  }
+  get effort(): string | undefined {
+    return this._effort;
+  }
+  get mode(): string {
+    return this._mode;
+  }
+  get options(): SessionConfigOption[] {
+    return this._options;
+  }
+
+  /** Apply the host's initial approval mode (from `_meta.permissionMode`). */
+  setInitialMode(permissionMode: string | undefined): void {
+    this._mode = resolveInitialMode(permissionMode);
+    this.rebuild();
+  }
+
+  /** Apply a `setSessionConfigOption` change; returns whether the mode changed. */
+  setOption(
+    configId: string | undefined,
+    value: unknown,
+  ): { modeChanged: boolean } {
+    let modeChanged = false;
+    if (typeof value === "string") {
+      if (configId === "model") this._model = value;
+      else if (configId === "effort") this._effort = value;
+      else if (configId === "mode") {
+        this._mode = value;
+        modeChanged = true;
+      }
+    }
+    this.rebuild();
+    return { modeChanged };
+  }
+
+  /**
+   * Populate the model + effort selectors from a `model/list` `data` array. The
+   * gateway also serves Claude models, so drop non-OpenAI ones; it doesn't
+   * populate efforts, so fall back to the shared codex model→effort map.
+   */
+  loadModels(rawModels: RawModel[]): void {
+    this.models = rawModels
+      .filter((m) => !m?.hidden)
+      .filter((m) => isOpenAIModel(m as unknown as GatewayModel))
+      .map((m) => ({
+        id: (m.id ?? m.model) as string,
+        name: (m.displayName ?? m.id ?? m.model) as string,
+      }));
+    const current = rawModels.find(
+      (m) => m.id === this._model || m.model === this._model,
+    );
+    const liveEfforts = (current?.supportedReasoningEfforts ?? [])
+      .map((e) => (typeof e === "string" ? e : e?.reasoningEffort))
+      .filter((e): e is string => typeof e === "string");
+    this.efforts = liveEfforts.length
+      ? liveEfforts
+      : getReasoningEffortOptions(this._model).map((o) => o.value);
+    this.rebuild();
+  }
+
+  /** Reset the model/effort lists (model/list failed); keeps the current model. */
+  clearModels(): void {
+    this.models = [];
+    this.efforts = [];
+    this.rebuild();
+  }
+
+  /**
+   * codex's per-turn `collaborationMode`: `{ mode, settings: { model } }`. The
+   * model must be a string (not the null in collaborationMode/list output).
+   */
+  collaborationModeForTurn(): unknown {
+    return {
+      mode: collaborationModeFor(this._mode),
+      settings: { model: this._model },
+    };
+  }
+
+  approvalPolicy(): string | undefined {
+    return modeApprovalPolicy(this._mode);
+  }
+
+  sandboxPolicy(): CodexSandboxPolicy | undefined {
+    return sandboxPolicyFor(this._mode);
+  }
+
+  /** Per-turn `activePermissionProfile` (codex 0.140.0's enforced sandbox), or undefined. */
+  permissionProfile(): { extends: string } | undefined {
+    const profile = permissionProfileFor(this._mode);
+    return profile ? { extends: profile } : undefined;
+  }
+
+  private rebuild(): void {
+    this._options = buildConfigOptions({
+      mode: this._mode,
+      model: this._model,
+      effort: this._effort,
+      models: this.models,
+      efforts: this.efforts,
+    });
+  }
+}
diff --git a/packages/agent/src/adapters/codex-app-server/spawn.test.ts b/packages/agent/src/adapters/codex-app-server/spawn.test.ts
index 0be0058b4b..a0db5c3b62 100644
--- a/packages/agent/src/adapters/codex-app-server/spawn.test.ts
+++ b/packages/agent/src/adapters/codex-app-server/spawn.test.ts
@@ -19,13 +19,42 @@ describe("buildAppServerArgs", () => {
     );
   });
 
-  it("passes guidance via developer_instructions, never the replacing key", () => {
+  it.each([
+    ["darwin", 'sandbox_mode="workspace-write"'],
+    ["linux", 'sandbox_mode="danger-full-access"'],
+    ["win32", 'sandbox_mode="danger-full-access"'],
+  ])(
+    "on %s spawns with %s (macOS keeps the sandbox engaged so read-only can restrict; cloud/linux avoids the linux-sandbox panic)",
+    (platform, expected) => {
+      const original = process.platform;
+      Object.defineProperty(process, "platform", {
+        value: platform,
+        configurable: true,
+      });
+      try {
+        const args = buildAppServerArgs({ binaryPath: "/bundle/codex" });
+        expect(args).toContain(expected);
+        expect(args.filter((a) => a.startsWith("sandbox_mode="))).toHaveLength(
+          1,
+        );
+      } finally {
+        Object.defineProperty(process, "platform", {
+          value: original,
+          configurable: true,
+        });
+      }
+    },
+  );
+
+  it("does not set instructions at spawn (developer_instructions are per-thread)", () => {
     const args = buildAppServerArgs({
       binaryPath: "/bundle/codex",
       developerInstructions: "Follow PostHog rules.",
     });
 
-    expect(args).toContain('developer_instructions="Follow PostHog rules."');
+    expect(args.some((arg) => arg.startsWith("developer_instructions="))).toBe(
+      false,
+    );
     expect(args.some((arg) => arg.startsWith("instructions="))).toBe(false);
   });
 });
diff --git a/packages/agent/src/adapters/codex-app-server/spawn.ts b/packages/agent/src/adapters/codex-app-server/spawn.ts
index 2db7a633b0..48a97d67ca 100644
--- a/packages/agent/src/adapters/codex-app-server/spawn.ts
+++ b/packages/agent/src/adapters/codex-app-server/spawn.ts
@@ -4,6 +4,7 @@ import { delimiter, dirname } from "node:path";
 import type { Readable, Writable } from "node:stream";
 import type { ProcessSpawnedCallback } from "../../types";
 import { Logger } from "../../utils/logger";
+import { CodexSettingsManager } from "../codex/settings";
 
 export interface CodexAppServerProcessOptions {
   /** Path to the native `codex` CLI binary (the one that exposes `app-server`). */
@@ -13,6 +14,8 @@ export interface CodexAppServerProcessOptions {
   apiKey?: string;
   /** Guidance appended to Codex's base prompt via `developer_instructions`. */
   developerInstructions?: string;
+  /** Extra codex `-c key=value` config overrides (e.g. auto_compact_token_limit). */
+  configOverrides?: Record<string, string | number>;
   logger?: Logger;
   processCallbacks?: ProcessSpawnedCallback;
 }
@@ -31,6 +34,27 @@ export function buildAppServerArgs(
 
   args.push("-c", "features.remote_models=false");
 
+  // OS sandbox gated on platform (= availability): macOS Seatbelt → workspace-write
+  // (keeps the sandbox engaged so a per-turn readOnly can tighten it and block
+  // edits); linux/windows have no sandbox launcher and would panic, so
+  // danger-full-access (the enclosing docker/Modal sandbox isolates instead).
+  args.push(
+    "-c",
+    process.platform === "darwin"
+      ? `sandbox_mode="workspace-write"`
+      : `sandbox_mode="danger-full-access"`,
+  );
+
+  // Disable the user's ambient ~/.codex MCP servers so the adapter only exposes
+  // MCP servers PostHog injects per-thread; otherwise codex fails connecting to them.
+  for (const name of new CodexSettingsManager(
+    options.cwd ?? process.cwd(),
+  ).getSettings().mcpServerNames) {
+    // codex's `-c` parser rejects quoted/special key segments; skip such names.
+    if (!/^[A-Za-z0-9_-]+$/.test(name)) continue;
+    args.push("-c", `mcp_servers.${name}.enabled=false`);
+  }
+
   if (options.apiBaseUrl) {
     args.push("-c", `model_provider="posthog"`);
     args.push("-c", `model_providers.posthog.name="PostHog Gateway"`);
@@ -42,13 +66,15 @@ export function buildAppServerArgs(
     );
   }
 
-  if (options.developerInstructions) {
-    const escaped = options.developerInstructions
-      .replace(/\\/g, "\\\\")
-      .replace(/\n/g, "\\n")
-      .replace(/\r/g, "\\r")
-      .replace(/"/g, '\\"');
-    args.push("-c", `developer_instructions="${escaped}"`);
+  // developer_instructions are set per-thread in thread/start (with the host's
+  // task system prompt), not as a spawn-level global default.
+
+  // Numbers/bools go bare; strings are quoted, matching codex's `-c` parser.
+  for (const [key, value] of Object.entries(options.configOverrides ?? {})) {
+    args.push(
+      "-c",
+      `${key}=${typeof value === "number" ? value : `"${value}"`}`,
+    );
   }
 
   return args;
diff --git a/packages/agent/src/adapters/codex-app-server/turn-controller.ts b/packages/agent/src/adapters/codex-app-server/turn-controller.ts
new file mode 100644
index 0000000000..5192222f77
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/turn-controller.ts
@@ -0,0 +1,96 @@
+import type { StopReason } from "@agentclientprotocol/sdk";
+
+interface PendingTurn {
+  resolve: (reason: StopReason) => void;
+  reject: (err: Error) => void;
+}
+
+/**
+ * The turn state machine for one codex thread. A turn is async: `prompt()` starts it and
+ * awaits a completion promise `turn/completed` (or interrupt/error) resolves. Owns the
+ * in-flight `turnId`, the pending completion, and the ids of interrupted turns to drop.
+ */
+export class TurnController {
+  private turnId?: string;
+  private pending?: PendingTurn;
+  private completion?: Promise<StopReason>;
+  private readonly cancelled = new Set<string>();
+
+  begin(): Promise<StopReason> {
+    this.completion = new Promise<StopReason>((resolve, reject) => {
+      this.pending = { resolve, reject };
+    });
+    return this.completion;
+  }
+
+  /** The live turn id (steer precondition / interrupt target), if a turn started. */
+  get activeTurnId(): string | undefined {
+    return this.turnId;
+  }
+
+  get isPending(): boolean {
+    return this.pending !== undefined;
+  }
+
+  /** A turn is running AND has a turnId — i.e. it can be steered. */
+  get isRunning(): boolean {
+    return this.pending !== undefined && this.turnId !== undefined;
+  }
+
+  /** Capture the turn id from turn/started (only while a turn is pending). */
+  onStarted(id: string | undefined): void {
+    if (this.pending && typeof id === "string") this.turnId = id;
+  }
+
+  onSteered(id: string | undefined): void {
+    if (typeof id === "string") this.turnId = id;
+  }
+
+  /** Await the in-flight turn's completion (the steer path reuses the original). */
+  awaitCompletion(): Promise<StopReason> {
+    return this.completion ?? Promise.resolve("end_turn");
+  }
+
+  /** Atomically claim the pending turn (clears the slot + turnId synchronously), or undefined if already claimed. */
+  claim(): PendingTurn | undefined {
+    const pending = this.pending;
+    if (!pending) return undefined;
+    this.pending = undefined;
+    this.turnId = undefined;
+    return pending;
+  }
+
+  /** Mark the live turn interrupted (so its late completion is dropped) and return its id, or undefined. */
+  markInterrupted(): string | undefined {
+    if (!this.turnId) return undefined;
+    this.cancelled.add(this.turnId);
+    return this.turnId;
+  }
+
+  /** True (once) if this completion is for an interrupted turn we should drop. */
+  shouldDropCompletion(id: string | undefined): boolean {
+    return id ? this.cancelled.delete(id) : false;
+  }
+
+  /** Clear the pending slot after prompt() returns (covers a turn/start throw). */
+  finishPrompt(): void {
+    this.pending = undefined;
+    this.completion = undefined;
+  }
+
+  /** Reject the in-flight turn (e.g. the server exited before it completed). */
+  fail(err: Error): void {
+    this.pending?.reject(err);
+    this.pending = undefined;
+    this.completion = undefined;
+  }
+
+  /** Resolve and clear everything on session close. */
+  close(reason: StopReason): void {
+    this.turnId = undefined;
+    this.pending?.resolve(reason);
+    this.pending = undefined;
+    this.completion = undefined;
+    this.cancelled.clear();
+  }
+}
diff --git a/packages/agent/src/adapters/codex-app-server/usage-tracker.ts b/packages/agent/src/adapters/codex-app-server/usage-tracker.ts
new file mode 100644
index 0000000000..ecd87ffa47
--- /dev/null
+++ b/packages/agent/src/adapters/codex-app-server/usage-tracker.ts
@@ -0,0 +1,88 @@
+import {
+  type ContextBreakdownBaseline,
+  emptyBaseline,
+} from "../claude/context-breakdown";
+import type { AccumulatedUsage } from "./ext-notifications";
+
+/** The live `_posthog/usage_update` fields (context-window occupancy). */
+export interface UsageUpdate {
+  used: number;
+  size: number | null;
+  usage: {
+    inputTokens?: number;
+    outputTokens?: number;
+    cachedReadTokens?: number;
+    reasoningTokens?: number;
+    totalTokens?: number;
+  };
+}
+
+/**
+ * Tracks token usage for one codex thread. codex's `thread/tokenUsage/updated` carries
+ * `{ total, last, modelContextWindow }`; `last` drives both context occupancy and per-turn
+ * usage rather than diffing `total` (a fallback for builds predating `last`).
+ */
+export class UsageTracker {
+  private baseline: ContextBreakdownBaseline = emptyBaseline();
+  private lastTurn?: AccumulatedUsage;
+  private contextUsed?: number;
+
+  setBaseline(baseline: ContextBreakdownBaseline): void {
+    this.baseline = baseline;
+  }
+
+  get baselineBreakdown(): ContextBreakdownBaseline {
+    return this.baseline;
+  }
+
+  /** Zero the per-turn view at turn start so a token-less turn reports 0. */
+  resetForTurn(): void {
+    this.lastTurn = undefined;
+    this.contextUsed = undefined;
+  }
+
+  /** Ingest a `thread/tokenUsage/updated` payload; returns the live usage_update, or null if unusable. */
+  ingest(params: unknown): UsageUpdate | null {
+    const tu = (params as { tokenUsage?: any })?.tokenUsage;
+    const total = tu?.total;
+    if (!total) return null;
+    const context = tu.last ?? total;
+    // Drives the per-source breakdown's "conversation" bucket on turn complete.
+    this.contextUsed = context.inputTokens ?? context.totalTokens;
+    this.lastTurn = {
+      inputTokens: context.inputTokens ?? 0,
+      outputTokens: context.outputTokens ?? 0,
+      cachedReadTokens: context.cachedInputTokens ?? 0,
+      // codex's TokenUsageBreakdown has no cache-write field; 0 is authoritative.
+      cachedWriteTokens: 0,
+    };
+    return {
+      used: context.totalTokens,
+      size: tu.modelContextWindow ?? null,
+      usage: {
+        inputTokens: context.inputTokens,
+        outputTokens: context.outputTokens,
+        cachedReadTokens: context.cachedInputTokens,
+        reasoningTokens: context.reasoningOutputTokens,
+        totalTokens: context.totalTokens,
+      },
+    };
+  }
+
+  /** Per-turn usage for `_posthog/turn_complete` — codex's `last`, not a delta. */
+  perTurnUsage(): AccumulatedUsage {
+    return (
+      this.lastTurn ?? {
+        inputTokens: 0,
+        outputTokens: 0,
+        cachedReadTokens: 0,
+        cachedWriteTokens: 0,
+      }
+    );
+  }
+
+  /** Live context occupancy (last turn's input tokens), or undefined pre-usage. */
+  contextTokens(): number | undefined {
+    return this.contextUsed;
+  }
+}
diff --git a/packages/agent/src/adapters/codex/spawn.ts b/packages/agent/src/adapters/codex/spawn.ts
index 9e14e1a8cd..c023b31126 100644
--- a/packages/agent/src/adapters/codex/spawn.ts
+++ b/packages/agent/src/adapters/codex/spawn.ts
@@ -25,6 +25,12 @@ export interface CodexProcessOptions {
   settings?: CodexSettings;
   /** Additional writable roots passed to Codex's workspace-write sandbox. */
   additionalDirectories?: string[];
+  /**
+   * Extra codex `-c key=value` config overrides (app-server sub-adapter only).
+   * An escape hatch for config the adapter doesn't model — e.g. the e2e sets
+   * `auto_compact_token_limit` low to force a compaction.
+   */
+  configOverrides?: Record<string, string | number>;
 }
 
 export interface CodexProcess {
@@ -39,6 +45,14 @@ function buildConfigArgs(options: CodexProcessOptions): string[] {
 
   args.push("-c", `features.remote_models=false`);
 
+  // The agent already runs inside PostHog's isolated sandbox (docker/Modal with
+  // agentsh egress + filesystem controls), so Codex's own OS-level sandbox is
+  // redundant — and its `linux-sandbox` launcher is unavailable inside that
+  // sandbox, so the default workspace-write mode panics ("sandbox launcher
+  // unavailable" → require_escalated) and wedges the session. Run Codex with no
+  // nested sandbox; the enclosing sandbox provides the isolation.
+  args.push("-c", `sandbox_mode="danger-full-access"`);
+
   // Disable the user's local MCPs one-by-one so Codex only uses the MCPs we
   // provide via ACP. We can't use `-c mcp_servers={}` because that makes Codex
   // ignore MCPs entirely, including the ones we inject later.
diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts
index 28d26d627c..85c78eab47 100644
--- a/packages/agent/src/agent.ts
+++ b/packages/agent/src/agent.ts
@@ -129,6 +129,7 @@ export class Agent {
       logger: this.logger,
       processCallbacks: options.processCallbacks,
       onStructuredOutput: options.onStructuredOutput,
+      useCodexAppServer: options.useCodexAppServer,
       allowedModelIds,
       posthogApiConfig: this.posthogApiConfig,
       enricherEnabled: this.enricherEnabled,
diff --git a/packages/agent/src/execution-mode.test.ts b/packages/agent/src/execution-mode.test.ts
index be59649062..669715968e 100644
--- a/packages/agent/src/execution-mode.test.ts
+++ b/packages/agent/src/execution-mode.test.ts
@@ -12,8 +12,9 @@ describe("execution modes", () => {
     ]);
   });
 
-  it("includes full access for codex sessions", () => {
+  it("exposes the same presets as a live codex session (incl. plan)", () => {
     expect(getAvailableCodexModes().map((mode) => mode.id)).toEqual([
+      "plan",
       "read-only",
       "auto",
       "full-access",
diff --git a/packages/agent/src/execution-mode.ts b/packages/agent/src/execution-mode.ts
index 99f6799183..c90925e631 100644
--- a/packages/agent/src/execution-mode.ts
+++ b/packages/agent/src/execution-mode.ts
@@ -73,7 +73,16 @@ export function isCodexNativeMode(mode: string): mode is CodexNativeMode {
   return (CODEX_NATIVE_MODES as readonly string[]).includes(mode);
 }
 
+// Mirrors the codex app-server adapter's CODEX_MODES (session-config.ts) so the
+// task-creation picker offers the same presets as a live session. "plan" is a
+// valid CodeExecutionMode that codex-acp maps to read-only, and the app-server
+// gives it a read-only sandbox — so it is safe on both sub-adapters.
 const codexModes: ModeInfo[] = [
+  {
+    id: "plan",
+    name: "Plan",
+    description: "Plan first — inspect and propose; makes no changes",
+  },
   {
     id: "read-only",
     name: "Read Only",
diff --git a/packages/agent/src/server/agent-server.ts b/packages/agent/src/server/agent-server.ts
index 5d64177928..9f854db6c0 100644
--- a/packages/agent/src/server/agent-server.ts
+++ b/packages/agent/src/server/agent-server.ts
@@ -395,7 +395,14 @@ export class AgentServer {
   }
 
   private shouldRelayPermissionToClient(mode: PermissionMode): boolean {
-    return mode === "default" || mode === "auto" || mode === "read-only";
+    // "plan" relays like "read-only" (look-don't-touch): escalations need a human
+    // veto, not silent auto-approval.
+    return (
+      mode === "default" ||
+      mode === "auto" ||
+      mode === "read-only" ||
+      mode === "plan"
+    );
   }
 
   private createApp(): Hono {
@@ -1156,6 +1163,11 @@ export class AgentServer {
               cwd: this.config.repositoryPath ?? "/tmp/workspace",
               apiBaseUrl: gatewayEnv.openaiBaseUrl,
               apiKey: this.config.apiKey,
+              // Path to the bundled codex-acp binary; the native app-server
+              // adapter derives `codex` from the same directory. Set in the
+              // sandbox image (POSTHOG_CODEX_BINARY_PATH); when unset the
+              // adapter falls back to npx codex-acp.
+              binaryPath: process.env.POSTHOG_CODEX_BINARY_PATH,
               model: this.config.model ?? DEFAULT_CODEX_MODEL,
               reasoningEffort: this.config.reasoningEffort,
               developerInstructions: codexInstructions,
@@ -2915,9 +2927,13 @@ ${signedCommitInstructions}
             isQuestion ||
             this.shouldRelayPermissionToClient(sessionPermissionMode);
 
+          // A background run has no human to answer a relayed approval
+          // (hasDesktopConnected is true from the event-relay reader), so
+          // auto-approve rather than hang on it.
           if (
-            isPlanApproval ||
-            (needsDesktopApproval && this.session?.hasDesktopConnected)
+            mode !== "background" &&
+            (isPlanApproval ||
+              (needsDesktopApproval && this.session?.hasDesktopConnected))
           ) {
             this.logger.debug("Relaying permission request", {
               kind: params.toolCall?.kind,
diff --git a/packages/agent/src/types.ts b/packages/agent/src/types.ts
index 0056590678..d5c10e6169 100644
--- a/packages/agent/src/types.ts
+++ b/packages/agent/src/types.ts
@@ -65,6 +65,12 @@ export interface TaskExecutionOptions {
   onStructuredOutput?: (output: Record<string, unknown>) => Promise<void>;
   /** Additional directories the agent process can access beyond cwd. */
   additionalDirectories?: string[];
+  /**
+   * Codex-only feature-flag lever: `true` selects the native app-server adapter,
+   * `false` codex-acp. The host evaluates a PostHog flag and passes the result;
+   * undefined falls back to env overrides then the bundled-binary default.
+   */
+  useCodexAppServer?: boolean;
 }
 
 export type LogLevel = "debug" | "info" | "warn" | "error";
diff --git a/packages/agent/vitest.e2e.config.ts b/packages/agent/vitest.e2e.config.ts
new file mode 100644
index 0000000000..01e95543af
--- /dev/null
+++ b/packages/agent/vitest.e2e.config.ts
@@ -0,0 +1,24 @@
+import { resolve } from "node:path";
+import { defineConfig } from "vitest/config";
+
+// Live, opt-in e2e suite. Separate from the default `vitest.config.ts` (which
+// only includes `src/**`), so these never run under `pnpm test` or in CI — only
+// via `pnpm test:e2e`. Sequential, generous timeouts: each test drives two real
+// model turns end to end.
+export default defineConfig({
+  resolve: {
+    alias: {
+      "@": resolve(__dirname, "src"),
+    },
+  },
+  test: {
+    globals: true,
+    environment: "node",
+    include: ["e2e/**/*.e2e.test.ts"],
+    exclude: ["**/node_modules/**", "**/dist/**"],
+    isolate: true,
+    fileParallelism: false,
+    testTimeout: 300_000,
+    hookTimeout: 120_000,
+  },
+});
diff --git a/packages/core/src/sessions/cloudSessionConfig.test.ts b/packages/core/src/sessions/cloudSessionConfig.test.ts
index d0712992ec..8bcf537ba7 100644
--- a/packages/core/src/sessions/cloudSessionConfig.test.ts
+++ b/packages/core/src/sessions/cloudSessionConfig.test.ts
@@ -61,7 +61,8 @@ describe("buildCloudDefaultConfigOptions", () => {
   it.each([
     { initialMode: "auto", expected: "auto" },
     { initialMode: "full-access", expected: "full-access" },
-    { initialMode: "plan", expected: "auto" },
+    // plan is now a valid codex preset (mirrors the app-server), so it's kept.
+    { initialMode: "plan", expected: "plan" },
     { initialMode: "default", expected: "auto" },
   ])(
     "validates codex initial mode $initialMode",
diff --git a/packages/core/src/sessions/contextUsage.test.ts b/packages/core/src/sessions/contextUsage.test.ts
index 4280d146c5..fb295c5818 100644
--- a/packages/core/src/sessions/contextUsage.test.ts
+++ b/packages/core/src/sessions/contextUsage.test.ts
@@ -56,6 +56,27 @@ describe("extractContextUsage", () => {
     expect(result?.breakdown).toBeNull();
   });
 
+  it("surfaces token count even when the context window size is unknown", () => {
+    // codex omits `size` when the protocol has no modelContextWindow — the
+    // aggregate must still render (size 0, no percentage) rather than vanish.
+    const event: AcpMessage = {
+      type: "acp_message",
+      ts: 1,
+      message: {
+        jsonrpc: "2.0",
+        method: "session/update",
+        params: {
+          sessionId: "s1",
+          update: { sessionUpdate: "usage_update", used: 50_000 },
+        },
+      },
+    };
+    const result = extractContextUsage([event]);
+    expect(result?.used).toBe(50_000);
+    expect(result?.size).toBe(0);
+    expect(result?.percentage).toBe(0);
+  });
+
   it("merges breakdown from a _posthog/usage_update notification", () => {
     const result = extractContextUsage([
       usageUpdateEvent(50_000, 200_000),
diff --git a/packages/core/src/sessions/contextUsage.ts b/packages/core/src/sessions/contextUsage.ts
index fb59a55060..22f33280e9 100644
--- a/packages/core/src/sessions/contextUsage.ts
+++ b/packages/core/src/sessions/contextUsage.ts
@@ -82,16 +82,18 @@ function extractAggregate(
     const update = params?.update;
     if (
       update?.sessionUpdate === "usage_update" &&
-      typeof update.used === "number" &&
-      typeof update.size === "number"
+      typeof update.used === "number"
     ) {
+      // The model context window (`size`) may be unknown — e.g. codex omits it
+      // when the protocol doesn't report `modelContextWindow`. Still surface the
+      // raw token count (size 0 → the indicator shows used tokens, no
+      // percentage) rather than dropping the whole aggregate.
+      const size = typeof update.size === "number" ? update.size : 0;
       const percentage =
-        update.size > 0
-          ? Math.min(100, Math.round((update.used / update.size) * 100))
-          : 0;
+        size > 0 ? Math.min(100, Math.round((update.used / size) * 100)) : 0;
       return {
         used: update.used,
-        size: update.size,
+        size,
         percentage,
         cost: update.cost ?? null,
       };
diff --git a/packages/core/src/sessions/executionModes.ts b/packages/core/src/sessions/executionModes.ts
index 8d471d44f1..dc36a4a6a8 100644
--- a/packages/core/src/sessions/executionModes.ts
+++ b/packages/core/src/sessions/executionModes.ts
@@ -32,7 +32,15 @@ const availableModes: ModeInfo[] = [
   },
 ];
 
+// Mirrors the codex app-server adapter's CODEX_MODES so the picker offers the
+// same presets as a live session. "plan" is a CodeExecutionMode codex-acp maps
+// to read-only and the app-server gives a read-only sandbox — safe on both.
 const codexModes: ModeInfo[] = [
+  {
+    id: "plan",
+    name: "Plan",
+    description: "Plan first — inspect and propose; makes no changes",
+  },
   {
     id: "read-only",
     name: "Read Only",
diff --git a/packages/core/src/sessions/sessionService.ts b/packages/core/src/sessions/sessionService.ts
index 36b18a1bd6..1c44ae604e 100644
--- a/packages/core/src/sessions/sessionService.ts
+++ b/packages/core/src/sessions/sessionService.ts
@@ -26,7 +26,9 @@ import {
   type OptimisticItem,
   type PermissionRequest,
   type QueuedMessage,
+  resolveBypassRevertMode,
   type StoredLogEntry,
+  sessionSupportsNativeSteer,
   type TaskRunStatus,
 } from "@posthog/shared";
 import { ANALYTICS_EVENTS } from "@posthog/shared/analytics-events";
@@ -252,6 +254,13 @@ export interface SessionServiceHelpers {
   ) => Promise<string[]>;
 }
 
+/**
+ * PostHog flag gating the native codex app-server sub-adapter. When enabled for
+ * the user, a codex session uses the app-server adapter instead of codex-acp.
+ * Resolved at session start and passed to the agent as `useCodexAppServer`.
+ */
+export const CODEX_APP_SERVER_FLAG = "codex-app-server";
+
 export interface SessionServiceDeps {
   trpc: SessionTrpc;
   store: ISessionStore;
@@ -267,6 +276,12 @@ export interface SessionServiceDeps {
     info: (msg: any, opts?: any) => unknown;
   };
   track: (event: string, props?: Record<string, unknown>) => void;
+  /**
+   * Evaluates a PostHog feature flag for the current user. Used to resolve
+   * {@link CODEX_APP_SERVER_FLAG} at session start. Optional so non-desktop
+   * hosts (stubbed web, tests) can omit it — absent is treated as "flag off".
+   */
+  featureFlags?: { isEnabled(flagKey: string): boolean };
   buildPermissionToolMetadata: (...args: any[]) => any;
   notifyPermissionRequest: (...args: any[]) => any;
   notifyPromptComplete: (...args: any[]) => any;
@@ -954,6 +969,7 @@ export class SessionService {
         logUrl,
         sessionId,
         adapter: resolvedAdapter,
+        useCodexAppServer: this.resolveUseCodexAppServer(resolvedAdapter),
         permissionMode: persistedMode,
         model: persistedModel,
         customInstructions: customInstructions || undefined,
@@ -978,6 +994,7 @@ export class SessionService {
         this.d.store.updateSession(taskRunId, {
           status: "connected",
           configOptions,
+          steering: (result as { steering?: string }).steering,
         });
 
         // Persist the merged config options
@@ -1245,6 +1262,26 @@ export class SessionService {
     );
   }
 
+  /**
+   * Resolve the `codex-app-server` flag for a session. Only meaningful for the
+   * codex adapter (Claude ignores it), so returns undefined otherwise.
+   *
+   * One-way opt-in: when the flag is ON we force the app-server adapter (`true`).
+   * When off/unloaded (or no flags service on non-desktop hosts) we return
+   * `undefined` rather than `false`, so the agent falls through to its env
+   * override (`POSTHOG_CODEX_USE_APP_SERVER`) and then the codex-acp default —
+   * hard-passing `false` would shadow that env, since the host value has the
+   * highest precedence in resolveUseCodexAppServer.
+   */
+  private resolveUseCodexAppServer(
+    adapter: "claude" | "codex" | undefined,
+  ): boolean | undefined {
+    if (adapter !== "codex") return undefined;
+    return this.d.featureFlags?.isEnabled(CODEX_APP_SERVER_FLAG)
+      ? true
+      : undefined;
+  }
+
   private async createNewLocalSession(
     taskId: string,
     taskTitle: string,
@@ -1277,6 +1314,7 @@ export class SessionService {
       projectId: auth.projectId,
       permissionMode: executionMode,
       adapter,
+      useCodexAppServer: this.resolveUseCodexAppServer(adapter),
       customInstructions: startCustomInstructions || undefined,
       effort: effortLevelSchema.safeParse(reasoningLevel).success
         ? (reasoningLevel as EffortLevel)
@@ -1312,6 +1350,7 @@ export class SessionService {
       | SessionConfigOption[]
       | undefined;
     session.configOptions = configOptions;
+    session.steering = (result as { steering?: string }).steering;
 
     // Persist the config options
     if (configOptions) {
@@ -2156,22 +2195,18 @@ export class SessionService {
     }
 
     // Steer: the user sent a message mid-turn and asked to fold it into the
-    // running turn rather than queue it. Native (Claude, local) injects at the
-    // next tool boundary; local Codex interrupts the turn and resends below as
-    // a fresh prompt.
-    //
-    // Cloud has no real mid-turn steer: the backend only delivers user messages
-    // between turns, so a cloud "steer" would cancel the running turn for no
-    // gain (the message lands next turn either way) while surfacing a jarring
-    // interruption. Until the backend supports true steering, cloud steer falls
-    // through to the queue like a normal message. Compaction also falls through.
+    // running turn rather than queue it. Adapters that negotiated
+    // `steering: "native"` (Claude, codex app-server) inject at the next tool
+    // boundary; codex-acp ("interrupt-resend") and unknown adapters cancel and
+    // resend. Cloud has no real mid-turn steer (the backend only delivers
+    // messages between turns), so it falls through to the queue; compaction too.
     if (
       options?.steer &&
       !session.isCloud &&
       session.isPromptPending &&
       !session.isCompacting
     ) {
-      if (session.adapter === "claude") {
+      if (sessionSupportsNativeSteer(session)) {
         return this.sendSteerPrompt(session, prompt);
       }
       await this.cancelPrompt(taskId);
@@ -4547,6 +4582,7 @@ export class SessionService {
       isCloud: boolean;
       allowBypassPermissions: boolean;
       currentModeId: string | boolean | undefined;
+      modeOption: SessionConfigOption | undefined;
     },
   ): void {
     if (options.allowBypassPermissions) return;
@@ -4555,7 +4591,9 @@ export class SessionService {
       options.currentModeId === "bypassPermissions" ||
       options.currentModeId === "full-access";
     if (!isBypass || !taskId) return;
-    this.setSessionConfigOptionByCategory(taskId, "mode", "default");
+    const target = resolveBypassRevertMode(options.modeOption);
+    if (!target) return;
+    this.setSessionConfigOptionByCategory(taskId, "mode", target);
   }
 
   /**
diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts
index c8516c74ec..967c6eb9ca 100644
--- a/packages/shared/src/index.ts
+++ b/packages/shared/src/index.ts
@@ -184,7 +184,9 @@ export {
   type OptimisticItem,
   type PermissionRequest,
   type QueuedMessage,
+  resolveBypassRevertMode,
   type SessionStatus,
+  sessionSupportsNativeSteer,
 } from "./sessions";
 export type {
   SignalReportOrderingField,
@@ -217,6 +219,15 @@ export {
   formatRelativeTimeShort,
   getRelativeDateGroup,
 } from "./time";
+export {
+  mcpToolKey,
+  type PosthogToolMeta,
+  parseMcpToolName,
+  posthogToolMeta,
+  readAgentToolName,
+  readMcpToolDescriptor,
+  readMcpToolName,
+} from "./tool-meta";
 export { TypedEventEmitter } from "./typed-event-emitter";
 export { isSafeExternalUrl } from "./url";
 export { getCloudUrlFromRegion } from "./urls";
diff --git a/packages/shared/src/sessions.test.ts b/packages/shared/src/sessions.test.ts
new file mode 100644
index 0000000000..85d736fbc3
--- /dev/null
+++ b/packages/shared/src/sessions.test.ts
@@ -0,0 +1,112 @@
+import type { SessionConfigOption } from "@agentclientprotocol/sdk";
+import { describe, expect, it } from "vitest";
+import {
+  type AgentSession,
+  resolveBypassRevertMode,
+  sessionSupportsNativeSteer,
+} from "./sessions";
+
+function modeOption(
+  values: string[],
+  currentValue: string,
+): SessionConfigOption {
+  return {
+    type: "select",
+    id: "mode",
+    name: "Mode",
+    category: "mode",
+    currentValue,
+    options: values.map((v) => ({ name: v, value: v })),
+  } as unknown as SessionConfigOption;
+}
+
+describe("resolveBypassRevertMode", () => {
+  it("reverts a claude session to 'default'", () => {
+    const opt = modeOption(
+      ["default", "acceptEdits", "plan", "bypassPermissions"],
+      "bypassPermissions",
+    );
+    expect(resolveBypassRevertMode(opt)).toBe("default");
+  });
+
+  it("reverts a codex session to 'auto', never the claude-only 'default'", () => {
+    const opt = modeOption(
+      ["plan", "read-only", "auto", "full-access"],
+      "full-access",
+    );
+    const target = resolveBypassRevertMode(opt);
+    expect(target).toBe("auto");
+    expect(target).not.toBe("default");
+  });
+
+  it("falls back to the first non-bypass option when neither default nor auto exist", () => {
+    expect(
+      resolveBypassRevertMode(
+        modeOption(["read-only", "full-access"], "full-access"),
+      ),
+    ).toBe("read-only");
+  });
+
+  it("returns undefined for a missing or non-select option", () => {
+    expect(resolveBypassRevertMode(undefined)).toBeUndefined();
+    expect(
+      resolveBypassRevertMode({
+        type: "boolean",
+      } as unknown as SessionConfigOption),
+    ).toBeUndefined();
+  });
+});
+
+describe("sessionSupportsNativeSteer", () => {
+  type Case = Pick<AgentSession, "isCloud" | "steering" | "adapter">;
+
+  it.each<[string, Case, boolean]>([
+    // Capability-driven: "native" folds the message into the running turn.
+    [
+      "claude advertises native",
+      { isCloud: false, steering: "native", adapter: "claude" },
+      true,
+    ],
+    [
+      "codex app-server advertises native",
+      { isCloud: false, steering: "native", adapter: "codex" },
+      true,
+    ],
+    // codex-acp advertises "interrupt-resend" — must NOT steer natively.
+    [
+      "codex-acp interrupt-resend",
+      { isCloud: false, steering: "interrupt-resend", adapter: "codex" },
+      false,
+    ],
+    // Fallback: pre-capability start paths leave steering unset; never regress claude.
+    [
+      "claude with no capability (fallback)",
+      { isCloud: false, steering: undefined, adapter: "claude" },
+      true,
+    ],
+    [
+      "codex with no capability (no fallback)",
+      { isCloud: false, steering: undefined, adapter: "codex" },
+      false,
+    ],
+    // An explicit non-native capability overrides the claude fallback.
+    [
+      "claude explicitly non-native",
+      { isCloud: false, steering: "interrupt-resend", adapter: "claude" },
+      false,
+    ],
+    // Cloud runs queue/resend; they never steer locally regardless of capability.
+    [
+      "cloud claude native",
+      { isCloud: true, steering: "native", adapter: "claude" },
+      false,
+    ],
+    [
+      "cloud codex native",
+      { isCloud: true, steering: "native", adapter: "codex" },
+      false,
+    ],
+  ])("%s", (_label, session, expected) => {
+    expect(sessionSupportsNativeSteer(session)).toBe(expected);
+  });
+});
diff --git a/packages/shared/src/sessions.ts b/packages/shared/src/sessions.ts
index 0724dddfac..a278771721 100644
--- a/packages/shared/src/sessions.ts
+++ b/packages/shared/src/sessions.ts
@@ -65,6 +65,13 @@ export interface AgentSession {
   framework?: "claude";
   adapter?: Adapter;
   configOptions?: SessionConfigOption[];
+  /**
+   * Adapter's negotiated steering capability (`_meta.posthog.steering` from
+   * initialize). "native" means a mid-turn message folds into the running turn
+   * (claude, codex app-server); "interrupt-resend" (codex-acp) or undefined
+   * means the host must cancel + resend. Drives the steer-vs-resend decision.
+   */
+  steering?: string;
   pendingPermissions: Map<string, PermissionRequest>;
   pausedDurationMs: number;
   messageQueue: QueuedMessage[];
@@ -160,3 +167,41 @@ export function getCurrentModeFromConfigOptions(
   const modeOption = getConfigOptionByCategory(configOptions, "mode");
   return modeOption?.currentValue as ExecutionMode | undefined;
 }
+
+/**
+ * The safe non-bypass mode to revert to when "Bypass permissions" is turned
+ * off, chosen from the session's OWN mode options so it's always valid for that
+ * adapter. Claude exposes "default"; codex has no "default" (its presets are
+ * plan/read-only/auto/full-access) so it falls back to "auto" — reverting codex
+ * to "default" would set an unknown mode (no approvalPolicy → an undefined
+ * approval state). Returns undefined when there is no usable mode option.
+ */
+export function resolveBypassRevertMode(
+  modeOption: SessionConfigOption | undefined,
+): string | undefined {
+  if (modeOption?.type !== "select") return undefined;
+  const opts = flattenSelectOptions(modeOption.options);
+  const isBypass = (v: string) =>
+    v === "bypassPermissions" || v === "full-access";
+  if (opts.some((o) => o.value === "default")) return "default";
+  if (opts.some((o) => o.value === "auto")) return "auto";
+  return opts.find((o) => !isBypass(o.value))?.value;
+}
+
+/**
+ * Whether a mid-turn message can be folded into the running turn (steered)
+ * rather than interrupt-and-resent. Decided by the adapter's negotiated
+ * `steering` capability: "native" folds (claude, codex app-server);
+ * "interrupt-resend" (codex-acp) does not. Cloud runs never steer locally.
+ *
+ * Fallback: if `steering` is unset (a start path that predates capability
+ * plumbing), Claude is still treated as native — it has always steered — so the
+ * capability rollout can never regress it.
+ */
+export function sessionSupportsNativeSteer(
+  session: Pick<AgentSession, "isCloud" | "steering" | "adapter">,
+): boolean {
+  if (session.isCloud) return false;
+  if (session.steering === "native") return true;
+  return session.steering == null && session.adapter === "claude";
+}
diff --git a/packages/shared/src/tool-meta.test.ts b/packages/shared/src/tool-meta.test.ts
new file mode 100644
index 0000000000..8e718d8616
--- /dev/null
+++ b/packages/shared/src/tool-meta.test.ts
@@ -0,0 +1,82 @@
+import { describe, expect, it } from "vitest";
+import {
+  parseMcpToolName,
+  readAgentToolName,
+  readMcpToolDescriptor,
+  readMcpToolName,
+} from "./tool-meta";
+
+describe("parseMcpToolName", () => {
+  it("splits the first __ after the prefix as the server boundary", () => {
+    expect(parseMcpToolName("mcp__posthog__exec")).toEqual({
+      server: "posthog",
+      tool: "exec",
+    });
+  });
+
+  it("keeps single underscores inside server and tool names", () => {
+    expect(
+      parseMcpToolName("mcp__plugin_posthog_posthog__execute-sql"),
+    ).toEqual({ server: "plugin_posthog_posthog", tool: "execute-sql" });
+  });
+
+  it("returns undefined for non-MCP or malformed names", () => {
+    expect(parseMcpToolName("Bash")).toBeUndefined();
+    expect(parseMcpToolName("mcp__posthog__")).toBeUndefined();
+    expect(parseMcpToolName("mcp____exec")).toBeUndefined();
+  });
+});
+
+describe("readAgentToolName", () => {
+  it("prefers the posthog channel over the legacy claudeCode fallback", () => {
+    expect(
+      readAgentToolName({
+        posthog: { toolName: "mcp__posthog__exec" },
+        claudeCode: { toolName: "stale" },
+      }),
+    ).toBe("mcp__posthog__exec");
+  });
+
+  it("falls back to claudeCode when posthog is absent", () => {
+    expect(readAgentToolName({ claudeCode: { toolName: "Bash" } })).toBe(
+      "Bash",
+    );
+  });
+
+  it("returns undefined for non-tool meta", () => {
+    expect(readAgentToolName(undefined)).toBeUndefined();
+    expect(readAgentToolName({})).toBeUndefined();
+  });
+});
+
+describe("readMcpToolDescriptor / readMcpToolName", () => {
+  it("uses the structured mcp descriptor when present (no name parsing)", () => {
+    const meta = {
+      posthog: {
+        toolName: "ignored",
+        mcp: { server: "posthog", tool: "exec" },
+      },
+    };
+    expect(readMcpToolDescriptor(meta)).toEqual({
+      server: "posthog",
+      tool: "exec",
+    });
+    expect(readMcpToolName(meta)).toBe("mcp__posthog__exec");
+  });
+
+  it("parses the legacy claudeCode mcp__ name when there is no structured channel", () => {
+    const meta = { claudeCode: { toolName: "mcp__posthog__execute-sql" } };
+    expect(readMcpToolDescriptor(meta)).toEqual({
+      server: "posthog",
+      tool: "execute-sql",
+    });
+    expect(readMcpToolName(meta)).toBe("mcp__posthog__execute-sql");
+  });
+
+  it("returns undefined for non-MCP tool calls", () => {
+    expect(
+      readMcpToolDescriptor({ claudeCode: { toolName: "Bash" } }),
+    ).toBeUndefined();
+    expect(readMcpToolName({ posthog: { toolName: "Bash" } })).toBeUndefined();
+  });
+});
diff --git a/packages/shared/src/tool-meta.ts b/packages/shared/src/tool-meta.ts
new file mode 100644
index 0000000000..8ef62cc324
--- /dev/null
+++ b/packages/shared/src/tool-meta.ts
@@ -0,0 +1,82 @@
+/**
+ * Canonical, harness-neutral tool metadata carried on an ACP tool call's
+ * `_meta.posthog`. Each adapter (the native-protocol → ACP boundary) populates
+ * it, so the renderer never has to know which harness produced a tool call.
+ *
+ * The renderer reads through {@link readAgentToolName} / {@link readMcpToolName},
+ * which prefer this channel and fall back to the legacy `_meta.claudeCode.toolName`
+ * the Claude adapter still writes. New adapters should only populate `posthog`.
+ */
+export interface PosthogToolMeta {
+  /** Agent-facing tool name, e.g. "Bash" or "mcp__posthog__exec". */
+  toolName: string;
+  /** Set only for MCP tool calls — the originating server + tool. */
+  mcp?: { server: string; tool: string };
+}
+
+/** `_meta` fragment for adapters to spread onto a tool_call update. */
+export function posthogToolMeta(meta: PosthogToolMeta): {
+  posthog: PosthogToolMeta;
+} {
+  return { posthog: meta };
+}
+
+/** Build the canonical `mcp__<server>__<tool>` key. */
+export function mcpToolKey(mcp: { server: string; tool: string }): string {
+  return `mcp__${mcp.server}__${mcp.tool}`;
+}
+
+/**
+ * Parse a `mcp__<server>__<tool>` name into its parts; undefined when the name
+ * isn't MCP-shaped. The server segment never contains `__`, so the first `__`
+ * after the prefix terminates it and the remainder is the tool.
+ */
+export function parseMcpToolName(
+  toolName: string,
+): { server: string; tool: string } | undefined {
+  const PREFIX = "mcp__";
+  if (!toolName.startsWith(PREFIX)) return undefined;
+  const rest = toolName.slice(PREFIX.length);
+  const sep = rest.indexOf("__");
+  if (sep <= 0 || sep + 2 >= rest.length) return undefined;
+  return { server: rest.slice(0, sep), tool: rest.slice(sep + 2) };
+}
+
+interface ToolCallMeta {
+  posthog?: PosthogToolMeta;
+  /** Legacy Claude-adapter channel, read only as a fallback. */
+  claudeCode?: { toolName?: string };
+}
+
+function asToolCallMeta(meta: unknown): ToolCallMeta | undefined {
+  return meta && typeof meta === "object" ? (meta as ToolCallMeta) : undefined;
+}
+
+/** Canonical agent-facing tool name: neutral channel first, legacy fallback. */
+export function readAgentToolName(meta: unknown): string | undefined {
+  const m = asToolCallMeta(meta);
+  return m?.posthog?.toolName ?? m?.claudeCode?.toolName;
+}
+
+/**
+ * The MCP `{ server, tool }` descriptor for a tool call, or undefined for a
+ * non-MCP call. Prefers the structured channel, else parses the legacy
+ * `mcp__…` name.
+ */
+export function readMcpToolDescriptor(
+  meta: unknown,
+): { server: string; tool: string } | undefined {
+  const m = asToolCallMeta(meta);
+  if (m?.posthog?.mcp) return m.posthog.mcp;
+  const name = m?.posthog?.toolName ?? m?.claudeCode?.toolName;
+  return name ? parseMcpToolName(name) : undefined;
+}
+
+/**
+ * Canonical `mcp__server__tool` key for a tool call, or undefined for a non-MCP
+ * call. Convenience for components still keyed on the string form.
+ */
+export function readMcpToolName(meta: unknown): string | undefined {
+  const mcp = readMcpToolDescriptor(meta);
+  return mcp ? mcpToolKey(mcp) : undefined;
+}
diff --git a/packages/ui/src/features/message-editor/components/PromptInput.test.tsx b/packages/ui/src/features/message-editor/components/PromptInput.test.tsx
new file mode 100644
index 0000000000..bd0a282946
--- /dev/null
+++ b/packages/ui/src/features/message-editor/components/PromptInput.test.tsx
@@ -0,0 +1,139 @@
+import { Theme } from "@radix-ui/themes";
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import type React from "react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+const editorState = vi.hoisted(() => ({ isEmpty: false }));
+const settingsState = vi.hoisted(() => ({ slotMachineMode: false }));
+
+vi.mock("../tiptap/useTiptapEditor", () => ({
+  useTiptapEditor: () => ({
+    editor: null,
+    isReady: true,
+    isEmpty: editorState.isEmpty,
+    isBashMode: false,
+    submit: vi.fn(),
+    focus: vi.fn(),
+    blur: vi.fn(),
+    clear: vi.fn(),
+    getText: vi.fn(),
+    getContent: vi.fn(),
+    setContent: vi.fn(),
+    insertChip: vi.fn(),
+    removeChipById: vi.fn(),
+    replaceChipAttrs: vi.fn(),
+    attachments: [],
+    addAttachment: vi.fn(),
+    removeAttachment: vi.fn(),
+  }),
+}));
+
+vi.mock("@posthog/ui/features/settings/settingsStore", () => ({
+  useSettingsStore: (selector: (s: typeof settingsState) => unknown) =>
+    selector(settingsState),
+}));
+
+vi.mock("../../skills/useSkills", () => ({
+  useSkills: () => ({ data: [] }),
+}));
+
+vi.mock("../draftStore", () => ({
+  useDraftStore: Object.assign(
+    (selector: (s: unknown) => unknown) =>
+      selector({ focusRequested: {}, actions: { clearFocusRequest: vi.fn() } }),
+    {
+      getState: () => ({
+        actions: { setCommands: vi.fn(), clearCommands: vi.fn() },
+      }),
+    },
+  ),
+}));
+
+vi.mock("./AttachmentMenu", () => ({ AttachmentMenu: () => null }));
+vi.mock("./AttachmentsBar", () => ({ AttachmentsBar: () => null }));
+vi.mock("./SlotMachineSubmit", () => ({
+  SlotMachineSubmit: ({
+    disabled,
+    onSubmit,
+  }: {
+    disabled?: boolean;
+    onSubmit?: () => void;
+  }) => (
+    <button
+      type="button"
+      aria-label="Slot machine submit"
+      disabled={disabled}
+      onClick={onSubmit}
+    />
+  ),
+}));
+
+vi.mock("@posthog/quill", () => ({
+  InputGroup: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+  InputGroupAddon: ({ children }: { children: React.ReactNode }) => (
+    <div>{children}</div>
+  ),
+  InputGroupButton: ({
+    children,
+    ...props
+  }: React.ButtonHTMLAttributes<HTMLButtonElement>) => (
+    <button type="button" {...props}>
+      {children}
+    </button>
+  ),
+}));
+
+import { PromptInput } from "./PromptInput";
+
+function renderInput(props: Partial<React.ComponentProps<typeof PromptInput>>) {
+  return render(
+    <Theme>
+      <PromptInput sessionId="s1" {...props} />
+    </Theme>,
+  );
+}
+
+describe("PromptInput submit/stop affordance", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    editorState.isEmpty = false;
+    settingsState.slotMachineMode = false;
+  });
+
+  it("shows Stop (not Send) while loading and calls onCancel when clicked", async () => {
+    const user = userEvent.setup();
+    const onCancel = vi.fn();
+
+    renderInput({ isLoading: true, onCancel });
+
+    const stop = screen.getByRole("button", { name: "Stop" });
+    expect(
+      screen.queryByRole("button", { name: "Send message" }),
+    ).not.toBeInTheDocument();
+
+    await user.click(stop);
+    expect(onCancel).toHaveBeenCalledOnce();
+  });
+
+  it("keeps Send enabled mid-turn when no cancel handler (queue/steer path)", () => {
+    // isLoading true but no onCancel => inStopMode is false, so the composer
+    // must still expose an enabled Send so messages queue/steer mid-turn.
+    // Regression guard: adding `|| isLoading` to submitBlocked disables this.
+    renderInput({ isLoading: true });
+
+    const send = screen.getByRole("button", { name: "Send message" });
+    expect(send).toBeEnabled();
+  });
+
+  it("disables Send when the editor is empty", () => {
+    editorState.isEmpty = true;
+
+    renderInput({});
+
+    const send = screen.getByRole("button", { name: "Send message" });
+    expect(send).toBeDisabled();
+  });
+});
diff --git a/packages/ui/src/features/permissions/McpPermission.tsx b/packages/ui/src/features/permissions/McpPermission.tsx
index 9b89780d3d..7fbac089aa 100644
--- a/packages/ui/src/features/permissions/McpPermission.tsx
+++ b/packages/ui/src/features/permissions/McpPermission.tsx
@@ -1,3 +1,4 @@
+import { readMcpToolName } from "@posthog/shared";
 import { parseMcpToolKey } from "@posthog/ui/features/mcp-apps/utils/mcp-app-host-utils";
 import {
   formatPosthogExecBody,
@@ -16,9 +17,7 @@ export function McpPermission({
   onSelect,
   onCancel,
 }: BasePermissionProps) {
-  const mcpToolName = (
-    toolCall._meta as { claudeCode?: { toolName?: string } } | undefined
-  )?.claudeCode?.toolName;
+  const mcpToolName = readMcpToolName(toolCall._meta);
 
   if (!mcpToolName) {
     return (
diff --git a/packages/ui/src/features/permissions/PermissionSelector.tsx b/packages/ui/src/features/permissions/PermissionSelector.tsx
index b89ad00d02..3030a9d56c 100644
--- a/packages/ui/src/features/permissions/PermissionSelector.tsx
+++ b/packages/ui/src/features/permissions/PermissionSelector.tsx
@@ -1,4 +1,5 @@
 import type { PermissionOption } from "@agentclientprotocol/sdk";
+import { readMcpToolName } from "@posthog/shared";
 import { DefaultPermission } from "./DefaultPermission";
 import { DeletePermission } from "./DeletePermission";
 import { EditPermission } from "./EditPermission";
@@ -31,11 +32,8 @@ export function PermissionSelector({
   onCancel,
 }: PermissionSelectorProps) {
   const props = { toolCall, options, onSelect, onCancel };
-  const meta = toolCall._meta as
-    | { codeToolKind?: string; claudeCode?: { toolName?: string } }
-    | undefined;
-  const agentToolName = meta?.claudeCode?.toolName;
-  if (agentToolName?.startsWith("mcp__")) {
+  const meta = toolCall._meta as { codeToolKind?: string } | undefined;
+  if (readMcpToolName(toolCall._meta)) {
     return <McpPermission {...props} />;
   }
   const kind = meta?.codeToolKind ?? (toolCall.kind as string);
diff --git a/packages/ui/src/features/posthog-mcp/utils/posthog-exec-display.ts b/packages/ui/src/features/posthog-mcp/utils/posthog-exec-display.ts
index 7642bb7ed9..50cda82a30 100644
--- a/packages/ui/src/features/posthog-mcp/utils/posthog-exec-display.ts
+++ b/packages/ui/src/features/posthog-mcp/utils/posthog-exec-display.ts
@@ -18,7 +18,12 @@
  *   call [--json] <tool> <json_input>      — invoke a tool
  */
 
-const POSTHOG_EXEC_TOOL_RE = /^mcp__(?:plugin_)?posthog(?:_[^_]+)*__exec$/;
+import { parseMcpToolName } from "@posthog/shared";
+
+// A PostHog MCP server name: optional `plugin_` prefix, `posthog`, then any
+// number of `_<segment>` parts (e.g. `posthog`, `posthog_cloud`,
+// `plugin_posthog_posthog`). The `exec` dispatcher lives on these servers.
+const POSTHOG_SERVER_RE = /^(?:plugin_)?posthog(?:_[^_]+)*$/;
 
 const POSTHOG_VERB_RE =
   /^\s*(tools|search|info|schema|call)(?:\s+([\s\S]*))?\s*$/;
@@ -33,7 +38,8 @@ export interface PostHogExecDisplay {
 }
 
 export function isPostHogExecTool(toolName: string): boolean {
-  return POSTHOG_EXEC_TOOL_RE.test(toolName);
+  const mcp = parseMcpToolName(toolName);
+  return !!mcp && mcp.tool === "exec" && POSTHOG_SERVER_RE.test(mcp.server);
 }
 
 export function getPostHogExecDisplay(
diff --git a/packages/ui/src/features/sessions/components/ContextUsageIndicator.test.tsx b/packages/ui/src/features/sessions/components/ContextUsageIndicator.test.tsx
new file mode 100644
index 0000000000..49945a0f7d
--- /dev/null
+++ b/packages/ui/src/features/sessions/components/ContextUsageIndicator.test.tsx
@@ -0,0 +1,69 @@
+import type { ContextUsage } from "@posthog/ui/features/sessions/hooks/useContextUsage";
+import { Theme } from "@radix-ui/themes";
+import { render, screen } from "@testing-library/react";
+import { describe, expect, it } from "vitest";
+import { ContextUsageIndicator } from "./ContextUsageIndicator";
+
+function usage(overrides?: Partial<ContextUsage>): ContextUsage {
+  return {
+    used: 50_000,
+    size: 200_000,
+    percentage: 25,
+    cost: null,
+    breakdown: null,
+    ...overrides,
+  };
+}
+
+describe("ContextUsageIndicator", () => {
+  it("renders nothing when usage is null", () => {
+    const { container } = render(
+      <Theme>
+        <ContextUsageIndicator usage={null} />
+      </Theme>,
+    );
+    expect(container.querySelector("button")).toBeNull();
+  });
+
+  it("renders the compact used/size label, percentage, and aria-label", () => {
+    render(
+      <Theme>
+        <ContextUsageIndicator usage={usage()} />
+      </Theme>,
+    );
+    expect(screen.getByText(/50K\/200K · 25%/)).toBeInTheDocument();
+    expect(
+      screen.getByRole("button", { name: "Context usage: 25%" }),
+    ).toBeInTheDocument();
+  });
+
+  it("shows only the token count when the context window is unknown (size 0)", () => {
+    render(
+      <Theme>
+        <ContextUsageIndicator
+          usage={usage({ used: 50_000, size: 0, percentage: 0 })}
+        />
+      </Theme>,
+    );
+    // No misleading "/0 · 0%" — just the used tokens.
+    expect(screen.getByText("50K")).toBeInTheDocument();
+    expect(screen.queryByText(/\/0/)).not.toBeInTheDocument();
+    expect(
+      screen.getByRole("button", { name: "Context usage: 50K tokens" }),
+    ).toBeInTheDocument();
+  });
+
+  it("renders a finite stroke offset at 0% (no NaN/Infinity)", () => {
+    const { container } = render(
+      <Theme>
+        <ContextUsageIndicator
+          usage={usage({ used: 0, size: 200_000, percentage: 0 })}
+        />
+      </Theme>,
+    );
+    const progress = container.querySelectorAll("circle")[1];
+    const offset = Number(progress?.getAttribute("stroke-dashoffset"));
+    expect(Number.isFinite(offset)).toBe(true);
+    expect(screen.getByText(/0\/200K · 0%/)).toBeInTheDocument();
+  });
+});
diff --git a/packages/ui/src/features/sessions/components/ContextUsageIndicator.tsx b/packages/ui/src/features/sessions/components/ContextUsageIndicator.tsx
index 94c0f599a2..1ae3ac46a1 100644
--- a/packages/ui/src/features/sessions/components/ContextUsageIndicator.tsx
+++ b/packages/ui/src/features/sessions/components/ContextUsageIndicator.tsx
@@ -19,6 +19,9 @@ export function ContextUsageIndicator({ usage }: ContextUsageIndicatorProps) {
   if (!usage) return null;
 
   const { used, size, percentage } = usage;
+  // The context window can be unknown (size 0) — show just the token count
+  // rather than a misleading "X/0 · 0%".
+  const hasSize = size > 0;
   const strokeDashoffset = CIRCUMFERENCE - (percentage / 100) * CIRCUMFERENCE;
   const color = getOverallUsageColor(percentage);
 
@@ -28,7 +31,11 @@ export function ContextUsageIndicator({ usage }: ContextUsageIndicatorProps) {
         <button
           type="button"
           className="flex cursor-pointer select-none items-center gap-1 bg-transparent"
-          aria-label={`Context usage: ${percentage}%`}
+          aria-label={
+            hasSize
+              ? `Context usage: ${percentage}%`
+              : `Context usage: ${formatTokensCompact(used)} tokens`
+          }
         >
           <Flex align="center" gap="1">
             <svg
@@ -59,8 +66,9 @@ export function ContextUsageIndicator({ usage }: ContextUsageIndicatorProps) {
               />
             </svg>
             <Text className="text-[13px] text-muted-foreground tabular-nums">
-              {formatTokensCompact(used)}/{formatTokensCompact(size)} ·{" "}
-              {percentage}%
+              {hasSize
+                ? `${formatTokensCompact(used)}/${formatTokensCompact(size)} · ${percentage}%`
+                : formatTokensCompact(used)}
             </Text>
           </Flex>
         </button>
diff --git a/packages/ui/src/features/sessions/components/ReasoningLevelSelector.test.tsx b/packages/ui/src/features/sessions/components/ReasoningLevelSelector.test.tsx
new file mode 100644
index 0000000000..9631603355
--- /dev/null
+++ b/packages/ui/src/features/sessions/components/ReasoningLevelSelector.test.tsx
@@ -0,0 +1,89 @@
+import type { SessionConfigOption } from "@agentclientprotocol/sdk";
+import { Theme } from "@radix-ui/themes";
+import { render, screen, waitFor } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { describe, expect, it, vi } from "vitest";
+import { ReasoningLevelSelector } from "./ReasoningLevelSelector";
+
+function codexThoughtOption(
+  overrides?: Partial<SessionConfigOption>,
+): SessionConfigOption {
+  return {
+    type: "select",
+    id: "effort",
+    name: "Reasoning effort",
+    category: "thought_level",
+    currentValue: "high",
+    options: [
+      { name: "low", value: "low" },
+      { name: "high", value: "high" },
+      { name: "max", value: "max" },
+    ],
+    ...overrides,
+  } as unknown as SessionConfigOption;
+}
+
+describe("ReasoningLevelSelector", () => {
+  it("renders the active level as the trigger label for a codex thought_level option", () => {
+    render(
+      <Theme>
+        <ReasoningLevelSelector
+          thoughtOption={codexThoughtOption()}
+          adapter="codex"
+        />
+      </Theme>,
+    );
+    expect(
+      screen.getByRole("button", { name: "Reasoning: high" }),
+    ).toBeInTheDocument();
+  });
+
+  it("emits the raw value via onChange once the menu closes", async () => {
+    const onChange = vi.fn();
+    const user = userEvent.setup();
+    render(
+      <Theme>
+        <ReasoningLevelSelector
+          thoughtOption={codexThoughtOption()}
+          adapter="codex"
+          onChange={onChange}
+        />
+      </Theme>,
+    );
+
+    await user.click(screen.getByRole("button", { name: "Reasoning: high" }));
+    const lowItem = await screen.findByRole("menuitemradio", { name: "low" });
+    await user.click(lowItem);
+
+    await waitFor(() => expect(onChange).toHaveBeenCalledWith("low"));
+    expect(onChange).toHaveBeenCalledTimes(1);
+  });
+
+  it("uses the 'Effort' label for the claude adapter", () => {
+    render(
+      <Theme>
+        <ReasoningLevelSelector
+          thoughtOption={codexThoughtOption({ currentValue: "medium" })}
+          adapter="claude"
+        />
+      </Theme>,
+    );
+    expect(
+      screen.getByRole("button", { name: "Effort: medium" }),
+    ).toBeInTheDocument();
+  });
+
+  it.each([
+    ["undefined option", undefined],
+    ["non-select type", codexThoughtOption({ type: "boolean" })],
+    ["empty options", codexThoughtOption({ options: [] })],
+  ])("renders no trigger for %s", (_label, option) => {
+    render(
+      <ReasoningLevelSelector
+        thoughtOption={option as SessionConfigOption | undefined}
+        adapter="codex"
+      />,
+    );
+    expect(screen.queryByRole("button")).not.toBeInTheDocument();
+  });
+});
diff --git a/packages/ui/src/features/sessions/components/SessionView.tsx b/packages/ui/src/features/sessions/components/SessionView.tsx
index ab832577da..84e7780658 100644
--- a/packages/ui/src/features/sessions/components/SessionView.tsx
+++ b/packages/ui/src/features/sessions/components/SessionView.tsx
@@ -192,8 +192,16 @@ export function SessionView({
       isCloud,
       allowBypassPermissions,
       currentModeId,
+      modeOption,
     });
-  }, [allowBypassPermissions, currentModeId, taskId, isCloud, sessionService]);
+  }, [
+    allowBypassPermissions,
+    currentModeId,
+    taskId,
+    isCloud,
+    sessionService,
+    modeOption,
+  ]);
 
   const handleModeChange = useCallback(
     (nextMode: string) => {
diff --git a/packages/ui/src/features/sessions/components/SteerQueueToggle.test.tsx b/packages/ui/src/features/sessions/components/SteerQueueToggle.test.tsx
new file mode 100644
index 0000000000..f6f0627b31
--- /dev/null
+++ b/packages/ui/src/features/sessions/components/SteerQueueToggle.test.tsx
@@ -0,0 +1,75 @@
+import { renderHook } from "@testing-library/react";
+import { beforeEach, describe, expect, it } from "vitest";
+import { useSupportsNativeSteer } from "../hooks/useMessagingMode";
+import {
+  type AgentSession,
+  sessionStoreSetters,
+  useSessionStore,
+} from "../sessionStore";
+import { steerQueueTooltip } from "./SteerQueueToggle";
+
+function seedSession(overrides: Partial<AgentSession>): void {
+  sessionStoreSetters.setSession({
+    taskRunId: "run-1",
+    taskId: "task-1",
+    taskTitle: "Test",
+    channel: "agent-event:run-1",
+    events: [],
+    startedAt: 0,
+    status: "connected",
+    isPromptPending: false,
+    isCompacting: false,
+    promptStartedAt: null,
+    pendingPermissions: new Map(),
+    pausedDurationMs: 0,
+    messageQueue: [],
+    optimisticItems: [],
+    ...overrides,
+  });
+}
+
+describe("steer tooltip copy follows the session's native-steer capability", () => {
+  beforeEach(() => {
+    useSessionStore.setState((state) => {
+      state.sessions = {};
+      state.taskIdIndex = {};
+    });
+  });
+
+  it.each([
+    {
+      name: "codex (local): interrupts and resends",
+      session: { adapter: "codex" as const, isCloud: false },
+      expectNative: false,
+    },
+    {
+      name: "claude cloud: interrupts and resends",
+      session: { adapter: "claude" as const, isCloud: true },
+      expectNative: false,
+    },
+    {
+      name: "claude (local): folds natively at the next tool boundary",
+      session: { adapter: "claude" as const, isCloud: false },
+      expectNative: true,
+    },
+  ])(
+    "$name — supportsNativeSteer and rendered tooltip agree",
+    ({ session, expectNative }) => {
+      seedSession(session);
+
+      const { result } = renderHook(() => useSupportsNativeSteer("task-1"));
+      expect(result.current).toBe(expectNative);
+
+      const tooltip = steerQueueTooltip(true, result.current, "Cmd+S");
+      if (expectNative) {
+        expect(tooltip).toContain(
+          "injects your message mid-turn at the next tool boundary",
+        );
+      } else {
+        expect(tooltip).toContain(
+          "interrupts the current turn and resends with your message",
+        );
+      }
+    },
+  );
+});
diff --git a/packages/ui/src/features/sessions/components/SteerQueueToggle.tsx b/packages/ui/src/features/sessions/components/SteerQueueToggle.tsx
index 113a56ad79..5b51da75bd 100644
--- a/packages/ui/src/features/sessions/components/SteerQueueToggle.tsx
+++ b/packages/ui/src/features/sessions/components/SteerQueueToggle.tsx
@@ -16,6 +16,19 @@ interface SteerQueueToggleProps {
   taskId: string;
 }
 
+export function steerQueueTooltip(
+  isSteer: boolean,
+  supportsNativeSteer: boolean,
+  shortcut: string,
+): string {
+  if (!isSteer) {
+    return `Queue: holds messages until the current turn ends. ${shortcut} to switch to Steer.`;
+  }
+  return supportsNativeSteer
+    ? `Steer: injects your message mid-turn at the next tool boundary. ${shortcut} to switch to Queue.`
+    : `Steer: interrupts the current turn and resends with your message. ${shortcut} to switch to Queue.`;
+}
+
 export function SteerQueueToggle({ taskId }: SteerQueueToggleProps) {
   const mode = useMessagingMode(taskId);
   const supportsNativeSteer = useSupportsNativeSteer(taskId);
@@ -30,11 +43,7 @@ export function SteerQueueToggle({ taskId }: SteerQueueToggleProps) {
       ? `Queue (${queuedCount})`
       : "Queue";
 
-  const tooltip = isSteer
-    ? supportsNativeSteer
-      ? `Steer: injects your message mid-turn at the next tool boundary. ${shortcut} to switch to Queue.`
-      : `Steer: interrupts the current turn and resends with your message. ${shortcut} to switch to Queue.`
-    : `Queue: holds messages until the current turn ends. ${shortcut} to switch to Steer.`;
+  const tooltip = steerQueueTooltip(isSteer, supportsNativeSteer, shortcut);
 
   const colorClass = isSteer ? "text-purple-11" : "text-gray-11";
 
diff --git a/packages/ui/src/features/sessions/components/UnifiedModelSelector.test.tsx b/packages/ui/src/features/sessions/components/UnifiedModelSelector.test.tsx
new file mode 100644
index 0000000000..2ef6396fb2
--- /dev/null
+++ b/packages/ui/src/features/sessions/components/UnifiedModelSelector.test.tsx
@@ -0,0 +1,133 @@
+import type {
+  SessionConfigOption,
+  SessionConfigSelectGroup,
+} from "@agentclientprotocol/sdk";
+import { Theme } from "@radix-ui/themes";
+import { render, screen } from "@testing-library/react";
+import userEvent from "@testing-library/user-event";
+import { describe, expect, it, vi } from "vitest";
+import { UnifiedModelSelector } from "./UnifiedModelSelector";
+
+const groupedCodexModel: SessionConfigOption = {
+  type: "select",
+  id: "model",
+  name: "Model",
+  category: "model",
+  currentValue: "gpt-5.5",
+  options: [
+    {
+      group: "openai",
+      name: "OpenAI",
+      options: [
+        { value: "gpt-5.5", name: "GPT-5.5" },
+        { value: "gpt-5.5-codex", name: "GPT-5.5 Codex" },
+      ],
+    },
+    {
+      group: "fable",
+      name: "Fable",
+      options: [{ value: "fable", name: "Fable" }],
+    },
+  ] satisfies SessionConfigSelectGroup[],
+};
+
+const flatCodexModel: SessionConfigOption = {
+  type: "select",
+  id: "model",
+  name: "Model",
+  category: "model",
+  currentValue: "gpt-5.5",
+  options: [
+    { value: "gpt-5.5", name: "GPT-5.5" },
+    { value: "fable", name: "Fable" },
+  ],
+};
+
+function renderSelector(
+  props: Partial<React.ComponentProps<typeof UnifiedModelSelector>> = {},
+) {
+  return render(
+    <Theme>
+      <UnifiedModelSelector
+        modelOption={groupedCodexModel}
+        adapter="codex"
+        onAdapterChange={vi.fn()}
+        onModelChange={vi.fn()}
+        {...props}
+      />
+    </Theme>,
+  );
+}
+
+describe("UnifiedModelSelector", () => {
+  it("renders the codex adapter label, group labels, and grouped model items", async () => {
+    const user = userEvent.setup();
+    renderSelector();
+
+    await user.click(screen.getByRole("button", { name: "Model" }));
+
+    // Every model in every group renders as a radio item.
+    expect(
+      await screen.findByRole("menuitemradio", { name: "GPT-5.5" }),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByRole("menuitemradio", { name: "GPT-5.5 Codex" }),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByRole("menuitemradio", { name: "Fable" }),
+    ).toBeInTheDocument();
+    // Adapter MenuLabel + group MenuLabels render.
+    expect(screen.getByText("Codex")).toBeInTheDocument();
+    expect(screen.getByText("OpenAI")).toBeInTheDocument();
+  });
+
+  it("renders flat (ungrouped) model items", async () => {
+    const user = userEvent.setup();
+    renderSelector({ modelOption: flatCodexModel });
+
+    await user.click(screen.getByRole("button", { name: "Model" }));
+
+    expect(
+      await screen.findByRole("menuitemradio", { name: "GPT-5.5" }),
+    ).toBeInTheDocument();
+    expect(
+      screen.getByRole("menuitemradio", { name: "Fable" }),
+    ).toBeInTheDocument();
+  });
+
+  it("fires onModelChange exactly once with the picked value after the menu closes", async () => {
+    const user = userEvent.setup();
+    const onModelChange = vi.fn();
+    renderSelector({ onModelChange });
+
+    await user.click(screen.getByRole("button", { name: "Model" }));
+    await user.click(
+      await screen.findByRole("menuitemradio", { name: "GPT-5.5 Codex" }),
+    );
+
+    expect(onModelChange).toHaveBeenCalledExactlyOnceWith("gpt-5.5-codex");
+  });
+
+  it("switches adapter via the 'Switch to Claude' item", async () => {
+    const user = userEvent.setup();
+    const onAdapterChange = vi.fn();
+    renderSelector({ onAdapterChange });
+
+    await user.click(screen.getByRole("button", { name: "Model" }));
+    await user.click(
+      await screen.findByRole("menuitem", { name: /switch to claude/i }),
+    );
+
+    expect(onAdapterChange).toHaveBeenCalledExactlyOnceWith("claude");
+  });
+
+  it("renders a disabled loading button with no menu while connecting", () => {
+    renderSelector({ isConnecting: true });
+
+    const button = screen.getByRole("button", { name: /loading/i });
+    expect(button).toHaveAttribute("aria-disabled", "true");
+    expect(
+      screen.queryByRole("button", { name: "Model" }),
+    ).not.toBeInTheDocument();
+  });
+});
diff --git a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts
index e4ea31e509..9253179fc1 100644
--- a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts
+++ b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts
@@ -1,4 +1,5 @@
 import type { Icon } from "@phosphor-icons/react";
+import { readAgentToolName } from "@posthog/shared";
 import type { ConversationItem } from "@posthog/ui/features/sessions/components/buildConversationItems";
 import {
   buildDoneLabel,
@@ -66,10 +67,7 @@ export interface ThreadGrouping {
 }
 
 function getToolName(update: { _meta?: unknown }): string | undefined {
-  const meta = update._meta as
-    | { claudeCode?: { toolName?: string } }
-    | undefined;
-  return meta?.claudeCode?.toolName;
+  return readAgentToolName(update._meta);
 }
 
 function isMcpToolItem(item: ConversationItem): boolean {
diff --git a/packages/ui/src/features/sessions/components/session-update/ToolCallBlock.test.tsx b/packages/ui/src/features/sessions/components/session-update/ToolCallBlock.test.tsx
new file mode 100644
index 0000000000..8ac51cfdc8
--- /dev/null
+++ b/packages/ui/src/features/sessions/components/session-update/ToolCallBlock.test.tsx
@@ -0,0 +1,118 @@
+import { ServiceProvider } from "@posthog/di/react";
+import { posthogToolMeta } from "@posthog/shared";
+import type { ToolCall } from "@posthog/ui/features/sessions/types";
+import { Theme } from "@radix-ui/themes";
+import { render, screen } from "@testing-library/react";
+import { Container } from "inversify";
+import type { ReactNode } from "react";
+import { describe, expect, it, vi } from "vitest";
+import { MCP_TOOL_BLOCK_COMPONENT } from "./identifiers";
+import { ToolCallBlock } from "./ToolCallBlock";
+import type { ToolViewProps } from "./toolCallUtils";
+
+// EditToolView's leaf renderers reach outside the unit under test: FileMentionChip
+// pulls workspace/tRPC context, and CodePreview mounts a web component that needs
+// a real CSSStyleSheet. The edit-routing test only cares that ToolCallBlock
+// dispatched to EditToolView, so stub both to their load-bearing inputs.
+vi.mock("./FileMentionChip", () => ({
+  FileMentionChip: ({ filePath }: { filePath: string }) => (
+    <span>{filePath}</span>
+  ),
+}));
+vi.mock("./CodePreview", () => ({
+  CodePreview: () => <span>code-preview</span>,
+}));
+
+function renderBlock(
+  toolCall: ToolCall,
+  mcpToolBlock?: (props: ToolViewProps & { mcpToolName: string }) => ReactNode,
+) {
+  const container = new Container();
+  if (mcpToolBlock) {
+    container.bind(MCP_TOOL_BLOCK_COMPONENT).toConstantValue(mcpToolBlock);
+  }
+  return render(
+    <ServiceProvider container={container}>
+      <Theme>
+        <ToolCallBlock toolCall={toolCall} turnComplete />
+      </Theme>
+    </ServiceProvider>,
+  );
+}
+
+describe("ToolCallBlock codex routing", () => {
+  it("routes a codex MCP descriptor to the bound McpToolBlock with the canonical name", () => {
+    const seen: { mcpToolName?: string } = {};
+    const McpToolBlock = vi.fn(
+      ({ mcpToolName }: ToolViewProps & { mcpToolName: string }) => {
+        seen.mcpToolName = mcpToolName;
+        return <div>mcp-block-rendered</div>;
+      },
+    );
+
+    renderBlock(
+      {
+        toolCallId: "tc-mcp",
+        title: "exec",
+        kind: "other",
+        status: "completed",
+        rawInput: { query: "select 1" },
+        _meta: posthogToolMeta({
+          toolName: "mcp__posthog__exec",
+          mcp: { server: "posthog", tool: "exec" },
+        }),
+      },
+      McpToolBlock,
+    );
+
+    expect(screen.getByText("mcp-block-rendered")).toBeInTheDocument();
+    expect(seen.mcpToolName).toBe("mcp__posthog__exec");
+  });
+
+  it("falls back to the generic tool view for an MCP call when no McpToolBlock is bound", () => {
+    renderBlock({
+      toolCallId: "tc-mcp-fallback",
+      title: "exec",
+      kind: "other",
+      status: "completed",
+      rawInput: { query: "select 1" },
+      _meta: posthogToolMeta({
+        toolName: "mcp__posthog__exec",
+        mcp: { server: "posthog", tool: "exec" },
+      }),
+    });
+
+    // The MCP branch renders the title in its header; assert it lands somewhere
+    // (i.e. the call did not blow up unbound) without an MCP block present.
+    expect(screen.getByText("exec")).toBeInTheDocument();
+  });
+
+  it("routes a codex edit tool call (no _meta) to the edit view with diff stats", () => {
+    renderBlock({
+      toolCallId: "tc-edit",
+      title: "Edit a.ts",
+      kind: "edit",
+      status: "completed",
+      content: [{ type: "diff", path: "a.ts", oldText: "x", newText: "y" }],
+      locations: [{ path: "a.ts" }],
+    });
+
+    expect(screen.getByText("a.ts")).toBeInTheDocument();
+    expect(screen.getByText("+1")).toBeInTheDocument();
+    expect(screen.getByText("-1")).toBeInTheDocument();
+  });
+
+  it("routes a codex execute tool call (no _meta) to the execute view header", () => {
+    renderBlock({
+      toolCallId: "tc-exec",
+      title: "run tests",
+      kind: "execute",
+      status: "completed",
+      rawInput: { command: "pnpm test", description: "Run tests" },
+      content: [{ type: "content", content: { type: "text", text: "ok" } }],
+    });
+
+    expect(screen.getByText("Run tests")).toBeInTheDocument();
+    expect(screen.getByText("pnpm test")).toBeInTheDocument();
+  });
+});
diff --git a/packages/ui/src/features/sessions/components/session-update/ToolCallBlock.tsx b/packages/ui/src/features/sessions/components/session-update/ToolCallBlock.tsx
index fe3a9b8a73..dcfe1732db 100644
--- a/packages/ui/src/features/sessions/components/session-update/ToolCallBlock.tsx
+++ b/packages/ui/src/features/sessions/components/session-update/ToolCallBlock.tsx
@@ -1,4 +1,5 @@
 import { useServiceOptional } from "@posthog/di/react";
+import { readAgentToolName, readMcpToolName } from "@posthog/shared";
 import { DeleteToolView } from "@posthog/ui/features/sessions/components/session-update/DeleteToolView";
 import { EditToolView } from "@posthog/ui/features/sessions/components/session-update/EditToolView";
 import { ExecuteToolView } from "@posthog/ui/features/sessions/components/session-update/ExecuteToolView";
@@ -36,10 +37,8 @@ export function ToolCallBlock({
   const McpToolBlock = useServiceOptional<McpToolBlockComponent>(
     MCP_TOOL_BLOCK_COMPONENT,
   );
-  const meta = toolCall._meta as
-    | { claudeCode?: { toolName?: string } }
-    | undefined;
-  const toolName = meta?.claudeCode?.toolName;
+  const toolName = readAgentToolName(toolCall._meta);
+  const mcpToolName = readMcpToolName(toolCall._meta);
   const chatChrome = useChatThreadChrome();
 
   if (toolName === "EnterPlanMode") {
@@ -70,13 +69,13 @@ export function ToolCallBlock({
     );
   }
 
-  if (toolName?.startsWith("mcp__")) {
+  if (mcpToolName) {
     return (
       <Box className={chatChrome ? "" : "pl-3"}>
         {McpToolBlock ? (
-          <McpToolBlock {...props} mcpToolName={toolName} />
+          <McpToolBlock {...props} mcpToolName={mcpToolName} />
         ) : (
-          <ToolCallView {...props} agentToolName={toolName} />
+          <ToolCallView {...props} agentToolName={mcpToolName} />
         )}
       </Box>
     );
diff --git a/packages/ui/src/features/sessions/hooks/useMessagingMode.ts b/packages/ui/src/features/sessions/hooks/useMessagingMode.ts
index e63d40e790..1d00ceecdc 100644
--- a/packages/ui/src/features/sessions/hooks/useMessagingMode.ts
+++ b/packages/ui/src/features/sessions/hooks/useMessagingMode.ts
@@ -1,3 +1,4 @@
+import { sessionSupportsNativeSteer } from "@posthog/shared";
 import {
   type MessagingMode,
   useMessagingModeStore,
@@ -15,9 +16,11 @@ export function useMessagingMode(taskId: string | undefined): MessagingMode {
 }
 
 /**
- * Whether the task's session steers natively (Claude, local) versus falling
- * back to interrupt-and-resend (Codex, cloud). Drives the steer label/tooltip,
- * not whether steer is allowed: every adapter supports steer in some form.
+ * Whether the task's session steers natively (folds a mid-turn message into the
+ * running turn) versus falling back to interrupt-and-resend. Driven by the
+ * adapter's negotiated `steering` capability — same decision as the host's
+ * sendPrompt gate — so Claude and codex app-server steer, codex-acp and cloud
+ * resend. Drives the steer label/tooltip, not whether steer is allowed.
  */
 export function useSupportsNativeSteer(taskId: string | undefined): boolean {
   return useSessionStore((s) => {
@@ -25,6 +28,6 @@ export function useSupportsNativeSteer(taskId: string | undefined): boolean {
     const taskRunId = s.taskIdIndex[taskId];
     if (!taskRunId) return false;
     const session = s.sessions[taskRunId];
-    return !!session && !session.isCloud && session.adapter === "claude";
+    return !!session && sessionSupportsNativeSteer(session);
   });
 }
diff --git a/packages/ui/src/features/sessions/sessionServiceHost.recovery.integration.test.ts b/packages/ui/src/features/sessions/sessionServiceHost.recovery.integration.test.ts
index bb44ec8658..abb7924fa0 100644
--- a/packages/ui/src/features/sessions/sessionServiceHost.recovery.integration.test.ts
+++ b/packages/ui/src/features/sessions/sessionServiceHost.recovery.integration.test.ts
@@ -213,6 +213,10 @@ vi.mock("@posthog/ui/features/sidebar/taskMetaApi", () => ({
 vi.mock("@posthog/ui/shell/posthogAnalyticsImpl", () => ({
   track: vi.fn(),
   buildPermissionToolMetadata: vi.fn(() => ({})),
+  posthogFeatureFlags: {
+    isEnabled: vi.fn(() => undefined),
+    onFlagsLoaded: vi.fn(),
+  },
 }));
 vi.mock("../../shell/logger", () => ({
   logger: {
diff --git a/packages/ui/src/features/sessions/sessionServiceHost.test.ts b/packages/ui/src/features/sessions/sessionServiceHost.test.ts
index 49e3c443d9..84144af24a 100644
--- a/packages/ui/src/features/sessions/sessionServiceHost.test.ts
+++ b/packages/ui/src/features/sessions/sessionServiceHost.test.ts
@@ -244,6 +244,10 @@ vi.mock("@posthog/ui/features/sidebar/taskMetaApi", () => ({
 vi.mock("@posthog/ui/shell/posthogAnalyticsImpl", () => ({
   track: vi.fn(),
   buildPermissionToolMetadata: vi.fn(() => ({})),
+  posthogFeatureFlags: {
+    isEnabled: vi.fn(() => undefined),
+    onFlagsLoaded: vi.fn(),
+  },
 }));
 vi.mock("../../shell/logger", () => ({
   logger: {
@@ -899,6 +903,7 @@ describe("SessionService", () => {
               id: "mode",
               currentValue: "full-access",
               options: [
+                expect.objectContaining({ value: "plan" }),
                 expect.objectContaining({ value: "read-only" }),
                 expect.objectContaining({ value: "auto" }),
                 expect.objectContaining({ value: "full-access" }),
diff --git a/packages/ui/src/features/sessions/sessionServiceHost.ts b/packages/ui/src/features/sessions/sessionServiceHost.ts
index 9a57f913e7..c0d0e30b80 100644
--- a/packages/ui/src/features/sessions/sessionServiceHost.ts
+++ b/packages/ui/src/features/sessions/sessionServiceHost.ts
@@ -37,6 +37,7 @@ import { WORKSPACE_QUERY_KEY } from "@posthog/ui/features/workspace/identifiers"
 import { toast } from "@posthog/ui/primitives/toast";
 import {
   buildPermissionToolMetadata,
+  posthogFeatureFlags,
   track,
 } from "@posthog/ui/shell/posthogAnalyticsImpl";
 import { logger } from "../../shell/logger";
@@ -80,6 +81,7 @@ function buildSessionServiceDeps(): SessionServiceDeps {
       );
     },
     buildPermissionToolMetadata,
+    featureFlags: posthogFeatureFlags,
     notifyPermissionRequest: (taskTitle, taskId) =>
       resolveService(NotificationBus).notifyPermissionRequest(
         taskTitle,
diff --git a/packages/workspace-server/src/services/agent/agent.ts b/packages/workspace-server/src/services/agent/agent.ts
index 83341d0c1e..b07de98a48 100644
--- a/packages/workspace-server/src/services/agent/agent.ts
+++ b/packages/workspace-server/src/services/agent/agent.ts
@@ -262,6 +262,12 @@ interface SessionConfig {
   /** The agent's session ID (for resume - SDK session ID for Claude, Codex's session ID for Codex) */
   sessionId?: string;
   adapter?: "claude" | "codex";
+  /**
+   * Resolved `codex-app-server` flag for the current user. When true and the
+   * adapter is codex, the agent uses the native app-server sub-adapter; when
+   * false/undefined it uses codex-acp. Ignored by the Claude adapter.
+   */
+  useCodexAppServer?: boolean;
   /** Permission mode to use for the session */
   permissionMode?: string;
   /** Custom instructions injected into the system prompt */
@@ -284,6 +290,16 @@ interface SessionConfig {
   importedSessionId?: string;
 }
 
+/** Pull the adapter's `agentCapabilities._meta.posthog.steering` from initialize. */
+function extractSteeringCapability(init: unknown): string | undefined {
+  const steering = (
+    init as {
+      agentCapabilities?: { _meta?: { posthog?: { steering?: unknown } } };
+    }
+  )?.agentCapabilities?._meta?.posthog?.steering;
+  return typeof steering === "string" ? steering : undefined;
+}
+
 interface ManagedSession {
   taskRunId: string;
   taskId: string;
@@ -298,6 +314,8 @@ interface ManagedSession {
   promptPending: boolean;
   pendingContext?: string;
   configOptions?: SessionConfigOption[];
+  /** Adapter's negotiated steering capability from initialize (`_meta.posthog.steering`). */
+  steering?: string;
   /** Tracks in-flight MCP tool calls (toolCallId → toolKey) for cancellation */
   inFlightMcpToolCalls: Map<string, string>;
   /** MCP tool approval states fetched at session start */
@@ -675,6 +693,7 @@ If a repository IS genuinely required, attach one in this priority order:
       credentials,
       logUrl,
       adapter,
+      useCodexAppServer,
       permissionMode,
       customInstructions,
       systemPromptOverride,
@@ -787,6 +806,7 @@ If a repository IS genuinely required, attach one in this priority order:
 
       const acpConnection = await agent.run(taskId, taskRunId, {
         adapter,
+        useCodexAppServer,
         gatewayUrl: proxyUrl,
         codexBinaryPath:
           adapter === "codex" ? this.getCodexBinaryPath() : undefined,
@@ -839,7 +859,7 @@ If a repository IS genuinely required, attach one in this priority order:
         clientStreams,
       );
 
-      await connection.initialize({
+      const initResult = await connection.initialize({
         protocolVersion: PROTOCOL_VERSION,
         clientCapabilities: {
           fs: {
@@ -849,6 +869,11 @@ If a repository IS genuinely required, attach one in this priority order:
           terminal: true,
         },
       });
+      // The adapter advertises whether mid-turn steering folds natively into the
+      // running turn (`steering: "native"`) vs needs cancel+resend. Surface it so
+      // the host gates steer-vs-resend on the negotiated capability, not on a
+      // hardcoded adapter name (codex-acp advertises "interrupt-resend").
+      const steering = extractSteeringCapability(initResult);
 
       const {
         servers: mcpServers,
@@ -1054,6 +1079,7 @@ If a repository IS genuinely required, attach one in this priority order:
         config,
         promptPending: false,
         configOptions,
+        steering,
         inFlightMcpToolCalls: new Map(),
         mcpToolApprovals: toolApprovals,
         toolInstallations,
@@ -1901,6 +1927,8 @@ For git operations while detached:
       logUrl: "logUrl" in params ? params.logUrl : undefined,
       sessionId: "sessionId" in params ? params.sessionId : undefined,
       adapter: "adapter" in params ? params.adapter : undefined,
+      useCodexAppServer:
+        "useCodexAppServer" in params ? params.useCodexAppServer : undefined,
       permissionMode:
         "permissionMode" in params ? params.permissionMode : undefined,
       customInstructions:
@@ -1924,6 +1952,7 @@ For git operations while detached:
       sessionId: session.taskRunId,
       channel: session.channel,
       configOptions: session.configOptions,
+      steering: session.steering,
     };
   }
 
diff --git a/packages/workspace-server/src/services/agent/schemas.ts b/packages/workspace-server/src/services/agent/schemas.ts
index 493e79943e..477630edfb 100644
--- a/packages/workspace-server/src/services/agent/schemas.ts
+++ b/packages/workspace-server/src/services/agent/schemas.ts
@@ -52,6 +52,12 @@ export const startSessionInput = z.object({
   autoProgress: z.boolean().optional(),
   runMode: z.enum(["local", "cloud"]).optional(),
   adapter: z.enum(["claude", "codex"]).optional(),
+  /**
+   * Resolved value of the `codex-app-server` PostHog flag (evaluated host-side
+   * for the current user). When true and adapter is "codex", the agent uses the
+   * native app-server sub-adapter instead of codex-acp. Ignored for Claude.
+   */
+  useCodexAppServer: z.boolean().optional(),
   additionalDirectories: z.array(z.string()).optional(),
   customInstructions: z.string().max(2000).optional(),
   /**
@@ -136,6 +142,11 @@ export const sessionResponseSchema = z.object({
   sessionId: z.string(),
   channel: z.string(),
   configOptions: z.array(sessionConfigOptionSchema).optional(),
+  // The adapter's negotiated steering capability from initialize
+  // (`_meta.posthog.steering`): "native" folds a mid-turn message into the
+  // running turn; "interrupt-resend" (codex-acp) or absent means the host must
+  // cancel + resend instead. Drives the host's steer-vs-resend decision.
+  steering: z.string().optional(),
 });
 
 export type SessionResponse = z.infer<typeof sessionResponseSchema>;
@@ -194,6 +205,8 @@ export const reconnectSessionInput = z.object({
   logUrl: z.string().optional(),
   sessionId: z.string().optional(),
   adapter: z.enum(["claude", "codex"]).optional(),
+  /** See startSessionInput.useCodexAppServer — re-resolved on reconnect. */
+  useCodexAppServer: z.boolean().optional(),
   /** Additional directories Claude can access beyond cwd (for worktree support) */
   additionalDirectories: z.array(z.string()).optional(),
   permissionMode: z.string().optional(),
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 3cf4edaa57..d606082cb9 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -700,6 +700,9 @@ importers:
       '@modelcontextprotocol/sdk':
         specifier: 1.29.0
         version: 1.29.0(zod@4.4.3)
+      '@openai/codex':
+        specifier: 0.140.0
+        version: 0.140.0
       '@opentelemetry/api-logs':
         specifier: ^0.208.0
         version: 0.208.0
@@ -3878,6 +3881,47 @@ packages:
   '@open-draft/until@2.1.0':
     resolution: {integrity: sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==}
 
+  '@openai/codex@0.140.0':
+    resolution: {integrity: sha512-FMnN12kJzVPljMTYRydLCNgd0cXXmVasNSfq2PtS42RMEIxoQ3dHtMvmno35hu2tfwrKNAAPCm4s+2PaFTEBGg==}
+    engines: {node: '>=16'}
+    hasBin: true
+
+  '@openai/codex@0.140.0-darwin-arm64':
+    resolution: {integrity: sha512-KDyQHsxdc8FHZKziSBXs82ABgben/8lLPdhi2Nu+wj6qs2RAp4k/IvE8foafVnp3OeGqhtEFbhlZp0H4Dg/Slg==}
+    engines: {node: '>=16'}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@openai/codex@0.140.0-darwin-x64':
+    resolution: {integrity: sha512-xA77AcKbP8BKxKqaJz8bqXtU1dUtanEKpWCMJ68LuYU054EC31BD7NftFe5/vpLUQR95fhRr7V9a91SLtCuLAg==}
+    engines: {node: '>=16'}
+    cpu: [x64]
+    os: [darwin]
+
+  '@openai/codex@0.140.0-linux-arm64':
+    resolution: {integrity: sha512-rGOgWEONilm+pQoQgcGpPRzvnou1CawyBOe8gvtuS32PQ00Pn+9nZF4O7iKBVlNh6Jeun8kpdJSjFdULm2wr4A==}
+    engines: {node: '>=16'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@openai/codex@0.140.0-linux-x64':
+    resolution: {integrity: sha512-7+N/cHB74nsDkOoL+VQVFVFRlfGj6GFSIAQHgs9DQIsvG+UdzWgUeeDE3l926taJqmzcP9NH8bysptKlZ2Ff6g==}
+    engines: {node: '>=16'}
+    cpu: [x64]
+    os: [linux]
+
+  '@openai/codex@0.140.0-win32-arm64':
+    resolution: {integrity: sha512-vs5Ed5OF+4671SZoO0MN5WoHl/K9aOSNzLgzbyyDyM7Jwm/PZYvF6OmIPRWf5AGatYqEOWt8Ovp5+df5PFPM7A==}
+    engines: {node: '>=16'}
+    cpu: [arm64]
+    os: [win32]
+
+  '@openai/codex@0.140.0-win32-x64':
+    resolution: {integrity: sha512-dP+nzd8UQ3Gdby+F5x0Sxd0hu6V9s6/cZYFsGtmmA6eCpU+IIu5tCOnUfgSu5HDw4BvXg046yd8Ihy5bOhwO4A==}
+    engines: {node: '>=16'}
+    cpu: [x64]
+    os: [win32]
+
   '@opentelemetry/api-logs@0.208.0':
     resolution: {integrity: sha512-CjruKY9V6NMssL/T1kAFgzosF1v9o6oeN+aX5JB/C/xPNtmgIJqcXHG7fA82Ou1zCpWGl4lROQUKwUNE1pMCyg==}
     engines: {node: '>=8.0.0'}
@@ -16288,6 +16332,33 @@ snapshots:
 
   '@open-draft/until@2.1.0': {}
 
+  '@openai/codex@0.140.0':
+    optionalDependencies:
+      '@openai/codex-darwin-arm64': '@openai/codex@0.140.0-darwin-arm64'
+      '@openai/codex-darwin-x64': '@openai/codex@0.140.0-darwin-x64'
+      '@openai/codex-linux-arm64': '@openai/codex@0.140.0-linux-arm64'
+      '@openai/codex-linux-x64': '@openai/codex@0.140.0-linux-x64'
+      '@openai/codex-win32-arm64': '@openai/codex@0.140.0-win32-arm64'
+      '@openai/codex-win32-x64': '@openai/codex@0.140.0-win32-x64'
+
+  '@openai/codex@0.140.0-darwin-arm64':
+    optional: true
+
+  '@openai/codex@0.140.0-darwin-x64':
+    optional: true
+
+  '@openai/codex@0.140.0-linux-arm64':
+    optional: true
+
+  '@openai/codex@0.140.0-linux-x64':
+    optional: true
+
+  '@openai/codex@0.140.0-win32-arm64':
+    optional: true
+
+  '@openai/codex@0.140.0-win32-x64':
+    optional: true
+
   '@opentelemetry/api-logs@0.208.0':
     dependencies:
       '@opentelemetry/api': 1.9.0