From 67bb500522ea72603495898c26ac0657452029b0 Mon Sep 17 00:00:00 2001 From: Adam Bowker Date: Sat, 27 Jun 2026 11:18:16 -0400 Subject: [PATCH 1/4] =?UTF-8?q?fix(sessions):=20show=20"Thinking=E2=80=A6"?= =?UTF-8?q?=20on=20the=20group=20chip=20while=20the=20agent=20thinks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A turn that's mid extended-thinking folds into a collapsed ToolCallGroupChip, but `summarize()` derived the chip's label/spinner only from `tool_call` updates. Thought chunks carry no tool status or title, so a thinking-only turn summarized as `liveLabel = null` / `active = false` / `doneLabel = "Worked"` — the chip read as finished ("Worked", no spinner) while the agent was actively thinking, with the reasoning buried inside the collapsed chip. Make `summarize()` thinking-aware: a trailing, still-streaming thought (`thoughtComplete !== true`, the same flag ThoughtView uses for its spinner) now marks the group active with a "Thinking…" live label. The existing chip logic then shows "Thinking…" with a spinner (or "Read a file · Thinking…" when work preceded it). Works live since the active turn's summary is never cached. Adds buildThreadGroups.test.ts covering the thinking-only, tool-after-thought, thought-after-work, and completed-thought cases. Generated-By: PostHog Code Task-Id: 6fa59fea-01a6-4d72-aadb-2e745e2c5495 --- .../new-thread/buildThreadGroups.test.ts | 113 ++++++++++++++++++ .../new-thread/buildThreadGroups.ts | 21 +++- 2 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts diff --git a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts new file mode 100644 index 000000000..04058c3fc --- /dev/null +++ b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts @@ -0,0 +1,113 @@ +import type { + ConversationItem, + TurnContext, +} from "@posthog/ui/features/sessions/components/buildConversationItems"; +import { buildThreadGroups } from "@posthog/ui/features/sessions/components/new-thread/buildThreadGroups"; +import { describe, expect, it } from "vitest"; + +const activeContext: TurnContext = { + toolCalls: new Map(), + childItems: new Map(), + turnCancelled: false, + turnComplete: false, +}; + +const completeContext: TurnContext = { + toolCalls: new Map(), + childItems: new Map(), + turnCancelled: false, + turnComplete: true, +}; + +function thought( + id: string, + { thoughtComplete }: { thoughtComplete?: boolean }, + turnContext: TurnContext = activeContext, +): ConversationItem { + return { + type: "session_update", + id, + turnContext, + thoughtComplete, + update: { + sessionUpdate: "agent_thought_chunk", + content: { type: "text", text: "pondering" }, + }, + }; +} + +function toolItem( + id: string, + turnContext: TurnContext = activeContext, +): ConversationItem { + return { + type: "session_update", + id, + turnContext, + update: { + sessionUpdate: "tool_call", + toolCallId: id, + kind: "read", + title: "Read file.ts", + status: turnContext.turnComplete ? "completed" : "in_progress", + }, + }; +} + +/** The single tool_group row's summary, or a failed assertion. */ +function summaryOf(items: ConversationItem[]) { + const { rows } = buildThreadGroups(items, "all", {}); + const group = rows.find((r) => r.kind === "tool_group"); + if (group?.kind !== "tool_group") throw new Error("expected a tool_group row"); + return group.summary; +} + +describe("buildThreadGroups summary — thinking awareness", () => { + it("reads a turn mid extended-thinking as live, not 'Worked'", () => { + // A still-streaming thought (thoughtComplete falsy) is the only activity so + // far: the chip must say it's thinking, not fall back to the done label. + const summary = summaryOf([thought("th1", { thoughtComplete: false })]); + + expect(summary.active).toBe(true); + expect(summary.liveLabel).toBe("Thinking…"); + expect(summary.hasCountableWork).toBe(false); + }); + + it("keeps the tool's live label when a tool runs after thinking", () => { + // Thought, then an in-flight tool call: the tool is the latest activity, so + // its title wins over the thinking label. + const summary = summaryOf([ + thought("th1", { thoughtComplete: true }), + toolItem("t1"), + ]); + + expect(summary.active).toBe(true); + expect(summary.liveLabel).toBe("Read file.ts"); + }); + + it("shows thinking again when a thought trails completed tool work", () => { + // Tool finished, agent is thinking once more: countable work plus a live + // thinking label, so the chip can read "Read a file · Thinking…". + const summary = summaryOf([ + toolItem("t1"), + thought("th1", { thoughtComplete: false }), + ]); + + expect(summary.active).toBe(true); + expect(summary.liveLabel).toBe("Thinking…"); + expect(summary.hasCountableWork).toBe(true); + expect(summary.doneLabel).toBe("Read a file"); + }); + + it("does not treat a completed thought as live work", () => { + // A finished turn whose only activity was thinking: no live label, falls + // back to the "Worked" done label (there is no countable tool work). + const summary = summaryOf([ + thought("th1", { thoughtComplete: true }, completeContext), + ]); + + expect(summary.active).toBe(false); + expect(summary.liveLabel).toBeNull(); + expect(summary.doneLabel).toBe("Worked"); + }); +}); diff --git a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts index e4ea31e50..95893e927 100644 --- a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts +++ b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts @@ -11,6 +11,9 @@ import { SUBAGENT_ICON, } from "@posthog/ui/features/sessions/components/new-thread/conversationThreadConfig"; +/** Live label for a turn that is mid extended-thinking (no tool call yet). */ +const THINKING_LIVE_LABEL = "Thinking…"; + export interface GroupIconEntry { Icon: Icon; key: GroupIconKey; @@ -202,8 +205,24 @@ function summarize(items: ConversationItem[]): GroupSummary { } } + // The agent's extended thinking streams as thought chunks, which carry no + // tool status or title. Without accounting for them, a turn that is mid- + // thought (before its first tool call) summarizes as "Worked" with no + // spinner — reading as finished while the agent is actively thinking. Treat a + // trailing, still-streaming thought as live work so the collapsed chip shows + // "Thinking…" and spins. `thoughtComplete` is the same flag ThoughtView uses + // to drive its own loading state. + const last = items[items.length - 1]; + const streamingThought = + last?.type === "session_update" && + last.update.sessionUpdate === "agent_thought_chunk" && + last.thoughtComplete !== true; + if (streamingThought) liveLabel = THINKING_LIVE_LABEL; + const active = - lastToolStatus === "pending" || lastToolStatus === "in_progress"; + streamingThought || + lastToolStatus === "pending" || + lastToolStatus === "in_progress"; const hasCountableWork = counts.execute + counts.read + From 1232e9d96c36105c79310f8ba59f02d6d37a8a22 Mon Sep 17 00:00:00 2001 From: Adam Bowker Date: Sat, 27 Jun 2026 11:36:52 -0400 Subject: [PATCH 2/4] style: apply Biome formatting to buildThreadGroups test The `biome ci` quality check enforces formatting; wrap the guard's throw onto its own line so the file matches the formatter's output. Generated-By: PostHog Code Task-Id: 6fa59fea-01a6-4d72-aadb-2e745e2c5495 --- .../sessions/components/new-thread/buildThreadGroups.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts index 04058c3fc..b70903ffa 100644 --- a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts +++ b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts @@ -58,7 +58,8 @@ function toolItem( function summaryOf(items: ConversationItem[]) { const { rows } = buildThreadGroups(items, "all", {}); const group = rows.find((r) => r.kind === "tool_group"); - if (group?.kind !== "tool_group") throw new Error("expected a tool_group row"); + if (group?.kind !== "tool_group") + throw new Error("expected a tool_group row"); return group.summary; } From 86ed3c244414f1fb31f20a1bb74b7c0ef920250a Mon Sep 17 00:00:00 2001 From: Adam Bowker Date: Sat, 27 Jun 2026 11:38:21 -0400 Subject: [PATCH 3/4] test: parameterise buildThreadGroups thinking-awareness cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The four cases share one shape — items in, GroupSummary fields out — so fold them into a single it.each table (active / liveLabel / hasCountableWork / doneLabel as expectation columns), matching the repo's parameterised-test convention. Per Greptile review feedback. Generated-By: PostHog Code Task-Id: 6fa59fea-01a6-4d72-aadb-2e745e2c5495 --- .../new-thread/buildThreadGroups.test.ts | 92 ++++++++++--------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts index b70903ffa..07aa009c3 100644 --- a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts +++ b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts @@ -64,51 +64,53 @@ function summaryOf(items: ConversationItem[]) { } describe("buildThreadGroups summary — thinking awareness", () => { - it("reads a turn mid extended-thinking as live, not 'Worked'", () => { - // A still-streaming thought (thoughtComplete falsy) is the only activity so - // far: the chip must say it's thinking, not fall back to the done label. - const summary = summaryOf([thought("th1", { thoughtComplete: false })]); - - expect(summary.active).toBe(true); - expect(summary.liveLabel).toBe("Thinking…"); - expect(summary.hasCountableWork).toBe(false); - }); - - it("keeps the tool's live label when a tool runs after thinking", () => { - // Thought, then an in-flight tool call: the tool is the latest activity, so - // its title wins over the thinking label. - const summary = summaryOf([ - thought("th1", { thoughtComplete: true }), - toolItem("t1"), - ]); - - expect(summary.active).toBe(true); - expect(summary.liveLabel).toBe("Read file.ts"); - }); - - it("shows thinking again when a thought trails completed tool work", () => { - // Tool finished, agent is thinking once more: countable work plus a live - // thinking label, so the chip can read "Read a file · Thinking…". - const summary = summaryOf([ - toolItem("t1"), - thought("th1", { thoughtComplete: false }), - ]); - - expect(summary.active).toBe(true); - expect(summary.liveLabel).toBe("Thinking…"); - expect(summary.hasCountableWork).toBe(true); - expect(summary.doneLabel).toBe("Read a file"); - }); - - it("does not treat a completed thought as live work", () => { - // A finished turn whose only activity was thinking: no live label, falls - // back to the "Worked" done label (there is no countable tool work). - const summary = summaryOf([ - thought("th1", { thoughtComplete: true }, completeContext), - ]); + it.each([ + { + // A still-streaming thought is the only activity so far: the chip must say + // it's thinking, not fall back to the done label. + name: "reads a turn mid extended-thinking as live, not 'Worked'", + items: [thought("th1", { thoughtComplete: false })], + active: true, + liveLabel: "Thinking…", + hasCountableWork: false, + doneLabel: "Worked", + }, + { + // Thought, then an in-flight tool call: the tool is the latest activity, + // so its title wins over the thinking label. + name: "keeps the tool's live label when a tool runs after thinking", + items: [thought("th1", { thoughtComplete: true }), toolItem("t1")], + active: true, + liveLabel: "Read file.ts", + hasCountableWork: true, + doneLabel: "Read a file", + }, + { + // Tool finished, agent is thinking once more: countable work plus a live + // thinking label, so the chip can read "Read a file · Thinking…". + name: "shows thinking again when a thought trails completed tool work", + items: [toolItem("t1"), thought("th1", { thoughtComplete: false })], + active: true, + liveLabel: "Thinking…", + hasCountableWork: true, + doneLabel: "Read a file", + }, + { + // A finished turn whose only activity was thinking: no live label, falls + // back to the "Worked" done label (there is no countable tool work). + name: "does not treat a completed thought as live work", + items: [thought("th1", { thoughtComplete: true }, completeContext)], + active: false, + liveLabel: null, + hasCountableWork: false, + doneLabel: "Worked", + }, + ])("$name", ({ items, active, liveLabel, hasCountableWork, doneLabel }) => { + const summary = summaryOf(items); - expect(summary.active).toBe(false); - expect(summary.liveLabel).toBeNull(); - expect(summary.doneLabel).toBe("Worked"); + expect(summary.active).toBe(active); + expect(summary.liveLabel).toBe(liveLabel); + expect(summary.hasCountableWork).toBe(hasCountableWork); + expect(summary.doneLabel).toBe(doneLabel); }); }); From bcaeb2e28ee0828e459b40473f5cdedc9fa69702 Mon Sep 17 00:00:00 2001 From: Adam Bowker Date: Sat, 27 Jun 2026 13:31:49 -0400 Subject: [PATCH 4/4] fix(sessions): no empty box on expand; show thinking text inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expanding a "Worked" chip whose only activity was thinking showed an empty bordered box, and even with content it showed a redundant collapsed "Thinking" row needing a second click. Two causes: - Blank extended-thinking streams as a text-less agent_thought_chunk, which ThoughtView renders as null. But the chip's bordered box draws whenever it has children (a hidden child still defeats CSS :empty), so the box stayed empty. - ThoughtView collapses its content by default, so a non-blank thought inside an already-expanded chip showed only a "Thinking" header. Fixes: - groupItemRendersContent() mirrors the null-returning branches of SessionUpdateView/ThoughtView; ConversationView only feeds renderable items to the chip and passes no children when none render, so ToolRow skips the box. - ToolCallGroupChip gains an `expandable` prop: a group with no renderable body is a plain summary line with no caret, instead of a caret opening onto nothing. - ThoughtView renders its content open by default — thinking has no useful one-line summary, so once revealed the reasoning itself is what's worth seeing. Adds groupItemRendersContent test cases (blank/streaming/text/tool). Generated-By: PostHog Code Task-Id: 6fa59fea-01a6-4d72-aadb-2e745e2c5495 --- .../sessions/components/ConversationView.tsx | 22 +++++++--- .../new-thread/ToolCallGroupChip.tsx | 29 ++++++++----- .../new-thread/buildThreadGroups.test.ts | 42 +++++++++++++++++-- .../new-thread/buildThreadGroups.ts | 21 ++++++++++ .../components/session-update/ThoughtView.tsx | 5 +++ 5 files changed, 101 insertions(+), 18 deletions(-) diff --git a/packages/ui/src/features/sessions/components/ConversationView.tsx b/packages/ui/src/features/sessions/components/ConversationView.tsx index d15543515..1766d2e47 100644 --- a/packages/ui/src/features/sessions/components/ConversationView.tsx +++ b/packages/ui/src/features/sessions/components/ConversationView.tsx @@ -18,9 +18,10 @@ import { ConversationSearchBar } from "@posthog/ui/features/sessions/components/ import { GitActionMessage } from "@posthog/ui/features/sessions/components/GitActionMessage"; import { GitActionResult } from "@posthog/ui/features/sessions/components/GitActionResult"; import { mergeConversationItems } from "@posthog/ui/features/sessions/components/mergeConversationItems"; -import type { - ThreadGrouping, - ThreadRow, +import { + groupItemRendersContent, + type ThreadGrouping, + type ThreadRow, } from "@posthog/ui/features/sessions/components/new-thread/buildThreadGroups"; import type { CollapseMode } from "@posthog/ui/features/sessions/components/new-thread/conversationThreadConfig"; import { createIncrementalThreadGrouper } from "@posthog/ui/features/sessions/components/new-thread/incrementalThreadGrouping"; @@ -294,17 +295,28 @@ export function ConversationView({ const renderRow = useCallback( (row: ThreadRow) => { if (row.kind === "item") return renderItem(row.item); + // Only items that actually render content reach the chip body — otherwise + // a turn whose sole activity was a blank thinking block would expand to an + // empty bordered box (the box draws whenever it has children, even hidden + // ones). When nothing is renderable the chip is a plain summary line with + // no expand affordance, rather than a caret that opens onto nothing. + const hasVisibleContent = row.items.some(groupItemRendersContent); + const visibleItems = + row.expanded && hasVisibleContent + ? row.items.filter(groupItemRendersContent) + : []; return ( sessionViewActions.setGroupOverride(row.id, !row.expanded) } > - {row.expanded - ? row.items.map((it) => { + {visibleItems.length > 0 + ? visibleItems.map((it) => { // Plain assistant text inside the group has no leading icon, so // pad it to line up with the tool titles (the text-next-to-icon // column = ToolCallBlock's pl-3 + the icon/gap width). Tool and diff --git a/packages/ui/src/features/sessions/components/new-thread/ToolCallGroupChip.tsx b/packages/ui/src/features/sessions/components/new-thread/ToolCallGroupChip.tsx index 94e8a1cd4..fdfa18b1a 100644 --- a/packages/ui/src/features/sessions/components/new-thread/ToolCallGroupChip.tsx +++ b/packages/ui/src/features/sessions/components/new-thread/ToolCallGroupChip.tsx @@ -15,6 +15,12 @@ interface ToolCallGroupChipProps { expanded: boolean; turnComplete: boolean; onToggle: () => void; + /** + * Whether the chip can be expanded. False when the group has no renderable + * body (e.g. a turn whose only activity was a blank thinking block) — the + * chip then reads as a plain summary line with no caret. Defaults to true. + */ + expandable?: boolean; /** Rendered group items, shown inside the ToolRow's box when expanded. */ children?: ReactNode; } @@ -30,6 +36,7 @@ export function ToolCallGroupChip({ expanded, turnComplete, onToggle, + expandable = true, children, }: ToolCallGroupChipProps) { const reduceMotion = useReducedMotion(); @@ -56,17 +63,19 @@ export function ToolCallGroupChip({ className="pl-3" > - + + {expandable ? ( + + ) : null} } trailing={ diff --git a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts index 07aa009c3..c0b33031b 100644 --- a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts +++ b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.test.ts @@ -2,7 +2,10 @@ import type { ConversationItem, TurnContext, } from "@posthog/ui/features/sessions/components/buildConversationItems"; -import { buildThreadGroups } from "@posthog/ui/features/sessions/components/new-thread/buildThreadGroups"; +import { + buildThreadGroups, + groupItemRendersContent, +} from "@posthog/ui/features/sessions/components/new-thread/buildThreadGroups"; import { describe, expect, it } from "vitest"; const activeContext: TurnContext = { @@ -21,7 +24,10 @@ const completeContext: TurnContext = { function thought( id: string, - { thoughtComplete }: { thoughtComplete?: boolean }, + { + thoughtComplete, + text = "pondering", + }: { thoughtComplete?: boolean; text?: string }, turnContext: TurnContext = activeContext, ): ConversationItem { return { @@ -31,7 +37,7 @@ function thought( thoughtComplete, update: { sessionUpdate: "agent_thought_chunk", - content: { type: "text", text: "pondering" }, + content: { type: "text", text }, }, }; } @@ -114,3 +120,33 @@ describe("buildThreadGroups summary — thinking awareness", () => { expect(summary.doneLabel).toBe(doneLabel); }); }); + +describe("groupItemRendersContent", () => { + it.each([ + { + name: "a completed thought with text renders", + item: thought("th", { thoughtComplete: true, text: "reasoned" }), + expected: true, + }, + { + // The bug source: blank extended-thinking streams as a text-less thought + // chunk, which renders nothing once complete — so it must not keep the + // chip's bordered box alive. + name: "a completed blank thought renders nothing", + item: thought("th", { thoughtComplete: true, text: " " }), + expected: false, + }, + { + name: "a blank thought still streaming renders (its spinner)", + item: thought("th", { thoughtComplete: false, text: "" }), + expected: true, + }, + { + name: "a tool call renders", + item: toolItem("t1", completeContext), + expected: true, + }, + ])("$name", ({ item, expected }) => { + expect(groupItemRendersContent(item)).toBe(expected); + }); +}); diff --git a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts index 95893e927..71cd01798 100644 --- a/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts +++ b/packages/ui/src/features/sessions/components/new-thread/buildThreadGroups.ts @@ -129,6 +129,27 @@ export function isGroupableItem(item: ConversationItem): boolean { return true; } +/** + * Whether a grouped item renders anything visible inside an expanded chip. + * Mirrors the `null`-returning branches of SessionUpdateView / ThoughtView so + * the chip can drop its bordered box when expanding would reveal nothing — + * otherwise a turn whose only activity was an empty thinking block (blank + * extended-thinking streams as a text-less thought chunk) shows an empty box. + */ +export function groupItemRendersContent(item: ConversationItem): boolean { + if (item.type !== "session_update") return true; + const update = item.update; + if (update.sessionUpdate === "user_message_chunk") return false; + if (update.sessionUpdate === "agent_thought_chunk") { + const hasText = + update.content.type === "text" && update.content.text.trim().length > 0; + // A blank thought still renders a spinner while streaming; only a blank + // *completed* thought collapses to nothing (see ThoughtView). + return hasText || item.thoughtComplete !== true; + } + return true; +} + function summarize(items: ConversationItem[]): GroupSummary { const counts: GroupCounts = { execute: 0, diff --git a/packages/ui/src/features/sessions/components/session-update/ThoughtView.tsx b/packages/ui/src/features/sessions/components/session-update/ThoughtView.tsx index b1d9b0cc2..bc9a07945 100644 --- a/packages/ui/src/features/sessions/components/session-update/ThoughtView.tsx +++ b/packages/ui/src/features/sessions/components/session-update/ThoughtView.tsx @@ -23,6 +23,11 @@ export const ThoughtView = memo(function ThoughtView({ {content} : undefined} > Thinking