diff --git a/src/agent/event-handler.ts b/src/agent/event-handler.ts index 236c71a..7bf7356 100644 --- a/src/agent/event-handler.ts +++ b/src/agent/event-handler.ts @@ -435,6 +435,7 @@ export function registerEventHandler( state.totalInputTokens += usageData.inputTokens ?? 0; state.totalOutputTokens += usageData.outputTokens ?? 0; state.totalCacheReadTokens += usageData.cacheReadTokens ?? 0; + state.totalCacheWriteTokens += usageData.cacheWriteTokens ?? 0; state.totalRequests += 1; // Ensure stats appear on a new line — streamed diff --git a/src/agent/llm-output.ts b/src/agent/llm-output.ts index 984b1f2..aa48dc1 100644 --- a/src/agent/llm-output.ts +++ b/src/agent/llm-output.ts @@ -16,6 +16,168 @@ import { ANSI, C } from "./ansi.js"; // ── Usage Stats ────────────────────────────────────────────────────── +// ── Model Pricing ──────────────────────────────────────────────────── +// +// List-price rates per million tokens for supported models. +// Rates are matched by prefix — the first matching entry wins. +// Add new models by inserting a new entry; order matters (longest +// prefix first for specificity). + +/** Per-million-token rates for a model tier. */ +export interface ModelPricing { + /** Human-readable label for the pricing tier. */ + label: string; + /** Input (non-cached) tokens — $/MTok. */ + inputPerMTok: number; + /** Output tokens — $/MTok. */ + outputPerMTok: number; + /** Cache-read tokens — $/MTok (0 if caching not supported). */ + cacheReadPerMTok: number; + /** Cache-write tokens — $/MTok (0 if caching not supported). */ + cacheWritePerMTok: number; +} + +/** + * Pricing table keyed by model-name prefix. Checked in order — first + * match wins. Keep entries ordered from most-specific to least-specific + * within each vendor group. + */ +const MODEL_PRICING: Array<{ prefix: string; pricing: ModelPricing }> = [ + // ── Anthropic Claude ──────────────────────────────────────── + { + prefix: "claude-opus", + pricing: { + label: "Claude Opus", + inputPerMTok: 15, + outputPerMTok: 75, + cacheReadPerMTok: 1.875, + cacheWritePerMTok: 18.75, + }, + }, + { + prefix: "claude-sonnet", + pricing: { + label: "Claude Sonnet", + inputPerMTok: 3, + outputPerMTok: 15, + cacheReadPerMTok: 0.3, + cacheWritePerMTok: 3.75, + }, + }, + { + prefix: "claude-haiku", + pricing: { + label: "Claude Haiku", + inputPerMTok: 0.8, + outputPerMTok: 4, + cacheReadPerMTok: 0.08, + cacheWritePerMTok: 1, + }, + }, + // ── OpenAI ────────────────────────────────────────────────── + { + prefix: "o1", + pricing: { + label: "OpenAI o1", + inputPerMTok: 15, + outputPerMTok: 60, + cacheReadPerMTok: 7.5, + cacheWritePerMTok: 0, + }, + }, + { + prefix: "o3", + pricing: { + label: "OpenAI o3", + inputPerMTok: 10, + outputPerMTok: 40, + cacheReadPerMTok: 2.5, + cacheWritePerMTok: 0, + }, + }, + { + prefix: "gpt-4.1", + pricing: { + label: "GPT-4.1", + inputPerMTok: 2, + outputPerMTok: 8, + cacheReadPerMTok: 0.5, + cacheWritePerMTok: 0, + }, + }, + { + prefix: "gpt-4o", + pricing: { + label: "GPT-4o", + inputPerMTok: 2.5, + outputPerMTok: 10, + cacheReadPerMTok: 1.25, + cacheWritePerMTok: 0, + }, + }, + // ── Google Gemini ─────────────────────────────────────────── + { + prefix: "gemini-2.5-pro", + pricing: { + label: "Gemini 2.5 Pro", + inputPerMTok: 1.25, + outputPerMTok: 10, + cacheReadPerMTok: 0.315, + cacheWritePerMTok: 0, + }, + }, + { + prefix: "gemini-2.5-flash", + pricing: { + label: "Gemini 2.5 Flash", + inputPerMTok: 0.15, + outputPerMTok: 0.6, + cacheReadPerMTok: 0.0375, + cacheWritePerMTok: 0, + }, + }, +]; + +/** + * Look up pricing for a model by name prefix. + * Matches against known model prefixes, requiring a word boundary + * (end-of-string or '-') after the prefix to avoid misclassification. + * Returns undefined if no matching pricing tier is found. + */ +export function getModelPricing( + modelName: string | undefined, +): ModelPricing | undefined { + if (!modelName) return undefined; + const lower = modelName.toLowerCase(); + return MODEL_PRICING.find((entry) => { + if (!lower.startsWith(entry.prefix)) return false; + // Require word boundary after prefix: end-of-string or '-' + const afterPrefix = lower[entry.prefix.length]; + return afterPrefix === undefined || afterPrefix === "-"; + })?.pricing; +} + +/** + * Calculate the estimated cost in USD for a set of token counts. + * The inputTokens parameter should be non-cached input only (total + * input minus cache reads) to avoid double-counting. + */ +export function estimateCost( + pricing: ModelPricing, + inputTokens: number, + outputTokens: number, + cacheReadTokens: number, + cacheWriteTokens: number, +): number { + const MILLION = 1_000_000; + return ( + (inputTokens / MILLION) * pricing.inputPerMTok + + (outputTokens / MILLION) * pricing.outputPerMTok + + (cacheReadTokens / MILLION) * pricing.cacheReadPerMTok + + (cacheWriteTokens / MILLION) * pricing.cacheWritePerMTok + ); +} + /** Shape of assistant.usage event data. */ export interface UsageData { model?: string; @@ -47,6 +209,28 @@ export function formatUsageStats(d: UsageData): string | null { if (d.duration !== undefined) { parts.push(`${(d.duration / 1000).toFixed(1)}s`); } + // Estimated cost for this request based on model pricing + const pricing = getModelPricing(d.model); + if (pricing) { + // Subtract cache reads from input to avoid double-counting — + // inputTokens typically includes the cached portion. + const nonCachedInput = Math.max( + 0, + (d.inputTokens ?? 0) - (d.cacheReadTokens ?? 0), + ); + const reqCost = estimateCost( + pricing, + nonCachedInput, + d.outputTokens ?? 0, + d.cacheReadTokens ?? 0, + d.cacheWriteTokens ?? 0, + ); + if (reqCost > 0) { + parts.push( + `~$${reqCost < 0.01 ? reqCost.toFixed(4) : reqCost.toFixed(2)}`, + ); + } + } return parts.length > 0 ? parts.join(" · ") : null; } @@ -68,8 +252,10 @@ export function formatTokenSummary(state: { totalInputTokens: number; totalOutputTokens: number; totalCacheReadTokens: number; + totalCacheWriteTokens: number; totalRequests: number; totalTurns: number; + currentModel?: string; }): string[] { const total = state.totalInputTokens + state.totalOutputTokens; const lines: string[] = []; @@ -81,9 +267,53 @@ export function formatTokenSummary(state: { `Cache read: ${state.totalCacheReadTokens.toLocaleString()} tokens`, ); } + if (state.totalCacheWriteTokens > 0) { + lines.push( + `Cache write: ${state.totalCacheWriteTokens.toLocaleString()} tokens`, + ); + } lines.push(`Total: ${total.toLocaleString()} tokens`); lines.push(`Requests: ${state.totalRequests}`); lines.push(`Turns: ${state.totalTurns}`); + + // Estimated session cost based on model list pricing + const pricing = getModelPricing(state.currentModel); + if (pricing) { + // Compute non-cached input: total input minus cache reads + const nonCachedInput = Math.max( + 0, + state.totalInputTokens - state.totalCacheReadTokens, + ); + const sessionCost = estimateCost( + pricing, + nonCachedInput, + state.totalOutputTokens, + state.totalCacheReadTokens, + state.totalCacheWriteTokens, + ); + lines.push(""); + lines.push( + `${C.label("Est. Cost")} ~$${sessionCost.toFixed(2)} ${C.dim(`(${pricing.label} list pricing)`)}`, + ); + + // Show what it would have cost without caching + if (state.totalCacheReadTokens > 0) { + const noCacheCost = estimateCost( + pricing, + state.totalInputTokens, + state.totalOutputTokens, + 0, + 0, + ); + const saved = noCacheCost - sessionCost; + if (saved > 0.01) { + lines.push( + `${C.dim(`Cache saved: ~$${saved.toFixed(2)} (${((saved / noCacheCost) * 100).toFixed(0)}% reduction)`)}`, + ); + } + } + } + return lines; } diff --git a/src/agent/state.ts b/src/agent/state.ts index fa6f67e..fe9f7e2 100644 --- a/src/agent/state.ts +++ b/src/agent/state.ts @@ -252,6 +252,9 @@ export interface AgentState { /** Cumulative cache-read tokens across all LLM requests this session. */ totalCacheReadTokens: number; + /** Cumulative cache-write tokens across all LLM requests this session. */ + totalCacheWriteTokens: number; + /** Total number of LLM API requests (one per assistant.usage event). */ totalRequests: number; @@ -339,6 +342,7 @@ export function createAgentState( totalInputTokens: 0, totalOutputTokens: 0, totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, totalRequests: 0, totalTurns: 0, };