Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/agent/event-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ export function registerEventHandler(
state.totalInputTokens += usageData.inputTokens ?? 0;
state.totalOutputTokens += usageData.outputTokens ?? 0;
state.totalCacheReadTokens += usageData.cacheReadTokens ?? 0;
state.totalCacheWriteTokens += usageData.cacheWriteTokens ?? 0;
state.totalRequests += 1;

// Ensure stats appear on a new line — streamed
Expand Down
230 changes: 230 additions & 0 deletions src/agent/llm-output.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,168 @@ import { ANSI, C } from "./ansi.js";

// ── Usage Stats ──────────────────────────────────────────────────────

// ── Model Pricing ────────────────────────────────────────────────────
//
// List-price rates per million tokens for supported models.
// Rates are matched by prefix — the first matching entry wins.
// Add new models by inserting a new entry; order matters (longest
// prefix first for specificity).

/** Per-million-token rates for a model tier. */
export interface ModelPricing {
/** Human-readable label for the pricing tier. */
label: string;
/** Input (non-cached) tokens — $/MTok. */
inputPerMTok: number;
/** Output tokens — $/MTok. */
outputPerMTok: number;
/** Cache-read tokens — $/MTok (0 if caching not supported). */
cacheReadPerMTok: number;
/** Cache-write tokens — $/MTok (0 if caching not supported). */
cacheWritePerMTok: number;
}

/**
* Pricing table keyed by model-name prefix. Checked in order — first
* match wins. Keep entries ordered from most-specific to least-specific
* within each vendor group.
*/
const MODEL_PRICING: Array<{ prefix: string; pricing: ModelPricing }> = [
// ── Anthropic Claude ────────────────────────────────────────
{
prefix: "claude-opus",
pricing: {
label: "Claude Opus",
inputPerMTok: 15,
outputPerMTok: 75,
cacheReadPerMTok: 1.875,
cacheWritePerMTok: 18.75,
},
},
{
prefix: "claude-sonnet",
pricing: {
label: "Claude Sonnet",
inputPerMTok: 3,
outputPerMTok: 15,
cacheReadPerMTok: 0.3,
cacheWritePerMTok: 3.75,
},
},
{
prefix: "claude-haiku",
pricing: {
label: "Claude Haiku",
inputPerMTok: 0.8,
outputPerMTok: 4,
cacheReadPerMTok: 0.08,
cacheWritePerMTok: 1,
},
},
// ── OpenAI ──────────────────────────────────────────────────
{
prefix: "o1",
pricing: {
label: "OpenAI o1",
inputPerMTok: 15,
outputPerMTok: 60,
cacheReadPerMTok: 7.5,
cacheWritePerMTok: 0,
},
},
{
prefix: "o3",
pricing: {
label: "OpenAI o3",
inputPerMTok: 10,
outputPerMTok: 40,
cacheReadPerMTok: 2.5,
cacheWritePerMTok: 0,
},
},
{
prefix: "gpt-4.1",
pricing: {
label: "GPT-4.1",
inputPerMTok: 2,
outputPerMTok: 8,
cacheReadPerMTok: 0.5,
cacheWritePerMTok: 0,
},
},
{
prefix: "gpt-4o",
pricing: {
label: "GPT-4o",
inputPerMTok: 2.5,
outputPerMTok: 10,
cacheReadPerMTok: 1.25,
cacheWritePerMTok: 0,
},
},
// ── Google Gemini ───────────────────────────────────────────
{
prefix: "gemini-2.5-pro",
pricing: {
label: "Gemini 2.5 Pro",
inputPerMTok: 1.25,
outputPerMTok: 10,
cacheReadPerMTok: 0.315,
cacheWritePerMTok: 0,
},
},
{
prefix: "gemini-2.5-flash",
pricing: {
label: "Gemini 2.5 Flash",
inputPerMTok: 0.15,
outputPerMTok: 0.6,
cacheReadPerMTok: 0.0375,
cacheWritePerMTok: 0,
},
},
];

/**
* Look up pricing for a model by name prefix.
* Matches against known model prefixes, requiring a word boundary
* (end-of-string or '-') after the prefix to avoid misclassification.
* Returns undefined if no matching pricing tier is found.
*/
export function getModelPricing(
modelName: string | undefined,
): ModelPricing | undefined {
if (!modelName) return undefined;
const lower = modelName.toLowerCase();
return MODEL_PRICING.find((entry) => {
if (!lower.startsWith(entry.prefix)) return false;
// Require word boundary after prefix: end-of-string or '-'
const afterPrefix = lower[entry.prefix.length];
return afterPrefix === undefined || afterPrefix === "-";
})?.pricing;
}
Comment thread
simongdavies marked this conversation as resolved.

/**
* Calculate the estimated cost in USD for a set of token counts.
* The inputTokens parameter should be non-cached input only (total
* input minus cache reads) to avoid double-counting.
*/
export function estimateCost(
pricing: ModelPricing,
inputTokens: number,
outputTokens: number,
cacheReadTokens: number,
cacheWriteTokens: number,
): number {
Comment thread
simongdavies marked this conversation as resolved.
const MILLION = 1_000_000;
return (
(inputTokens / MILLION) * pricing.inputPerMTok +
(outputTokens / MILLION) * pricing.outputPerMTok +
(cacheReadTokens / MILLION) * pricing.cacheReadPerMTok +
(cacheWriteTokens / MILLION) * pricing.cacheWritePerMTok
);
}

/** Shape of assistant.usage event data. */
export interface UsageData {
model?: string;
Expand Down Expand Up @@ -47,6 +209,28 @@ export function formatUsageStats(d: UsageData): string | null {
if (d.duration !== undefined) {
parts.push(`${(d.duration / 1000).toFixed(1)}s`);
}
// Estimated cost for this request based on model pricing
const pricing = getModelPricing(d.model);
if (pricing) {
// Subtract cache reads from input to avoid double-counting —
// inputTokens typically includes the cached portion.
const nonCachedInput = Math.max(
0,
(d.inputTokens ?? 0) - (d.cacheReadTokens ?? 0),
);
const reqCost = estimateCost(
pricing,
nonCachedInput,
d.outputTokens ?? 0,
d.cacheReadTokens ?? 0,
Comment thread
simongdavies marked this conversation as resolved.
d.cacheWriteTokens ?? 0,
);
if (reqCost > 0) {
parts.push(
`~$${reqCost < 0.01 ? reqCost.toFixed(4) : reqCost.toFixed(2)}`,
);
}
}
return parts.length > 0 ? parts.join(" · ") : null;
}

Expand All @@ -68,8 +252,10 @@ export function formatTokenSummary(state: {
totalInputTokens: number;
totalOutputTokens: number;
totalCacheReadTokens: number;
totalCacheWriteTokens: number;
totalRequests: number;
totalTurns: number;
currentModel?: string;
}): string[] {
Comment thread
simongdavies marked this conversation as resolved.
const total = state.totalInputTokens + state.totalOutputTokens;
const lines: string[] = [];
Expand All @@ -81,9 +267,53 @@ export function formatTokenSummary(state: {
`Cache read: ${state.totalCacheReadTokens.toLocaleString()} tokens`,
);
}
if (state.totalCacheWriteTokens > 0) {
lines.push(
`Cache write: ${state.totalCacheWriteTokens.toLocaleString()} tokens`,
);
}
lines.push(`Total: ${total.toLocaleString()} tokens`);
lines.push(`Requests: ${state.totalRequests}`);
lines.push(`Turns: ${state.totalTurns}`);

// Estimated session cost based on model list pricing
const pricing = getModelPricing(state.currentModel);
if (pricing) {
// Compute non-cached input: total input minus cache reads
const nonCachedInput = Math.max(
0,
state.totalInputTokens - state.totalCacheReadTokens,
);
const sessionCost = estimateCost(
pricing,
nonCachedInput,
state.totalOutputTokens,
state.totalCacheReadTokens,
state.totalCacheWriteTokens,
);
lines.push("");
lines.push(
`${C.label("Est. Cost")} ~$${sessionCost.toFixed(2)} ${C.dim(`(${pricing.label} list pricing)`)}`,
);

// Show what it would have cost without caching
if (state.totalCacheReadTokens > 0) {
const noCacheCost = estimateCost(
pricing,
state.totalInputTokens,
state.totalOutputTokens,
0,
0,
);
const saved = noCacheCost - sessionCost;
if (saved > 0.01) {
lines.push(
`${C.dim(`Cache saved: ~$${saved.toFixed(2)} (${((saved / noCacheCost) * 100).toFixed(0)}% reduction)`)}`,
);
}
}
}

return lines;
}

Expand Down
4 changes: 4 additions & 0 deletions src/agent/state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ export interface AgentState {
/** Cumulative cache-read tokens across all LLM requests this session. */
totalCacheReadTokens: number;

/** Cumulative cache-write tokens across all LLM requests this session. */
totalCacheWriteTokens: number;

/** Total number of LLM API requests (one per assistant.usage event). */
totalRequests: number;

Expand Down Expand Up @@ -339,6 +342,7 @@ export function createAgentState(
totalInputTokens: 0,
totalOutputTokens: 0,
totalCacheReadTokens: 0,
totalCacheWriteTokens: 0,
totalRequests: 0,
totalTurns: 0,
};
Expand Down
Loading