feat: add estimated cost to token usage display (#114)

simongdavies · web-flow · commit 09d6567a891a · 2026-05-07T18:44:44.000+01:00
* feat: add estimated cost to token usage display Adds model-aware cost estimation to both per-request inline stats and the session summary (/tokens command + exit display). Pricing table: - Claude Opus/Sonnet/Haiku (with cache read/write rates) - OpenAI o1, o3, GPT-4.1, GPT-4o (with cache read rates) - Gemini 2.5 Pro, 2.5 Flash (with cache read rates) Per-request display now shows: '~$X.XX' at the end of the stats line Session summary now shows: - Est. Cost with model tier label - Cache savings (how much caching saved vs no-cache pricing) Also tracks cacheWriteTokens (was available from SDK but not accumulated) for accurate cost calculation. Implementation: - getModelPricing(modelName) — prefix-based pricing lookup - estimateCost(pricing, in, out, cacheRead, cacheWrite) — USD calc - Both exported for testing/reuse Signed-off-by: Simon Davies <simongdavies@users.noreply.github.com> * fix: address PR #114 review feedback on cost estimation - Fix JSDoc on estimateCost: removed incorrect 'returns undefined' claim — function always returns a number - Fix per-request cost double-counting: subtract cacheReadTokens from inputTokens before estimating (inputTokens includes cached portion) - Make currentModel optional in formatTokenSummary — gracefully skips pricing when model is unknown - Tighten prefix matching in getModelPricing: require word boundary (end-of-string or '-') after prefix to prevent misclassification (e.g. 'o3' won't match 'o3something', only 'o3' or 'o3-mini') Signed-off-by: Simon Davies <simongdavies@users.noreply.github.com> --------- Signed-off-by: Simon Davies <simongdavies@users.noreply.github.com>
diff --git a/src/agent/event-handler.ts b/src/agent/event-handler.ts
@@ -435,6 +435,7 @@ export function registerEventHandler(
         state.totalInputTokens += usageData.inputTokens ?? 0;
         state.totalOutputTokens += usageData.outputTokens ?? 0;
         state.totalCacheReadTokens += usageData.cacheReadTokens ?? 0;
+        state.totalCacheWriteTokens += usageData.cacheWriteTokens ?? 0;
         state.totalRequests += 1;
 
         // Ensure stats appear on a new line — streamed
diff --git a/src/agent/llm-output.ts b/src/agent/llm-output.ts
@@ -16,6 +16,168 @@ import { ANSI, C } from "./ansi.js";
 
 // ── Usage Stats ──────────────────────────────────────────────────────
 
+// ── Model Pricing ────────────────────────────────────────────────────
+//
+// List-price rates per million tokens for supported models.
+// Rates are matched by prefix — the first matching entry wins.
+// Add new models by inserting a new entry; order matters (longest
+// prefix first for specificity).
+
+/** Per-million-token rates for a model tier. */
+export interface ModelPricing {
+  /** Human-readable label for the pricing tier. */
+  label: string;
+  /** Input (non-cached) tokens — $/MTok. */
+  inputPerMTok: number;
+  /** Output tokens — $/MTok. */
+  outputPerMTok: number;
+  /** Cache-read tokens — $/MTok (0 if caching not supported). */
+  cacheReadPerMTok: number;
+  /** Cache-write tokens — $/MTok (0 if caching not supported). */
+  cacheWritePerMTok: number;
+}
+
+/**
+ * Pricing table keyed by model-name prefix. Checked in order — first
+ * match wins. Keep entries ordered from most-specific to least-specific
+ * within each vendor group.
+ */
+const MODEL_PRICING: Array<{ prefix: string; pricing: ModelPricing }> = [
+  // ── Anthropic Claude ────────────────────────────────────────
+  {
+    prefix: "claude-opus",
+    pricing: {
+      label: "Claude Opus",
+      inputPerMTok: 15,
+      outputPerMTok: 75,
+      cacheReadPerMTok: 1.875,
+      cacheWritePerMTok: 18.75,
+    },
+  },
+  {
+    prefix: "claude-sonnet",
+    pricing: {
+      label: "Claude Sonnet",
+      inputPerMTok: 3,
+      outputPerMTok: 15,
+      cacheReadPerMTok: 0.3,
+      cacheWritePerMTok: 3.75,
+    },
+  },
+  {
+    prefix: "claude-haiku",
+    pricing: {
+      label: "Claude Haiku",
+      inputPerMTok: 0.8,
+      outputPerMTok: 4,
+      cacheReadPerMTok: 0.08,
+      cacheWritePerMTok: 1,
+    },
+  },
+  // ── OpenAI ──────────────────────────────────────────────────
+  {
+    prefix: "o1",
+    pricing: {
+      label: "OpenAI o1",
+      inputPerMTok: 15,
+      outputPerMTok: 60,
+      cacheReadPerMTok: 7.5,
+      cacheWritePerMTok: 0,
+    },
+  },
+  {
+    prefix: "o3",
+    pricing: {
+      label: "OpenAI o3",
+      inputPerMTok: 10,
+      outputPerMTok: 40,
+      cacheReadPerMTok: 2.5,
+      cacheWritePerMTok: 0,
+    },
+  },
+  {
+    prefix: "gpt-4.1",
+    pricing: {
+      label: "GPT-4.1",
+      inputPerMTok: 2,
+      outputPerMTok: 8,
+      cacheReadPerMTok: 0.5,
+      cacheWritePerMTok: 0,
+    },
+  },
+  {
+    prefix: "gpt-4o",
+    pricing: {
+      label: "GPT-4o",
+      inputPerMTok: 2.5,
+      outputPerMTok: 10,
+      cacheReadPerMTok: 1.25,
+      cacheWritePerMTok: 0,
+    },
+  },
+  // ── Google Gemini ───────────────────────────────────────────
+  {
+    prefix: "gemini-2.5-pro",
+    pricing: {
+      label: "Gemini 2.5 Pro",
+      inputPerMTok: 1.25,
+      outputPerMTok: 10,
+      cacheReadPerMTok: 0.315,
+      cacheWritePerMTok: 0,
+    },
+  },
+  {
+    prefix: "gemini-2.5-flash",
+    pricing: {
+      label: "Gemini 2.5 Flash",
+      inputPerMTok: 0.15,
+      outputPerMTok: 0.6,
+      cacheReadPerMTok: 0.0375,
+      cacheWritePerMTok: 0,
+    },
+  },
+];
+
+/**
+ * Look up pricing for a model by name prefix.
+ * Matches against known model prefixes, requiring a word boundary
+ * (end-of-string or '-') after the prefix to avoid misclassification.
+ * Returns undefined if no matching pricing tier is found.
+ */
+export function getModelPricing(
+  modelName: string | undefined,
+): ModelPricing | undefined {
+  if (!modelName) return undefined;
+  const lower = modelName.toLowerCase();
+  return MODEL_PRICING.find((entry) => {
+    if (!lower.startsWith(entry.prefix)) return false;
+    // Require word boundary after prefix: end-of-string or '-'
+    const afterPrefix = lower[entry.prefix.length];
+    return afterPrefix === undefined || afterPrefix === "-";
+  })?.pricing;
+}
+
+/**
+ * Calculate the estimated cost in USD for a set of token counts.
+ * The inputTokens parameter should be non-cached input only (total
+ * input minus cache reads) to avoid double-counting.
+ */
+export function estimateCost(
+  pricing: ModelPricing,
+  inputTokens: number,
+  outputTokens: number,
+  cacheReadTokens: number,
+  cacheWriteTokens: number,
+): number {
+  const MILLION = 1_000_000;
+  return (
+    (inputTokens / MILLION) * pricing.inputPerMTok +
+    (outputTokens / MILLION) * pricing.outputPerMTok +
+    (cacheReadTokens / MILLION) * pricing.cacheReadPerMTok +
+    (cacheWriteTokens / MILLION) * pricing.cacheWritePerMTok
+  );
+}
+
 /** Shape of assistant.usage event data. */
 export interface UsageData {
   model?: string;
@@ -47,6 +209,28 @@ export function formatUsageStats(d: UsageData): string | null {
   if (d.duration !== undefined) {
     parts.push(`${(d.duration / 1000).toFixed(1)}s`);
   }
+  // Estimated cost for this request based on model pricing
+  const pricing = getModelPricing(d.model);
+  if (pricing) {
+    // Subtract cache reads from input to avoid double-counting —
+    // inputTokens typically includes the cached portion.
+    const nonCachedInput = Math.max(
+      0,
+      (d.inputTokens ?? 0) - (d.cacheReadTokens ?? 0),
+    );
+    const reqCost = estimateCost(
+      pricing,
+      nonCachedInput,
+      d.outputTokens ?? 0,
+      d.cacheReadTokens ?? 0,
+      d.cacheWriteTokens ?? 0,
+    );
+    if (reqCost > 0) {
+      parts.push(
+        `~$${reqCost < 0.01 ? reqCost.toFixed(4) : reqCost.toFixed(2)}`,
+      );
+    }
+  }
   return parts.length > 0 ? parts.join(" · ") : null;
 }
 
@@ -68,8 +252,10 @@ export function formatTokenSummary(state: {
   totalInputTokens: number;
   totalOutputTokens: number;
   totalCacheReadTokens: number;
+  totalCacheWriteTokens: number;
   totalRequests: number;
   totalTurns: number;
+  currentModel?: string;
 }): string[] {
   const total = state.totalInputTokens + state.totalOutputTokens;
   const lines: string[] = [];
@@ -81,9 +267,53 @@ export function formatTokenSummary(state: {
       `Cache read:  ${state.totalCacheReadTokens.toLocaleString()} tokens`,
     );
   }
+  if (state.totalCacheWriteTokens > 0) {
+    lines.push(
+      `Cache write: ${state.totalCacheWriteTokens.toLocaleString()} tokens`,
+    );
+  }
   lines.push(`Total:       ${total.toLocaleString()} tokens`);
   lines.push(`Requests:    ${state.totalRequests}`);
   lines.push(`Turns:       ${state.totalTurns}`);
+
+  // Estimated session cost based on model list pricing
+  const pricing = getModelPricing(state.currentModel);
+  if (pricing) {
+    // Compute non-cached input: total input minus cache reads
+    const nonCachedInput = Math.max(
+      0,
+      state.totalInputTokens - state.totalCacheReadTokens,
+    );
+    const sessionCost = estimateCost(
+      pricing,
+      nonCachedInput,
+      state.totalOutputTokens,
+      state.totalCacheReadTokens,
+      state.totalCacheWriteTokens,
+    );
+    lines.push("");
+    lines.push(
+      `${C.label("Est. Cost")}    ~$${sessionCost.toFixed(2)} ${C.dim(`(${pricing.label} list pricing)`)}`,
+    );
+
+    // Show what it would have cost without caching
+    if (state.totalCacheReadTokens > 0) {
+      const noCacheCost = estimateCost(
+        pricing,
+        state.totalInputTokens,
+        state.totalOutputTokens,
+        0,
+        0,
+      );
+      const saved = noCacheCost - sessionCost;
+      if (saved > 0.01) {
+        lines.push(
+          `${C.dim(`Cache saved:  ~$${saved.toFixed(2)} (${((saved / noCacheCost) * 100).toFixed(0)}% reduction)`)}`,
+        );
+      }
+    }
+  }
+
   return lines;
 }
 
diff --git a/src/agent/state.ts b/src/agent/state.ts
@@ -252,6 +252,9 @@ export interface AgentState {
   /** Cumulative cache-read tokens across all LLM requests this session. */
   totalCacheReadTokens: number;
 
+  /** Cumulative cache-write tokens across all LLM requests this session. */
+  totalCacheWriteTokens: number;
+
   /** Total number of LLM API requests (one per assistant.usage event). */
   totalRequests: number;
 
@@ -339,6 +342,7 @@ export function createAgentState(
     totalInputTokens: 0,
     totalOutputTokens: 0,
     totalCacheReadTokens: 0,
+    totalCacheWriteTokens: 0,
     totalRequests: 0,
     totalTurns: 0,
   };