Skip to content

Commit 09d6567

Browse files
authored
feat: add estimated cost to token usage display (#114)
* feat: add estimated cost to token usage display Adds model-aware cost estimation to both per-request inline stats and the session summary (/tokens command + exit display). Pricing table: - Claude Opus/Sonnet/Haiku (with cache read/write rates) - OpenAI o1, o3, GPT-4.1, GPT-4o (with cache read rates) - Gemini 2.5 Pro, 2.5 Flash (with cache read rates) Per-request display now shows: '~$X.XX' at the end of the stats line Session summary now shows: - Est. Cost with model tier label - Cache savings (how much caching saved vs no-cache pricing) Also tracks cacheWriteTokens (was available from SDK but not accumulated) for accurate cost calculation. Implementation: - getModelPricing(modelName) — prefix-based pricing lookup - estimateCost(pricing, in, out, cacheRead, cacheWrite) — USD calc - Both exported for testing/reuse Signed-off-by: Simon Davies <simongdavies@users.noreply.github.com> * fix: address PR #114 review feedback on cost estimation - Fix JSDoc on estimateCost: removed incorrect 'returns undefined' claim — function always returns a number - Fix per-request cost double-counting: subtract cacheReadTokens from inputTokens before estimating (inputTokens includes cached portion) - Make currentModel optional in formatTokenSummary — gracefully skips pricing when model is unknown - Tighten prefix matching in getModelPricing: require word boundary (end-of-string or '-') after prefix to prevent misclassification (e.g. 'o3' won't match 'o3something', only 'o3' or 'o3-mini') Signed-off-by: Simon Davies <simongdavies@users.noreply.github.com> --------- Signed-off-by: Simon Davies <simongdavies@users.noreply.github.com>
1 parent 7545c76 commit 09d6567

3 files changed

Lines changed: 235 additions & 0 deletions

File tree

src/agent/event-handler.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ export function registerEventHandler(
435435
state.totalInputTokens += usageData.inputTokens ?? 0;
436436
state.totalOutputTokens += usageData.outputTokens ?? 0;
437437
state.totalCacheReadTokens += usageData.cacheReadTokens ?? 0;
438+
state.totalCacheWriteTokens += usageData.cacheWriteTokens ?? 0;
438439
state.totalRequests += 1;
439440

440441
// Ensure stats appear on a new line — streamed

src/agent/llm-output.ts

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,168 @@ import { ANSI, C } from "./ansi.js";
1616

1717
// ── Usage Stats ──────────────────────────────────────────────────────
1818

19+
// ── Model Pricing ────────────────────────────────────────────────────
20+
//
21+
// List-price rates per million tokens for supported models.
22+
// Rates are matched by prefix — the first matching entry wins.
23+
// Add new models by inserting a new entry; order matters (longest
24+
// prefix first for specificity).
25+
26+
/** Per-million-token rates for a model tier. */
27+
export interface ModelPricing {
28+
/** Human-readable label for the pricing tier. */
29+
label: string;
30+
/** Input (non-cached) tokens — $/MTok. */
31+
inputPerMTok: number;
32+
/** Output tokens — $/MTok. */
33+
outputPerMTok: number;
34+
/** Cache-read tokens — $/MTok (0 if caching not supported). */
35+
cacheReadPerMTok: number;
36+
/** Cache-write tokens — $/MTok (0 if caching not supported). */
37+
cacheWritePerMTok: number;
38+
}
39+
40+
/**
41+
* Pricing table keyed by model-name prefix. Checked in order — first
42+
* match wins. Keep entries ordered from most-specific to least-specific
43+
* within each vendor group.
44+
*/
45+
const MODEL_PRICING: Array<{ prefix: string; pricing: ModelPricing }> = [
46+
// ── Anthropic Claude ────────────────────────────────────────
47+
{
48+
prefix: "claude-opus",
49+
pricing: {
50+
label: "Claude Opus",
51+
inputPerMTok: 15,
52+
outputPerMTok: 75,
53+
cacheReadPerMTok: 1.875,
54+
cacheWritePerMTok: 18.75,
55+
},
56+
},
57+
{
58+
prefix: "claude-sonnet",
59+
pricing: {
60+
label: "Claude Sonnet",
61+
inputPerMTok: 3,
62+
outputPerMTok: 15,
63+
cacheReadPerMTok: 0.3,
64+
cacheWritePerMTok: 3.75,
65+
},
66+
},
67+
{
68+
prefix: "claude-haiku",
69+
pricing: {
70+
label: "Claude Haiku",
71+
inputPerMTok: 0.8,
72+
outputPerMTok: 4,
73+
cacheReadPerMTok: 0.08,
74+
cacheWritePerMTok: 1,
75+
},
76+
},
77+
// ── OpenAI ──────────────────────────────────────────────────
78+
{
79+
prefix: "o1",
80+
pricing: {
81+
label: "OpenAI o1",
82+
inputPerMTok: 15,
83+
outputPerMTok: 60,
84+
cacheReadPerMTok: 7.5,
85+
cacheWritePerMTok: 0,
86+
},
87+
},
88+
{
89+
prefix: "o3",
90+
pricing: {
91+
label: "OpenAI o3",
92+
inputPerMTok: 10,
93+
outputPerMTok: 40,
94+
cacheReadPerMTok: 2.5,
95+
cacheWritePerMTok: 0,
96+
},
97+
},
98+
{
99+
prefix: "gpt-4.1",
100+
pricing: {
101+
label: "GPT-4.1",
102+
inputPerMTok: 2,
103+
outputPerMTok: 8,
104+
cacheReadPerMTok: 0.5,
105+
cacheWritePerMTok: 0,
106+
},
107+
},
108+
{
109+
prefix: "gpt-4o",
110+
pricing: {
111+
label: "GPT-4o",
112+
inputPerMTok: 2.5,
113+
outputPerMTok: 10,
114+
cacheReadPerMTok: 1.25,
115+
cacheWritePerMTok: 0,
116+
},
117+
},
118+
// ── Google Gemini ───────────────────────────────────────────
119+
{
120+
prefix: "gemini-2.5-pro",
121+
pricing: {
122+
label: "Gemini 2.5 Pro",
123+
inputPerMTok: 1.25,
124+
outputPerMTok: 10,
125+
cacheReadPerMTok: 0.315,
126+
cacheWritePerMTok: 0,
127+
},
128+
},
129+
{
130+
prefix: "gemini-2.5-flash",
131+
pricing: {
132+
label: "Gemini 2.5 Flash",
133+
inputPerMTok: 0.15,
134+
outputPerMTok: 0.6,
135+
cacheReadPerMTok: 0.0375,
136+
cacheWritePerMTok: 0,
137+
},
138+
},
139+
];
140+
141+
/**
142+
* Look up pricing for a model by name prefix.
143+
* Matches against known model prefixes, requiring a word boundary
144+
* (end-of-string or '-') after the prefix to avoid misclassification.
145+
* Returns undefined if no matching pricing tier is found.
146+
*/
147+
export function getModelPricing(
148+
modelName: string | undefined,
149+
): ModelPricing | undefined {
150+
if (!modelName) return undefined;
151+
const lower = modelName.toLowerCase();
152+
return MODEL_PRICING.find((entry) => {
153+
if (!lower.startsWith(entry.prefix)) return false;
154+
// Require word boundary after prefix: end-of-string or '-'
155+
const afterPrefix = lower[entry.prefix.length];
156+
return afterPrefix === undefined || afterPrefix === "-";
157+
})?.pricing;
158+
}
159+
160+
/**
161+
* Calculate the estimated cost in USD for a set of token counts.
162+
* The inputTokens parameter should be non-cached input only (total
163+
* input minus cache reads) to avoid double-counting.
164+
*/
165+
export function estimateCost(
166+
pricing: ModelPricing,
167+
inputTokens: number,
168+
outputTokens: number,
169+
cacheReadTokens: number,
170+
cacheWriteTokens: number,
171+
): number {
172+
const MILLION = 1_000_000;
173+
return (
174+
(inputTokens / MILLION) * pricing.inputPerMTok +
175+
(outputTokens / MILLION) * pricing.outputPerMTok +
176+
(cacheReadTokens / MILLION) * pricing.cacheReadPerMTok +
177+
(cacheWriteTokens / MILLION) * pricing.cacheWritePerMTok
178+
);
179+
}
180+
19181
/** Shape of assistant.usage event data. */
20182
export interface UsageData {
21183
model?: string;
@@ -47,6 +209,28 @@ export function formatUsageStats(d: UsageData): string | null {
47209
if (d.duration !== undefined) {
48210
parts.push(`${(d.duration / 1000).toFixed(1)}s`);
49211
}
212+
// Estimated cost for this request based on model pricing
213+
const pricing = getModelPricing(d.model);
214+
if (pricing) {
215+
// Subtract cache reads from input to avoid double-counting —
216+
// inputTokens typically includes the cached portion.
217+
const nonCachedInput = Math.max(
218+
0,
219+
(d.inputTokens ?? 0) - (d.cacheReadTokens ?? 0),
220+
);
221+
const reqCost = estimateCost(
222+
pricing,
223+
nonCachedInput,
224+
d.outputTokens ?? 0,
225+
d.cacheReadTokens ?? 0,
226+
d.cacheWriteTokens ?? 0,
227+
);
228+
if (reqCost > 0) {
229+
parts.push(
230+
`~$${reqCost < 0.01 ? reqCost.toFixed(4) : reqCost.toFixed(2)}`,
231+
);
232+
}
233+
}
50234
return parts.length > 0 ? parts.join(" · ") : null;
51235
}
52236

@@ -68,8 +252,10 @@ export function formatTokenSummary(state: {
68252
totalInputTokens: number;
69253
totalOutputTokens: number;
70254
totalCacheReadTokens: number;
255+
totalCacheWriteTokens: number;
71256
totalRequests: number;
72257
totalTurns: number;
258+
currentModel?: string;
73259
}): string[] {
74260
const total = state.totalInputTokens + state.totalOutputTokens;
75261
const lines: string[] = [];
@@ -81,9 +267,53 @@ export function formatTokenSummary(state: {
81267
`Cache read: ${state.totalCacheReadTokens.toLocaleString()} tokens`,
82268
);
83269
}
270+
if (state.totalCacheWriteTokens > 0) {
271+
lines.push(
272+
`Cache write: ${state.totalCacheWriteTokens.toLocaleString()} tokens`,
273+
);
274+
}
84275
lines.push(`Total: ${total.toLocaleString()} tokens`);
85276
lines.push(`Requests: ${state.totalRequests}`);
86277
lines.push(`Turns: ${state.totalTurns}`);
278+
279+
// Estimated session cost based on model list pricing
280+
const pricing = getModelPricing(state.currentModel);
281+
if (pricing) {
282+
// Compute non-cached input: total input minus cache reads
283+
const nonCachedInput = Math.max(
284+
0,
285+
state.totalInputTokens - state.totalCacheReadTokens,
286+
);
287+
const sessionCost = estimateCost(
288+
pricing,
289+
nonCachedInput,
290+
state.totalOutputTokens,
291+
state.totalCacheReadTokens,
292+
state.totalCacheWriteTokens,
293+
);
294+
lines.push("");
295+
lines.push(
296+
`${C.label("Est. Cost")} ~$${sessionCost.toFixed(2)} ${C.dim(`(${pricing.label} list pricing)`)}`,
297+
);
298+
299+
// Show what it would have cost without caching
300+
if (state.totalCacheReadTokens > 0) {
301+
const noCacheCost = estimateCost(
302+
pricing,
303+
state.totalInputTokens,
304+
state.totalOutputTokens,
305+
0,
306+
0,
307+
);
308+
const saved = noCacheCost - sessionCost;
309+
if (saved > 0.01) {
310+
lines.push(
311+
`${C.dim(`Cache saved: ~$${saved.toFixed(2)} (${((saved / noCacheCost) * 100).toFixed(0)}% reduction)`)}`,
312+
);
313+
}
314+
}
315+
}
316+
87317
return lines;
88318
}
89319

src/agent/state.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,9 @@ export interface AgentState {
252252
/** Cumulative cache-read tokens across all LLM requests this session. */
253253
totalCacheReadTokens: number;
254254

255+
/** Cumulative cache-write tokens across all LLM requests this session. */
256+
totalCacheWriteTokens: number;
257+
255258
/** Total number of LLM API requests (one per assistant.usage event). */
256259
totalRequests: number;
257260

@@ -339,6 +342,7 @@ export function createAgentState(
339342
totalInputTokens: 0,
340343
totalOutputTokens: 0,
341344
totalCacheReadTokens: 0,
345+
totalCacheWriteTokens: 0,
342346
totalRequests: 0,
343347
totalTurns: 0,
344348
};

0 commit comments

Comments
 (0)