diff --git a/.changeset/openrouter-cost-tracking.md b/.changeset/openrouter-cost-tracking.md
new file mode 100644
index 000000000..47eee66e5
--- /dev/null
+++ b/.changeset/openrouter-cost-tracking.md
@@ -0,0 +1,11 @@
+---
+'@tanstack/ai-openrouter': minor
+'@tanstack/ai': minor
+---
+
+- Add OpenRouter cost tracking. The `OpenRouterTextAdapter` now attaches OpenRouter's authoritative per-request cost amount to the `RUN_FINISHED` event under `usage.cost`, along with numeric/null fields from `usage.cost_details` under `usage.costDetails`. Cost is sourced from OpenRouter's chat completion response itself (the field arrives in the trailing SSE chunk), so there is no extra HTTP request and no added end-of-stream latency.
+- Cost is always sourced from OpenRouter — never computed locally from token counts and a price table — because OpenRouter routes the same model id to different upstream providers (primary, fallback, BYOK) with different pricing, and may expose provider-specific cost breakdowns the SDK cannot reconstruct.
+- Cost is captured via the SDK's public `HTTPClient` response hook. The hook calls `Response.clone()` and parses the cloned body out-of-band to recover `usage.cost` and `usage.cost_details`, which the @openrouter/sdk chat-completion parser would otherwise strip. The SDK's stream consumer is unaffected — both clones are read independently.
+- Custom `httpClient` values passed into the adapter are preserved: the adapter clones the caller's client (inheriting their fetcher, retries, tracing, and any pre-registered hooks) and appends the cost-capture hook to the clone, so the caller's original instance is never mutated and cost tracking still works when callers supply their own transport.
+- Defer the OpenRouter `RUN_FINISHED` emission until after the upstream stream fully drains, so token usage that arrives in a trailing usage-only chunk (the common case for OpenAI-compatible providers, where the final chunk has empty `choices`) is included in `usage` instead of being dropped.
+- Extend `RunFinishedEvent.usage` in `@tanstack/ai` with optional `cost` and `costDetails` fields. The middleware `UsageInfo` (consumed by `onUsage`) and `FinishInfo.usage` (consumed by `onFinish`) carry the same fields, so middleware authors can read cost without casts. The change is additive and backwards-compatible for adapters that don't populate cost.
diff --git a/README.md b/README.md
index cd8d92350..7bb11c5d0 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ A powerful, type-safe AI SDK for building AI-powered applications.
- Isomorphic type-safe tools with server/client execution
- **Enhanced integration with TanStack Start** - Share implementations between AI tools and server functions
- **Observability events** - Structured, typed events for text, tools, image, speech, transcription, and video ([docs](./docs/guides/observability.md))
+- **Cost tracking** - Per-request cost on `RUN_FINISHED` for providers that report it (currently [OpenRouter](./docs/adapters/openrouter.md#cost-tracking))
### Read the docs →
diff --git a/docs/adapters/openrouter.md b/docs/adapters/openrouter.md
index c61fcff96..9a0a7bac0 100644
--- a/docs/adapters/openrouter.md
+++ b/docs/adapters/openrouter.md
@@ -115,6 +115,31 @@ Set your API key in environment variables:
OPENROUTER_API_KEY=sk-or-...
```
+## Cost Tracking
+
+The OpenRouter adapter attaches the authoritative per-request cost to the `RUN_FINISHED` event under `usage.cost`. OpenRouter [reports cost inline in every chat response](https://openrouter.ai/docs/use-cases/usage-accounting), in credits, so cost arrives in the same SSE stream as the model output — there is **no extra HTTP request** and **no added latency**.
+
+Why we don't compute cost locally from tokens × price: OpenRouter routes the same model id to different upstream providers (primary, fallback, BYOK), each with different pricing, and may include cached-token pricing or BYOK upstream costs. A static price table would silently drift and produce wrong numbers.
+
+```typescript
+import { chat } from "@tanstack/ai";
+import { openRouterText } from "@tanstack/ai-openrouter";
+
+const stream = chat({
+ adapter: openRouterText("openai/gpt-5"),
+ messages: [{ role: "user", content: "Hello!" }],
+});
+
+for await (const chunk of stream) {
+ if (chunk.type === "RUN_FINISHED") {
+ console.log("OpenRouter cost:", chunk.usage?.cost);
+ console.log("Upstream inference cost:", chunk.usage?.costDetails?.upstreamInferenceCost);
+ }
+}
+```
+
+If a particular response doesn't include cost (rare — for example if a future OpenRouter response shape change moves the field), `RUN_FINISHED` still emits with token usage; only the `cost` field is omitted. Cost tracking will never break a chat stream.
+
## Model Routing
OpenRouter can automatically route requests to the best available provider:
@@ -177,4 +202,3 @@ const stream = chat({
```
**Supported models:** all OpenRouter chat models. See [Provider Tools](../tools/provider-tools.md#which-models-support-which-tools).
-
diff --git a/packages/typescript/ai-openrouter/src/adapters/cost-capture.ts b/packages/typescript/ai-openrouter/src/adapters/cost-capture.ts
new file mode 100644
index 000000000..069b0e2bb
--- /dev/null
+++ b/packages/typescript/ai-openrouter/src/adapters/cost-capture.ts
@@ -0,0 +1,349 @@
+import { HTTPClient } from '@openrouter/sdk'
+
+export interface CostInfo {
+ cost: number
+ costDetails?: Record
+}
+
+interface CostEntry {
+ info: CostInfo
+ timer: ReturnType
+}
+
+interface ParseEntry {
+ parse: Promise
+ timer?: ReturnType
+}
+
+/**
+ * Per-response cost cache, keyed by upstream response id.
+ *
+ * The chat-completion response carries `usage.cost` in its trailing chunk,
+ * but the @openrouter/sdk Zod parser strips it (the schema doesn't declare
+ * the field, and Zod defaults to strip mode). We clone the response inside
+ * the SDK's own response hook, parse our copy out-of-band, and stash cost
+ * here so the adapter can read it after the SDK-parsed stream ends.
+ *
+ * The hook runs the parse as a fire-and-forget Promise, so by the time the
+ * adapter's `for await` loop exits, `store.set(id, cost)` may not yet have
+ * run — there's no direct happens-before relationship between the SDK's
+ * stream consumer and the tee'd parse reader. `take(id)` awaits **only the
+ * parse that has announced this id** (via `announceId`), so overlapping
+ * `chat.send` calls on the same adapter cannot block each other's cost
+ * delivery. If no parse has announced the id yet, `take(id)` waits for the
+ * next announcement or for all currently-pending parses to settle,
+ * whichever comes first.
+ *
+ * Entries also auto-expire so a missing read (errored stream, missing id,
+ * etc.) cannot leak memory across long-lived adapters.
+ */
+export class CostStore {
+ private entries = new Map()
+ private unannouncedParses = new Set>()
+ private idToParse = new Map()
+ private announcementWaiters = new Set<() => void>()
+ private readonly ttlMs: number
+
+ constructor(ttlMs = 60_000) {
+ this.ttlMs = ttlMs
+ }
+
+ set(id: string, info: CostInfo): void {
+ const existing = this.entries.get(id)
+ if (existing) clearTimeout(existing.timer)
+ const timer = setTimeout(() => this.entries.delete(id), this.ttlMs)
+ // In browser/Deno runtimes `setTimeout` returns a number — `'unref' in
+ // ` would throw. Gate on object-ness before probing for unref().
+ if (typeof timer === 'object' && 'unref' in timer) timer.unref()
+ this.entries.set(id, { info, timer })
+ }
+
+ recordParse(parse: Promise): void {
+ this.unannouncedParses.add(parse)
+ parse.finally(() => this.unannouncedParses.delete(parse))
+ }
+
+ announceId(id: string, parse: Promise): void {
+ // Once a parse has bound to an id, it's no longer a fallback candidate
+ // for other ids — removing it here prevents head-of-line blocking where
+ // an id with no matching parse would otherwise await this (unrelated)
+ // stream as part of the "current wave drained" fallback.
+ this.unannouncedParses.delete(parse)
+ const existing = this.idToParse.get(id)
+ if (existing?.timer) clearTimeout(existing.timer)
+ const entry: ParseEntry = { parse }
+ this.idToParse.set(id, entry)
+ // After the parse settles, keep the entry around briefly so a late
+ // `take(id)` can resolve without falling through to the pending-parses
+ // wait (which would reintroduce head-of-line blocking on unrelated
+ // concurrent streams — especially for responses that announce an id
+ // but never produce `usage.cost`). TTL eventually drops it.
+ parse.finally(() => {
+ const timer = setTimeout(() => {
+ if (this.idToParse.get(id) === entry) this.idToParse.delete(id)
+ }, this.ttlMs)
+ if (typeof timer === 'object' && 'unref' in timer) timer.unref()
+ entry.timer = timer
+ })
+ // Wake every `take()` that was parked waiting for *some* announcement —
+ // the ones whose id doesn't match will fall back into the wait loop.
+ const waiters = [...this.announcementWaiters]
+ this.announcementWaiters.clear()
+ for (const w of waiters) w()
+ }
+
+ async take(id: string): Promise {
+ // Fast path: the tee'd parser typically finishes before the SDK's
+ // stream consumer exits (less work per chunk than the Zod pipeline),
+ // so skip the wait entirely when the entry is already populated.
+ const preset = this.entries.get(id)
+ if (preset) {
+ clearTimeout(preset.timer)
+ this.entries.delete(id)
+ return preset.info
+ }
+ // Slow path: prefer per-id matching so an unrelated long-running stream
+ // on the same adapter cannot delay our RUN_FINISHED. If the matching
+ // parse hasn't announced yet, park on the next announcement or on the
+ // current wave of parses draining (whichever happens first), then loop.
+ for (;;) {
+ const match = this.idToParse.get(id)
+ if (match) {
+ await match.parse.catch(() => {})
+ if (match.timer) clearTimeout(match.timer)
+ if (this.idToParse.get(id) === match) this.idToParse.delete(id)
+ break
+ }
+ if (this.unannouncedParses.size === 0) break
+ let resolveSignal: () => void = () => {}
+ const nextAnnouncement = new Promise((resolve) => {
+ resolveSignal = resolve
+ this.announcementWaiters.add(resolve)
+ })
+ const currentWaveDrained = Promise.allSettled([
+ ...this.unannouncedParses,
+ ]).then(() => {})
+ await Promise.race([nextAnnouncement, currentWaveDrained])
+ this.announcementWaiters.delete(resolveSignal)
+ }
+ const entry = this.entries.get(id)
+ if (!entry) return undefined
+ clearTimeout(entry.timer)
+ this.entries.delete(id)
+ return entry.info
+ }
+
+ clear(): void {
+ for (const { timer } of this.entries.values()) clearTimeout(timer)
+ for (const entry of this.idToParse.values()) {
+ if (entry.timer) clearTimeout(entry.timer)
+ }
+ this.entries.clear()
+ this.unannouncedParses.clear()
+ this.idToParse.clear()
+ this.announcementWaiters.clear()
+ }
+}
+
+/**
+ * Returns a response hook for the SDK's public `HTTPClient` that captures
+ * `usage.cost` and `usage.cost_details` from chat-completion responses
+ * without consuming the SDK's body.
+ *
+ * `Response.clone()` tees the body internally — our consumer reads one
+ * branch while the SDK reads the other exactly as if no interception
+ * happened. Any parse error is silently swallowed because cost is a
+ * secondary signal and must not break the chat stream.
+ */
+export function createCostCaptureHook(
+ store: CostStore,
+): (res: Response, req: Request) => void {
+ return (res, req) => {
+ // Cheap header check first: `structuredOutput()` calls `/chat/completions`
+ // with `stream: false` and returns JSON — matching URL before content-type
+ // would pay a `new URL()` parse on every one of those.
+ // Content-Type is case-insensitive per RFC 9110; normalize before matching.
+ const contentType = (res.headers.get('content-type') ?? '').toLowerCase()
+ if (!contentType.includes('text/event-stream')) return
+ if (!isChatCompletionsRequest(req.url)) return
+ if (!res.body) return
+ let copy: Response
+ try {
+ copy = res.clone()
+ } catch {
+ // A preceding response hook consumed the body (e.g. read `res.text()`
+ // for logging). Cloning now throws `TypeError: Body already read`;
+ // give up on cost rather than bubbling a failure that the SDK's hook
+ // loop would surface as a whole-request error.
+ return
+ }
+ if (!copy.body) return
+ // Announce the id the moment the parser sees it so concurrent `take(id)`
+ // calls can await only *this* parse instead of every in-flight one.
+ // Forward-ref: the parser needs a callback that references `parse`, but
+ // `parse` isn't bound yet at construction time. The stable `onId`
+ // trampoline delegates to `announce`, which we swap in right after
+ // `parse` is assigned — the parser only fires `onId` after its first
+ // async read, by which point the swap has happened.
+ let announce: (id: string) => void = () => {}
+ const onId = (id: string) => announce(id)
+ const parse = parseSseAndStore(copy.body, store, onId).catch(() => {})
+ announce = (id) => store.announceId(id, parse)
+ store.recordParse(parse)
+ }
+}
+
+/**
+ * Returns an `HTTPClient` with cost capture attached. When `client` is
+ * provided, clones it — the clone inherits the caller's fetcher and any
+ * hooks they registered, and our hook is appended. We never mutate the
+ * caller's instance, so they can reuse it for unrelated SDK calls without
+ * picking up our cost-capture behavior as a side effect.
+ */
+export function attachCostCapture(
+ store: CostStore,
+ client?: HTTPClient,
+): HTTPClient {
+ const wrapped = client ? client.clone() : new HTTPClient()
+ wrapped.addHook('response', createCostCaptureHook(store))
+ return wrapped
+}
+
+function isChatCompletionsRequest(url: string): boolean {
+ // Match against pathname only so a query string like `?next=/chat/completions`
+ // on an unrelated endpoint doesn't trip the hook. The SDK sends absolute
+ // URLs today, but a custom `Fetcher` could pass something else — fail closed
+ // rather than letting a parse error bubble into the SDK's hook chain.
+ try {
+ return /\/chat\/completions(?:\/|$)/.test(new URL(url).pathname)
+ } catch {
+ return false
+ }
+}
+
+// Event separators per SSE spec: a blank line between events may use any of
+// `\n\n`, `\r\n\r\n`, or `\r\r`. Matching all three directly (instead of
+// pre-normalizing line endings) avoids a chunk-boundary bug where a lone
+// `\r` at the end of one read would be rewritten to `\n` before the paired
+// `\n` of a CRLF landed in the next read, producing a false `\n\n` frame
+// split in the middle of a line.
+const SSE_EVENT_SEPARATOR = /\r\n\r\n|\r\r|\n\n/
+// Split on any single-line terminator per SSE spec (`\r\n`, `\r`, or `\n`)
+// so we don't leave a trailing `\r` in the payload when servers use CRLF
+// inside a frame. Hoisted out of `extractDataPayload` so the regex isn't
+// recompiled on every event parse.
+const SSE_LINE_TERMINATOR = /\r\n|\r|\n/
+
+async function parseSseAndStore(
+ body: ReadableStream,
+ store: CostStore,
+ onId?: (id: string) => void,
+): Promise {
+ const reader = body.getReader()
+ const decoder = new TextDecoder()
+ let buffer = ''
+ let responseId: string | undefined
+ let cost: CostInfo | undefined
+ const applyEvent = (event: string): void => {
+ const payload = extractDataPayload(event)
+ if (!payload || payload === '[DONE]') return
+ const parsed = safeParseJson(payload)
+ if (!parsed) return
+ if (!responseId && typeof parsed.id === 'string') {
+ responseId = parsed.id
+ onId?.(responseId)
+ }
+ const usage = parsed.usage as Record | undefined
+ if (usage) {
+ const extracted = extractCostFromUsage(usage)
+ if (extracted) cost = extracted
+ }
+ }
+ try {
+ for (;;) {
+ const { value, done } = await reader.read()
+ buffer += decoder.decode(value, { stream: true })
+ // Network reads may split SSE events or batch several together — drain
+ // separator-delimited frames and keep any trailing partial in `buffer`
+ // for the next read so we never parse a half-received JSON payload.
+ for (;;) {
+ const match = SSE_EVENT_SEPARATOR.exec(buffer)
+ if (!match) break
+ const event = buffer.slice(0, match.index)
+ buffer = buffer.slice(match.index + match[0].length)
+ applyEvent(event)
+ }
+ if (done) {
+ // EOF-terminated SSE can legitimately omit the final separator
+ // (especially through proxies). Flush whatever is left as a final
+ // event so the trailing usage chunk isn't silently dropped.
+ if (buffer.length > 0) applyEvent(buffer)
+ break
+ }
+ // Cost arrives in the trailing usage chunk, after which there's no
+ // more data we care about. Cancel our clone early so the upstream
+ // body can be GC'd; the SDK's clone half is unaffected.
+ if (responseId && cost) {
+ await reader.cancel().catch(() => {})
+ break
+ }
+ }
+ } finally {
+ reader.releaseLock()
+ }
+ if (responseId && cost) store.set(responseId, cost)
+}
+
+function extractDataPayload(event: string): string | undefined {
+ const lines: Array = []
+ for (const line of event.split(SSE_LINE_TERMINATOR)) {
+ if (!line.startsWith('data:')) continue
+ lines.push(line.slice(5).replace(/^ /, ''))
+ }
+ if (!lines.length) return undefined
+ return lines.join('\n')
+}
+
+function safeParseJson(text: string): Record | undefined {
+ try {
+ const v = JSON.parse(text)
+ return v && typeof v === 'object'
+ ? (v as Record)
+ : undefined
+ } catch {
+ return undefined
+ }
+}
+
+function toCamelCase(key: string): string {
+ return key.replace(/_([a-z])/g, (_, char: string) => char.toUpperCase())
+}
+
+function extractCostDetails(
+ details: Record | undefined,
+): CostInfo['costDetails'] | undefined {
+ if (!details) return undefined
+ const result: NonNullable = {}
+ for (const [key, value] of Object.entries(details)) {
+ if (typeof value === 'number' || value === null) {
+ result[toCamelCase(key)] = value
+ }
+ }
+ return Object.keys(result).length > 0 ? result : undefined
+}
+
+function extractCostFromUsage(
+ usage: Record,
+): CostInfo | undefined {
+ // `cost` is the authoritative per-request total reported by OpenRouter.
+ // Emitting `costDetails` without it would surface a breakdown that can't be
+ // reconciled against a total — callers could misread it as the bill.
+ const cost = typeof usage.cost === 'number' ? usage.cost : undefined
+ if (cost === undefined) return undefined
+ const details = usage.cost_details as Record | undefined
+ const costDetails = extractCostDetails(details)
+ return {
+ cost,
+ ...(costDetails && { costDetails }),
+ }
+}
diff --git a/packages/typescript/ai-openrouter/src/adapters/text.ts b/packages/typescript/ai-openrouter/src/adapters/text.ts
index 29427171c..69cdc74df 100644
--- a/packages/typescript/ai-openrouter/src/adapters/text.ts
+++ b/packages/typescript/ai-openrouter/src/adapters/text.ts
@@ -7,6 +7,8 @@ import {
getOpenRouterApiKeyFromEnv,
generateId as utilGenerateId,
} from '../utils'
+import { CostStore, attachCostCapture } from './cost-capture'
+import type { CostInfo } from './cost-capture'
import type { SDKOptions } from '@openrouter/sdk'
import type {
OPENROUTER_CHAT_MODELS,
@@ -21,6 +23,7 @@ import type {
import type {
ContentPart,
ModelMessage,
+ RunFinishedEvent,
StreamChunk,
TextOptions,
} from '@tanstack/ai'
@@ -86,7 +89,7 @@ interface AGUIState {
deferredUsage:
| { promptTokens: number; completionTokens: number; totalTokens: number }
| undefined
- computedFinishReason: string | undefined
+ computedFinishReason: RunFinishedEvent['finishReason'] | undefined
}
export class OpenRouterTextAdapter<
@@ -104,10 +107,18 @@ export class OpenRouterTextAdapter<
readonly name = 'openrouter' as const
private client: OpenRouter
+ private costStore: CostStore
constructor(config: OpenRouterConfig, model: TModel) {
super({}, model)
- this.client = new OpenRouter(config)
+ this.costStore = new CostStore()
+ // Wrap the caller's HTTPClient (if any) by cloning and appending a
+ // response hook — their fetcher, retries, tracing, and pre-registered
+ // hooks are preserved, but their instance stays untouched.
+ this.client = new OpenRouter({
+ ...config,
+ httpClient: attachCostCapture(this.costStore, config.httpClient),
+ })
}
async *chatStream(
@@ -200,18 +211,41 @@ export class OpenRouterTextAdapter<
aguiState,
)
}
+
+ // Capture usage from a trailing `choices: []` chunk that the
+ // choice loop above would have skipped. OpenRouter (and other
+ // OpenAI-compatible streams) often report final token counts in
+ // a terminal chunk with no choices, after `finishReason` was
+ // delivered on an earlier chunk.
+ if (chunk.usage && !aguiState.deferredUsage) {
+ aguiState.deferredUsage = {
+ promptTokens: chunk.usage.promptTokens || 0,
+ completionTokens: chunk.usage.completionTokens || 0,
+ totalTokens: chunk.usage.totalTokens || 0,
+ }
+ }
}
// Emit RUN_FINISHED after the stream ends so we capture usage from
// any chunk (some SDKs send usage on a separate trailing chunk).
if (aguiState.hasEmittedRunFinished && aguiState.computedFinishReason) {
+ // Deferral: RUN_FINISHED waits for the stream to fully drain
+ // because OpenRouter delivers `usage.cost` in a trailing
+ // `choices: []` chunk that arrives after `finishReason`. Cost is
+ // captured by the HTTPClient response hook while the SDK was
+ // draining the stream; `take()` awaits only the matching parse so
+ // overlapping sends don't block each other. If absent, RUN_FINISHED
+ // simply omits `usage.cost`.
+ const costInfo = responseId
+ ? await this.costStore.take(responseId)
+ : undefined
yield asChunk({
type: 'RUN_FINISHED',
runId: aguiState.runId,
threadId: aguiState.threadId,
model: currentModel || options.model,
timestamp,
- usage: aguiState.deferredUsage,
+ usage: this.buildRunFinishedUsage(aguiState.deferredUsage, costInfo),
finishReason: aguiState.computedFinishReason,
})
}
@@ -332,6 +366,25 @@ export class OpenRouterTextAdapter<
return utilGenerateId(this.name)
}
+ private buildRunFinishedUsage(
+ usage:
+ | { promptTokens: number; completionTokens: number; totalTokens: number }
+ | undefined,
+ costInfo: CostInfo | undefined,
+ ): RunFinishedEvent['usage'] {
+ // If no token counts arrived (e.g. the stream aborted before the
+ // trailing usage chunk), emit no usage at all — even if `costInfo` was
+ // captured. Synthesizing zero-token counts alongside a non-zero cost
+ // would feed billing/telemetry a "successful run with zero tokens but
+ // nonzero cost" signal, which is worse than an absent usage payload.
+ if (!usage) return undefined
+ return {
+ ...usage,
+ ...(costInfo && { cost: costInfo.cost }),
+ ...(costInfo?.costDetails && { costDetails: costInfo.costDetails }),
+ }
+ }
+
private *processChoice(
choice: ChatStreamChoice,
toolCallBuffers: Map,
diff --git a/packages/typescript/ai-openrouter/tests/cost-capture.test.ts b/packages/typescript/ai-openrouter/tests/cost-capture.test.ts
new file mode 100644
index 000000000..d0494098f
--- /dev/null
+++ b/packages/typescript/ai-openrouter/tests/cost-capture.test.ts
@@ -0,0 +1,495 @@
+import { describe, expect, it, vi } from 'vitest'
+import { HTTPClient } from '@openrouter/sdk'
+import {
+ CostStore,
+ attachCostCapture,
+ createCostCaptureHook,
+} from '../src/adapters/cost-capture'
+import type { CostInfo } from '../src/adapters/cost-capture'
+import type { Fetcher } from '@openrouter/sdk'
+
+function makeSseResponse(body: string): Response {
+ return new Response(body, {
+ status: 200,
+ headers: { 'content-type': 'text/event-stream' },
+ })
+}
+
+function makeJsonResponse(payload: unknown): Response {
+ return new Response(JSON.stringify(payload), {
+ status: 200,
+ headers: { 'content-type': 'application/json' },
+ })
+}
+
+const fakeFetcher =
+ (response: Response): Fetcher =>
+ () =>
+ Promise.resolve(response)
+
+async function readAll(
+ stream: ReadableStream | null,
+): Promise {
+ if (!stream) return ''
+ const reader = stream.getReader()
+ const decoder = new TextDecoder()
+ let out = ''
+ for (;;) {
+ const { value, done } = await reader.read()
+ if (value) out += decoder.decode(value, { stream: true })
+ if (done) break
+ }
+ return out
+}
+
+const chatUrl = 'https://openrouter.ai/api/v1/chat/completions'
+
+function makeChatRequest(url = chatUrl): Request {
+ return new Request(url, { method: 'POST' })
+}
+
+function buildClient(
+ response: Response,
+ store: CostStore = new CostStore(),
+): { client: HTTPClient; store: CostStore } {
+ const client = new HTTPClient({ fetcher: fakeFetcher(response) })
+ client.addHook('response', createCostCaptureHook(store))
+ return { client, store }
+}
+
+describe('createCostCaptureHook — SSE chat-completion responses', () => {
+ it('extracts cost and cost_details from the trailing usage chunk', async () => {
+ const body =
+ `data: ${JSON.stringify({ id: 'gen-1', choices: [{ delta: { content: 'Hi' }, index: 0 }] })}\n\n` +
+ `data: ${JSON.stringify({
+ id: 'gen-1',
+ choices: [],
+ usage: {
+ prompt_tokens: 5,
+ completion_tokens: 2,
+ total_tokens: 7,
+ cost: 0.001234,
+ cost_details: {
+ upstream_inference_cost: 0.001,
+ upstream_inference_input_cost: 0.0004,
+ upstream_inference_output_cost: 0.0006,
+ cache_discount: -0.0001,
+ },
+ },
+ })}\n\n` +
+ `data: [DONE]\n\n`
+
+ const { client, store } = buildClient(makeSseResponse(body))
+ const res = await client.request(makeChatRequest())
+
+ expect(await readAll(res.body)).toBe(body)
+ expect(await store.take('gen-1')).toEqual({
+ cost: 0.001234,
+ costDetails: {
+ upstreamInferenceCost: 0.001,
+ upstreamInferenceInputCost: 0.0004,
+ upstreamInferenceOutputCost: 0.0006,
+ cacheDiscount: -0.0001,
+ },
+ })
+ })
+
+ it('handles cost without cost_details', async () => {
+ const body = `data: ${JSON.stringify({
+ id: 'gen-2',
+ choices: [],
+ usage: {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost: 0.5,
+ },
+ })}\n\ndata: [DONE]\n\n`
+
+ const { client, store } = buildClient(makeSseResponse(body))
+ const res = await client.request(makeChatRequest())
+ await readAll(res.body)
+
+ expect(await store.take('gen-2')).toEqual({ cost: 0.5 })
+ })
+
+ // Skipping an orphan `cost_details` is deliberate: a breakdown without the
+ // authoritative `cost` total can't be reconciled, and surfacing it invites
+ // callers to misread a partial bill.
+ it.each([
+ [
+ 'no cost_details',
+ { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+ ],
+ [
+ 'orphan cost_details',
+ {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost_details: { upstream_inference_cost: 0.0005 },
+ },
+ ],
+ ])(
+ 'stores nothing when the response has no cost field (%s)',
+ async (_label, usage) => {
+ const id = 'gen-nocost'
+ const body = `data: ${JSON.stringify({ id, choices: [], usage })}\n\ndata: [DONE]\n\n`
+
+ const { client, store } = buildClient(makeSseResponse(body))
+ const res = await client.request(makeChatRequest())
+ await readAll(res.body)
+
+ expect(await store.take(id)).toBeUndefined()
+ },
+ )
+})
+
+describe('createCostCaptureHook — non-streaming responses are skipped', () => {
+ // Non-SSE responses on /chat/completions come from `structuredOutput()`
+ // (stream: false). These never consume the cost store, so cloning and
+ // re-parsing them is wasted work — the hook should bail early.
+ it('does not clone or parse a JSON response on the chat completions endpoint', async () => {
+ const payload = {
+ id: 'gen-json-1',
+ choices: [{ message: { content: '{}' } }],
+ usage: {
+ prompt_tokens: 4,
+ completion_tokens: 2,
+ total_tokens: 6,
+ cost: 0.0008,
+ cost_details: { upstream_inference_cost: 0.0005 },
+ },
+ }
+ const json = JSON.stringify(payload)
+
+ const { client, store } = buildClient(makeJsonResponse(payload))
+ const res = await client.request(makeChatRequest())
+
+ // Body is delivered to the caller unchanged and nothing is cached.
+ expect(await res.text()).toBe(json)
+ expect(await store.take('gen-json-1')).toBeUndefined()
+ })
+})
+
+describe('createCostCaptureHook — passes through unrelated requests', () => {
+ it('does not parse non-chat responses', async () => {
+ const body = JSON.stringify({ data: { id: 'x', total_cost: 5 } })
+ const passed = new Response(body, {
+ status: 200,
+ headers: { 'content-type': 'application/json' },
+ })
+
+ const { client, store } = buildClient(passed)
+ const res = await client.request(
+ new Request('https://openrouter.ai/api/v1/generation?id=x'),
+ )
+
+ expect(await res.text()).toBe(body)
+ expect(await store.take('x')).toBeUndefined()
+ })
+
+ it('ignores SSE responses whose query string mentions /chat/completions', async () => {
+ // Pathname-only matching: a query param that happens to contain the
+ // chat-completions path must not activate the hook on an unrelated
+ // endpoint.
+ const body = `data: ${JSON.stringify({
+ id: 'spoof-1',
+ choices: [],
+ usage: {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost: 0.42,
+ },
+ })}\n\ndata: [DONE]\n\n`
+
+ const { client, store } = buildClient(makeSseResponse(body))
+ const res = await client.request(
+ makeChatRequest(
+ 'https://openrouter.ai/api/v1/generation?next=/chat/completions',
+ ),
+ )
+
+ expect(await readAll(res.body)).toBe(body)
+ expect(await store.take('spoof-1')).toBeUndefined()
+ })
+})
+
+describe('createCostCaptureHook — robustness', () => {
+ it('survives malformed SSE payloads without breaking the SDK stream', async () => {
+ const body =
+ `data: not-json\n\n` +
+ `data: ${JSON.stringify({
+ id: 'gen-mix',
+ choices: [],
+ usage: {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost: 0.1,
+ },
+ })}\n\ndata: [DONE]\n\n`
+
+ const { client, store } = buildClient(makeSseResponse(body))
+ const res = await client.request(makeChatRequest())
+
+ expect(await readAll(res.body)).toBe(body)
+ expect(await store.take('gen-mix')).toEqual({ cost: 0.1 })
+ })
+
+ it('returns the original response unchanged when there is no body', async () => {
+ const { client } = buildClient(new Response(null, { status: 204 }))
+ const res = await client.request(makeChatRequest())
+ expect(res.status).toBe(204)
+ })
+
+ // Regression: proxies and some runtimes emit spec-compliant CRLF-framed
+ // SSE (`\r\n\r\n`). Splitting only on `\n\n` used to silently drop cost.
+ it('parses SSE with CRLF-delimited frames', async () => {
+ const body = `data: ${JSON.stringify({
+ id: 'gen-crlf',
+ choices: [],
+ usage: {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost: 0.3,
+ },
+ })}\r\n\r\ndata: [DONE]\r\n\r\n`
+
+ const { client, store } = buildClient(makeSseResponse(body))
+ const res = await client.request(makeChatRequest())
+ await readAll(res.body)
+
+ expect(await store.take('gen-crlf')).toEqual({ cost: 0.3 })
+ })
+
+ // Regression: EOF-terminated SSE (proxies omitting the trailing blank
+ // line) used to drop the final usage chunk because the read loop broke
+ // on `done` before flushing `buffer`.
+ it('flushes the trailing SSE frame when the stream ends without a blank line', async () => {
+ const body = `data: ${JSON.stringify({
+ id: 'gen-eof',
+ choices: [],
+ usage: {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost: 0.2,
+ },
+ })}\n`
+
+ const { client, store } = buildClient(makeSseResponse(body))
+ const res = await client.request(makeChatRequest())
+ await readAll(res.body)
+
+ expect(await store.take('gen-eof')).toEqual({ cost: 0.2 })
+ })
+})
+
+describe('attachCostCapture', () => {
+ const costBody = `data: ${JSON.stringify({
+ id: 'gen-attach',
+ choices: [],
+ usage: {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost: 0.25,
+ },
+ })}\n\ndata: [DONE]\n\n`
+
+ it('returns a fresh HTTPClient when no caller client is supplied', () => {
+ const store = new CostStore()
+ const wrapped = attachCostCapture(store)
+ expect(wrapped).toBeInstanceOf(HTTPClient)
+ })
+
+ it('clones the caller-supplied HTTPClient rather than mutating it', async () => {
+ const callerClient = new HTTPClient({
+ fetcher: fakeFetcher(makeSseResponse(costBody)),
+ })
+
+ const store = new CostStore()
+ const wrapped = attachCostCapture(store, callerClient)
+
+ expect(wrapped).not.toBe(callerClient)
+
+ // Calling the caller's original client must not populate our store —
+ // proves we did not mutate it.
+ const direct = await callerClient.request(makeChatRequest())
+ await readAll(direct.body)
+ expect(await store.take('gen-attach')).toBeUndefined()
+ })
+
+ it('preserves hooks registered on the caller before wrapping', async () => {
+ const callerClient = new HTTPClient({
+ fetcher: fakeFetcher(makeSseResponse(costBody)),
+ })
+ const callerHook = vi.fn()
+ callerClient.addHook('response', callerHook)
+
+ const store = new CostStore()
+ const wrapped = attachCostCapture(store, callerClient)
+
+ const res = await wrapped.request(makeChatRequest())
+ await readAll(res.body)
+
+ expect(callerHook).toHaveBeenCalledTimes(1)
+ expect(await store.take('gen-attach')).toEqual({ cost: 0.25 })
+ })
+
+ it('inherits the caller fetcher (proxies, tracing, retries, etc.)', async () => {
+ const callerFetcher = vi.fn(fakeFetcher(makeSseResponse(costBody)))
+ const callerClient = new HTTPClient({ fetcher: callerFetcher })
+
+ const store = new CostStore()
+ const wrapped = attachCostCapture(store, callerClient)
+
+ const res = await wrapped.request(makeChatRequest())
+ await readAll(res.body)
+
+ expect(callerFetcher).toHaveBeenCalledTimes(1)
+ expect(await store.take('gen-attach')).toEqual({ cost: 0.25 })
+ })
+})
+
+describe('CostStore', () => {
+ it('take() removes the entry after reading', async () => {
+ const store = new CostStore()
+ store.set('a', { cost: 1 })
+ expect(await store.take('a')).toEqual({ cost: 1 })
+ expect(await store.take('a')).toBeUndefined()
+ })
+
+ it('overwrites entries with the same id', async () => {
+ const store = new CostStore()
+ store.set('a', { cost: 1 })
+ store.set('a', { cost: 2 })
+ expect(await store.take('a')).toEqual({ cost: 2 })
+ })
+
+ // Regression: the tee'd parse is fire-and-forget, so the adapter can
+ // reach `take(id)` before `store.set(id, ...)` has run. `take` must
+ // await outstanding parses to keep cost capture deterministic.
+ it('take() awaits an in-flight parse before reading', async () => {
+ const store = new CostStore()
+ let resolveParse!: () => void
+ const parse = new Promise((resolve) => {
+ resolveParse = resolve
+ }).then(() => {
+ store.set('racey', { cost: 7 })
+ })
+ store.recordParse(parse)
+
+ let taken: CostInfo | undefined
+ const takePromise = store.take('racey').then((info) => {
+ taken = info
+ })
+
+ expect(taken).toBeUndefined()
+ resolveParse()
+ await takePromise
+
+ expect(taken).toEqual({ cost: 7 })
+ })
+
+ // Regression: a shared adapter can have overlapping `chat.send` calls.
+ // Previously `take(id)` awaited *every* in-flight parse, so a long-
+ // running concurrent stream could block an already-completed request's
+ // RUN_FINISHED. Per-id announcements mean `take(id)` awaits only the
+ // matching parse.
+ it('take(id) does not block on an unrelated in-flight parse', async () => {
+ const store = new CostStore()
+
+ let resolveFast!: () => void
+ const fastParse = new Promise((resolve) => {
+ resolveFast = resolve
+ }).then(() => {
+ store.set('fast', { cost: 1 })
+ })
+ store.recordParse(fastParse)
+ store.announceId('fast', fastParse)
+
+ let resolveSlow!: () => void
+ const slowParse = new Promise((resolve) => {
+ resolveSlow = resolve
+ })
+ store.recordParse(slowParse)
+ store.announceId('slow', slowParse)
+
+ let taken: CostInfo | undefined
+ const takePromise = store.take('fast').then((info) => {
+ taken = info
+ })
+
+ resolveFast()
+ await takePromise
+
+ expect(taken).toEqual({ cost: 1 })
+
+ resolveSlow()
+ await slowParse
+ })
+
+ // Regression: a parse that announces its id but never produces cost
+ // (response simply had no `usage.cost`) used to lose its `idToParse`
+ // entry in `parse.finally`, forcing a subsequent `take(id)` to fall
+ // through to the pending-parses wait — which could then block on an
+ // unrelated concurrent stream. The settled parse must stay resolvable
+ // until `take(id)` consumes it (or TTL expires).
+ it('take(id) returns undefined promptly when the matching parse finished without cost', async () => {
+ const store = new CostStore()
+
+ // Parse A announces id but never calls `set`, then settles.
+ const parseA = Promise.resolve()
+ store.recordParse(parseA)
+ store.announceId('A', parseA)
+ await parseA
+
+ // Parse B stays in flight and never announces 'A'.
+ let resolveB!: () => void
+ const parseB = new Promise((resolve) => {
+ resolveB = resolve
+ })
+ store.recordParse(parseB)
+ store.announceId('B', parseB)
+
+ const info = await store.take('A')
+ expect(info).toBeUndefined()
+
+ resolveB()
+ await parseB
+ })
+})
+
+describe('createCostCaptureHook — resilience to preceding hooks', () => {
+ it('does not fail the request when a preceding hook consumed the body', async () => {
+ const body = `data: ${JSON.stringify({
+ id: 'gen-disturbed',
+ choices: [],
+ usage: {
+ prompt_tokens: 1,
+ completion_tokens: 1,
+ total_tokens: 2,
+ cost: 0.9,
+ },
+ })}\n\ndata: [DONE]\n\n`
+
+ const client = new HTTPClient({
+ fetcher: fakeFetcher(makeSseResponse(body)),
+ })
+ // A preceding caller hook reads the body directly; this disturbs `res`
+ // so that subsequent `res.clone()` throws. We must not surface that as
+ // a request failure.
+ client.addHook('response', async (res) => {
+ await res.text()
+ })
+ const store = new CostStore()
+ client.addHook('response', createCostCaptureHook(store))
+
+ await expect(client.request(makeChatRequest())).resolves.toBeDefined()
+ expect(await store.take('gen-disturbed')).toBeUndefined()
+ })
+})
diff --git a/packages/typescript/ai-openrouter/tests/openrouter-adapter.test.ts b/packages/typescript/ai-openrouter/tests/openrouter-adapter.test.ts
index 206d16525..f8320132c 100644
--- a/packages/typescript/ai-openrouter/tests/openrouter-adapter.test.ts
+++ b/packages/typescript/ai-openrouter/tests/openrouter-adapter.test.ts
@@ -1,20 +1,30 @@
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { chat } from '@tanstack/ai'
import { resolveDebugOption } from '@tanstack/ai/adapter-internals'
+import { HTTPClient } from '@openrouter/sdk'
import { ChatRequest$outboundSchema } from '@openrouter/sdk/models'
import { createOpenRouterText } from '../src/adapters/text'
-import type { OpenRouterTextModelOptions } from '../src/adapters/text'
-import type { StreamChunk, Tool } from '@tanstack/ai'
+import type {
+ OpenRouterTextAdapter,
+ OpenRouterTextModelOptions,
+} from '../src/adapters/text'
+import type { CostStore } from '../src/adapters/cost-capture'
+import type { RunFinishedEvent, StreamChunk, Tool } from '@tanstack/ai'
+import type * as OpenRouterSDK from '@openrouter/sdk'
// Test helper: a silent logger for test chatStream calls.
const testLogger = resolveDebugOption(false)
-// Declare mockSend at module level
+// Declare mocks at module level
let mockSend: any
-// Mock the SDK with a class defined inline
-// eslint-disable-next-line @typescript-eslint/require-await
+// Mock `OpenRouter` so unit tests can drive chat chunks without real HTTP.
+// Keep the real `HTTPClient` (and other exports) since the adapter clones
+// the caller's HTTPClient via `attachCostCapture` — a stubbed HTTPClient
+// would break that path and hide regressions.
vi.mock('@openrouter/sdk', async () => {
+ const actual = await vi.importActual('@openrouter/sdk')
return {
+ ...actual,
OpenRouter: class {
chat = {
send: (...args: Array) => mockSend(...args),
@@ -26,6 +36,13 @@ vi.mock('@openrouter/sdk', async () => {
const createAdapter = () =>
createOpenRouterText('openai/gpt-4o-mini', 'test-key')
+// Tests that exercise cost-aware behavior seed the adapter's cost store
+// directly, since the real hook only fires on actual HTTP responses (which
+// are mocked away here). Real-HTTP coverage lives in tests/cost-capture.test.ts.
+function getCostStore(adapter: OpenRouterTextAdapter): CostStore {
+ return (adapter as unknown as { costStore: CostStore }).costStore
+}
+
const toolArguments = JSON.stringify({ location: 'Berlin' })
const weatherTool: Tool = {
@@ -393,7 +410,7 @@ describe('OpenRouter adapter option mapping', () => {
const errorChunk = chunks.find((c) => c.type === 'RUN_ERROR')
expect(errorChunk).toBeDefined()
- if (errorChunk && errorChunk.type === 'RUN_ERROR') {
+ if (errorChunk) {
expect(errorChunk.error?.message).toBe('Invalid API key')
}
})
@@ -1664,3 +1681,149 @@ describe('OpenRouter STEP event consistency', () => {
expect(stepFinished).toHaveLength(1)
})
})
+describe('OpenRouter cost tracking', () => {
+ beforeEach(() => {
+ vi.clearAllMocks()
+ })
+
+ const baseStreamChunks = (id = 'gen-cost-1') => [
+ {
+ id,
+ model: 'openai/gpt-4o-mini',
+ choices: [{ delta: { content: 'Hi' }, finishReason: null }],
+ },
+ {
+ id,
+ model: 'openai/gpt-4o-mini',
+ choices: [{ delta: {}, finishReason: 'stop' }],
+ usage: { promptTokens: 9, completionTokens: 3, totalTokens: 12 },
+ },
+ ]
+
+ async function runAndGetFinished(
+ adapter: OpenRouterTextAdapter,
+ ): Promise {
+ let finished: RunFinishedEvent | undefined
+ for await (const chunk of chat({
+ adapter,
+ messages: [{ role: 'user', content: 'hi' }],
+ })) {
+ if (chunk.type === 'RUN_FINISHED') finished = chunk
+ }
+ return finished
+ }
+
+ it('attaches OpenRouter cost and details to RUN_FINISHED.usage when the hook populated the store', async () => {
+ setupMockSdkClient(baseStreamChunks('gen-cost-1'))
+
+ const adapter = createOpenRouterText('openai/gpt-4o-mini', 'test-key')
+ getCostStore(adapter).set('gen-cost-1', {
+ cost: 0.0012,
+ costDetails: {
+ upstreamInferenceCost: 0.001,
+ upstreamInferenceInputCost: 0.0004,
+ upstreamInferenceOutputCost: 0.0006,
+ cacheDiscount: -0.0001,
+ },
+ })
+
+ const finished = await runAndGetFinished(adapter)
+
+ expect(finished?.usage).toMatchObject({
+ promptTokens: 9,
+ completionTokens: 3,
+ totalTokens: 12,
+ cost: 0.0012,
+ costDetails: {
+ upstreamInferenceCost: 0.001,
+ upstreamInferenceInputCost: 0.0004,
+ upstreamInferenceOutputCost: 0.0006,
+ cacheDiscount: -0.0001,
+ },
+ })
+ })
+
+ it('omits cost when the response had no cost in its trailing chunk', async () => {
+ setupMockSdkClient(baseStreamChunks('gen-cost-2'))
+
+ // No store seeding — simulates a response without cost (or one the
+ // hook never populated).
+ const adapter = createOpenRouterText('openai/gpt-4o-mini', 'test-key')
+ const finished = await runAndGetFinished(adapter)
+
+ expect(finished?.usage).toEqual({
+ promptTokens: 9,
+ completionTokens: 3,
+ totalTokens: 12,
+ })
+ expect(finished?.usage).not.toHaveProperty('cost')
+ })
+
+ it('uses usage from a trailing usage-only chunk and still attaches cost', async () => {
+ const id = 'gen-trailing-usage'
+ const streamChunks = [
+ {
+ id,
+ model: 'openai/gpt-4o-mini',
+ choices: [{ delta: { content: 'Hi' }, finishReason: null }],
+ },
+ {
+ id,
+ model: 'openai/gpt-4o-mini',
+ choices: [{ delta: {}, finishReason: 'stop' }],
+ },
+ {
+ id,
+ model: 'openai/gpt-4o-mini',
+ choices: [],
+ usage: { promptTokens: 7, completionTokens: 2, totalTokens: 9 },
+ },
+ ]
+
+ setupMockSdkClient(streamChunks)
+
+ const adapter = createOpenRouterText('openai/gpt-4o-mini', 'test-key')
+ getCostStore(adapter).set(id, { cost: 0.42 })
+
+ const finished = await runAndGetFinished(adapter)
+
+ expect(finished?.usage).toMatchObject({
+ promptTokens: 7,
+ completionTokens: 2,
+ totalTokens: 9,
+ cost: 0.42,
+ })
+ })
+
+ it('cost-store entries are consumed (each id is read at most once)', async () => {
+ setupMockSdkClient(baseStreamChunks('gen-consume'))
+ const adapter = createOpenRouterText('openai/gpt-4o-mini', 'test-key')
+ const store = getCostStore(adapter)
+ store.set('gen-consume', { cost: 0.99 })
+
+ await runAndGetFinished(adapter)
+
+ expect(await store.take('gen-consume')).toBeUndefined()
+ })
+
+ // Regression: passing a custom httpClient used to bypass cost tracking
+ // entirely. The adapter now clones the caller's client and attaches the
+ // cost-capture hook to the clone, so the store is still populated and
+ // RUN_FINISHED still carries cost.
+ it('tracks cost when the caller supplies a custom httpClient', async () => {
+ setupMockSdkClient(baseStreamChunks('gen-custom-http'))
+
+ const customClient = new HTTPClient()
+ const adapter = createOpenRouterText('openai/gpt-4o-mini', 'test-key', {
+ httpClient: customClient,
+ })
+
+ const store = getCostStore(adapter)
+ expect(store).toBeDefined()
+ store.set('gen-custom-http', { cost: 0.75 })
+
+ const finished = await runAndGetFinished(adapter)
+
+ expect(finished?.usage).toMatchObject({ cost: 0.75 })
+ })
+})
diff --git a/packages/typescript/ai/src/activities/chat/middleware/types.ts b/packages/typescript/ai/src/activities/chat/middleware/types.ts
index 19ce04586..39b308ce4 100644
--- a/packages/typescript/ai/src/activities/chat/middleware/types.ts
+++ b/packages/typescript/ai/src/activities/chat/middleware/types.ts
@@ -1,4 +1,10 @@
-import type { ModelMessage, StreamChunk, Tool, ToolCall } from '../../../types'
+import type {
+ ModelMessage,
+ StreamChunk,
+ Tool,
+ ToolCall,
+ UsageTotals,
+} from '../../../types'
// ===========================
// Middleware Context
@@ -221,14 +227,11 @@ export interface ToolPhaseCompleteInfo {
// ===========================
/**
- * Token usage statistics passed to the onUsage hook.
- * Extracted from the RUN_FINISHED chunk when usage data is present.
+ * Token usage statistics passed to the onUsage hook. Aliased to the
+ * canonical `UsageTotals` so this surface cannot drift from
+ * `RunFinishedEvent.usage`, which is where the runtime sources it from.
*/
-export interface UsageInfo {
- promptTokens: number
- completionTokens: number
- totalTokens: number
-}
+export type UsageInfo = UsageTotals
// ===========================
// Terminal Hook Info
@@ -245,11 +248,7 @@ export interface FinishInfo {
/** Final accumulated text content */
content: string
/** Final usage totals, if available */
- usage?: {
- promptTokens: number
- completionTokens: number
- totalTokens: number
- }
+ usage?: UsageInfo
}
/**
diff --git a/packages/typescript/ai/src/types.ts b/packages/typescript/ai/src/types.ts
index e11e7176f..9b1e257b4 100644
--- a/packages/typescript/ai/src/types.ts
+++ b/packages/typescript/ai/src/types.ts
@@ -812,6 +812,30 @@ export interface RunStartedEvent extends AGUIRunStartedEvent {
model?: string
}
+/**
+ * Per-run token and cost totals, shared between `RunFinishedEvent.usage`
+ * and the middleware `UsageInfo` so the two can never drift.
+ */
+export interface UsageTotals {
+ promptTokens: number
+ completionTokens: number
+ totalTokens: number
+ /**
+ * Provider-reported cost amount. Optional because most providers don't
+ * report cost per request. Adapters MUST forward only what the provider
+ * returned; they MUST NOT multiply tokens × price tables, since stale or
+ * wrong pricing data would silently corrupt accounting.
+ */
+ cost?: number
+ /**
+ * Provider-reported cost breakdown. Loosely typed because providers
+ * disagree on what to expose (BYOK upstream costs, cache discounts,
+ * per-tier rates, ...) and adapters should preserve only the numeric
+ * provider fields they received, using the provider's native units.
+ */
+ costDetails?: Record
+}
+
/**
* Emitted when a run completes successfully.
*
@@ -823,12 +847,8 @@ export interface RunFinishedEvent extends AGUIRunFinishedEvent {
model?: string
/** Why the generation stopped */
finishReason?: 'stop' | 'length' | 'content_filter' | 'tool_calls' | null
- /** Token usage statistics */
- usage?: {
- promptTokens: number
- completionTokens: number
- totalTokens: number
- }
+ /** Token and cost totals */
+ usage?: UsageTotals
}
/**