Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
283 changes: 283 additions & 0 deletions infra/dashboards/token-usage-queries.kql
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
// ============================================================
// KQL Queries for LLM Token Usage Monitoring
// Content Processing Solution Accelerator
// Run these in Application Insights > Logs
// ============================================================

// 1. Overall token usage summary (last 7 days)
customEvents
| where name == 'LLM_Token_Usage_Summary'
| where timestamp > ago(7d)
| extend input_tokens = toint(customDimensions['total_input_tokens'])
| extend output_tokens = toint(customDimensions['total_output_tokens'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize
TotalDocuments = count(),
TotalInputTokens = sum(input_tokens),
TotalOutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens),
AvgTokensPerDocument = round(avg(total_tokens), 0)

// 2. Token usage by pipeline step (agent)
customEvents
| where name == 'LLM_Agent_Token_Usage'
| where timestamp > ago(7d)
| extend agent = tostring(customDimensions['agent_name'])
| extend input_tokens = toint(customDimensions['input_tokens'])
| extend output_tokens = toint(customDimensions['output_tokens'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize
InputTokens = sum(input_tokens),
OutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens),
Invocations = count()
by Step = agent
| order by TotalTokens desc

// 3. Token usage over time (hourly)
customEvents
| where name == 'LLM_Token_Usage_Summary'
| where timestamp > ago(7d)
| extend input_tokens = toint(customDimensions['total_input_tokens'])
| extend output_tokens = toint(customDimensions['total_output_tokens'])
| summarize InputTokens = sum(input_tokens), OutputTokens = sum(output_tokens) by bin(timestamp, 1h)
| order by timestamp asc
| render areachart

// 4. Estimated cost (GPT-4o pricing: $2.50/1M input, $10.00/1M output)
let input_price_per_million = 2.50;
let output_price_per_million = 10.00;
customEvents
| where name == 'LLM_Token_Usage_Summary'
| where timestamp > ago(30d)
| extend input_tokens = toint(customDimensions['total_input_tokens'])
| extend output_tokens = toint(customDimensions['total_output_tokens'])
| summarize TotalInput = sum(input_tokens), TotalOutput = sum(output_tokens) by bin(timestamp, 1d)
| extend InputCost = round(TotalInput * input_price_per_million / 1000000.0, 4)
| extend OutputCost = round(TotalOutput * output_price_per_million / 1000000.0, 4)
| extend TotalCost = InputCost + OutputCost
| project Day = timestamp, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
| order by Day desc

// 5. Top token consumers by document
customEvents
| where name == 'LLM_Token_Usage_Summary'
| where timestamp > ago(7d)
| extend total_tokens = toint(customDimensions['total_tokens'])
| extend process_id = tostring(customDimensions['process_id'])
| extend file_name = tostring(customDimensions['file_name'])
| summarize TotalTokens = sum(total_tokens) by process_id, file_name
| order by TotalTokens desc
| take 20

// 6. Pipeline step token distribution (pie chart)
customEvents
| where name == 'LLM_Agent_Token_Usage'
| where timestamp > ago(7d)
| extend agent = tostring(customDimensions['agent_name'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize TotalTokens = sum(total_tokens) by agent
| render piechart

// 7. Token usage percentiles per document
customEvents
| where name == 'LLM_Token_Usage_Summary'
| where timestamp > ago(7d)
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize
p50 = percentile(total_tokens, 50),
p90 = percentile(total_tokens, 90),
p95 = percentile(total_tokens, 95),
p99 = percentile(total_tokens, 99),
Max = max(total_tokens)

// 8. Token usage by step grouping (Extraction vs Analysis vs Safety)
let StepGroupMapping = datatable(agent:string, StepGroup:string) [
"MapHandler", "Extraction",
"RAI", "Safety",
"Summarize", "Analysis",
"GapAnalysis", "Analysis"
];
customEvents
| where name == 'LLM_Agent_Token_Usage'
| where timestamp > ago(7d)
| extend agent = tostring(customDimensions['agent_name'])
| extend input_tokens = toint(customDimensions['input_tokens'])
| extend output_tokens = toint(customDimensions['output_tokens'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| lookup kind=leftouter StepGroupMapping on agent
| extend StepGroup = iff(isempty(StepGroup), "Unknown", StepGroup)
| summarize
TotalRequests = count(),
TotalInputTokens = sum(input_tokens),
TotalOutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens),
AvgTokensPerRequest = round(avg(total_tokens), 0)
by StepGroup
| order by TotalTokens desc

// 9. Token usage by model deployment
customEvents
| where name == 'LLM_Model_Token_Usage'
| where timestamp > ago(7d)
| extend model = tostring(customDimensions['model_deployment_name'])
| extend input_tokens = toint(customDimensions['input_tokens'])
| extend output_tokens = toint(customDimensions['output_tokens'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize
InputTokens = sum(input_tokens),
OutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens),
Invocations = count()
by Model = model
| order by TotalTokens desc

// 10. Token usage by model over time (hourly)
customEvents
| where name == 'LLM_Model_Token_Usage'
| where timestamp > ago(7d)
| extend model = tostring(customDimensions['model_deployment_name'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize TotalTokens = sum(total_tokens) by bin(timestamp, 1h), model
| order by timestamp asc
| render areachart

// 11. Model token distribution (pie chart)
customEvents
| where name == 'LLM_Model_Token_Usage'
| where timestamp > ago(7d)
| extend model = tostring(customDimensions['model_deployment_name'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize TotalTokens = sum(total_tokens) by model
| render piechart

// 12. Estimated cost by model (adjust pricing per model)
let gpt4o_input = 2.50;
let gpt4o_output = 10.00;
let gpt4o_mini_input = 0.15;
let gpt4o_mini_output = 0.60;
customEvents
| where name == 'LLM_Model_Token_Usage'
| where timestamp > ago(30d)
| extend model = tostring(customDimensions['model_deployment_name'])
| extend input_tokens = toint(customDimensions['input_tokens'])
| extend output_tokens = toint(customDimensions['output_tokens'])
| summarize TotalInput = sum(input_tokens), TotalOutput = sum(output_tokens) by model
| extend InputPrice = case(
model has "mini", gpt4o_mini_input,
gpt4o_input)
| extend OutputPrice = case(
model has "mini", gpt4o_mini_output,
gpt4o_output)
| extend InputCost = round(TotalInput * InputPrice / 1000000.0, 4)
| extend OutputCost = round(TotalOutput * OutputPrice / 1000000.0, 4)
| extend TotalCost = InputCost + OutputCost
| project Model = model, TotalInput, TotalOutput, InputCost, OutputCost, TotalCost
| order by TotalCost desc

// 13. Step-to-model mapping with token usage
customEvents
| where name == 'LLM_Agent_Token_Usage'
| where timestamp > ago(7d)
| extend agent = tostring(customDimensions['agent_name'])
| extend model = tostring(customDimensions['model_deployment_name'])
| extend input_tokens = toint(customDimensions['input_tokens'])
| extend output_tokens = toint(customDimensions['output_tokens'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize
InputTokens = sum(input_tokens),
OutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens),
Invocations = count()
by Step = agent, Model = model
| order by TotalTokens desc

// 14. RAI agent specific token usage
customEvents
| where name == 'LLM_Agent_Token_Usage'
| where timestamp > ago(7d)
| extend agent = tostring(customDimensions['agent_name'])
| where agent == "RAI"
| extend input_tokens = toint(customDimensions['input_tokens'])
| extend output_tokens = toint(customDimensions['output_tokens'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| extend model = tostring(customDimensions['model_deployment_name'])
| summarize
InputTokens = sum(input_tokens),
OutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens),
Invocations = count()
by Model = model

// 15. OpenTelemetry auto-instrumented OpenAI calls (if available)
dependencies
| where name has "openai" or target has "openai"
| where timestamp > ago(7d)
| extend input_tokens = tolong(customDimensions["gen_ai.usage.input_tokens"])
| extend output_tokens = tolong(customDimensions["gen_ai.usage.output_tokens"])
| extend model = tostring(customDimensions["gen_ai.request.model"])
| where isnotnull(input_tokens)
| summarize
Calls = count(),
TotalInput = sum(input_tokens),
TotalOutput = sum(output_tokens)
by model
| order by TotalInput desc

// ============================================================
// Content Processing Specific Queries
// ============================================================

// 16. Token usage by file type (PDF, DOCX, image, etc.)
customEvents
| where name == 'LLM_Token_Usage_Summary'
| where timestamp > ago(7d)
| extend total_tokens = toint(customDimensions['total_tokens'])
| extend input_tokens = toint(customDimensions['total_input_tokens'])
| extend output_tokens = toint(customDimensions['total_output_tokens'])
| extend mime_type = tostring(customDimensions['file_mime_type'])
| extend file_type = case(
mime_type has "pdf", "PDF",
mime_type has "image", "Image",
mime_type has "word" or mime_type has "docx", "Word",
mime_type has "excel" or mime_type has "xlsx", "Excel",
mime_type has "text", "Text",
"Other")
| summarize
Documents = count(),
TotalInputTokens = sum(input_tokens),
TotalOutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens),
AvgTokensPerDoc = round(avg(total_tokens), 0)
by FileType = file_type
| order by TotalTokens desc

// 17. Per-document token breakdown by step
customEvents
| where name == 'LLM_Agent_Token_Usage'
| where timestamp > ago(7d)
| extend agent = tostring(customDimensions['agent_name'])
| extend process_id = tostring(customDimensions['process_id'])
| extend input_tokens = toint(customDimensions['input_tokens'])
| extend output_tokens = toint(customDimensions['output_tokens'])
| extend total_tokens = toint(customDimensions['total_tokens'])
| summarize
InputTokens = sum(input_tokens),
OutputTokens = sum(output_tokens),
TotalTokens = sum(total_tokens)
by process_id, Step = agent
| order by process_id, TotalTokens desc

// 18. Daily processing volume with token costs
customEvents
| where name == 'LLM_Token_Usage_Summary'
| where timestamp > ago(30d)
| extend total_tokens = toint(customDimensions['total_tokens'])
| extend file_name = tostring(customDimensions['file_name'])
| summarize
DocumentsProcessed = count(),
TotalTokens = sum(total_tokens),
AvgTokensPerDoc = round(avg(total_tokens), 0),
MaxTokensPerDoc = max(total_tokens)
by Day = bin(timestamp, 1d)
| order by Day desc
Loading
Loading