Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions infra/dashboards/deploy-workbooks.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# =============================================================
# LLM Token Usage Workbook Deployment Script
# =============================================================
# Usage:
# .\deploy-workbooks.ps1 -ResourceGroup <rg-name> -AppInsightsResourceId <full-resource-id> [-Location <location>]
#
# Example:
# .\deploy-workbooks.ps1 `
# -ResourceGroup "rg-my-permanent-rg" `
# -AppInsightsResourceId "/subscriptions/<sub>/resourcegroups/<rg>/providers/microsoft.insights/components/<name>" `
# -Location "australiaeast"
# =============================================================

param(
[Parameter(Mandatory=$true)]
[string]$ResourceGroup,

[Parameter(Mandatory=$true)]
[string]$AppInsightsResourceId,

[string]$Location = "australiaeast"
)

$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path

# Deploy GKE workbook
$gkeContent = Get-Content "$scriptDir\workbook-gke-content.json" -Raw
$gkeId = [guid]::NewGuid().ToString()

$body = @{
location = $Location
kind = "shared"
properties = @{
displayName = "LLM Token Usage Dashboard - GKE"
serializedData = $gkeContent
version = "Notebook/1.0"
sourceId = $AppInsightsResourceId
category = "workbook"
}
tags = @{
"hidden-title" = "LLM Token Usage Dashboard - GKE"
}
} | ConvertTo-Json -Depth 5

$bodyFile = [System.IO.Path]::GetTempFileName()
$body | Set-Content $bodyFile -Encoding UTF8

az rest --method PUT `
--url "https://management.azure.com/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$ResourceGroup/providers/microsoft.insights/workbooks/$($gkeId)?api-version=2022-04-01" `
--body "@$bodyFile" `
--headers "Content-Type=application/json" 2>&1 | Out-Null

Write-Host "Deployed GKE workbook: $gkeId"
Remove-Item $bodyFile

# Deploy EKS workbook
$eksContent = Get-Content "$scriptDir\workbook-eks-content.json" -Raw
$eksId = [guid]::NewGuid().ToString()

$body = @{
location = $Location
kind = "shared"
properties = @{
displayName = "LLM Token Usage Dashboard - EKS"
serializedData = $eksContent
version = "Notebook/1.0"
sourceId = $AppInsightsResourceId
category = "workbook"
}
tags = @{
"hidden-title" = "LLM Token Usage Dashboard - EKS"
}
} | ConvertTo-Json -Depth 5

$bodyFile = [System.IO.Path]::GetTempFileName()
$body | Set-Content $bodyFile -Encoding UTF8

az rest --method PUT `
--url "https://management.azure.com/subscriptions/$(az account show --query id -o tsv)/resourceGroups/$ResourceGroup/providers/microsoft.insights/workbooks/$($eksId)?api-version=2022-04-01" `
--body "@$bodyFile" `
--headers "Content-Type=application/json" 2>&1 | Out-Null

Write-Host "Deployed EKS workbook: $eksId"
Remove-Item $bodyFile

Write-Host "`nDone! Both workbooks deployed to $ResourceGroup"
113 changes: 113 additions & 0 deletions infra/dashboards/token-usage-queries.kql
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// =============================================================================
// LLM Token Usage Dashboard Queries for Application Insights
// =============================================================================
// These KQL queries can be used in Azure Application Insights / Log Analytics
// to visualize token usage across agents, models, steps, and users.
// =============================================================================

// ---- 1. Overall Token Usage Summary (last 24h) ----
customEvents
| where name == "LLM_Token_Usage_Summary"
| where timestamp > ago(24h)
| extend process_id = tostring(customDimensions.process_id),
total_input = toint(customDimensions.total_input_tokens),
total_output = toint(customDimensions.total_output_tokens),
total = toint(customDimensions.total_tokens),
call_count = toint(customDimensions.total_calls)
| project timestamp, process_id, total_input, total_output, total, call_count
| order by timestamp desc

// ---- 2. Per-Agent Token Usage ----
customEvents
| where name == "LLM_Agent_Token_Usage"
| where timestamp > ago(24h)
| extend agent_name = tostring(customDimensions.agent_name),
input_tokens = toint(customDimensions.input_tokens),
output_tokens = toint(customDimensions.output_tokens),
total_tokens = toint(customDimensions.total_tokens),
calls = toint(customDimensions.call_count),
process_id = tostring(customDimensions.process_id)
| summarize total_input = sum(input_tokens),
total_output = sum(output_tokens),
total = sum(total_tokens),
total_calls = sum(calls)
by agent_name
| order by total desc

// ---- 3. Per-Model Token Usage ----
customEvents
| where name == "LLM_Model_Token_Usage"
| where timestamp > ago(24h)
| extend model_name = tostring(customDimensions.model_deployment_name),
input_tokens = toint(customDimensions.input_tokens),
output_tokens = toint(customDimensions.output_tokens),
total_tokens = toint(customDimensions.total_tokens),
calls = toint(customDimensions.call_count),
process_id = tostring(customDimensions.process_id)
| summarize total_input = sum(input_tokens),
total_output = sum(output_tokens),
total = sum(total_tokens),
total_calls = sum(calls)
by model_name
| order by total desc

// ---- 4. Per-Step (Team) Token Usage ----
customEvents
| where name == "LLM_Step_Token_Usage"
| where timestamp > ago(24h)
| extend step_name = tostring(customDimensions.step_name),
input_tokens = toint(customDimensions.input_tokens),
output_tokens = toint(customDimensions.output_tokens),
total_tokens = toint(customDimensions.total_tokens),
calls = toint(customDimensions.call_count),
process_id = tostring(customDimensions.process_id)
| summarize total_input = sum(input_tokens),
total_output = sum(output_tokens),
total = sum(total_tokens),
total_calls = sum(calls)
by step_name
| order by total desc

// ---- 5. Per-User Token Usage (requires user_id in process telemetry) ----
customEvents
| where name == "LLM_Token_Usage_Summary"
| where timestamp > ago(24h)
| extend process_id = tostring(customDimensions.process_id),
total_tokens = toint(customDimensions.total_tokens),
user_id = tostring(customDimensions.user_id)
| summarize total = sum(total_tokens), runs = count() by user_id
| order by total desc

// ---- 6. Individual LLM Call Log ----
customEvents
| where name == "LLM_Token_Usage"
| where timestamp > ago(24h)
| extend agent_name = tostring(customDimensions.agent_name),
step_name = tostring(customDimensions.step_name),
model = tostring(customDimensions.model_deployment_name),
input_tokens = toint(customDimensions.input_tokens),
output_tokens = toint(customDimensions.output_tokens),
total_tokens = toint(customDimensions.total_tokens),
process_id = tostring(customDimensions.process_id)
| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens
| order by timestamp desc

// ---- 7. Hourly Token Usage Trend ----
customEvents
| where name == "LLM_Token_Usage"
| where timestamp > ago(7d)
| extend total_tokens = toint(customDimensions.total_tokens)
| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)
| order by timestamp asc
| render timechart

// ---- 8. Estimated Cost (GPT-4o pricing: $2.50/1M input, $10/1M output) ----
customEvents
| where name == "LLM_Token_Usage_Summary"
| where timestamp > ago(24h)
| extend process_id = tostring(customDimensions.process_id),
input_tokens = toint(customDimensions.total_input_tokens),
output_tokens = toint(customDimensions.total_output_tokens)
| extend estimated_cost_usd = (input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0)
| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd
| order by estimated_cost_usd desc
1 change: 1 addition & 0 deletions infra/dashboards/workbook-eks-content.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":"Notebook/1.0","items":[{"type":1,"content":{"json":"# LLM Token Usage Dashboard\n\nThis workbook provides comprehensive visibility into LLM token consumption across agents, models, workflow steps, and users.\n\n---"},"name":"header"},{"type":9,"content":{"version":"KqlParameterItem/1.0","parameters":[{"id":"time-range-param","version":"KqlParameterItem/1.0","name":"TimeRange","type":4,"isRequired":true,"value":{"durationMs":1800000,"endTime":"2026-05-21T06:50:00.000Z"},"typeSettings":{"selectableValues":[{"durationMs":3600000},{"durationMs":14400000},{"durationMs":86400000},{"durationMs":259200000},{"durationMs":604800000},{"durationMs":2592000000}],"allowCustom":true},"label":"Time Range"}],"style":"pills","queryType":0,"resourceType":"microsoft.insights/components"},"name":"parameters"},{"type":1,"content":{"json":"## Overall Token Usage Summary"},"name":"summary-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| summarize \n total_input = sum(toint(customDimensions.total_input_tokens)),\n total_output = sum(toint(customDimensions.total_output_tokens)),\n total = sum(toint(customDimensions.total_tokens)),\n total_calls = sum(toint(customDimensions.total_calls)),\n processes = dcount(tostring(customDimensions.process_id))","size":4,"title":"Token Usage Totals","queryType":0,"resourceType":"microsoft.insights/components","visualization":"tiles","tileSettings":{"titleContent":{"columnMatch":"Column1","formatter":1},"leftContent":{"columnMatch":"total","formatter":12,"formatOptions":{"palette":"auto"},"numberFormat":{"unit":0,"options":{"style":"decimal","maximumFractionDigits":0}}},"showBorder":true}},"name":"summary-tiles"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_input = toint(customDimensions.total_input_tokens),\n total_output = toint(customDimensions.total_output_tokens),\n total = toint(customDimensions.total_tokens),\n call_count = toint(customDimensions.total_calls)\n| project timestamp, process_id, total_input, total_output, total, call_count\n| order by timestamp desc","size":0,"title":"Token Usage by Process","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"summary-table"},{"type":1,"content":{"json":"## Per-Agent Token Usage"},"name":"agent-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by agent_name\n| order by total desc","size":0,"title":"Token Consumption by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"blue"}}]}},"customWidth":"50","name":"agent-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Agent_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by agent_name\n| order by total desc","size":0,"title":"Token Distribution by Agent","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"agent-chart"},{"type":1,"content":{"json":"## Per-Model Token Usage"},"name":"model-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by model_name\n| order by total desc","size":0,"title":"Token Consumption by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"green"}}]}},"customWidth":"50","name":"model-table"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Model_Token_Usage\"\n| where timestamp {TimeRange}\n| extend model_name = tostring(customDimensions.model_deployment_name),\n total_tokens = toint(customDimensions.total_tokens)\n| summarize total = sum(total_tokens) by model_name\n| order by total desc","size":0,"title":"Token Distribution by Model","queryType":0,"resourceType":"microsoft.insights/components","visualization":"piechart"},"customWidth":"50","name":"model-chart"},{"type":1,"content":{"json":"## Per-Step (Team) Token Usage"},"name":"step-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Token Consumption by Workflow Step","queryType":0,"resourceType":"microsoft.insights/components","visualization":"barchart","chartSettings":{"xAxis":"step_name","yAxis":"total","group":"step_name"}},"customWidth":"50","name":"step-chart"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Step_Token_Usage\"\n| where timestamp {TimeRange}\n| extend step_name = tostring(customDimensions.step_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n calls = toint(customDimensions.call_count)\n| summarize total_input = sum(input_tokens),\n total_output = sum(output_tokens),\n total = sum(total_tokens),\n total_calls = sum(calls)\n by step_name\n| order by total desc","size":0,"title":"Step Usage Details","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"orange"}}]}},"customWidth":"50","name":"step-table"},{"type":1,"content":{"json":"## Per-User Token Usage"},"name":"user-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n total_tokens = toint(customDimensions.total_tokens),\n user_id = tostring(customDimensions.user_id)\n| summarize total = sum(total_tokens), runs = count() by user_id\n| order by total desc","size":0,"title":"Token Usage by User","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total","formatter":3,"formatOptions":{"palette":"purple"}}]}},"name":"user-table"},{"type":1,"content":{"json":"## Token Usage Trends"},"name":"trend-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend total_tokens = toint(customDimensions.total_tokens)\n| summarize hourly_tokens = sum(total_tokens), calls = count() by bin(timestamp, 1h)\n| order by timestamp asc","size":0,"title":"Hourly Token Consumption","queryType":0,"resourceType":"microsoft.insights/components","visualization":"linechart","chartSettings":{"xAxis":"timestamp","yAxis":"hourly_tokens","showLegend":true}},"name":"trend-chart"},{"type":1,"content":{"json":"## Estimated Cost\n\n> Cost estimates use GPT-4o pricing: **$2.50 / 1M input tokens**, **$10.00 / 1M output tokens**. Adjust for your actual model pricing."},"name":"cost-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage_Summary\"\n| where timestamp {TimeRange}\n| extend process_id = tostring(customDimensions.process_id),\n input_tokens = toint(customDimensions.total_input_tokens),\n output_tokens = toint(customDimensions.total_output_tokens)\n| extend estimated_cost_usd = round((input_tokens / 1000000.0 * 2.50) + (output_tokens / 1000000.0 * 10.0), 4)\n| project timestamp, process_id, input_tokens, output_tokens, estimated_cost_usd\n| order by estimated_cost_usd desc","size":0,"title":"Estimated Cost per Process (USD)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"estimated_cost_usd","formatter":3,"formatOptions":{"palette":"redBright"}}]}},"name":"cost-table"},{"type":1,"content":{"json":"## Individual LLM Call Log"},"name":"calls-header"},{"type":3,"content":{"version":"KqlItem/1.0","query":"customEvents\n| where name == \"LLM_Token_Usage\"\n| where timestamp {TimeRange}\n| extend agent_name = tostring(customDimensions.agent_name),\n step_name = tostring(customDimensions.step_name),\n model = tostring(customDimensions.model_deployment_name),\n input_tokens = toint(customDimensions.input_tokens),\n output_tokens = toint(customDimensions.output_tokens),\n total_tokens = toint(customDimensions.total_tokens),\n process_id = tostring(customDimensions.process_id)\n| project timestamp, process_id, agent_name, step_name, model, input_tokens, output_tokens, total_tokens\n| order by timestamp desc\n| take 200","size":0,"title":"Recent LLM Calls (last 200)","queryType":0,"resourceType":"microsoft.insights/components","visualization":"table","gridSettings":{"formatters":[{"columnMatch":"total_tokens","formatter":3,"formatOptions":{"palette":"blue"}}]}},"name":"calls-table"}],"isLocked":false,"fallbackResourceIds":["/subscriptions/1d5876cd-7603-407a-96d2-ae5ca9a9c5f3/resourcegroups/rg-pricmglogp33/providers/microsoft.insights/components/appi-pricmglogp33usmqm"]}
Loading
Loading