Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions adk-code/internal/display/events/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,17 @@ func PrintEventEnhanced(renderer *Renderer, streamDisplay *StreamingDisplay,
// Record token metrics if available and update spinner with metrics
if event.UsageMetadata != nil {
sessionTokens.RecordMetrics(event.UsageMetadata, requestID)
// Create token metrics for spinner display
metric := &tracking.TokenMetrics{
PromptTokens: event.UsageMetadata.PromptTokenCount,
CachedTokens: event.UsageMetadata.CachedContentTokenCount,
ResponseTokens: event.UsageMetadata.CandidatesTokenCount,
ThoughtTokens: event.UsageMetadata.ThoughtsTokenCount,
ToolUseTokens: event.UsageMetadata.ToolUsePromptTokenCount,
TotalTokens: event.UsageMetadata.TotalTokenCount,
}
// Update spinner with metrics if it's actively running
if *toolRunning {
spinner.UpdateWithMetrics("Processing", metric)
} else {
spinner.UpdateWithMetrics("Agent is thinking", metric)

// Get the correctly calculated per-request metric (with deltas already computed)
metric := sessionTokens.GetLastMetric()

// Update spinner with the per-request metrics if it's actively running
if metric != nil {
if *toolRunning {
spinner.UpdateWithMetrics("Processing", metric)
} else {
spinner.UpdateWithMetrics("Agent is thinking", metric)
}
}
}

Expand Down
93 changes: 85 additions & 8 deletions adk-code/internal/display/formatters/metrics_formatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ func NewMetricsFormatter(outputFormat string, s *styles.Styles, f *styles.Format
}

// RenderTokenMetrics renders compact token usage metrics for display
func (mf *MetricsFormatter) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, totalTokens int64) string {
// contextWindow is in tokens, or -1 if unknown/not applicable
func (mf *MetricsFormatter) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, thoughtTokens, totalTokens, contextWindow int64) string {
isTTY := styles.IsTTY != nil && styles.IsTTY()
if mf.outputFormat == styles.OutputFormatPlain || !isTTY || totalTokens == 0 {
return ""
Expand All @@ -39,24 +40,100 @@ func (mf *MetricsFormatter) RenderTokenMetrics(promptTokens, cachedTokens, respo
Foreground(lipgloss.AdaptiveColor{Light: "250", Dark: "240"}).
Italic(true)

// Build metrics string: "Tokens: 2,341 prompt | 892 cached | 1,205 response | Total: 5,054"
var parts []string
// Calculate meaningful metrics
// Note: promptTokens from tracker includes cached portion (from Gemini API PromptTokenCount)
// So we need to subtract cached to get truly new tokens
newPromptTokens := promptTokens - cachedTokens // New prompt tokens (excluding cached)
actualTokensUsed := newPromptTokens + responseTokens // New tokens actually processed (what you pay for)
cacheHitTokens := cachedTokens // Tokens served from cache

// Calculate cache efficiency: percentage of INPUT that was cached
// (response tokens don't apply to caching, only input does)
var cacheEfficiency float64
if promptTokens > 0 {
parts = append(parts, fmt.Sprintf("%d prompt", promptTokens))
cacheEfficiency = (float64(cacheHitTokens) / float64(promptTokens)) * 100
}

// Determine cache efficiency indicator
cacheIndicator := ""
switch {
case cacheEfficiency >= 80:
cacheIndicator = "🚀 excellent"
case cacheEfficiency >= 50:
cacheIndicator = "✅ good"
case cacheEfficiency >= 20:
cacheIndicator = "⚠️ modest"
default:
cacheIndicator = "❌ minimal"
}

// Build metrics string with meaningful insights
// Format: "Session: new:29K tok | cached:26K tok (92% excellent) | context:28K/1M tok (3% ✅ healthy)"
var parts []string

// Show new tokens used (cost to the user) - make it clear these are tokens
if actualTokensUsed > 0 {
parts = append(parts, fmt.Sprintf("new:%s tok", formatCompactNumber(actualTokensUsed)))
}
if cachedTokens > 0 {
parts = append(parts, fmt.Sprintf("%d cached", cachedTokens))

// Show cache reuse efficiency - make it clear these are tokens
if cacheHitTokens > 0 {
parts = append(parts, fmt.Sprintf("cached:%s tok (%.0f%% %s)", formatCompactNumber(cacheHitTokens), cacheEfficiency, cacheIndicator))
}

// Show response size only if significant - make it clear these are tokens
if responseTokens > 0 {
parts = append(parts, fmt.Sprintf("%d response", responseTokens))
parts = append(parts, fmt.Sprintf("response:%s tok", formatCompactNumber(responseTokens)))
}

// Add session total with context window utilization
// totalTokens includes ALL tokens: new + cached + thoughts + tool use
if contextWindow > 0 {
contextUsagePercent := (float64(totalTokens) / float64(contextWindow)) * 100
contextIndicator := getContextWindowIndicator(contextUsagePercent)

// Show thought tokens if they're a significant portion (>10% of total)
thoughtNote := ""
if thoughtTokens > 0 && float64(thoughtTokens)/float64(totalTokens) > 0.1 {
thoughtNote = fmt.Sprintf(" incl. %s thoughts", formatCompactNumber(thoughtTokens))
}

parts = append(parts, fmt.Sprintf("session:%s/%s tok (%.1f%% %s%s)", formatCompactNumber(totalTokens), formatCompactNumber(contextWindow), contextUsagePercent, contextIndicator, thoughtNote))
}

metricsStr := fmt.Sprintf("Tokens: %s | Total: %d", strings.Join(parts, " | "), totalTokens)
metricsStr := fmt.Sprintf("Session: %s", strings.Join(parts, " | "))

return metricStyle.Render(metricsStr)
}

// formatCompactNumber converts large numbers to compact form (e.g., 28029 -> 28K)
func formatCompactNumber(n int64) string {
switch {
case n >= 1000000:
return fmt.Sprintf("%.1fM", float64(n)/1000000)
case n >= 1000:
return fmt.Sprintf("%.0fK", float64(n)/1000)
default:
return fmt.Sprintf("%d", n)
}
}

// getContextWindowIndicator returns a visual indicator for context window usage
func getContextWindowIndicator(usagePercent float64) string {
switch {
case usagePercent < 10:
return "✅ healthy"
case usagePercent < 25:
return "🟢 good"
case usagePercent < 50:
return "🟡 moderate"
case usagePercent < 75:
return "🟠 high"
default:
return "🔴 critical"
}
}

// APIUsageInfo holds token usage and cost information
type APIUsageInfo struct {
TokensIn int
Expand Down
4 changes: 2 additions & 2 deletions adk-code/internal/display/renderer/renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ func (r *Renderer) RenderTaskFailed() string {
return r.metricsFormatter.RenderTaskFailed()
}

func (r *Renderer) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, totalTokens int64) string {
return r.metricsFormatter.RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, totalTokens)
func (r *Renderer) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, thoughtTokens, totalTokens, contextWindow int64) string {
return r.metricsFormatter.RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, thoughtTokens, totalTokens, contextWindow)
}

func (r *Renderer) RenderAPIUsage(status string, usage *formatters.APIUsageInfo) string {
Expand Down
3 changes: 3 additions & 0 deletions adk-code/internal/repl/repl.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,14 @@ agentLoop:
// Display token metrics for this request
summary := r.config.SessionTokens.GetSummary()
if summary.TotalTokens > 0 {
contextWindow := int64(r.config.SelectedModel.ContextWindow)
metrics := r.config.Renderer.RenderTokenMetrics(
summary.TotalPromptTokens,
summary.TotalCachedTokens,
summary.TotalResponseTokens,
summary.TotalThoughtTokens,
summary.TotalTokens,
contextWindow,
)
if metrics != "" {
fmt.Printf("%s\n", metrics)
Expand Down
56 changes: 40 additions & 16 deletions adk-code/internal/tracking/formatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,32 +46,56 @@ func FormatSessionSummary(summary *Summary) string {
"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━",
}

// Calculate used tokens and cache efficiency
usedTokens := summary.TotalTokens - summary.TotalCachedTokens
// Calculate key metrics
usedTokens := summary.TotalPromptTokens + summary.TotalResponseTokens // Actual new tokens
cachedTokens := summary.TotalCachedTokens // Tokens served from cache
totalProcessed := usedTokens + cachedTokens // Everything processed

var cacheEfficiency float64
if summary.TotalTokens > 0 {
cacheEfficiency = float64(summary.TotalCachedTokens) / float64(summary.TotalTokens) * 100
if totalProcessed > 0 {
cacheEfficiency = float64(cachedTokens) / float64(totalProcessed) * 100
}

lines = append(lines, fmt.Sprintf("Total Tokens: %d", summary.TotalTokens))
lines = append(lines, fmt.Sprintf(" ├─ Actually Used: %d", usedTokens))
lines = append(lines, fmt.Sprintf(" ├─ Prompt: %d", summary.TotalPromptTokens))
lines = append(lines, fmt.Sprintf(" ├─ Response: %d", summary.TotalResponseTokens))
// Calculate cost savings from caching (rough estimate: cached = 10% of actual cost)
estimatedCostSavings := cachedTokens / 10 // Rough estimation

// Main metrics - what actually matters
lines = append(lines, "")
lines = append(lines, "💰 Cost Metrics (What You Pay)")
lines = append(lines, fmt.Sprintf(" ├─ New Tokens: %d (prompt + response you paid for)", usedTokens))
lines = append(lines, fmt.Sprintf(" ├─ Cache Reuse: %d tokens (%.1f%% efficiency)", cachedTokens, cacheEfficiency))
lines = append(lines, fmt.Sprintf(" ├─ Cost Savings: ~%d tokens via caching", estimatedCostSavings))
lines = append(lines, fmt.Sprintf(" └─ API Billing: %d total tokens", totalProcessed))

// Breakdown by component
lines = append(lines, "")
lines = append(lines, "🔧 Token Breakdown")
lines = append(lines, fmt.Sprintf(" ├─ Prompt (input): %d", summary.TotalPromptTokens))
lines = append(lines, fmt.Sprintf(" ├─ Response (output):%d", summary.TotalResponseTokens))

if summary.TotalCachedTokens > 0 {
lines = append(lines, fmt.Sprintf(" ├─ Cached: %d (%.1f%% saved)", summary.TotalCachedTokens, cacheEfficiency))
}
if summary.TotalThoughtTokens > 0 {
lines = append(lines, fmt.Sprintf(" ├─ Thoughts: %d", summary.TotalThoughtTokens))
lines = append(lines, fmt.Sprintf(" ├─ Thinking: %d", summary.TotalThoughtTokens))
}
if summary.TotalToolUseTokens > 0 {
lines = append(lines, fmt.Sprintf(" └─ Tool Use: %d", summary.TotalToolUseTokens))
lines = append(lines, fmt.Sprintf(" ├─ Tool Use: %d", summary.TotalToolUseTokens))
}
if summary.TotalCachedTokens > 0 {
lines = append(lines, fmt.Sprintf(" └─ Cached Reuse: %d", summary.TotalCachedTokens))
}

// Efficiency metrics
lines = append(lines, "")
lines = append(lines, fmt.Sprintf("Requests: %d", summary.RequestCount))
lines = append(lines, fmt.Sprintf("Avg Tokens/Request: %.1f", summary.AvgTokensPerRequest))
lines = append(lines, fmt.Sprintf("Session Duration: %s", formatDuration(summary.SessionDuration)))
lines = append(lines, "📈 Session Efficiency")
lines = append(lines, fmt.Sprintf(" ├─ Requests: %d", summary.RequestCount))
lines = append(lines, fmt.Sprintf(" ├─ Avg/Request: %.0f tokens", summary.AvgTokensPerRequest))

// Cache hit rate if available
if cacheEfficiency > 0 {
lines = append(lines, fmt.Sprintf(" ├─ Cache Hit Rate: %.1f%% (excellent!)", cacheEfficiency))
}

lines = append(lines, fmt.Sprintf(" └─ Duration: %s", formatDuration(summary.SessionDuration)))

lines = append(lines, "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")

return strings.Join(lines, "\n")
Expand Down
87 changes: 75 additions & 12 deletions adk-code/internal/tracking/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ type SessionTokens struct {
RequestCount int
Metrics []TokenMetrics
SessionStartTime time.Time
// Track previous API response totals to calculate per-request deltas
// API returns cumulative values, so we need to subtract previous to get current request's cost
PreviousPromptTotal int32
PreviousCachedTotal int32
PreviousResponseTotal int32
PreviousThoughtTotal int32
PreviousToolUseTotal int32
}

// NewSessionTokens creates a new session token tracker.
Expand All @@ -43,6 +50,9 @@ func NewSessionTokens() *SessionTokens {
}

// RecordMetrics records token usage from a GenerateContentResponseUsageMetadata.
// For multi-turn conversations, the API returns cumulative token counts.
// We calculate the per-request delta for each component (prompt, response, cached, etc.)
// to show accurate current request usage.
func (st *SessionTokens) RecordMetrics(metadata *genai.GenerateContentResponseUsageMetadata, requestID string) {
if metadata == nil {
return
Expand All @@ -51,27 +61,80 @@ func (st *SessionTokens) RecordMetrics(metadata *genai.GenerateContentResponseUs
st.mu.Lock()
defer st.mu.Unlock()

// Calculate per-request deltas for each component
// The API returns cumulative values, so we subtract the previous total to get this request's cost
promptDelta := metadata.PromptTokenCount - st.PreviousPromptTotal
responseDelta := metadata.CandidatesTokenCount - st.PreviousResponseTotal
cachedDelta := metadata.CachedContentTokenCount - st.PreviousCachedTotal
thoughtDelta := metadata.ThoughtsTokenCount - st.PreviousThoughtTotal
toolUseDelta := metadata.ToolUsePromptTokenCount - st.PreviousToolUseTotal

// Ensure we don't get negative values (safeguard against API quirks)
if promptDelta < 0 {
promptDelta = metadata.PromptTokenCount
}
if responseDelta < 0 {
responseDelta = metadata.CandidatesTokenCount
}
if cachedDelta < 0 {
cachedDelta = metadata.CachedContentTokenCount
}
if thoughtDelta < 0 {
thoughtDelta = metadata.ThoughtsTokenCount
}
if toolUseDelta < 0 {
toolUseDelta = metadata.ToolUsePromptTokenCount
}

// Total for this request = input (prompt) + output (response) + cached
// This is the actual cost of this single request
perRequestTotal := promptDelta + responseDelta + cachedDelta + thoughtDelta + toolUseDelta

metric := TokenMetrics{
PromptTokens: metadata.PromptTokenCount,
CachedTokens: metadata.CachedContentTokenCount,
ResponseTokens: metadata.CandidatesTokenCount,
ThoughtTokens: metadata.ThoughtsTokenCount,
ToolUseTokens: metadata.ToolUsePromptTokenCount,
TotalTokens: metadata.TotalTokenCount,
PromptTokens: promptDelta,
CachedTokens: cachedDelta,
ResponseTokens: responseDelta,
ThoughtTokens: thoughtDelta,
ToolUseTokens: toolUseDelta,
TotalTokens: perRequestTotal, // Only this request's cost, not cumulative
Timestamp: time.Now(),
RequestID: requestID,
}

st.Metrics = append(st.Metrics, metric)
st.TotalPromptTokens += int64(metadata.PromptTokenCount)
st.TotalCachedTokens += int64(metadata.CachedContentTokenCount)
st.TotalResponseTokens += int64(metadata.CandidatesTokenCount)
st.TotalThoughtTokens += int64(metadata.ThoughtsTokenCount)
st.TotalToolUseTokens += int64(metadata.ToolUsePromptTokenCount)
st.TotalTokens += int64(metadata.TotalTokenCount)

// Accumulate the per-request deltas for session totals
st.TotalPromptTokens += int64(promptDelta)
st.TotalCachedTokens += int64(cachedDelta)
st.TotalResponseTokens += int64(responseDelta)
st.TotalThoughtTokens += int64(thoughtDelta)
st.TotalToolUseTokens += int64(toolUseDelta)
st.TotalTokens += int64(perRequestTotal)

// Update previous totals for next request's delta calculation
st.PreviousPromptTotal = metadata.PromptTokenCount
st.PreviousResponseTotal = metadata.CandidatesTokenCount
st.PreviousCachedTotal = metadata.CachedContentTokenCount
st.PreviousThoughtTotal = metadata.ThoughtsTokenCount
st.PreviousToolUseTotal = metadata.ToolUsePromptTokenCount

st.RequestCount++
}

// GetLastMetric returns the most recently recorded metric (for current request).
// This provides the per-request token breakdown that should be displayed.
func (st *SessionTokens) GetLastMetric() *TokenMetrics {
st.mu.RLock()
defer st.mu.RUnlock()

if len(st.Metrics) == 0 {
return nil
}

metric := st.Metrics[len(st.Metrics)-1]
return &metric
}

// GetSummary returns a formatted summary of token usage.
func (st *SessionTokens) GetSummary() *Summary {
st.mu.RLock()
Expand Down
Loading