raphaelmansuy · raphaelmansuy · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025
diff --git a/adk-code/internal/display/events/event.go b/adk-code/internal/display/events/event.go
@@ -58,20 +58,17 @@ func PrintEventEnhanced(renderer *Renderer, streamDisplay *StreamingDisplay,
 	// Record token metrics if available and update spinner with metrics
 	if event.UsageMetadata != nil {
 		sessionTokens.RecordMetrics(event.UsageMetadata, requestID)
-		// Create token metrics for spinner display
-		metric := &tracking.TokenMetrics{
-			PromptTokens:   event.UsageMetadata.PromptTokenCount,
-			CachedTokens:   event.UsageMetadata.CachedContentTokenCount,
-			ResponseTokens: event.UsageMetadata.CandidatesTokenCount,
-			ThoughtTokens:  event.UsageMetadata.ThoughtsTokenCount,
-			ToolUseTokens:  event.UsageMetadata.ToolUsePromptTokenCount,
-			TotalTokens:    event.UsageMetadata.TotalTokenCount,
-		}
-		// Update spinner with metrics if it's actively running
-		if *toolRunning {
-			spinner.UpdateWithMetrics("Processing", metric)
-		} else {
-			spinner.UpdateWithMetrics("Agent is thinking", metric)
+
+		// Get the correctly calculated per-request metric (with deltas already computed)
+		metric := sessionTokens.GetLastMetric()
+
+		// Update spinner with the per-request metrics if it's actively running
+		if metric != nil {
+			if *toolRunning {
+				spinner.UpdateWithMetrics("Processing", metric)
+			} else {
+				spinner.UpdateWithMetrics("Agent is thinking", metric)
+			}
 		}
 	}
 

diff --git a/adk-code/internal/display/formatters/metrics_formatter.go b/adk-code/internal/display/formatters/metrics_formatter.go
@@ -28,7 +28,8 @@ func NewMetricsFormatter(outputFormat string, s *styles.Styles, f *styles.Format
 }
 
 // RenderTokenMetrics renders compact token usage metrics for display
-func (mf *MetricsFormatter) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, totalTokens int64) string {
+// contextWindow is in tokens, or -1 if unknown/not applicable
+func (mf *MetricsFormatter) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, thoughtTokens, totalTokens, contextWindow int64) string {
 	isTTY := styles.IsTTY != nil && styles.IsTTY()
 	if mf.outputFormat == styles.OutputFormatPlain || !isTTY || totalTokens == 0 {
 		return ""
@@ -39,24 +40,100 @@ func (mf *MetricsFormatter) RenderTokenMetrics(promptTokens, cachedTokens, respo
 		Foreground(lipgloss.AdaptiveColor{Light: "250", Dark: "240"}).
 		Italic(true)
 
-	// Build metrics string: "Tokens: 2,341 prompt | 892 cached | 1,205 response | Total: 5,054"
-	var parts []string
+	// Calculate meaningful metrics
+	// Note: promptTokens from tracker includes cached portion (from Gemini API PromptTokenCount)
+	// So we need to subtract cached to get truly new tokens
+	newPromptTokens := promptTokens - cachedTokens       // New prompt tokens (excluding cached)
+	actualTokensUsed := newPromptTokens + responseTokens // New tokens actually processed (what you pay for)
+	cacheHitTokens := cachedTokens                       // Tokens served from cache
 
+	// Calculate cache efficiency: percentage of INPUT that was cached
+	// (response tokens don't apply to caching, only input does)
+	var cacheEfficiency float64
 	if promptTokens > 0 {
-		parts = append(parts, fmt.Sprintf("%d prompt", promptTokens))
+		cacheEfficiency = (float64(cacheHitTokens) / float64(promptTokens)) * 100
+	}
+
+	// Determine cache efficiency indicator
+	cacheIndicator := ""
+	switch {
+	case cacheEfficiency >= 80:
+		cacheIndicator = "🚀 excellent"
+	case cacheEfficiency >= 50:
+		cacheIndicator = "✅ good"
+	case cacheEfficiency >= 20:
+		cacheIndicator = "⚠️ modest"
+	default:
+		cacheIndicator = "❌ minimal"
+	}
+
+	// Build metrics string with meaningful insights
+	// Format: "Session: new:29K tok | cached:26K tok (92% excellent) | context:28K/1M tok (3% ✅ healthy)"
+	var parts []string
+
+	// Show new tokens used (cost to the user) - make it clear these are tokens
+	if actualTokensUsed > 0 {
+		parts = append(parts, fmt.Sprintf("new:%s tok", formatCompactNumber(actualTokensUsed)))
 	}
-	if cachedTokens > 0 {
-		parts = append(parts, fmt.Sprintf("%d cached", cachedTokens))
+
+	// Show cache reuse efficiency - make it clear these are tokens
+	if cacheHitTokens > 0 {
+		parts = append(parts, fmt.Sprintf("cached:%s tok (%.0f%% %s)", formatCompactNumber(cacheHitTokens), cacheEfficiency, cacheIndicator))
 	}
+
+	// Show response size only if significant - make it clear these are tokens
 	if responseTokens > 0 {
-		parts = append(parts, fmt.Sprintf("%d response", responseTokens))
+		parts = append(parts, fmt.Sprintf("response:%s tok", formatCompactNumber(responseTokens)))
+	}
+
+	// Add session total with context window utilization
+	// totalTokens includes ALL tokens: new + cached + thoughts + tool use
+	if contextWindow > 0 {
+		contextUsagePercent := (float64(totalTokens) / float64(contextWindow)) * 100
+		contextIndicator := getContextWindowIndicator(contextUsagePercent)
+
+		// Show thought tokens if they're a significant portion (>10% of total)
+		thoughtNote := ""
+		if thoughtTokens > 0 && float64(thoughtTokens)/float64(totalTokens) > 0.1 {
+			thoughtNote = fmt.Sprintf(" incl. %s thoughts", formatCompactNumber(thoughtTokens))
+		}
+
+		parts = append(parts, fmt.Sprintf("session:%s/%s tok (%.1f%% %s%s)", formatCompactNumber(totalTokens), formatCompactNumber(contextWindow), contextUsagePercent, contextIndicator, thoughtNote))
 	}
 
-	metricsStr := fmt.Sprintf("Tokens: %s | Total: %d", strings.Join(parts, " | "), totalTokens)
+	metricsStr := fmt.Sprintf("Session: %s", strings.Join(parts, " | "))
 
 	return metricStyle.Render(metricsStr)
 }
 
+// formatCompactNumber converts large numbers to compact form (e.g., 28029 -> 28K)
+func formatCompactNumber(n int64) string {
+	switch {
+	case n >= 1000000:
+		return fmt.Sprintf("%.1fM", float64(n)/1000000)
+	case n >= 1000:
+		return fmt.Sprintf("%.0fK", float64(n)/1000)
+	default:
+		return fmt.Sprintf("%d", n)
+	}
+}
+
+// getContextWindowIndicator returns a visual indicator for context window usage
+func getContextWindowIndicator(usagePercent float64) string {
+	switch {
+	case usagePercent < 10:
+		return "✅ healthy"
+	case usagePercent < 25:
+		return "🟢 good"
+	case usagePercent < 50:
+		return "🟡 moderate"
+	case usagePercent < 75:
+		return "🟠 high"
+	default:
+		return "🔴 critical"
+	}
+}
+
 // APIUsageInfo holds token usage and cost information
 type APIUsageInfo struct {
 	TokensIn    int

diff --git a/adk-code/internal/display/renderer/renderer.go b/adk-code/internal/display/renderer/renderer.go
@@ -235,8 +235,8 @@ func (r *Renderer) RenderTaskFailed() string {
 	return r.metricsFormatter.RenderTaskFailed()
 }
 
-func (r *Renderer) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, totalTokens int64) string {
-	return r.metricsFormatter.RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, totalTokens)
+func (r *Renderer) RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, thoughtTokens, totalTokens, contextWindow int64) string {
+	return r.metricsFormatter.RenderTokenMetrics(promptTokens, cachedTokens, responseTokens, thoughtTokens, totalTokens, contextWindow)
 }
 
 func (r *Renderer) RenderAPIUsage(status string, usage *formatters.APIUsageInfo) string {

diff --git a/adk-code/internal/repl/repl.go b/adk-code/internal/repl/repl.go
@@ -231,11 +231,14 @@ agentLoop:
 	// Display token metrics for this request
 	summary := r.config.SessionTokens.GetSummary()
 	if summary.TotalTokens > 0 {
+		contextWindow := int64(r.config.SelectedModel.ContextWindow)
 		metrics := r.config.Renderer.RenderTokenMetrics(
 			summary.TotalPromptTokens,
 			summary.TotalCachedTokens,
 			summary.TotalResponseTokens,
+			summary.TotalThoughtTokens,
 			summary.TotalTokens,
+			contextWindow,
 		)
 		if metrics != "" {
 			fmt.Printf("%s\n", metrics)

diff --git a/adk-code/internal/tracking/formatter.go b/adk-code/internal/tracking/formatter.go
@@ -46,32 +46,56 @@ func FormatSessionSummary(summary *Summary) string {
 		"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━",
 	}
 
-	// Calculate used tokens and cache efficiency
-	usedTokens := summary.TotalTokens - summary.TotalCachedTokens
+	// Calculate key metrics
+	usedTokens := summary.TotalPromptTokens + summary.TotalResponseTokens // Actual new tokens
+	cachedTokens := summary.TotalCachedTokens                             // Tokens served from cache
+	totalProcessed := usedTokens + cachedTokens                           // Everything processed
+
 	var cacheEfficiency float64
-	if summary.TotalTokens > 0 {
-		cacheEfficiency = float64(summary.TotalCachedTokens) / float64(summary.TotalTokens) * 100
+	if totalProcessed > 0 {
+		cacheEfficiency = float64(cachedTokens) / float64(totalProcessed) * 100
 	}
 
-	lines = append(lines, fmt.Sprintf("Total Tokens:        %d", summary.TotalTokens))
-	lines = append(lines, fmt.Sprintf("  ├─ Actually Used:  %d", usedTokens))
-	lines = append(lines, fmt.Sprintf("  ├─ Prompt:         %d", summary.TotalPromptTokens))
-	lines = append(lines, fmt.Sprintf("  ├─ Response:       %d", summary.TotalResponseTokens))
+	// Calculate cost savings from caching (rough estimate: cached = 10% of actual cost)
+	estimatedCostSavings := cachedTokens / 10 // Rough estimation
+
+	// Main metrics - what actually matters
+	lines = append(lines, "")
+	lines = append(lines, "💰 Cost Metrics (What You Pay)")
+	lines = append(lines, fmt.Sprintf("  ├─ New Tokens:     %d (prompt + response you paid for)", usedTokens))
+	lines = append(lines, fmt.Sprintf("  ├─ Cache Reuse:    %d tokens (%.1f%% efficiency)", cachedTokens, cacheEfficiency))
+	lines = append(lines, fmt.Sprintf("  ├─ Cost Savings:   ~%d tokens via caching", estimatedCostSavings))
+	lines = append(lines, fmt.Sprintf("  └─ API Billing:    %d total tokens", totalProcessed))
+
+	// Breakdown by component
+	lines = append(lines, "")
+	lines = append(lines, "🔧 Token Breakdown")
+	lines = append(lines, fmt.Sprintf("  ├─ Prompt (input):   %d", summary.TotalPromptTokens))
+	lines = append(lines, fmt.Sprintf("  ├─ Response (output):%d", summary.TotalResponseTokens))
 
-	if summary.TotalCachedTokens > 0 {
-		lines = append(lines, fmt.Sprintf("  ├─ Cached:         %d (%.1f%% saved)", summary.TotalCachedTokens, cacheEfficiency))
-	}
 	if summary.TotalThoughtTokens > 0 {
-		lines = append(lines, fmt.Sprintf("  ├─ Thoughts:       %d", summary.TotalThoughtTokens))
+		lines = append(lines, fmt.Sprintf("  ├─ Thinking:         %d", summary.TotalThoughtTokens))
 	}
 	if summary.TotalToolUseTokens > 0 {
-		lines = append(lines, fmt.Sprintf("  └─ Tool Use:       %d", summary.TotalToolUseTokens))
+		lines = append(lines, fmt.Sprintf("  ├─ Tool Use:         %d", summary.TotalToolUseTokens))
+	}
+	if summary.TotalCachedTokens > 0 {
+		lines = append(lines, fmt.Sprintf("  └─ Cached Reuse:     %d", summary.TotalCachedTokens))
 	}
 
+	// Efficiency metrics
 	lines = append(lines, "")
-	lines = append(lines, fmt.Sprintf("Requests:            %d", summary.RequestCount))
-	lines = append(lines, fmt.Sprintf("Avg Tokens/Request:  %.1f", summary.AvgTokensPerRequest))
-	lines = append(lines, fmt.Sprintf("Session Duration:    %s", formatDuration(summary.SessionDuration)))
+	lines = append(lines, "📈 Session Efficiency")
+	lines = append(lines, fmt.Sprintf("  ├─ Requests:         %d", summary.RequestCount))
+	lines = append(lines, fmt.Sprintf("  ├─ Avg/Request:      %.0f tokens", summary.AvgTokensPerRequest))
+
+	// Cache hit rate if available
+	if cacheEfficiency > 0 {
+		lines = append(lines, fmt.Sprintf("  ├─ Cache Hit Rate:   %.1f%% (excellent!)", cacheEfficiency))
+	}
+
+	lines = append(lines, fmt.Sprintf("  └─ Duration:         %s", formatDuration(summary.SessionDuration)))
+
 	lines = append(lines, "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
 
 	return strings.Join(lines, "\n")

diff --git a/adk-code/internal/tracking/tracker.go b/adk-code/internal/tracking/tracker.go
@@ -32,6 +32,13 @@ type SessionTokens struct {
 	RequestCount        int
 	Metrics             []TokenMetrics
 	SessionStartTime    time.Time
+	// Track previous API response totals to calculate per-request deltas
+	// API returns cumulative values, so we need to subtract previous to get current request's cost
+	PreviousPromptTotal   int32
+	PreviousCachedTotal   int32
+	PreviousResponseTotal int32
+	PreviousThoughtTotal  int32
+	PreviousToolUseTotal  int32
 }
 
 // NewSessionTokens creates a new session token tracker.
@@ -43,6 +50,9 @@ func NewSessionTokens() *SessionTokens {
 }
 
 // RecordMetrics records token usage from a GenerateContentResponseUsageMetadata.
+// For multi-turn conversations, the API returns cumulative token counts.
+// We calculate the per-request delta for each component (prompt, response, cached, etc.)
+// to show accurate current request usage.
 func (st *SessionTokens) RecordMetrics(metadata *genai.GenerateContentResponseUsageMetadata, requestID string) {
 	if metadata == nil {
 		return
@@ -51,27 +61,80 @@ func (st *SessionTokens) RecordMetrics(metadata *genai.GenerateContentResponseUs
 	st.mu.Lock()
 	defer st.mu.Unlock()
 
+	// Calculate per-request deltas for each component
+	// The API returns cumulative values, so we subtract the previous total to get this request's cost
+	promptDelta := metadata.PromptTokenCount - st.PreviousPromptTotal
+	responseDelta := metadata.CandidatesTokenCount - st.PreviousResponseTotal
+	cachedDelta := metadata.CachedContentTokenCount - st.PreviousCachedTotal
+	thoughtDelta := metadata.ThoughtsTokenCount - st.PreviousThoughtTotal
+	toolUseDelta := metadata.ToolUsePromptTokenCount - st.PreviousToolUseTotal
+
+	// Ensure we don't get negative values (safeguard against API quirks)
+	if promptDelta < 0 {
+		promptDelta = metadata.PromptTokenCount
+	}
+	if responseDelta < 0 {
+		responseDelta = metadata.CandidatesTokenCount
+	}
+	if cachedDelta < 0 {
+		cachedDelta = metadata.CachedContentTokenCount
+	}
+	if thoughtDelta < 0 {
+		thoughtDelta = metadata.ThoughtsTokenCount
+	}
+	if toolUseDelta < 0 {
+		toolUseDelta = metadata.ToolUsePromptTokenCount
+	}
+
+	// Total for this request = input (prompt) + output (response) + cached
+	// This is the actual cost of this single request
+	perRequestTotal := promptDelta + responseDelta + cachedDelta + thoughtDelta + toolUseDelta
+
 	metric := TokenMetrics{
-		PromptTokens:   metadata.PromptTokenCount,
-		CachedTokens:   metadata.CachedContentTokenCount,
-		ResponseTokens: metadata.CandidatesTokenCount,
-		ThoughtTokens:  metadata.ThoughtsTokenCount,
-		ToolUseTokens:  metadata.ToolUsePromptTokenCount,
-		TotalTokens:    metadata.TotalTokenCount,
+		PromptTokens:   promptDelta,
+		CachedTokens:   cachedDelta,
+		ResponseTokens: responseDelta,
+		ThoughtTokens:  thoughtDelta,
+		ToolUseTokens:  toolUseDelta,
+		TotalTokens:    perRequestTotal, // Only this request's cost, not cumulative
 		Timestamp:      time.Now(),
 		RequestID:      requestID,
 	}
 
 	st.Metrics = append(st.Metrics, metric)
-	st.TotalPromptTokens += int64(metadata.PromptTokenCount)
-	st.TotalCachedTokens += int64(metadata.CachedContentTokenCount)
-	st.TotalResponseTokens += int64(metadata.CandidatesTokenCount)
-	st.TotalThoughtTokens += int64(metadata.ThoughtsTokenCount)
-	st.TotalToolUseTokens += int64(metadata.ToolUsePromptTokenCount)
-	st.TotalTokens += int64(metadata.TotalTokenCount)
+
+	// Accumulate the per-request deltas for session totals
+	st.TotalPromptTokens += int64(promptDelta)
+	st.TotalCachedTokens += int64(cachedDelta)
+	st.TotalResponseTokens += int64(responseDelta)
+	st.TotalThoughtTokens += int64(thoughtDelta)
+	st.TotalToolUseTokens += int64(toolUseDelta)
+	st.TotalTokens += int64(perRequestTotal)
+
+	// Update previous totals for next request's delta calculation
+	st.PreviousPromptTotal = metadata.PromptTokenCount
+	st.PreviousResponseTotal = metadata.CandidatesTokenCount
+	st.PreviousCachedTotal = metadata.CachedContentTokenCount
+	st.PreviousThoughtTotal = metadata.ThoughtsTokenCount
+	st.PreviousToolUseTotal = metadata.ToolUsePromptTokenCount
+
 	st.RequestCount++
 }
 
+// GetLastMetric returns the most recently recorded metric (for current request).
+// This provides the per-request token breakdown that should be displayed.
+func (st *SessionTokens) GetLastMetric() *TokenMetrics {
+	st.mu.RLock()
+	defer st.mu.RUnlock()
+
+	if len(st.Metrics) == 0 {
+		return nil
+	}
+
+	metric := st.Metrics[len(st.Metrics)-1]
+	return &metric
+}
+
 // GetSummary returns a formatted summary of token usage.
 func (st *SessionTokens) GetSummary() *Summary {
 	st.mu.RLock()