-
Notifications
You must be signed in to change notification settings - Fork 34
feat: in-session model switching and cost awareness (#32) #35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
75675a0
af6988c
3aeb7cb
95bd91b
6522b01
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| use serde::{Deserialize, Serialize}; | ||
|
|
||
| /// Model pricing: (prompt_price_per_1k_tokens, completion_price_per_1k_tokens) | ||
| fn model_pricing(model: &str) -> Option<(f64, f64)> { | ||
| let lower = model.trim().to_ascii_lowercase(); | ||
| // Match by substring to handle provider prefixes like "openrouter/anthropic/claude-3.7-sonnet" | ||
| if lower.contains("claude-3.7-sonnet") || lower.contains("claude-3-7-sonnet") { | ||
| Some((0.003, 0.015)) | ||
| } else if lower.contains("claude-3.5-haiku") || lower.contains("claude-3-5-haiku") { | ||
| Some((0.0008, 0.004)) | ||
| } else if lower.contains("gpt-4o-mini") { | ||
| Some((0.00015, 0.0006)) | ||
| } else if lower.contains("gpt-4o") { | ||
| Some((0.0025, 0.01)) | ||
| } else if lower.contains("gpt-4.1") { | ||
| Some((0.002, 0.008)) | ||
| } else if lower.contains("gemini-2.0-flash") { | ||
| Some((0.0001, 0.0004)) | ||
| } else if lower.contains("kimi-k2") { | ||
| Some((0.0006, 0.002)) | ||
| } else { | ||
| None | ||
| } | ||
| } | ||
|
|
||
| /// Estimate cost in USD for a given model and token counts. | ||
| pub fn estimate_cost(model: &str, prompt_tokens: u64, completion_tokens: u64) -> Option<f64> { | ||
| let (prompt_price, completion_price) = model_pricing(model)?; | ||
| let cost = (prompt_tokens as f64 / 1000.0) * prompt_price | ||
| + (completion_tokens as f64 / 1000.0) * completion_price; | ||
| Some(cost) | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, Serialize, Deserialize, Default)] | ||
| #[serde(rename_all = "camelCase")] | ||
| pub struct CostEstimate { | ||
| pub model: String, | ||
| pub prompt_tokens: u64, | ||
| pub completion_tokens: u64, | ||
| pub estimated_cost_usd: Option<f64>, | ||
| } | ||
|
|
||
| #[tauri::command] | ||
| pub fn estimate_query_cost( | ||
| model: String, | ||
| prompt_tokens: u64, | ||
| completion_tokens: u64, | ||
| ) -> Result<CostEstimate, String> { | ||
| let cost = estimate_cost(&model, prompt_tokens, completion_tokens); | ||
| Ok(CostEstimate { | ||
| model, | ||
| prompt_tokens, | ||
| completion_tokens, | ||
| estimated_cost_usd: cost, | ||
| }) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn estimate_cost_for_known_model() { | ||
| let cost = estimate_cost("gpt-4o", 1000, 1000).unwrap(); | ||
| assert!((cost - 0.0125).abs() < 0.0001); | ||
| } | ||
|
|
||
| #[test] | ||
| fn estimate_cost_for_unknown_model() { | ||
| assert!(estimate_cost("unknown-model", 1000, 1000).is_none()); | ||
| } | ||
|
|
||
| #[test] | ||
| fn estimate_cost_with_provider_prefix() { | ||
| let cost = estimate_cost("openrouter/anthropic/claude-3.7-sonnet", 1000, 500).unwrap(); | ||
| assert!(cost > 0.0); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| pub mod adapter; | ||
| pub mod cost; | ||
| pub mod install_adapter; | ||
| pub mod process; | ||
| pub mod sanitize; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -266,6 +266,43 @@ pub fn get_zeroclaw_usage_stats() -> ZeroclawUsageStats { | |
| usage_store().lock().map(|stats| *stats).unwrap_or_default() | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // Per-session usage tracking | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| fn session_usage_store() -> &'static Mutex<std::collections::HashMap<String, ZeroclawUsageStats>> { | ||
| static STORE: OnceLock<Mutex<std::collections::HashMap<String, ZeroclawUsageStats>>> = | ||
| OnceLock::new(); | ||
| STORE.get_or_init(|| Mutex::new(std::collections::HashMap::new())) | ||
| } | ||
|
|
||
| pub fn record_session_usage(session_id: &str, prompt_tokens: u64, completion_tokens: u64) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This new Useful? React with 👍 / 👎. |
||
| if session_id.is_empty() { | ||
| return; | ||
| } | ||
| if let Ok(mut map) = session_usage_store().lock() { | ||
| let stats = map | ||
| .entry(session_id.to_string()) | ||
| .or_insert_with(ZeroclawUsageStats::default); | ||
| stats.total_calls = stats.total_calls.saturating_add(1); | ||
| stats.usage_calls = stats.usage_calls.saturating_add(1); | ||
| stats.prompt_tokens = stats.prompt_tokens.saturating_add(prompt_tokens); | ||
| stats.completion_tokens = stats.completion_tokens.saturating_add(completion_tokens); | ||
| stats.total_tokens = stats | ||
| .total_tokens | ||
| .saturating_add(prompt_tokens.saturating_add(completion_tokens)); | ||
| stats.last_updated_ms = now_ms(); | ||
| } | ||
| } | ||
|
|
||
| pub fn get_session_usage(session_id: &str) -> ZeroclawUsageStats { | ||
| session_usage_store() | ||
| .lock() | ||
| .ok() | ||
| .and_then(|map| map.get(session_id).copied()) | ||
| .unwrap_or_default() | ||
| } | ||
|
|
||
| fn sanitize_instance_namespace(raw: &str) -> String { | ||
| let trimmed = raw.trim(); | ||
| if trimmed.is_empty() { | ||
|
|
@@ -804,7 +841,9 @@ pub fn run_zeroclaw_message( | |
| "-m".to_string(), | ||
| message, | ||
| ]; | ||
| let preferred_model = crate::commands::load_zeroclaw_model_preference(); | ||
| // Per-session model override takes priority over global preference. | ||
| let preferred_model = crate::commands::preferences::lookup_session_model_override(instance_id) | ||
| .or_else(|| crate::commands::load_zeroclaw_model_preference()); | ||
| let provider_order = provider_order_for_runtime(&env_pairs, preferred_model.as_deref()); | ||
| if provider_order.is_empty() { | ||
| return Err( | ||
|
|
@@ -821,7 +860,13 @@ pub fn run_zeroclaw_message( | |
| let stdout = sanitize_output(&String::from_utf8_lossy(&output.stdout)); | ||
| let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); | ||
| record_zeroclaw_usage(&stdout, &stderr); | ||
| if parse_usage_from_text(&stdout).is_none() && parse_usage_from_text(&stderr).is_none() { | ||
| // Also record per-session usage. | ||
| let session_usage = | ||
| parse_usage_from_text(&stdout).or_else(|| parse_usage_from_text(&stderr)); | ||
| if let Some((prompt, completion, _total)) = session_usage { | ||
| record_session_usage(instance_id, prompt, completion); | ||
| } | ||
| if session_usage.is_none() { | ||
| if let Ok(mut stats) = usage_store().lock() { | ||
| if let Some((prompt, completion, total)) = | ||
| read_usage_from_builtin_traces(&cmd, &cfg, &env_pairs) | ||
|
|
@@ -831,6 +876,8 @@ pub fn run_zeroclaw_message( | |
| stats.completion_tokens = stats.completion_tokens.saturating_add(completion); | ||
| stats.total_tokens = stats.total_tokens.saturating_add(total); | ||
| stats.last_updated_ms = now_ms(); | ||
| // Record per-session usage from traces as well. | ||
| record_session_usage(instance_id, prompt, completion); | ||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
set_session_model_overridestores overrides in-memory, but the runtime path does not read this map when selecting model/provider (it still uses global preference resolution inrun_zeroclaw_message). That means the new model switch API updates state that never affects actual requests, so in-session model switching is functionally broken.Useful? React with 👍 / 👎.