Merge branch 'main' into main

GabrielBianconi · web-flow · commit aa2a73652942 · 2025-08-03T01:46:49.000-04:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -15,7 +15,7 @@ members = [
 resolver = "2"
 
 [workspace.package]
-version = "2025.7.5"
+version = "2025.7.6"
 rust-version = "1.86.0"
 license = "Apache-2.0"
 
diff --git a/tensorzero-core/src/cache.rs b/tensorzero-core/src/cache.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 
 use crate::clickhouse::{ClickHouseConnectionInfo, TableName};
 use crate::embeddings::{EmbeddingRequest, EmbeddingResponse};
-use crate::error::{Error, ErrorDetails};
+use crate::error::{warn_discarded_cache_write, Error, ErrorDetails};
 use crate::inference::types::file::serialize_with_file_data;
 use crate::inference::types::{
     ContentBlockChunk, ContentBlockOutput, FinishReason, ModelInferenceRequest,
@@ -13,7 +13,7 @@ use crate::model::StreamResponse;
 use crate::serde_util::deserialize_json_string;
 use blake3::Hash;
 use clap::ValueEnum;
-use serde::de::DeserializeOwned;
+use serde::de::{DeserializeOwned, IgnoredAny};
 use serde::{Deserialize, Serialize};
 use std::fmt::Debug;
 
@@ -210,11 +210,39 @@ pub struct CacheData<T: CacheOutput> {
 /// to/from ClickHouse
 /// We use a marker trait rather than an enum so that the expected type can be enforced by the caller
 /// (e.g. `infer_stream` will never try to deserialize a `NonStreamingCacheData`)
-pub trait CacheOutput {}
+pub trait CacheOutput {
+    /// If this return `false`, then we'll log a warning and skip writing this entry to the cache
+    fn should_write_to_cache(&self) -> bool;
+}
 
-impl CacheOutput for StreamingCacheData {}
-impl CacheOutput for NonStreamingCacheData {}
-impl CacheOutput for EmbeddingCacheData {}
+impl CacheOutput for StreamingCacheData {
+    fn should_write_to_cache(&self) -> bool {
+        true
+    }
+}
+impl CacheOutput for NonStreamingCacheData {
+    fn should_write_to_cache(&self) -> bool {
+        for block in &self.blocks {
+            if let ContentBlockOutput::ToolCall(tool_call) = block {
+                // We skip writing to the cache if the tool call arguments are not valid JSON
+                // We're assuming that it's almost never useful to have an invalid tool call cached
+                // (in particular, tensorzero is not being used with a provider/model that only ever
+                // emits invalid json for its tool call arguments).
+                // The invalid tool call will still be returned to the user, but we won't create a
+                // cache entry, even if the user turned on caching.
+                if serde_json::from_str::<IgnoredAny>(&tool_call.arguments).is_err() {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+}
+impl CacheOutput for EmbeddingCacheData {
+    fn should_write_to_cache(&self) -> bool {
+        true
+    }
+}
 
 #[derive(Debug, Deserialize, Serialize)]
 #[serde(transparent)]
@@ -237,6 +265,24 @@ pub struct StreamingCacheData {
     pub chunks: Vec<CachedProviderInferenceResponseChunk>,
 }
 
+fn spawn_maybe_cache_write<T: Serialize + CacheOutput + Send + Sync + 'static>(
+    row: FullCacheRow<T>,
+    clickhouse_client: ClickHouseConnectionInfo,
+) {
+    tokio::spawn(async move {
+        if row.data.output.should_write_to_cache() {
+            if let Err(e) = clickhouse_client
+                .write(&[row], TableName::ModelInferenceCache)
+                .await
+            {
+                tracing::warn!("Failed to write to cache: {e}");
+            }
+        } else {
+            warn_discarded_cache_write(&row.data.raw_response);
+        }
+    });
+}
+
 // This doesn't block
 pub fn start_cache_write<T: Serialize + CacheOutput + Send + Sync + 'static>(
     clickhouse_client: &ClickHouseConnectionInfo,
@@ -255,28 +301,21 @@ pub fn start_cache_write<T: Serialize + CacheOutput + Send + Sync + 'static>(
     let output_tokens = usage.output_tokens;
     let clickhouse_client = clickhouse_client.clone();
     let finish_reason = finish_reason.cloned();
-    tokio::spawn(async move {
-        if let Err(e) = clickhouse_client
-            .write(
-                &[FullCacheRow {
-                    short_cache_key,
-                    long_cache_key,
-                    data: CacheData {
-                        output,
-                        raw_request,
-                        raw_response,
-                        input_tokens,
-                        output_tokens,
-                        finish_reason,
-                    },
-                }],
-                TableName::ModelInferenceCache,
-            )
-            .await
-        {
-            tracing::warn!("Failed to write to cache: {e}");
-        }
-    });
+    spawn_maybe_cache_write(
+        FullCacheRow {
+            short_cache_key,
+            long_cache_key,
+            data: CacheData {
+                output,
+                raw_request,
+                raw_response,
+                input_tokens,
+                output_tokens,
+                finish_reason,
+            },
+        },
+        clickhouse_client,
+    );
     Ok(())
 }
 
@@ -322,25 +361,21 @@ pub fn start_cache_write_streaming(
     };
     let raw_request = raw_request.to_string();
     let clickhouse_client = clickhouse_client.clone();
-    tokio::spawn(async move {
-        clickhouse_client
-            .write(
-                &[FullCacheRow {
-                    short_cache_key,
-                    long_cache_key,
-                    data: CacheData {
-                        output,
-                        raw_request,
-                        raw_response: String::new(),
-                        input_tokens,
-                        output_tokens,
-                        finish_reason,
-                    },
-                }],
-                TableName::ModelInferenceCache,
-            )
-            .await
-    });
+    spawn_maybe_cache_write(
+        FullCacheRow {
+            short_cache_key,
+            long_cache_key,
+            data: CacheData {
+                output,
+                raw_request,
+                raw_response: String::new(),
+                input_tokens,
+                output_tokens,
+                finish_reason,
+            },
+        },
+        clickhouse_client,
+    );
     Ok(())
 }
 
diff --git a/tensorzero-core/src/endpoints/feedback.rs b/tensorzero-core/src/endpoints/feedback.rs
@@ -476,7 +476,11 @@ async fn get_function_name(
         MetricConfigLevel::Episode => "episode_id_uint",
     };
     let query = format!(
-        "SELECT function_name FROM {table_name} FINAL WHERE {identifier_key} = toUInt128(toUUID('{target_id}'))"
+        "SELECT function_name
+         FROM {table_name}
+         WHERE {identifier_key} = toUInt128(toUUID('{target_id}'))
+         LIMIT 1
+         SETTINGS max_threads=1"
     );
     let function_name = connection_info
         .run_query_synchronous_no_params(query)
diff --git a/tensorzero-core/src/error.rs b/tensorzero-core/src/error.rs
@@ -52,6 +52,14 @@ pub fn set_unstable_error_json(unstable_error_json: bool) -> Result<(), Error> {
     })
 }
 
+pub fn warn_discarded_cache_write(raw_response: &str) {
+    if *DEBUG.get().unwrap_or(&false) {
+        tracing::warn!("Skipping cache write due to invalid output:\nRaw response: {raw_response}");
+    } else {
+        tracing::warn!("Skipping cache write due to invalid output");
+    }
+}
+
 pub fn warn_discarded_thought_block(provider_type: &str, thought: &Thought) {
     if *DEBUG.get().unwrap_or(&false) {
         tracing::warn!("Provider type `{provider_type}` does not support input thought blocks, discarding: {thought:?}");
diff --git a/tensorzero-core/src/providers/dummy.rs b/tensorzero-core/src/providers/dummy.rs
@@ -333,6 +333,11 @@ impl InferenceProvider for DummyProvider {
                 arguments: serde_json::to_string(&*DUMMY_TOOL_RESPONSE).unwrap(),
                 id: "0".to_string(),
             })],
+            "invalid_tool_arguments" => vec![ContentBlockOutput::ToolCall(ToolCall {
+                name: "get_temperature".to_string(),
+                arguments: "Not valid 'JSON'".to_string(),
+                id: "0".to_string(),
+            })],
             "reasoner" => vec![
                 ContentBlockOutput::Thought(Thought {
                     text: Some("hmmm".to_string()),
diff --git a/tensorzero-core/tests/e2e/cache.rs b/tensorzero-core/tests/e2e/cache.rs
@@ -9,16 +9,25 @@ use reqwest_eventsource::RequestBuilderExt;
 use serde_json::json;
 use serde_json::Value;
 use std::time::Duration;
+use tensorzero::CacheParamsOptions;
+use tensorzero::ClientInferenceParams;
+use tensorzero::ClientInput;
+use tensorzero::ClientInputMessage;
+use tensorzero::ClientInputMessageContent;
 use tensorzero::ContentBlockChunk;
+use tensorzero::InferenceOutput;
 use tensorzero_core::cache::cache_lookup_streaming;
 use tensorzero_core::cache::start_cache_write_streaming;
+use tensorzero_core::cache::CacheEnabledMode;
 use tensorzero_core::cache::NonStreamingCacheData;
 use tensorzero_core::inference::types::ContentBlock;
 use tensorzero_core::inference::types::ContentBlockOutput;
 use tensorzero_core::inference::types::FinishReason;
 use tensorzero_core::inference::types::ProviderInferenceResponseChunk;
 use tensorzero_core::inference::types::Text;
 use tensorzero_core::inference::types::TextChunk;
+use tensorzero_core::inference::types::TextKind;
+use tracing_test::traced_test;
 use uuid::Uuid;
 
 use tensorzero_core::cache::cache_lookup;
@@ -33,6 +42,7 @@ use tensorzero_core::inference::types::{
 };
 
 use crate::common::get_gateway_endpoint;
+use crate::providers::common::make_embedded_gateway;
 use tensorzero_core::clickhouse::test_helpers::{
     get_clickhouse, select_chat_inference_clickhouse, select_model_inference_clickhouse,
 };
@@ -312,6 +322,45 @@ async fn test_cache_stream_write_and_read() {
     assert!(result.is_none());
 }
 
+#[traced_test]
+#[tokio::test]
+pub async fn test_dont_cache_invalid_tool_call() {
+    let client = make_embedded_gateway().await;
+    let randomness = Uuid::now_v7();
+    let params = ClientInferenceParams {
+        model_name: Some("dummy::invalid_tool_arguments".to_string()),
+        input: ClientInput {
+            system: None,
+            messages: vec![ClientInputMessage {
+                role: Role::User,
+                content: vec![ClientInputMessageContent::Text(TextKind::Text {
+                    text: format!("Test inference: {randomness}"),
+                })],
+            }],
+        },
+        cache_options: CacheParamsOptions {
+            enabled: CacheEnabledMode::On,
+            max_age_s: None,
+        },
+        ..Default::default()
+    };
+    client.inference(params.clone()).await.unwrap();
+
+    tokio::time::sleep(std::time::Duration::from_secs(5)).await;
+    let clickhouse = get_clickhouse().await;
+    assert!(logs_contain("Skipping cache write"));
+
+    // Run again, and check that we get a cache miss
+    let res = client.inference(params).await.unwrap();
+    let InferenceOutput::NonStreaming(res) = res else {
+        panic!("Expected non-streaming inference response");
+    };
+    let model_inference = select_model_inference_clickhouse(&clickhouse, res.inference_id())
+        .await
+        .unwrap();
+    assert_eq!(model_inference.get("cached").unwrap(), false);
+}
+
 #[tokio::test]
 pub async fn test_streaming_cache_with_err() {
     let episode_id = Uuid::now_v7();
diff --git a/ui/package.json b/ui/package.json
@@ -2,7 +2,7 @@
   "name": "tensorzero-ui",
   "private": true,
   "type": "module",
-  "version": "2025.7.5",
+  "version": "2025.7.6",
   "scripts": {
     "build": "NODE_ENV=production react-router build",
     "dev": "react-router dev",