quickwit-oss · fulmicoton · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
@@ -81,6 +81,17 @@ pub fn is_true(value: &bool) -> bool {
     *value
 }
 
+/// `true` when the current process is running inside an AWS Lambda runtime.
+///
+/// Detected via the presence of the `AWS_LAMBDA_FUNCTION_NAME` environment
+/// variable, which the Lambda runtime sets automatically. The result is cached
+/// on first access since environment variables are read once at process start.
+pub fn is_running_in_lambda() -> bool {
+    static IS_LAMBDA: std::sync::LazyLock<bool> =
+        std::sync::LazyLock::new(|| std::env::var_os("AWS_LAMBDA_FUNCTION_NAME").is_some());
+    *IS_LAMBDA
+}
+
 pub fn chunk_range(range: Range<usize>, chunk_size: usize) -> impl Iterator<Item = Range<usize>> {
     range.clone().step_by(chunk_size).map(move |block_start| {
         let block_end = (block_start + chunk_size).min(range.end);

@@ -2723,6 +2723,7 @@ mod tests {
                     scroll_id: None,
                     failed_splits: Vec::new(),
                     num_successful_splits: 1,
+                    resource_stats: None,
                 })
             });
 

@@ -170,14 +170,18 @@ impl LambdaLeafSearchInvoker for AwsLambdaInvoker {
         let start = std::time::Instant::now();
         let result = self.invoke_leaf_search_with_retry(request).await;
         let elapsed = start.elapsed().as_secs_f64();
-        let status = if result.is_ok() { "success" } else { "error" };
+        let outcome = match &result {
+            Ok(_) => "success",
+            Err(SearchError::Timeout(_)) => "timeout",
+            Err(_) => "other",
+        };
         LAMBDA_METRICS
             .leaf_search_requests_total
-            .with_label_values([status])
+            .with_label_values([outcome])
             .inc();
         LAMBDA_METRICS
             .leaf_search_duration_seconds
-            .with_label_values([status])
+            .with_label_values([outcome])
             .observe(elapsed);
         result
     }

@@ -46,14 +46,14 @@ impl Default for LambdaMetrics {
                 "Total number of Lambda leaf search invocations.",
                 "lambda",
                 &[],
-                ["status"],
+                ["outcome"],
             ),
             leaf_search_duration_seconds: new_histogram_vec(
                 "leaf_search_duration_seconds",
                 "Duration of Lambda leaf search invocations in seconds.",
                 "lambda",
                 &[],
-                ["status"],
+                ["outcome"],
                 duration_buckets(),
             ),
             leaf_search_request_payload_size_bytes: new_histogram(

@@ -184,6 +184,16 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .unwrap();
 
     // Search service.
+    //
+    // Unlike the other services above, search goes through `tonic_prost_build` directly
+    // (not through `quickwit_codegen::Codegen`, which emits `cargo:rerun-if-changed` for
+    // every proto it compiles). `prost_build` 0.14 has a TODO acknowledging it does not
+    // emit those directives itself, and `tonic_prost_build` 0.14.5 exposes an
+    // `emit_rerun_if_changed` setter but never forwards it to the underlying `Config`.
+    // Without the explicit hint below, edits to `search.proto` do not retrigger build.rs
+    // (because the other `Codegen` calls have already narrowed cargo's watch list).
+    println!("cargo:rerun-if-changed=protos/quickwit/search.proto");
+
     let mut prost_config = prost_build::Config::default();
     prost_config
         .file_descriptor_set_path("src/codegen/quickwit/search_descriptor.bin")
@@ -199,6 +209,12 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .type_attribute("SortByValue", "#[derive(Ord, PartialOrd)]")
         .type_attribute("SearchRequest", "#[derive(Hash, Eq)]")
         .type_attribute("PartialHit", "#[derive(Hash, Eq)]")
+        // The `Response` variant carries a `LeafSearchResponse` which now
+        // embeds `LeafResourceStats`.
+        .type_attribute(
+            "LambdaSingleSplitResult.outcome",
+            "#[allow(clippy::large_enum_variant)]",
+        )
         .out_dir("src/codegen/quickwit")
         .compile_with_config(
             prost_config,

@@ -319,6 +319,9 @@ message SearchResponse {
 
   // Total number of successful splits searched.
   uint64 num_successful_splits = 8;
+
+  // Resource statistics for the root search.
+  RootResourceStats resource_stats = 10;
 }
 
 message SearchPlanResponse {
@@ -355,12 +358,98 @@ message LeafSearchRequest {
   repeated string index_uris = 9;
 }
 
-message ResourceStats {
-    uint64 short_lived_cache_num_bytes = 1;
-    uint64 split_num_docs = 2;
-    uint64 warmup_microsecs = 3;
-    uint64 cpu_thread_pool_wait_microsecs = 4;
-    uint64 cpu_microsecs = 5;
+// Per-split resource statistics.
+//
+// All fields are extensive (sum across splits is meaningful) except where noted.
+message SplitResourceStats {
+    // Number of documents in the split.
+    uint64 split_num_docs = 1;
+    // Bytes resident in the warmup short-lived cache after warmup
+    // (measure of input data the search needed to process).
+    uint64 input_memory_bytes = 2;
+    // Bytes downloaded from storage during the request, as measured
+    // by a request-scoped storage wrapper.
+    uint64 download_num_bytes = 3;
+    // Number of storage GET requests issued during the request.
+    uint64 download_num_requests = 4;
+    // Number of documents matched by the query in this split (we always count).
+    uint64 matched_num_docs = 5;
+
+    // Time the split spent waiting to acquire its search permit.
+    uint64 wait_for_search_permit_microsecs = 6;
+    // Time spent in the warmup phase (downloading and indexing data into caches).
+    uint64 warmup_microsecs = 7;
+    // Time the split spent waiting for a slot on the CPU pool after warmup.
+    uint64 wait_for_cpu_pool_microsecs = 8;
+    // CPU time spent in the search itself (predicate matching + collection +
+    // per-segment harvest), as measured around the tantivy search call.
+    // Excludes any cross-split finalize work performed outside the single-split
+    // search.
+    uint64 cpu_search_microsecs = 9;
+}
+
+// Resource statistics for a single leaf-search call (over one or more splits).
+// If the configuration allows it, leaf nodes can offload part of their computation to
+// lambdas.
+//
+// `split_resources_worst` / `split_resources_sum` aggregations on
+// `LeafResourceStats` are only computed for locally-executed splits.
+//
+// All numeric fields are extensive (summed when merging across leaves).
+// `split_resources_worst` is the exception — it is selected by ranking, not
+// summed. `lambda_bottleneck` is 0 or 1 at the source and becomes a count of
+// leaves where lambda was the bottleneck once aggregated.
+message LeafResourceStats {
+    // Number of splits whose results came from the partial result cache.
+    uint64 partial_result_cache_num_splits = 1;
+    // Sum of `split.num_docs` across cache-hit splits.
+    uint64 partial_result_cache_num_docs = 2;
+
+    // Number of splits executed locally (excluding cache hits and lambda).
+    uint64 localexec_num_splits = 8;
+    // Sum of `split.num_docs` across locally-executed splits.
+    uint64 localexec_num_docs = 9;
+    // The worst single-split contribution, ranked by
+    // `warmup + wait_for_cpu_pool + cpu_predicate + cpu_collection + cpu_harvest`
+    // (intentionally excludes `wait_for_search_permit`).
+    SplitResourceStats split_resources_worst = 10;
+    // Field-wise sum of `SplitResourceStats` across all locally-executed splits.
+    // If you want to compute averages, divide by localexec_num_splits.
+    SplitResourceStats split_resources_sum = 11;
+    // Wall-clock duration of the leaf search (set in `multi_index_leaf_search`).
+    uint64 wall_time_microsecs = 12;
+
+    // Number of splits dispatched to lambda (offloaded execution).
+    uint64 lambda_num_splits = 3;
+    // Sum of `split.num_docs` across lambda-dispatched splits.
+    uint64 lambda_num_docs = 4;
+    // Number of lambda-dispatched splits that succeeded.
+    uint64 lambda_success_num_splits = 5;
+    // Sum of `split.num_docs` across successful lambda-dispatched splits.
+    uint64 lambda_success_num_docs = 6;
+    // At the source (a single leaf call) this is 1 if the offloaded path
+    // finished after the local path (lambda was the bottleneck), 0 otherwise.
+    // Forced to 0 when `lambda_num_splits == 0`. At aggregate levels (merged
+    // across leaves) it is the count of leaves where lambda was the
+    // bottleneck. Voluntarily a uint64 (not a bool) for summability.
+    uint64 lambda_bottleneck = 7;
+}
+
+// Resource statistics for a root search.
+message RootResourceStats {
+    // The leaf with the largest `wall_time_microsecs`.
+    LeafResourceStats leaf_resources_worst = 1;
+    // Field-wise sum of all leaf stats. `wall_time_microsecs` is summed even though
+    // it is not strictly extensive — the sum still gives a useful view of total leaf time.
+    // If you want to compute averages, divide by leaf_num_calls: failed leaf attempts
+    // usually do not come with proper counters.
+    LeafResourceStats leaf_resources_sum = 2;
+    // Number of leaf calls excluding retries.
+    uint64 leaf_num_calls = 3;
+    // Number of leaf calls, including retries.
+    uint64 leaf_num_calls_including_retries = 4;
+    // Number of failed splits across all leaves.
+    uint64 num_failed_splits = 5;
 }
 
 // LeafRequestRef references data in LeafSearchRequest to deduplicate data.
@@ -496,7 +585,10 @@ message LeafSearchResponse {
   // postcard serialized intermediate aggregation_result.
   optional bytes intermediate_aggregation_result = 6;
 
-  ResourceStats resource_stats = 8;
+  // [deprecated] ResourceStats resource_stats = 8;
+  reserved 8;
+
+  LeafResourceStats resource_stats = 9;
 }
 
 // The result of searching a single split in a Lambda invocation.