Use-Tusk · podocarp · Feb 2, 2026 · cubic-dev-ai · Feb 2, 2026 · cubic-dev-ai
diff --git a/.envrc b/.envrc
@@ -0,0 +1,2 @@
+use flake
+layout python
diff --git a/benchmarks2/README.md b/benchmarks2/README.md
@@ -0,0 +1,49 @@
+# SDK Benchmarks
+
+Simple Go-style benchmarks for measuring SDK overhead.
+
+## Quick Start (Automated)
+
+```bash
+./run_benchmark.sh [duration_seconds]
+```
+
+This automatically:
+1. Starts the delay server
+2. Runs baseline benchmark (SDK disabled)
+3. Runs benchmark with SDK enabled
+4. Prints comparison with overhead percentages
+
+## Manual Mode
+
+```bash
+# Terminal 1: Start delay server
+python delay_server.py
+
+# Terminal 2: Run baseline (SDK disabled)
+TUSK_DRIFT_MODE=DISABLED python app.py
+
+# Terminal 3: Run benchmark
+python benchmark.py --url=http://localhost:8080
+
+# Terminal 2: Restart with SDK enabled
+# Ctrl+C, then:
+TUSK_DRIFT_MODE=RECORD python app.py
+
+# Terminal 3: Run benchmark again
+python benchmark.py --url=http://localhost:8080
+```
+
+## Output
+
+```
+Benchmark_Sort              1000        1234567 ns/op         810.37 ops/s
+Benchmark_Downstream         500       20123456 ns/op          49.69 ops/s
+```
+
+## For Node SDK
+
+Use the same `benchmark.py` against Node test server:
+```bash
+python benchmark.py --url=http://localhost:3000
+```
diff --git a/benchmarks2/app.py b/benchmarks2/app.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+"""Test app for SDK benchmarks. Respects TUSK_DRIFT_MODE env var."""
+import logging
+import os
+import requests as req
+from flask import Flask, request, jsonify
+
+# Disable Flask/Werkzeug request logging
+logging.getLogger('werkzeug').setLevel(logging.ERROR)
+
+# SDK initialization based on env var
+MODE = os.environ.get('TUSK_DRIFT_MODE', 'DISABLED')
+if MODE != 'DISABLED':
+    from drift import TuskDrift
+    TuskDrift.initialize(
+        api_key="benchmark-key",
+        env="benchmark",
+        log_level="error",
+    )
+
+app = Flask(__name__)
+DELAY_SERVER = os.environ.get('DELAY_SERVER', 'http://127.0.0.1:9999')
+
+@app.route('/health')
+def health():
+    return 'ok'
+
+@app.route('/api/sort', methods=['POST'])
+def api_sort():
+    data = request.json['data']
+    return jsonify({'sorted': sorted(data)})
+
+@app.route('/api/downstream', methods=['POST'])
+def api_downstream():
+    delay_ms = request.json.get('delay_ms', 10)
+    req.get(f'{DELAY_SERVER}/delay?ms={delay_ms}')
+    return jsonify({'status': 'ok'})
+
+if __name__ == '__main__':
+    port = int(os.environ.get('PORT', 8080))
+    app.run(host='127.0.0.1', port=port, threaded=True)
diff --git a/benchmarks2/benchmark.py b/benchmarks2/benchmark.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""
+Benchmark runner for SDK performance testing.
+Works with any HTTP server implementing /api/sort and /api/downstream.
+
+Usage:
+    python benchmark.py --url=http://localhost:8080 [--duration=5] [--baseline=baseline.txt]
+"""
+import argparse
+import random
+import time
+import requests
+
+def benchmark(name, func, duration_sec):
+    """Run func repeatedly for duration_sec, return stats."""
+    start = time.perf_counter_ns()
+    deadline = start + (duration_sec * 1_000_000_000)
+    iterations = 0
+
+    while time.perf_counter_ns() < deadline:
+        func()
+        iterations += 1
+
+    elapsed_ns = time.perf_counter_ns() - start
+    ns_per_op = elapsed_ns // iterations if iterations > 0 else 0
+    ops_per_sec = iterations / (elapsed_ns / 1_000_000_000) if elapsed_ns > 0 else 0
+
+    return iterations, ns_per_op, ops_per_sec
+
+def print_result(name, iterations, ns_per_op, ops_per_sec):
+    print(f"{name:<25} {iterations:>8} {ns_per_op:>15} ns/op {ops_per_sec:>12.2f} ops/s")
+
+def parse_results(filepath):
+    """Parse benchmark output file into dict of {name: ops_per_sec}."""
+    results = {}
+    with open(filepath) as f:
+        for line in f:
+            if line.startswith('Benchmark_'):
+                parts = line.split()
+                name = parts[0]
+                ops_s = float(parts[5])
+                results[name] = ops_s
+    return results
+
+def print_comparison(baseline, current):
+    """Print comparison table with percentage diff."""
+    print("\n" + "=" * 70)
+    print("COMPARISON (negative = slower with SDK)")
+    print("=" * 70)
+    print(f"{'Benchmark':<25} {'Baseline':>12} {'Current':>12} {'Diff':>12}")
+    print("-" * 70)
+    for name in baseline:
+        if name in current:
+            base_ops = baseline[name]
+            curr_ops = current[name]
+            if base_ops > 0:
+                diff_pct = ((curr_ops - base_ops) / base_ops) * 100
+                print(f"{name:<25} {base_ops:>10.2f}/s {curr_ops:>10.2f}/s {diff_pct:>+10.1f}%")
+
+def main():
+    parser = argparse.ArgumentParser(description='Benchmark SDK overhead')
+    parser.add_argument('--url', required=True, help='Base URL of test server')
+    parser.add_argument('--duration', type=int, default=5, help='Seconds per benchmark')
+    parser.add_argument('--baseline', help='Baseline results file to compare against')
+    args = parser.parse_args()
+
+    base_url = args.url.rstrip('/')
+    session = requests.Session()
+
+    # Wait for server ready
+    print(f"Connecting to {base_url}...")
+    for _ in range(50):
+        try:
+            session.get(f'{base_url}/health', timeout=0.5)
+            break
+        except Exception:
+            time.sleep(0.1)
+    else:
+        print("ERROR: Server not responding")
+        return 1
+
+    print(f"Running benchmarks (duration={args.duration}s per test)...\n")
+
+    results = {}
+
+    # Benchmark: Sort
+    test_data = list(range(1000))
+    random.shuffle(test_data)
+
+    def sort_request():
+        resp = session.post(f'{base_url}/api/sort', json={'data': test_data})
+        resp.raise_for_status()
+
+    iters, ns_op, ops_s = benchmark('Benchmark_Sort', sort_request, args.duration)
+    print_result('Benchmark_Sort', iters, ns_op, ops_s)
+    results['Benchmark_Sort'] = ops_s
+
+    # Benchmark: Downstream
+    def downstream_request():
+        resp = session.post(f'{base_url}/api/downstream', json={'delay_ms': 10})
+        resp.raise_for_status()
+
+    iters, ns_op, ops_s = benchmark('Benchmark_Downstream', downstream_request, args.duration)
+    print_result('Benchmark_Downstream', iters, ns_op, ops_s)
+    results['Benchmark_Downstream'] = ops_s
+
+    # Compare against baseline if provided
+    if args.baseline:
+        baseline = parse_results(args.baseline)
+        print_comparison(baseline, results)
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmarks2/delay_server.py b/benchmarks2/delay_server.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+"""Simple delay server for downstream benchmarks."""
+import logging
+import time
+from flask import Flask, request
+
+logging.getLogger('werkzeug').setLevel(logging.ERROR)
+
+app = Flask(__name__)
+
+@app.route('/delay')
+def delay():
+    ms = int(request.args.get('ms', 10))
+    time.sleep(ms / 1000)
+    return 'ok'
+
+@app.route('/health')
+def health():
+    return 'ok'
+
+if __name__ == '__main__':
+    app.run(host='127.0.0.1', port=9999, threaded=True)
diff --git a/benchmarks2/run_benchmark.sh b/benchmarks2/run_benchmark.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Automated benchmark runner - compares SDK disabled vs enabled performance.
+#
+# Usage: ./run_benchmark.sh [duration_seconds]
+
+set -e
+
+DURATION=${1:-5}
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+cd "$SCRIPT_DIR"
+
+cleanup() {
+    echo "Cleaning up..."
+    kill $DELAY_PID 2>/dev/null || true
+    kill $APP_PID 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# Start delay server
+echo "Starting delay server..."
+python delay_server.py &
+DELAY_PID=$!
+sleep 1
+
+# Run baseline (SDK disabled)
+echo ""
+echo "============================================================"
+echo "BASELINE (SDK DISABLED)"
+echo "============================================================"
+
+TUSK_DRIFT_MODE=DISABLED python app.py &
+APP_PID=$!
+sleep 1
+
+python benchmark.py --url=http://localhost:8080 --duration="$DURATION" | tee /tmp/baseline.txt
+
+kill $APP_PID 2>/dev/null || true
+wait $APP_PID 2>/dev/null || true
+sleep 0.5
+
+# Run with SDK enabled (with baseline comparison)
+echo ""
+echo "============================================================"
+echo "WITH SDK (TUSK_DRIFT_MODE=RECORD)"
+echo "============================================================"
+
+TUSK_DRIFT_MODE=RECORD python app.py &
+APP_PID=$!
+sleep 1
+
+python benchmark.py --url=http://localhost:8080 --duration="$DURATION" --baseline=/tmp/baseline.txt
diff --git a/flake.lock b/flake.lock
diff --git a/flake.nix b/flake.nix
@@ -0,0 +1,47 @@
+{
+  description = "A very basic flake";
+
+  inputs = {
+    nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
+  };
+
+  outputs =
+    {
+      self,
+      nixpkgs,
+    }:
+    let
+      x86-linux = "x86_64-linux";
+      arm-linux = "aarch64-linux";
+      arm-macos = "aarch64-darwin";
+
+      defaultSystems = [
+        x86-linux
+        arm-linux
+        arm-macos
+      ];
+      eachSystem =
+        fun: systems:
+        nixpkgs.lib.genAttrs systems (
+          system:
+          let
+            pkgs = import nixpkgs {
+              inherit system;
+            };
+          in
+          fun pkgs
+        );
+    in
+    {
+      devShell = eachSystem (
+        pkgs:
+        with pkgs;
+        mkShell {
+          buildInputs = [
+            (python3.withPackages (p: with p; [ ]))
+          ];
+        }
+      ) defaultSystems;
+    };
+
+}