stacklok · therealnb · Jan 29, 2026
@@ -44,3 +44,12 @@ coverage*
 
 crd-helm-wrapper
 cmd/vmcp/__debug_bin*
+
+# Demo files
+examples/operator/virtual-mcps/vmcp_optimizer.yaml
+scripts/k8s_vmcp_optimizer_demo.sh
+examples/ingress/mcp-servers-ingress.yaml
+examples/vmcp-config-optimizer.yaml
+/vmcp
+thv-operator
+thv
@@ -642,14 +642,6 @@ spec:
                             - fail
                             - best_effort
                             type: string
-                          statusReportingInterval:
-                            default: 30s
-                            description: |-
-                              StatusReportingInterval is the interval for reporting status updates to Kubernetes.
-                              This controls how often the vMCP runtime reports backend health and phase changes.
-                              Lower values provide faster status updates but increase API server load.
-                            pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
-                            type: string
                           unhealthyThreshold:
                             default: 3
                             description: UnhealthyThreshold is the number of consecutive
@@ -685,17 +677,76 @@ spec:
                   optimizer:
                     description: |-
                       Optimizer configures the MCP optimizer for context optimization on large toolsets.
-                      When enabled, vMCP exposes only find_tool and call_tool operations to clients
+                      When enabled, vMCP exposes optim_find_tool and optim_call_tool operations to clients
                       instead of all backend tools directly. This reduces token usage by allowing
                       LLMs to discover relevant tools on demand rather than receiving all tool definitions.
                     properties:
-                      embeddingService:
+                      embeddingBackend:
                         description: |-
-                          EmbeddingService is the name of a Kubernetes Service that provides the embedding service
-                          for semantic tool discovery. The service must implement the optimizer embedding API.
+                          EmbeddingBackend specifies the embedding provider: "ollama", "vllm", "unified", or "openai".
+                          - "ollama": Uses local Ollama HTTP API for embeddings
+                          - "vllm": Uses vLLM OpenAI-compatible API (recommended for production Kubernetes deployments)
+                          - "unified": Uses generic OpenAI-compatible API (works with both vLLM and OpenAI)
+                          - "openai": Uses OpenAI-compatible API
+                        enum:
+                        - ollama
+                        - vllm
+                        - unified
+                        - openai
+                        type: string
+                      embeddingDimension:
+                        description: |-
+                          EmbeddingDimension is the dimension of the embedding vectors.
+                          Common values:
+                          - 384: all-MiniLM-L6-v2, nomic-embed-text
+                          - 768: BAAI/bge-small-en-v1.5
+                          - 1536: OpenAI text-embedding-3-small
+                        minimum: 1
+                        type: integer
+                      embeddingModel:
+                        description: |-
+                          EmbeddingModel is the model name to use for embeddings.
+                          Required when EmbeddingBackend is "ollama" or "openai-compatible".
+                          Examples:
+                          - Ollama: "nomic-embed-text", "all-minilm"
+                          - vLLM: "BAAI/bge-small-en-v1.5"
+                          - OpenAI: "text-embedding-3-small"
+                        type: string
+                      embeddingURL:
+                        description: |-
+                          EmbeddingURL is the base URL for the embedding service (Ollama or OpenAI-compatible API).
+                          Required when EmbeddingBackend is "ollama" or "openai-compatible".
+                          Examples:
+                          - Ollama: "http://localhost:11434"
+                          - vLLM: "http://vllm-service:8000/v1"
+                          - OpenAI: "https://api.openai.com/v1"
+                        type: string
+                      enabled:
+                        description: |-
+                          Enabled determines whether the optimizer is active.
+                          When true, vMCP exposes optim_find_tool and optim_call_tool instead of all backend tools.
+                        type: boolean
+                      ftsDBPath:
+                        description: |-
+                          FTSDBPath is the path to the SQLite FTS5 database for BM25 text search.
+                          If empty, defaults to ":memory:" for in-memory FTS5, or "{PersistPath}/fts.db" if PersistPath is set.
+                          Hybrid search (semantic + BM25) is always enabled.
+                        type: string
+                      hybridSearchRatio:
+                        description: |-
+                          HybridSearchRatio controls the mix of semantic vs BM25 results in hybrid search.
+                          Value range: 0 (all BM25) to 100 (all semantic), representing percentage.
+                          Default: 70 (70% semantic, 30% BM25)
+                          Only used when FTSDBPath is set.
+                        maximum: 100
+                        minimum: 0
+                        type: integer
+                      persistPath:
+                        description: |-
+                          PersistPath is the optional filesystem path for persisting the chromem-go database.
+                          If empty, the database will be in-memory only (ephemeral).
+                          When set, tool metadata and embeddings are persisted to disk for faster restarts.
                         type: string
-                    required:
-                    - embeddingService
                     type: object
                   outgoingAuth:
                     description: |-

@@ -645,14 +645,6 @@ spec:
                             - fail
                             - best_effort
                             type: string
-                          statusReportingInterval:
-                            default: 30s
-                            description: |-
-                              StatusReportingInterval is the interval for reporting status updates to Kubernetes.
-                              This controls how often the vMCP runtime reports backend health and phase changes.
-                              Lower values provide faster status updates but increase API server load.
-                            pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
-                            type: string
                           unhealthyThreshold:
                             default: 3
                             description: UnhealthyThreshold is the number of consecutive
@@ -688,17 +680,74 @@ spec:
                   optimizer:
                     description: |-
                       Optimizer configures the MCP optimizer for context optimization on large toolsets.
-                      When enabled, vMCP exposes only find_tool and call_tool operations to clients
+                      When enabled, vMCP exposes optim_find_tool and optim_call_tool operations to clients
                       instead of all backend tools directly. This reduces token usage by allowing
                       LLMs to discover relevant tools on demand rather than receiving all tool definitions.
                     properties:
-                      embeddingService:
+                      embeddingBackend:
                         description: |-
-                          EmbeddingService is the name of a Kubernetes Service that provides the embedding service
-                          for semantic tool discovery. The service must implement the optimizer embedding API.
+                          EmbeddingBackend specifies the embedding provider: "ollama", "openai-compatible", or "placeholder".
+                          - "ollama": Uses local Ollama HTTP API for embeddings
+                          - "openai-compatible": Uses OpenAI-compatible API (vLLM, OpenAI, etc.)
+                          - "placeholder": Uses deterministic hash-based embeddings (for testing/development)
+                        enum:
+                        - ollama
+                        - openai-compatible
+                        - placeholder
+                        type: string
+                      embeddingDimension:
+                        description: |-
+                          EmbeddingDimension is the dimension of the embedding vectors.
+                          Common values:
+                          - 384: all-MiniLM-L6-v2, nomic-embed-text
+                          - 768: BAAI/bge-small-en-v1.5
+                          - 1536: OpenAI text-embedding-3-small
+                        minimum: 1
+                        type: integer
+                      embeddingModel:
+                        description: |-
+                          EmbeddingModel is the model name to use for embeddings.
+                          Required when EmbeddingBackend is "ollama" or "openai-compatible".
+                          Examples:
+                          - Ollama: "nomic-embed-text", "all-minilm"
+                          - vLLM: "BAAI/bge-small-en-v1.5"
+                          - OpenAI: "text-embedding-3-small"
+                        type: string
+                      embeddingURL:
+                        description: |-
+                          EmbeddingURL is the base URL for the embedding service (Ollama or OpenAI-compatible API).
+                          Required when EmbeddingBackend is "ollama" or "openai-compatible".
+                          Examples:
+                          - Ollama: "http://localhost:11434"
+                          - vLLM: "http://vllm-service:8000/v1"
+                          - OpenAI: "https://api.openai.com/v1"
+                        type: string
+                      enabled:
+                        description: |-
+                          Enabled determines whether the optimizer is active.
+                          When true, vMCP exposes optim_find_tool and optim_call_tool instead of all backend tools.
+                        type: boolean
+                      ftsDBPath:
+                        description: |-
+                          FTSDBPath is the path to the SQLite FTS5 database for BM25 text search.
+                          If empty, defaults to ":memory:" for in-memory FTS5, or "{PersistPath}/fts.db" if PersistPath is set.
+                          Hybrid search (semantic + BM25) is always enabled.
+                        type: string
+                      hybridSearchRatio:
+                        description: |-
+                          HybridSearchRatio controls the mix of semantic vs BM25 results in hybrid search.
+                          Value range: 0 (all BM25) to 100 (all semantic), representing percentage.
+                          Default: 70 (70% semantic, 30% BM25)
+                          Only used when FTSDBPath is set.
+                        maximum: 100
+                        minimum: 0
+                        type: integer
+                      persistPath:
+                        description: |-
+                          PersistPath is the optional filesystem path for persisting the chromem-go database.
+                          If empty, the database will be in-memory only (ephemeral).
+                          When set, tool metadata and embeddings are persisted to disk for faster restarts.
                         type: string
-                    required:
-                    - embeddingService
                     type: object
                   outgoingAuth:
                     description: |-

@@ -1,6 +1,6 @@
 module github.com/stacklok/toolhive
 
-go 1.25.6
+go 1.25.5
 
 require (
 	dario.cat/mergo v1.0.2
@@ -29,6 +29,7 @@ require (
 	github.com/onsi/ginkgo/v2 v2.27.5
 	github.com/onsi/gomega v1.39.0
 	github.com/ory/fosite v0.49.0
+	github.com/philippgille/chromem-go v0.7.0
 	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
 	github.com/prometheus/client_golang v1.23.2
 	github.com/sigstore/protobuf-specs v0.5.0
@@ -59,6 +60,7 @@ require (
 	k8s.io/api v0.35.0
 	k8s.io/apimachinery v0.35.0
 	k8s.io/utils v0.0.0-20260108192941-914a6e750570
+	modernc.org/sqlite v1.44.0
 	sigs.k8s.io/controller-runtime v0.22.4
 	sigs.k8s.io/yaml v1.6.0
 )
@@ -174,6 +176,7 @@ require (
 	github.com/muesli/termenv v0.16.0 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
 	github.com/oklog/ulid v1.3.1 // indirect
 	github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
 	github.com/olekukonko/errors v1.1.0 // indirect
@@ -188,6 +191,7 @@ require (
 	github.com/prometheus/common v0.67.4 // indirect
 	github.com/prometheus/otlptranslator v1.0.0 // indirect
 	github.com/prometheus/procfs v0.19.2 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/sagikazarmark/locafero v0.11.0 // indirect
@@ -251,6 +255,9 @@ require (
 	k8s.io/apiextensions-apiserver v0.34.1 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
 	k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect
+	modernc.org/libc v1.67.4 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
 	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
 	sigs.k8s.io/randfill v1.0.0 // indirect
 	sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
@@ -268,7 +275,7 @@ require (
 	github.com/go-logr/stdr v1.2.2 // indirect
 	github.com/goccy/go-json v0.10.5 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
-	github.com/golang-jwt/jwt/v5 v5.3.1
+	github.com/golang-jwt/jwt/v5 v5.3.0
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/lestrrat-go/blackmagic v1.0.4 // indirect
 	github.com/lestrrat-go/httpcc v1.0.1 // indirect
@@ -286,7 +293,7 @@ require (
 	go.opentelemetry.io/otel/metric v1.39.0
 	go.opentelemetry.io/otel/trace v1.39.0
 	golang.org/x/crypto v0.47.0
-	golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
+	golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
 	golang.org/x/sys v0.40.0
 	k8s.io/client-go v0.35.0
 )