Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ cmd/thv-operator/.task/checksum/crdref-gen
# Test coverage
coverage*

crd-helm-wrapper
crd-helm-wrapper
cmd/vmcp/__debug_bin*
2 changes: 2 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ linters:
- third_party$
- builtin$
- examples$
- scripts$
formatters:
enable:
- gci
Expand All @@ -155,3 +156,4 @@ formatters:
- third_party$
- builtin$
- examples$
- scripts$
11 changes: 8 additions & 3 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ tasks:
- task: test-e2e-windows
platforms: [windows]

test-optimizer:
desc: Run optimizer integration tests with sqlite-vec
cmds:
- ./scripts/test-optimizer-with-sqlite-vec.sh

test-all:
desc: Run all tests (unit and e2e)
deps: [test, test-e2e]
Expand Down Expand Up @@ -219,12 +224,12 @@ tasks:
cmds:
- cmd: mkdir -p bin
platforms: [linux, darwin]
- cmd: go build -ldflags "-s -w -X github.com/stacklok/toolhive/pkg/versions.Version={{.VERSION}} -X github.com/stacklok/toolhive/pkg/versions.Commit={{.COMMIT}} -X github.com/stacklok/toolhive/pkg/versions.BuildDate={{.BUILD_DATE}}" -o bin/vmcp ./cmd/vmcp
- cmd: go build -tags="fts5" -ldflags "-s -w -X github.com/stacklok/toolhive/pkg/versions.Version={{.VERSION}} -X github.com/stacklok/toolhive/pkg/versions.Commit={{.COMMIT}} -X github.com/stacklok/toolhive/pkg/versions.BuildDate={{.BUILD_DATE}}" -o bin/vmcp ./cmd/vmcp
platforms: [linux, darwin]
- cmd: cmd.exe /c mkdir bin
platforms: [windows]
ignore_error: true
- cmd: go build -ldflags "-s -w -X github.com/stacklok/toolhive/pkg/versions.Version={{.VERSION}} -X github.com/stacklok/toolhive/pkg/versions.Commit={{.COMMIT}} -X github.com/stacklok/toolhive/pkg/versions.BuildDate={{.BUILD_DATE}}" -o bin/vmcp.exe ./cmd/vmcp
- cmd: go build -tags="fts5" -ldflags "-s -w -X github.com/stacklok/toolhive/pkg/versions.Version={{.VERSION}} -X github.com/stacklok/toolhive/pkg/versions.Commit={{.COMMIT}} -X github.com/stacklok/toolhive/pkg/versions.BuildDate={{.BUILD_DATE}}" -o bin/vmcp.exe ./cmd/vmcp
platforms: [windows]

install-vmcp:
Expand All @@ -236,7 +241,7 @@ tasks:
sh: git rev-parse --short HEAD || echo "unknown"
BUILD_DATE: '{{dateInZone "2006-01-02T15:04:05Z" (now) "UTC"}}'
cmds:
- go install -ldflags "-s -w -X github.com/stacklok/toolhive/pkg/versions.Version={{.VERSION}} -X github.com/stacklok/toolhive/pkg/versions.Commit={{.COMMIT}} -X github.com/stacklok/toolhive/pkg/versions.BuildDate={{.BUILD_DATE}}" -v ./cmd/vmcp
- go install -tags="fts5" -ldflags "-s -w -X github.com/stacklok/toolhive/pkg/versions.Version={{.VERSION}} -X github.com/stacklok/toolhive/pkg/versions.Commit={{.COMMIT}} -X github.com/stacklok/toolhive/pkg/versions.BuildDate={{.BUILD_DATE}}" -v ./cmd/vmcp

all:
desc: Run linting, tests, and build
Expand Down
2 changes: 1 addition & 1 deletion cmd/thv-operator/Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ tasks:
ignore_error: true # Windows has no mkdir -p, so just ignore error if it exists
- go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.17.3
- $(go env GOPATH)/bin/controller-gen rbac:roleName=toolhive-operator-manager-role paths="{{.CONTROLLER_GEN_PATHS}}" output:rbac:artifacts:config={{.PROJECT_ROOT}}/deploy/charts/operator/templates/clusterrole
- $(go env GOPATH)/bin/controller-gen crd webhook paths="{{.CONTROLLER_GEN_PATHS}}" output:crd:artifacts:config={{.PROJECT_ROOT}}/deploy/charts/operator-crds/files/crds
- $(go env GOPATH)/bin/controller-gen crd:allowDangerousTypes=true webhook paths="{{.CONTROLLER_GEN_PATHS}}" output:crd:artifacts:config={{.PROJECT_ROOT}}/deploy/charts/operator-crds/files/crds
# Wrap CRDs with Helm templates for conditional installation
- go run {{.PROJECT_ROOT}}/deploy/charts/operator-crds/crd-helm-wrapper/main.go -source {{.PROJECT_ROOT}}/deploy/charts/operator-crds/files/crds -target {{.PROJECT_ROOT}}/deploy/charts/operator-crds/templates
# - "{{.PROJECT_ROOT}}/deploy/charts/operator-crds/scripts/wrap-crds.sh"
Expand Down
47 changes: 47 additions & 0 deletions cmd/thv-operator/pkg/vmcpconfig/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -143,6 +144,24 @@ func (c *Converter) Convert(
config.Audit.Component = vmcp.Name
}

// Convert optimizer config - resolve embeddingService to embeddingURL if needed
if vmcp.Spec.Config.Optimizer != nil {
optimizerConfig := vmcp.Spec.Config.Optimizer.DeepCopy()

// If embeddingService is set, resolve it to embeddingURL
if optimizerConfig.EmbeddingService != "" && optimizerConfig.EmbeddingURL == "" {
embeddingURL, err := c.resolveEmbeddingService(ctx, vmcp.Namespace, optimizerConfig.EmbeddingService)
if err != nil {
return nil, fmt.Errorf("failed to resolve embedding service %s: %w", optimizerConfig.EmbeddingService, err)
}
optimizerConfig.EmbeddingURL = embeddingURL
// Clear embeddingService since we've resolved it to URL
optimizerConfig.EmbeddingService = ""
}

config.Optimizer = optimizerConfig
}

// Apply operational defaults (fills missing values)
config.EnsureOperationalDefaults()

Expand Down Expand Up @@ -608,3 +627,31 @@ func validateCompositeToolNames(tools []vmcpconfig.CompositeToolConfig) error {
}
return nil
}

// resolveEmbeddingService resolves a Kubernetes service name to its URL by querying the service.
// Returns the service URL in format: http://<service-name>.<namespace>.svc.cluster.local:<port>
func (c *Converter) resolveEmbeddingService(ctx context.Context, namespace, serviceName string) (string, error) {
// Get the service
svc := &corev1.Service{}
key := types.NamespacedName{
Name: serviceName,
Namespace: namespace,
}
if err := c.k8sClient.Get(ctx, key, svc); err != nil {
return "", fmt.Errorf("failed to get service %s/%s: %w", namespace, serviceName, err)
}

// Find the first port (typically there's only one for embedding services)
if len(svc.Spec.Ports) == 0 {
return "", fmt.Errorf("service %s/%s has no ports", namespace, serviceName)
}

port := svc.Spec.Ports[0].Port
if port == 0 {
return "", fmt.Errorf("service %s/%s has invalid port", namespace, serviceName)
}

// Construct URL using full DNS name
url := fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", serviceName, namespace, port)
return url, nil
}
4 changes: 3 additions & 1 deletion cmd/vmcp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The Virtual MCP Server (vmcp) is a standalone binary that aggregates multiple MC

## Features

### Implemented (Phase 1)
### Implemented
- ✅ **Group-Based Backend Management**: Automatic workload discovery from ToolHive groups
- ✅ **Tool Aggregation**: Combines tools from multiple MCP servers with conflict resolution (prefix, priority, manual)
- ✅ **Resource & Prompt Aggregation**: Unified access to resources and prompts from all backends
Expand All @@ -15,12 +15,14 @@ The Virtual MCP Server (vmcp) is a standalone binary that aggregates multiple MC
- ✅ **Health Endpoints**: `/health` and `/ping` for service monitoring
- ✅ **Configuration Validation**: `vmcp validate` command for config verification
- ✅ **Observability**: OpenTelemetry metrics and traces for backend operations and workflow executions
- ✅ **Composite Tools**: Multi-step workflows with elicitation support

### In Progress
- 🚧 **Incoming Authentication** (Issue #165): OIDC, local, anonymous authentication
- 🚧 **Outgoing Authentication** (Issue #160): RFC 8693 token exchange for backend API access
- 🚧 **Token Caching**: Memory and Redis cache providers
- 🚧 **Health Monitoring** (Issue #166): Circuit breakers, backend health checks
- 🚧 **Optimizer** Support the MCP optimizer in vMCP for context optimization on large toolsets.

### Future (Phase 2+)
- 📋 **Authorization**: Cedar policy-based access control
Expand Down
55 changes: 48 additions & 7 deletions cmd/vmcp/app/commands.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
// SPDX-License-Identifier: Apache-2.0

// Package app provides the entry point for the vmcp command-line application.
package app

Expand All @@ -27,7 +24,6 @@ import (
"github.com/stacklok/toolhive/pkg/vmcp/discovery"
"github.com/stacklok/toolhive/pkg/vmcp/health"
"github.com/stacklok/toolhive/pkg/vmcp/k8s"
"github.com/stacklok/toolhive/pkg/vmcp/optimizer"
vmcprouter "github.com/stacklok/toolhive/pkg/vmcp/router"
vmcpserver "github.com/stacklok/toolhive/pkg/vmcp/server"
)
Expand Down Expand Up @@ -431,9 +427,54 @@ func runServe(cmd *cobra.Command, _ []string) error {
Watcher: backendWatcher,
}

if cfg.Optimizer != nil {
// TODO: update this with the real optimizer.
serverCfg.OptimizerFactory = optimizer.NewDummyOptimizer
// Configure optimizer if enabled in YAML config
if cfg.Optimizer != nil && cfg.Optimizer.Enabled {
logger.Info("🔬 Optimizer enabled via configuration (chromem-go)")
hybridRatio := 0.7 // Default
if cfg.Optimizer.HybridSearchRatio != nil {
hybridRatio = *cfg.Optimizer.HybridSearchRatio
}

// embeddingURL should already be resolved from embeddingService by the operator
// If embeddingService is still set (CLI mode), log a warning
if cfg.Optimizer.EmbeddingService != "" {
logger.Warnf("embeddingService is set but not resolved to embeddingURL. This should be handled by the operator. Falling back to default port 11434")
// Simple fallback for CLI/testing scenarios
namespace := os.Getenv("POD_NAMESPACE")
if namespace != "" {
cfg.Optimizer.EmbeddingURL = fmt.Sprintf("http://%s.%s.svc.cluster.local:11434", cfg.Optimizer.EmbeddingService, namespace)
} else {
cfg.Optimizer.EmbeddingURL = fmt.Sprintf("http://%s:11434", cfg.Optimizer.EmbeddingService)
}
}

serverCfg.OptimizerConfig = &vmcpserver.OptimizerConfig{
Enabled: cfg.Optimizer.Enabled,
PersistPath: cfg.Optimizer.PersistPath,
FTSDBPath: cfg.Optimizer.FTSDBPath,
HybridSearchRatio: hybridRatio,
EmbeddingBackend: cfg.Optimizer.EmbeddingBackend,
EmbeddingURL: cfg.Optimizer.EmbeddingURL,
EmbeddingModel: cfg.Optimizer.EmbeddingModel,
EmbeddingDimension: cfg.Optimizer.EmbeddingDimension,
}
persistInfo := "in-memory"
if cfg.Optimizer.PersistPath != "" {
persistInfo = cfg.Optimizer.PersistPath
}
// FTS5 is always enabled with configurable semantic/BM25 ratio
ratio := 0.7 // Default
if cfg.Optimizer.HybridSearchRatio != nil {
ratio = *cfg.Optimizer.HybridSearchRatio
}
searchMode := fmt.Sprintf("hybrid (%.0f%% semantic, %.0f%% BM25)",
ratio*100,
(1-ratio)*100)
logger.Infof("Optimizer configured: backend=%s, dimension=%d, persistence=%s, search=%s",
cfg.Optimizer.EmbeddingBackend,
cfg.Optimizer.EmbeddingDimension,
persistInfo,
searchMode)
}

// Convert composite tool configurations to workflow definitions
Expand Down
2 changes: 2 additions & 0 deletions codecov.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ coverage:
- "**/mocks/**/*"
- "**/mock_*.go"
- "**/zz_generated.deepcopy.go"
- "**/*_test.go"
- "**/*_test_coverage.go"
status:
project:
default:
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/operator-crds/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: toolhive-operator-crds
description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.
type: application
version: 0.0.98
version: 0.0.97
appVersion: "0.0.1"
2 changes: 1 addition & 1 deletion deploy/charts/operator-crds/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ToolHive Operator CRDs Helm Chart

![Version: 0.0.98](https://img.shields.io/badge/Version-0.0.98-informational?style=flat-square)
![Version: 0.0.97](https://img.shields.io/badge/Version-0.0.97-informational?style=flat-square)
![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)

A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,17 +677,80 @@ spec:
optimizer:
description: |-
Optimizer configures the MCP optimizer for context optimization on large toolsets.
When enabled, vMCP exposes only find_tool and call_tool operations to clients
When enabled, vMCP exposes optim.find_tool and optim.call_tool operations to clients
instead of all backend tools directly. This reduces token usage by allowing
LLMs to discover relevant tools on demand rather than receiving all tool definitions.
properties:
embeddingBackend:
description: |-
EmbeddingBackend specifies the embedding provider: "ollama", "openai-compatible", or "placeholder".
- "ollama": Uses local Ollama HTTP API for embeddings
- "openai-compatible": Uses OpenAI-compatible API (vLLM, OpenAI, etc.)
- "placeholder": Uses deterministic hash-based embeddings (for testing/development)
enum:
- ollama
- openai-compatible
- placeholder
type: string
embeddingDimension:
description: |-
EmbeddingDimension is the dimension of the embedding vectors.
Common values:
- 384: all-MiniLM-L6-v2, nomic-embed-text
- 768: BAAI/bge-small-en-v1.5
- 1536: OpenAI text-embedding-3-small
minimum: 1
type: integer
embeddingModel:
description: |-
EmbeddingModel is the model name to use for embeddings.
Required when EmbeddingBackend is "ollama" or "openai-compatible".
Examples:
- Ollama: "nomic-embed-text", "all-minilm"
- vLLM: "BAAI/bge-small-en-v1.5"
- OpenAI: "text-embedding-3-small"
type: string
embeddingService:
description: |-
EmbeddingService is the name of a Kubernetes Service that provides the embedding service
for semantic tool discovery. The service must implement the optimizer embedding API.
EmbeddingService is the name of a Kubernetes Service that provides embeddings (K8s only).
This is an alternative to EmbeddingURL for in-cluster deployments.
When set, vMCP will resolve the service DNS name for the embedding API.
type: string
embeddingURL:
description: |-
EmbeddingURL is the base URL for the embedding service (Ollama or OpenAI-compatible API).
Required when EmbeddingBackend is "ollama" or "openai-compatible".
Examples:
- Ollama: "http://localhost:11434"
- vLLM: "http://vllm-service:8000/v1"
- OpenAI: "https://api.openai.com/v1"
type: string
enabled:
description: |-
Enabled determines whether the optimizer is active.
When true, vMCP exposes optim.find_tool and optim.call_tool instead of all backend tools.
type: boolean
ftsDBPath:
description: |-
FTSDBPath is the path to the SQLite FTS5 database for BM25 text search.
If empty, defaults to ":memory:" for in-memory FTS5, or "{PersistPath}/fts.db" if PersistPath is set.
Hybrid search (semantic + BM25) is always enabled.
type: string
hybridSearchRatio:
description: |-
HybridSearchRatio controls the mix of semantic vs BM25 results in hybrid search.
Value range: 0.0 (all BM25) to 1.0 (all semantic).
Default: 0.7 (70% semantic, 30% BM25)
Only used when FTSDBPath is set.
maximum: 1
minimum: 0
type: number
persistPath:
description: |-
PersistPath is the optional filesystem path for persisting the chromem-go database.
If empty, the database will be in-memory only (ephemeral).
When set, tool metadata and embeddings are persisted to disk for faster restarts.
type: string
required:
- embeddingService
type: object
outgoingAuth:
description: |-
Expand Down
Loading