SynapsesOS · Divish1032 · Mar 4, 2026 · Mar 3, 2026 · Mar 4, 2026
diff --git a/cmd/brain/main.go b/cmd/brain/main.go
@@ -23,12 +23,13 @@ import (
 	"time"
 
 	"github.com/SynapsesOS/synapses-intelligence/config"
+	"github.com/SynapsesOS/synapses-intelligence/internal/llm"
 	"github.com/SynapsesOS/synapses-intelligence/internal/store"
 	"github.com/SynapsesOS/synapses-intelligence/pkg/brain"
 	"github.com/SynapsesOS/synapses-intelligence/server"
 )
 
-const version = "0.4.0"
+const version = "0.6.0"
 
 func main() {
 	if len(os.Args) < 2 {
@@ -65,6 +66,8 @@ func main() {
 		cmdPatterns(cfg)
 	case "reset":
 		cmdReset(cfg)
+	case "benchmark":
+		cmdBenchmark(cfg)
 	case "version", "--version", "-v":
 		fmt.Printf("synapses-intelligence v%s\n", version)
 	case "help", "--help", "-h":
@@ -560,8 +563,12 @@ func ollamaInstalled() bool {
 	return err == nil
 }
 
-// cmdSetup runs an interactive-free setup wizard: detects RAM, picks a model,
-// checks/explains Ollama, pulls the model, and writes brain.json.
+// probeMaxDuration is the per-model timeout used during setup and benchmark.
+// Models that can't respond within this time are considered too slow for use.
+const probeMaxDuration = 90 * time.Second
+
+// cmdSetup runs an interactive-free setup wizard: detects RAM, probes installed
+// Ollama models for actual inference latency, picks the fastest, and writes brain.json.
 func cmdSetup(cfg config.BrainConfig, cfgPath string) {
 	fmt.Println("synapses-intelligence setup")
 	fmt.Println("────────────────────────────")
@@ -572,32 +579,17 @@ func cmdSetup(cfg config.BrainConfig, cfgPath string) {
 		fmt.Printf("  System RAM:  %d GB\n", ramGB)
 	} else {
 		fmt.Println("  System RAM:  unknown")
-		ramGB = 4 // safe default
-	}
-
-	// Step 2: Model recommendation.
-	model, size := recommendedModel(ramGB)
-	fmt.Printf("  Recommended: %s  (%s)\n", model, size)
-	fmt.Println()
-	fmt.Println("  All tiers:")
-	for _, t := range modelTiers {
-		marker := "  "
-		if t.model == model {
-			marker = "→ "
-		}
-		fmt.Printf("    %s%-26s %s   %s\n", marker, t.model, t.size, t.note)
+		ramGB = 4
 	}
-	fmt.Println()
 
-	// Step 3: Ollama check.
+	// Step 2: Ollama check.
 	if !ollamaInstalled() {
 		fmt.Println("  ✗ Ollama not found on PATH.")
 		fmt.Println()
 		fmt.Println("  Install Ollama first:")
 		switch runtime.GOOS {
 		case "darwin":
 			fmt.Println("    brew install ollama")
-			fmt.Println("    # or download from https://ollama.com/download")
 		case "linux":
 			fmt.Println("    curl -fsSL https://ollama.com/install.sh | sh")
 		default:
@@ -609,35 +601,78 @@ func cmdSetup(cfg config.BrainConfig, cfgPath string) {
 	}
 	fmt.Println("  ✓ Ollama installed")
 
-	// Step 4: Update config with recommended model if user hasn't already customised.
-	if cfg.Model == config.DefaultConfig().Model {
-		cfg.Model = model
+	ctx := context.Background()
+
+	// Step 3: Discover installed models and probe actual latency.
+	// This is more reliable than RAM-based heuristics — actual measurement
+	// catches CPU architecture differences that theory cannot predict.
+	installed, err := llm.ListInstalledModels(ctx, cfg.OllamaURL)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "  ✗ Cannot reach Ollama at %s: %v\n", cfg.OllamaURL, err)
+		fmt.Fprintf(os.Stderr, "    Start it with:  ollama serve\n")
+		os.Exit(1)
+	}
+
+	var chosenModel string
+	var chosenLatency time.Duration
+
+	if len(installed) > 0 {
+		fmt.Printf("\n  Probing %d installed model(s) (max %s each)...\n",
+			len(installed), probeMaxDuration)
+		chosenModel, chosenLatency = pickFastestModel(ctx, cfg.OllamaURL, installed, probeMaxDuration)
+		if chosenModel != "" {
+			fmt.Printf("\n  ✓ Fastest model: %s  (%s)\n", chosenModel, chosenLatency.Round(time.Millisecond))
+		} else {
+			fmt.Println("\n  ⚠ No installed model responded within", probeMaxDuration)
+		}
+	}
+
+	// Step 4: Fall back to RAM-based recommendation if probe found nothing usable.
+	if chosenModel == "" {
+		chosenModel, _ = recommendedModel(ramGB)
+		fmt.Printf("  Falling back to RAM-based recommendation: %s\n", chosenModel)
+		fmt.Println("  (Run  brain benchmark  after pulling models to confirm actual speed)")
 	}
+
+	// Step 5: Apply chosen model to all 4 tiers and compute timeout.
 	cfg.Enabled = true
+	cfg.Model = chosenModel
+	cfg.ModelIngest = chosenModel
+	cfg.ModelGuardian = chosenModel
+	cfg.ModelEnrich = chosenModel
+	cfg.ModelOrchestrate = chosenModel
+
+	// Set timeout to 3× measured latency (or 60s default when latency unknown).
+	if chosenLatency > 0 {
+		cfg.TimeoutMS = int(chosenLatency.Milliseconds() * 3)
+		if cfg.TimeoutMS < 30000 {
+			cfg.TimeoutMS = 30000 // minimum 30s
+		}
+	} else {
+		cfg.TimeoutMS = 60000
+	}
+	fmt.Printf("  timeout_ms set to %dms (3× measured latency)\n", cfg.TimeoutMS)
 
-	// Step 5: Save config.
+	// Step 6: Save config.
 	if err := config.SaveFile(cfgPath, cfg); err != nil {
 		fmt.Fprintf(os.Stderr, "setup: could not write config: %v\n", err)
 		os.Exit(1)
 	}
 	fmt.Printf("  ✓ Config saved to %s\n", cfgPath)
 
-	// Step 6: Pull model.
-	fmt.Printf("  Pulling %s...\n", cfg.Model)
+	// Step 7: Pull the model if not already installed.
 	b := brain.New(cfg)
 	if !b.Available() {
-		fmt.Fprintf(os.Stderr, "\n  ✗ Ollama is installed but not running.\n")
+		fmt.Fprintf(os.Stderr, "\n  ✗ Ollama is not running.\n")
 		fmt.Fprintf(os.Stderr, "    Start it with:  ollama serve\n")
-		fmt.Fprintf(os.Stderr, "    Then run:       brain setup\n")
 		os.Exit(1)
 	}
-	if err := b.EnsureModel(context.Background(), os.Stderr); err != nil {
+	if err := b.EnsureModel(ctx, os.Stderr); err != nil {
 		fmt.Fprintf(os.Stderr, "\n  ✗ Pull failed: %v\n", err)
 		fmt.Fprintf(os.Stderr, "    Try manually:  ollama pull %s\n", cfg.Model)
 		os.Exit(1)
 	}
 
-	// Step 7: Done.
 	fmt.Println()
 	fmt.Println("  ✓ Model ready")
 	fmt.Println()
@@ -653,8 +688,109 @@ func cmdSetup(cfg config.BrainConfig, cfgPath string) {
 	fmt.Println("  3. (Re)start synapses:")
 	fmt.Println("       synapses start --path .")
 	fmt.Println()
-	fmt.Printf("  To change model later:  brain config model <tag> --pull\n")
-	fmt.Printf("  Available tags: qwen2.5-coder:1.5b  qwen3:1.7b  qwen3:4b  qwen3:8b\n")
+	fmt.Println("  Run  brain benchmark  at any time to re-measure model latency.")
+	fmt.Println("  Run  brain config model <tag> --pull  to switch models later.")
+}
+
+// pickFastestModel probes each model in order and returns the name and latency
+// of the fastest one that responds within maxDuration. Returns ("", 0) if none do.
+func pickFastestModel(ctx context.Context, ollamaURL string, models []string, maxDuration time.Duration) (string, time.Duration) {
+	type result struct {
+		model   string
+		latency time.Duration
+	}
+
+	var best result
+	for _, model := range models {
+		client := llm.NewOllamaClient(ollamaURL, model, int(maxDuration.Milliseconds()))
+		fmt.Printf("    %-35s ", model)
+		lat, err := client.ProbeLatency(ctx, maxDuration)
+		if err != nil {
+			fmt.Printf("❌  (%v)\n", shortErr(err))
+			continue
+		}
+		fmt.Printf("✅  %s\n", lat.Round(time.Millisecond))
+		if best.model == "" || lat < best.latency {
+			best = result{model, lat}
+		}
+	}
+	return best.model, best.latency
+}
+
+// cmdBenchmark probes all installed Ollama models and prints a latency table.
+// Use this to decide which model to assign to each brain tier.
+func cmdBenchmark(cfg config.BrainConfig) {
+	ctx := context.Background()
+
+	fmt.Println("brain benchmark — measuring actual inference latency")
+	fmt.Println("────────────────────────────────────────────────────")
+	fmt.Printf("  Ollama: %s\n", cfg.OllamaURL)
+	fmt.Printf("  Max probe time per model: %s\n\n", probeMaxDuration)
+
+	installed, err := llm.ListInstalledModels(ctx, cfg.OllamaURL)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "benchmark: cannot reach Ollama: %v\n", err)
+		os.Exit(1)
+	}
+	if len(installed) == 0 {
+		fmt.Println("  No models installed. Run  ollama pull <model>  first.")
+		os.Exit(0)
+	}
+
+	type row struct {
+		model   string
+		latency time.Duration
+		ok      bool
+	}
+	rows := make([]row, 0, len(installed))
+
+	fmt.Printf("  %-35s  %s\n", "MODEL", "LATENCY")
+	fmt.Printf("  %s\n", repeat("-", 55))
+	for _, model := range installed {
+		client := llm.NewOllamaClient(cfg.OllamaURL, model, int(probeMaxDuration.Milliseconds()))
+		fmt.Printf("  %-35s  ", model)
+		lat, err := client.ProbeLatency(ctx, probeMaxDuration)
+		if err != nil {
+			fmt.Printf("timeout / error  (%v)\n", shortErr(err))
+			rows = append(rows, row{model, 0, false})
+		} else {
+			fmt.Printf("%s\n", lat.Round(time.Millisecond))
+			rows = append(rows, row{model, lat, true})
+		}
+	}
+
+	// Find fastest.
+	var fastest row
+	for _, r := range rows {
+		if r.ok && (fastest.model == "" || r.latency < fastest.latency) {
+			fastest = r
+		}
+	}
+
+	fmt.Println()
+	if fastest.model != "" {
+		recommendedMS := int(fastest.latency.Milliseconds() * 3)
+		if recommendedMS < 30000 {
+			recommendedMS = 30000
+		}
+		fmt.Printf("  Fastest: %s (%s)\n", fastest.model, fastest.latency.Round(time.Millisecond))
+		fmt.Printf("  Recommended timeout_ms: %d (3× latency)\n", recommendedMS)
+		fmt.Println()
+		fmt.Printf("  To apply:  brain config model %s\n", fastest.model)
+		fmt.Printf("             brain setup   (re-runs probe and writes brain.json)\n")
+	} else {
+		fmt.Println("  No model responded within the probe timeout.")
+		fmt.Println("  Consider pulling a smaller model:  ollama pull qwen2.5-coder:1.5b")
+	}
+}
+
+// shortErr truncates long error messages for display.
+func shortErr(err error) string {
+	s := err.Error()
+	if len(s) > 50 {
+		return s[:47] + "..."
+	}
+	return s
 }
 
 func truncate(s string, n int) string {

diff --git a/internal/contextbuilder/builder.go b/internal/contextbuilder/builder.go
@@ -119,8 +119,9 @@ type Request struct {
 	TaskID          string
 
 	// Graph topology signals (populated by synapses core):
-	HasTests bool // whether *_test.go exists for root file
-	FanIn    int  // total caller count (may exceed len(CallerNames) when capped)
+	HasTests bool   // whether *_test.go exists for root file
+	FanIn    int    // total caller count (may exceed len(CallerNames) when capped)
+	RootDoc  string // AST doc comment; used as fallback when brain.sqlite has no summary
 }
 
 // Builder assembles a Context Packet from a Synapses snapshot and brain data.
@@ -154,8 +155,12 @@ func (b *Builder) Build(ctx context.Context, req Request) (*Packet, error) {
 	}
 
 	// Section 1: Root summary (fast path — SQLite).
+	// Falls back to the AST doc comment so packet_quality ≥ 0.4 on cold brain.
 	if sections.RootSummary && req.RootNodeID != "" {
 		pkt.RootSummary = b.store.GetSummary(req.RootNodeID)
+		if pkt.RootSummary == "" && req.RootDoc != "" {
+			pkt.RootSummary = req.RootDoc
+		}
 	}
 
 	// Section 1b: Dependency summaries (fast path — SQLite).
@@ -178,6 +183,7 @@ func (b *Builder) Build(ctx context.Context, req Request) (*Packet, error) {
 				RootID:       req.RootNodeID,
 				RootName:     req.RootName,
 				RootType:     req.RootType,
+				RootFile:     req.RootFile,
 				CalleeNames:  req.CalleeNames,
 				CallerNames:  req.CallerNames,
 				RelatedNames: req.RelatedNames,

diff --git a/internal/enricher/enricher.go b/internal/enricher/enricher.go
@@ -19,6 +19,21 @@ import (
 	"github.com/SynapsesOS/synapses-intelligence/internal/store"
 )
 
+// domainFocusMap maps file path substrings to domain-specific focus lines.
+// When a root file path contains one of these substrings, the corresponding focus
+// line is appended to the enricher prompt so the LLM applies domain expertise.
+var domainFocusMap = []struct {
+	pattern string
+	focus   string
+}{
+	{"internal/parser/", "Focus on: AST correctness, language quirks, tree-sitter query patterns, edge cases in public/private detection."},
+	{"internal/mcp/", "Focus on: tool contract (fail-silent), handler latency, context.WithTimeout usage, JSON serialization correctness."},
+	{"internal/graph/", "Focus on: BFS correctness, edge type semantics, complexity invariants, memory efficiency."},
+	{"internal/store/", "Focus on: SQL correctness, migration safety, FTS5 index, CGo-free driver constraints."},
+	{"internal/brain/", "Focus on: HTTP timeout handling, fail-silent pattern, client retry, interface contract."},
+	{"internal/scout/", "Focus on: HTTP timeout handling, fail-silent pattern, client retry, interface contract."},
+}
+
 const (
 	// maxNamesInPrompt limits how many callee/caller names are sent to the LLM.
 	// 10 is appropriate for 7b models; reduce to 5 for 1-2b models.
@@ -42,6 +57,7 @@ type Request struct {
 	RootID       string
 	RootName     string
 	RootType     string
+	RootFile     string // file path of the root entity; used for domain detection
 	CalleeNames  []string
 	CallerNames  []string
 	RelatedNames []string
@@ -114,13 +130,29 @@ func (e *Enricher) buildPrompt(req Request) string {
 		taskSection = "\nTask context: " + req.TaskContext
 	}
 
+	domainSection := ""
+	if focus := detectDomain(req.RootFile); focus != "" {
+		domainSection = "\n" + focus
+	}
+
 	return fmt.Sprintf(promptTemplate,
 		req.RootName, nodeType,
 		callees, callers,
-		taskSection,
+		taskSection+domainSection,
 	)
 }
 
+// detectDomain returns a domain-specific focus line for the given file path,
+// or "" if no domain pattern matches.
+func detectDomain(filePath string) string {
+	for _, d := range domainFocusMap {
+		if strings.Contains(filePath, d.pattern) {
+			return d.focus
+		}
+	}
+	return ""
+}
+
 func parseInsight(raw string) (Response, error) {
 	extracted := llm.ExtractJSON(raw)
 	var result insightJSON