TensorTemplar · TensorTemplar · Dec 31, 2025 · Dec 31, 2025
diff --git a/.coverage b/.coverage
diff --git a/README.md b/README.md
@@ -217,6 +217,24 @@ uv sync --extra dev
 uv run pytest
 ```
 
+### Running Tests with LLM Integration
+
+By default, LLM integration tests are skipped because `offline_mode` is enabled. To run the full test suite including LLM tests:
+
+```bash
+# Set up credentials in .env (copy from example)
+cp .env.summoner.example .env
+# Edit .env with your LLM proxy credentials:
+# - SLOPOMETRY_LLM_PROXY_URL
+# - SLOPOMETRY_LLM_PROXY_API_KEY
+# - SLOPOMETRY_LLM_RESPONSES_URL
+
+# Run tests with offline mode disabled
+SLOPOMETRY_OFFLINE_MODE=false uv run pytest tests/test_llm_integration.py -v
+```
+
+The integration tests make real API calls to configured LLM providers and verify that agents return valid responses.
+
 Customize via `.env` file or environment variables:
 
 - `SLOPOMETRY_DATABASE_PATH`: Custom database location (optional)

diff --git a/coverage.xml b/coverage.xml
diff --git a/src/slopometry/core/complexity_analyzer.py b/src/slopometry/core/complexity_analyzer.py
@@ -22,6 +22,21 @@
 CALCULATOR_VERSION = "2024.1.4"
 
 
+def _get_tiktoken_encoder() -> Any:
+    """Get tiktoken encoder, falling back if o200k_base encoding not available.
+
+    Returns:
+        tiktoken Encoder for token counting
+    """
+    import tiktoken
+
+    try:
+        return tiktoken.get_encoding("o200k_base")
+    except Exception as e:
+        logger.debug(f"Falling back to cl100k_base encoding: {e}")
+        return tiktoken.get_encoding("cl100k_base")
+
+
 def _analyze_single_file_extended(file_path: Path) -> FileAnalysisResult | None:
     """Analyze a single Python file for all metrics.
 
@@ -30,12 +45,8 @@ def _analyze_single_file_extended(file_path: Path) -> FileAnalysisResult | None:
     """
     import radon.complexity as cc_lib
     import radon.metrics as metrics_lib
-    import tiktoken
 
-    try:
-        encoder = tiktoken.get_encoding("o200k_base")
-    except Exception:
-        encoder = tiktoken.get_encoding("cl100k_base")
+    encoder = _get_tiktoken_encoder()
 
     try:
         content = file_path.read_text(encoding="utf-8")
@@ -109,7 +120,8 @@ def analyze_complexity_with_baseline(self, baseline_dir: Path) -> tuple[Complexi
 
             return current_metrics, delta
 
-        except Exception:
+        except Exception as e:
+            logger.debug(f"Baseline complexity analysis failed, returning current metrics only: {e}")
             current_metrics = self._analyze_directory(self.working_directory)
             return current_metrics, ComplexityDelta()
 
@@ -125,18 +137,13 @@ def _analyze_directory(self, directory: Path) -> ComplexityMetrics:
             ComplexityMetrics with aggregated complexity data.
         """
         import radon.complexity as cc_lib
-        import tiktoken
 
         from slopometry.core.git_tracker import GitTracker
 
         tracker = GitTracker(directory)
         python_files = tracker.get_tracked_python_files()
 
-        try:
-            encoder = tiktoken.get_encoding("o200k_base")
-        except Exception:
-            # Fallback for older tiktoken versions if o200k_base not available
-            encoder = tiktoken.get_encoding("cl100k_base")
+        encoder = _get_tiktoken_encoder()
 
         files_by_complexity = {}
         all_complexities = []

diff --git a/src/slopometry/core/database.py b/src/slopometry/core/database.py
@@ -1,13 +1,16 @@
 """Database operations for storing hook events."""
 
 import json
+import logging
 import sqlite3
 from collections.abc import Generator
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
 
 from slopometry.core.migrations import MigrationRunner
+
+logger = logging.getLogger(__name__)
 from slopometry.core.models import (
     ComplexityDelta,
     ContextCoverage,
@@ -568,14 +571,16 @@ def get_session_statistics(self, session_id: str) -> SessionStatistics | None:
                     transcript_path = Path(stats.transcript_path)
                     if transcript_path.exists():
                         stats.plan_evolution.token_usage = analyze_transcript_tokens(transcript_path)
-                except Exception:
-                    pass
-        except Exception:
+                except Exception as e:
+                    logger.debug(f"Failed to analyze transcript tokens for session {session_id}: {e}")
+        except Exception as e:
+            logger.debug(f"Failed to calculate plan evolution for session {session_id}: {e}")
             stats.plan_evolution = None
 
         try:
             stats.context_coverage = self._calculate_context_coverage(stats.transcript_path, stats.working_directory)
-        except Exception:
+        except Exception as e:
+            logger.debug(f"Failed to calculate context coverage for session {session_id}: {e}")
             stats.context_coverage = None
 
         return stats
@@ -631,7 +636,8 @@ def _get_session_complexity_metrics(
             try:
                 baseline_commit_sha = initial_git_state.commit_sha if initial_git_state else None
                 return self.calculate_extended_complexity_metrics(working_directory, baseline_commit_sha)
-            except Exception:
+            except Exception as e2:
+                logger.debug(f"Failed to compute session complexity metrics (fallback also failed): {e2}")
                 return None, None
 
     def _calculate_plan_evolution(self, session_id: str) -> PlanEvolution:
@@ -793,13 +799,15 @@ def calculate_extended_complexity_metrics(
                         )
 
                         shutil.rmtree(baseline_dir, ignore_errors=True)
-                    except Exception:
+                    except Exception as e:
+                        logger.debug(f"Failed to compute complexity delta, cleanup skipped: {e}")
                         if baseline_dir:
                             shutil.rmtree(baseline_dir, ignore_errors=True)
 
             return current_extended, complexity_delta
 
-        except Exception:
+        except Exception as e:
+            logger.debug(f"Failed to compute extended complexity metrics: {e}")
             return None, None
 
     def list_sessions(self, limit: int | None = None) -> list[str]:

diff --git a/src/slopometry/core/hook_handler.py b/src/slopometry/core/hook_handler.py
@@ -26,6 +26,7 @@
 )
 from slopometry.core.project_tracker import ProjectTracker
 from slopometry.core.settings import settings
+from slopometry.core.working_tree_state import WorkingTreeStateCalculator
 from slopometry.display.formatters import truncate_path
 
 logger = logging.getLogger(__name__)
@@ -97,7 +98,13 @@ def parse_hook_input(raw_data: dict) -> HookInputUnion:
         return NotificationInput(**raw_data)
 
     elif "stop_hook_active" in fields:
-        return SubagentStopInput(**raw_data)
+        if raw_data.get("stop_hook_active"):
+            return SubagentStopInput(**raw_data)
+        return StopInput(**raw_data)
+
+    elif "session_id" in fields and "transcript_path" in fields:
+        # Handle stop-type hooks without stop_hook_active field
+        return StopInput(**raw_data)
 
     else:
         raise ValueError(f"Unknown hook input schema with fields: {fields}")
@@ -279,8 +286,12 @@ def _compute_feedback_cache_key(working_directory: str, edited_files: set[str],
         Cache key string
     """
     tracker = GitTracker(Path(working_directory))
-    commit_sha = tracker.get_current_commit_sha() or "unknown"
-    working_tree_hash = tracker.get_working_tree_hash() or "unknown"
+    git_state = tracker.get_git_state()
+    commit_sha = git_state.commit_sha or "unknown"
+    wt_calculator = WorkingTreeStateCalculator(working_directory)
+    working_tree_hash = (
+        wt_calculator.calculate_working_tree_hash(commit_sha) if git_state.has_uncommitted_changes else "clean"
+    )
     files_key = ",".join(sorted(edited_files))
 
     key_parts = f"{commit_sha}:{working_tree_hash}:{files_key}:{feedback_hash}"

diff --git a/src/slopometry/core/settings.py b/src/slopometry/core/settings.py
@@ -2,9 +2,11 @@
 
 import os
 import sys
+import warnings
 from pathlib import Path
 
-from pydantic import Field, field_validator
+from dotenv import dotenv_values
+from pydantic import Field, field_validator, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 
@@ -88,12 +90,21 @@ def _ensure_global_config_dir() -> None:
 
     llm_proxy_url: str = ""
     llm_proxy_api_key: str = ""
+    llm_responses_url: str = ""
     interactive_rating_enabled: bool = False
 
     hf_token: str = ""
     hf_default_repo: str = ""
 
-    user_story_agents: list[str] = ["o3", "claude-opus-4", "gemini-2.5-pro"]
+    offline_mode: bool = Field(
+        default=True,
+        description="Disables all external LLM requests from slopometry. Set to False to enable AI features.",
+    )
+
+    user_story_agent: str = Field(
+        default="gpt_oss_120b",
+        description="Agent to use for user story generation. Options: gpt_oss_120b, gemini",
+    )
 
     enable_working_at_microsoft: bool = Field(
         default=False, description="Galen Rate feature flag - shows NGMI alert when below 1 Galen productivity target"
@@ -124,6 +135,36 @@ def validate_database_path(cls, v: str | Path | None) -> Path | None:
             return Path(v)
         return v
 
+    @model_validator(mode="after")
+    def warn_unknown_prefixed_settings(self) -> "Settings":
+        """Warn about unknown SLOPOMETRY_ prefixed environment variables."""
+        prefix = "SLOPOMETRY_"
+        known_fields = {prefix + name.upper() for name in type(self).model_fields}
+
+        prefixed_env_vars: set[str] = set()
+        for key in os.environ:
+            if key.upper().startswith(prefix):
+                prefixed_env_vars.add(key.upper())
+
+        for env_file in self.model_config.get("env_file", []):
+            env_path = Path(env_file)
+            if env_path.exists():
+                for key in dotenv_values(env_path):
+                    if key.upper().startswith(prefix):
+                        prefixed_env_vars.add(key.upper())
+
+        unknown = prefixed_env_vars - known_fields
+        if unknown:
+            unknown_list = ", ".join(sorted(unknown))
+            warnings.warn(
+                f"Unknown SLOPOMETRY_ settings will be ignored: {unknown_list}. "
+                f"Check spelling or see 'slopometry solo config --list' for valid options.",
+                UserWarning,
+                stacklevel=2,
+            )
+
+        return self
+
     @property
     def resolved_database_path(self) -> Path:
         """Get the resolved database path, using default if not set."""

diff --git a/src/slopometry/summoner/cli/commands.py b/src/slopometry/summoner/cli/commands.py
@@ -49,8 +49,6 @@ def complete_nfp_id(ctx: click.Context, param: click.Parameter, incomplete: str)
 def complete_feature_id(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[str]:
     """Complete feature IDs from the database."""
     try:
-        from pathlib import Path
-
         from slopometry.core.database import EventDatabase
 
         db = EventDatabase()
@@ -63,9 +61,9 @@ def complete_feature_id(ctx: click.Context, param: click.Parameter, incomplete:
 
 def complete_user_story_entry_id(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[str]:
     """Complete user story entry IDs from the database."""
-    try:
-        from slopometry.core.database import EventDatabase
+    from slopometry.core.database import EventDatabase
 
+    try:
         db = EventDatabase()
         entry_ids = db.get_user_story_entry_ids_for_completion()
         return [eid for eid in entry_ids if eid.startswith(incomplete)]
@@ -484,7 +482,7 @@ def userstorify(
         sys.exit(1)
 
     console.print(f"Repository: {repo_path}")
-    console.print(f"Using agents: {', '.join(llm_service.get_configured_agents())}")
+    console.print(f"Using agent: {llm_service.get_configured_agent()}")
 
     commit_info = llm_service.get_commit_info_for_display(base_commit, head_commit)
 

diff --git a/src/slopometry/summoner/services/baseline_service.py b/src/slopometry/summoner/services/baseline_service.py
@@ -219,8 +219,8 @@ def _compute_deltas_parallel(
                     delta = future.result(timeout=120)
                     if delta:
                         deltas.append(delta)
-                except Exception:
-                    pass
+                except Exception as e:
+                    logger.debug(f"Skipping failed baseline delta analysis: {e}")
 
         return deltas
 
@@ -244,7 +244,8 @@ def _get_commit_token_count(self, repo_path: Path, commit_sha: str, analyzer: Co
         try:
             metrics = analyzer.analyze_extended_complexity(commit_dir)
             return metrics.total_tokens
-        except Exception:
+        except Exception as e:
+            logger.debug(f"Failed to analyze token count for commit {commit_sha}: {e}")
             return None
         finally:
             if commit_dir:

diff --git a/src/slopometry/summoner/services/current_impact_service.py b/src/slopometry/summoner/services/current_impact_service.py
@@ -1,5 +1,6 @@
 """Current (uncommitted) impact analysis service."""
 
+import logging
 import shutil
 from datetime import datetime
 from pathlib import Path
@@ -14,6 +15,8 @@
 from slopometry.core.working_tree_extractor import WorkingTreeExtractor
 from slopometry.summoner.services.impact_calculator import ImpactCalculator
 
+logger = logging.getLogger(__name__)
+
 
 class CurrentImpactService:
     """Service for analyzing impact of uncommitted changes."""

diff --git a/src/slopometry/summoner/services/hf_uploader.py b/src/slopometry/summoner/services/hf_uploader.py
@@ -1,9 +1,13 @@
 """Hugging Face dataset upload functionality."""
 
+import logging
 from pathlib import Path
 
 from slopometry.core.settings import settings
 
+logger = logging.getLogger(__name__)
+from slopometry.summoner.services.llm_wrapper import OfflineModeError
+
 
 def upload_to_huggingface(file_path: Path, repo_id: str, token: str | None = None) -> None:
     """Upload dataset to Hugging Face Hub.
@@ -12,7 +16,13 @@ def upload_to_huggingface(file_path: Path, repo_id: str, token: str | None = Non
         file_path: Path to the parquet dataset file
         repo_id: HuggingFace dataset repository ID (e.g., 'username/dataset-name')
         token: Optional HuggingFace token (defaults to settings or HF_TOKEN env var)
+
+    Raises:
+        OfflineModeError: If offline_mode is enabled
     """
+    if settings.offline_mode:
+        raise OfflineModeError()
+
     # Use token from settings if not provided
     if token is None and settings.hf_token:
         token = settings.hf_token
@@ -36,8 +46,8 @@ def upload_to_huggingface(file_path: Path, repo_id: str, token: str | None = Non
     api = HfApi(token=token)
     try:
         create_repo(repo_id=repo_id, token=token, repo_type="dataset", exist_ok=True)
-    except Exception:
-        pass  # Repo might already exist
+    except Exception as e:
+        logger.debug(f"Could not create repo {repo_id} (may already exist): {e}")
 
     # Push to hub
     dataset_dict.push_to_hub(repo_id, token=token, commit_message="Upload slopometry user story dataset")