hyperlight-dev
diff --git a/‎docs/TESTING-USER-SKILLS.md‎
Lines changed: 27 additions & 19 deletions b/‎docs/TESTING-USER-SKILLS.md‎
Lines changed: 27 additions & 19 deletions
diff --git a/‎src/agent/index.ts‎
Lines changed: 41 additions & 7 deletions b/‎src/agent/index.ts‎
Lines changed: 41 additions & 7 deletions
diff --git a/‎src/agent/mcp/plugin-adapter.ts‎
Lines changed: 25 additions & 0 deletions b/‎src/agent/mcp/plugin-adapter.ts‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/agent/session-context.ts‎
Lines changed: 18 additions & 1 deletion b/‎src/agent/session-context.ts‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎src/agent/skill-writer.ts‎
Lines changed: 46 additions & 6 deletions b/‎src/agent/skill-writer.ts‎
Lines changed: 46 additions & 6 deletions
@@ -5,14 +5,11 @@ feature lets a user persist what HyperAgent learned in a session as a
 reusable skill at `~/.hyperagent/skills/<name>/SKILL.md`, surviving
 upgrades and overriding system skills with the same name.
 
-Implemented on branch `feat-user-skills` (atop fix
-`fix-marked-v15-renderer` for the markdown renderer crash).
-
 ---
 
 ## Prerequisites
 
-- A working HyperAgent checkout on the `feat-user-skills` branch
+- A working HyperAgent checkout
 - `just setup` already run (Rust addons built, deps installed) — see the
   project [README](../README.md) and [DEVELOPMENT.md](DEVELOPMENT.md)
 - A terminal where `just start` launches the agent successfully
@@ -59,8 +56,10 @@ Let it run to completion. Then ask the agent to save what it learned:
 1. The agent receives a synthetic prompt summarising the session
    context (tools used, MCP servers, modules registered, recent errors)
 2. The LLM calls the `generate_skill(...)` tool
-3. You see an interactive approval prompt with a preview of the
-   `SKILL.md` content
+3. You see an interactive approval prompt showing a **summary** — the
+   skill name, the one-line description, a preview of the first few
+   triggers, the allowed-tools list, and a byte count for the guidance
+   body. (The full content is *not* echoed to stdout.)
 4. Hit `y` to approve
 
 Verify the file landed on disk:
@@ -82,14 +81,19 @@ Exercise every command path. From a fresh `just start`:
 
 ```text
 > /skills                                 # list both system + user skills
-> /skills info code-review                # show full detail for a system skill
+> /skills info kql-expert                 # show full detail for a bundled system skill
 > /save-skill                             # no name → LLM picks one
 > /skills                                 # user skill now shows with 👤
 > /skills info fetch-page-title           # user skill detail
-> /skills edit fetch-page-title           # opens $EDITOR for hand-tuning
+> /skills edit fetch-page-title           # prints the user-skill path; open it in your editor
 > exit
 ```
 
+> `/skills edit <name>` does **not** spawn `$EDITOR`. It just prints
+> the absolute path to the user-skill `SKILL.md` so you can open it
+> in your own editor of choice. Save the file, then restart (or run
+> `/suggest_approach`) and the change takes effect.
+
 Then restart the agent and repeat the original task — the matching
 `/suggest_approach` should surface the saved skill via its triggers.
 
@@ -98,15 +102,17 @@ Then restart the agent and repeat the original task — the matching
 ## 3. Override Test
 
 User skills must override system skills with the same name. Drop a user
-skill that shadows an existing system one:
+skill that shadows an existing system one (pick any skill that `ls
+skills/` shows — here we use `kql-expert`):
 
 ```bash
-mkdir -p "$HYPERAGENT_USER_SKILLS_DIR/code-review"
-cat > "$HYPERAGENT_USER_SKILLS_DIR/code-review/SKILL.md" << 'EOF'
+mkdir -p "$HYPERAGENT_USER_SKILLS_DIR/kql-expert"
+cat > "$HYPERAGENT_USER_SKILLS_DIR/kql-expert/SKILL.md" << 'EOF'
 ---
-name: code-review
-description: My customised code review skill
-triggers: [review, audit]
+name: kql-expert
+description: My customised KQL skill
+triggers: [kql, kusto, query]
+allowed-tools: [execute_javascript]
 ---
 This overrides the system version.
 EOF
@@ -117,11 +123,12 @@ just start
 In the REPL:
 
 ```text
-> /skills info code-review
+> /skills
 ```
 
-**Expected:** the **user** description ("My customised code review
-skill") appears, and an **override** flag/note is present.
+**Expected:** the `kql-expert` row appears with the **`👤 (overrides
+built-in)`** badge in the list view. Running `/skills info kql-expert`
+then shows the **user** description ("My customised KQL skill").
 
 ---
 
@@ -134,7 +141,8 @@ Validation should reject bad input cleanly without crashing the agent:
 | `/save-skill BadName` | Rejected — not kebab-case |
 | `/save-skill ../escape` | Rejected — path traversal |
 | `/save-skill thisnameisreallylongandshouldfailitsbeyondsixtyfourcharactersnowforsure` | Rejected — exceeds 64 chars |
-| `/save-skill fetch-page-title` (second time) | Overwrite confirmation prompt |
+| `/save-skill info` | Rejected — reserved subcommand name |
+| `/save-skill fetch-page-title` (second time, fresh session) | `generate_skill` first errors with "already exists — set overwrite=true"; the LLM retries with `overwrite=true`, and you get an **"Overwrite existing user skill?"** confirmation before the file is replaced |
 
 ---
 
@@ -155,7 +163,7 @@ unset HYPERAGENT_USER_SKILLS_DIR
 | Approval prompt shows a skill preview | Tool handler validation working ✅ |
 | `.md` file lands on disk under `$HYPERAGENT_USER_SKILLS_DIR` | `writeUserSkill()` working ✅ |
 | `/skills` shows the 👤 badge for the new skill | Multi-dir loader + `source` field working ✅ |
-| `/skills info <name>` shows the override flag for shadowed system skills | Name-collision detection working ✅ |
+| `/skills` shows `👤 (overrides built-in)` for shadowed system skills | Name-collision detection working ✅ |
 | Restarting the agent matches the skill on similar prompts | `loadSkillsFromDirs` + boot wiring working ✅ |
 
 ---
 
@@ -1016,6 +1016,14 @@ async function syncPluginsToSandbox(): Promise<void> {
             conn,
             mcpManager,
             mcpWriteSafetyGate,
+            // Session-learning: record any MCP server the LLM
+            // actually exercised — including calls made from inside
+            // `execute_javascript` via `host:mcp-<name>` imports
+            // (which never surface as a top-level `mcp__*` tool name
+            // and so are invisible to onPostToolUse).
+            (serverName: string) => {
+              state.mcpServersUsed.add(serverName);
+            },
           );
           registrations.push(adapter);
         }
@@ -1131,8 +1139,11 @@ async function handleSlashCommand(
     drainAndWarn,
     mcpManager, // Real MCP manager (or null if no config)
     syncPlugins: syncPluginsToSandbox,
-    submitToLLM: (prompt: string) => {
+    submitToLLM: (prompt: string, options?: { skipAutoSuggest?: boolean }) => {
       state.pendingPrompt = prompt;
+      if (options?.skipAutoSuggest) {
+        state.skipNextAutoSuggest = true;
+      }
     },
   };
   return handleSlashCommandImpl(rawInput, rl, slashDeps);
@@ -5574,7 +5585,18 @@ const generateSkillTool = defineTool("generate_skill", {
         params.triggers.length > 5
           ? ` (+${params.triggers.length - 5} more)`
           : "";
-      console.log(`\n  ${C.warn("📚 Save skill:")} ${C.tool(params.name)}`);
+      // Surface the overwrite path explicitly — the LLM passing
+      // `overwrite=true` is necessary but not sufficient.  The user
+      // gets a chance to refuse before we replace existing content.
+      const isOverwrite =
+        params.overwrite === true && userSkillExists(params.name);
+      if (isOverwrite) {
+        console.log(
+          `\n  ${C.warn("⚠️  Overwrite existing user skill:")} ${C.tool(params.name)}`,
+        );
+      } else {
+        console.log(`\n  ${C.warn("📚 Save skill:")} ${C.tool(params.name)}`);
+      }
       console.log(`     ${params.description}`);
       console.log(`     Triggers: ${triggerPreview}${triggerSuffix}`);
       console.log(
@@ -5589,9 +5611,12 @@ const generateSkillTool = defineTool("generate_skill", {
       }
 
       await drainAndWarn(rl);
+      const promptLabel = isOverwrite
+        ? `  ${C.dim("Overwrite skill? [y/n] ")}`
+        : `  ${C.dim("Save skill? [y/n] ")}`;
       const approval = state.autoApprove
         ? "y"
-        : await promptUser(rl, `  ${C.dim("Save skill? [y/n] ")}`);
+        : await promptUser(rl, promptLabel);
       if (approval.trim().toLowerCase() !== "y") {
         console.log(`  ${C.dim("Denied by user.")}`);
         return { success: false, error: "Skill save denied by user" };
@@ -6013,9 +6038,16 @@ function buildSessionConfig() {
         state.currentUserPrompt = input.prompt;
         state.hasCalledListModules = false;
 
+        // Slash commands that queue a synthetic prompt (e.g. /save-skill)
+        // set state.skipNextAutoSuggest so the auto-suggest pass doesn't
+        // match unrelated skills on scaffolding terms like "MCP" or
+        // "SKILL.md".  Consume the flag before the suggest pass below.
+        const skipAutoSuggest = state.skipNextAutoSuggest;
+        state.skipNextAutoSuggest = false;
+
         // Auto-invoke suggest_approach for non-trivial prompts
         const isNonTrivial = input.prompt.length > 25;
-        if (isNonTrivial) {
+        if (isNonTrivial && !skipAutoSuggest) {
           const result = runSuggestApproach(
             input.prompt,
             state.preLoadedSkills,
@@ -6144,9 +6176,11 @@ function buildSessionConfig() {
             state.toolCallHistory.length - MAX_TOOL_HISTORY,
           );
         }
-        // Tools whose name looks like `mcp__<server>__<tool>` count
-        // their server as "used" — that's how the SDK exposes MCP
-        // tools to the LLM.  See manage_mcp + listMCPServersTool.
+        // Top-level MCP tool fallback: when an SDK ever surfaces a
+        // tool as `mcp__<server>__<tool>` we still count the server.
+        // The primary tracking path is the MCP plugin-adapter's onCall
+        // observer (wired in syncPluginsToSandbox above) which catches
+        // calls made via `host:mcp-<name>` imports too.
         if (success && toolName.startsWith("mcp__")) {
           const server = toolName.split("__")[1];
           if (server) state.mcpServersUsed.add(server);
 
@@ -33,6 +33,22 @@ export type WriteSafetyGate = (
   annotations: MCPToolAnnotations | undefined,
 ) => Promise<boolean>;
 
+/**
+ * Callback invoked at the start of every MCP tool call routed through
+ * the adapter.  Used by the agent to track which MCP servers were
+ * actually exercised in this session (independent of whether the LLM
+ * called the tool via a top-level `mcp__*` name or imported it as a
+ * `host:mcp-<name>` module from inside `execute_javascript`).
+ *
+ * Fires after the write-safety gate (if any) has approved, but before
+ * the call is dispatched to the manager.  Synchronous on purpose so
+ * tracking cannot delay the call.
+ *
+ * @param serverName - MCP server name (e.g. "work-iq-mail")
+ * @param toolName   - Tool name being invoked
+ */
+export type MCPCallObserver = (serverName: string, toolName: string) => void;
+
 /**
  * PluginRegistration-compatible interface.
  * Matches the shape expected by src/sandbox/tool.js setPlugins().
@@ -55,12 +71,16 @@ export interface MCPPluginRegistration {
  * @param manager - The client manager for making tool calls.
  * @param gate - Optional write-safety gate. When provided, non-read-only
  *   tools are checked before execution.
+ * @param onCall - Optional observer fired on every successful gate-pass
+ *   immediately before the tool is dispatched.  Used by the agent for
+ *   session-learning tracking (see state.mcpServersUsed).
  * @returns A PluginRegistration that can be passed to setPlugins().
  */
 export function createMCPPluginAdapter(
   conn: MCPConnection,
   manager: MCPClientManager,
   gate?: WriteSafetyGate,
+  onCall?: MCPCallObserver,
 ): MCPPluginRegistration {
   const moduleName = `mcp-${conn.name}`;
 
@@ -93,6 +113,11 @@ export function createMCPPluginAdapter(
             }
           }
 
+          // Notify any observer that we are about to dispatch.  The
+          // observer is intentionally invoked AFTER the gate so denied
+          // calls do not pollute session-learning state.
+          onCall?.(conn.name, tool.name);
+
           return manager.callTool(conn.name, tool.name, toolArgs);
         };
       }
 
@@ -30,6 +30,15 @@ const MAX_ERRORS_REPORTED = 8;
  */
 const MAX_TOP_TOOLS = 10;
 
+/**
+ * Maximum characters of the user's most-recent prompt kept in the
+ * session-context summary.  Anything longer is truncated with an
+ * ellipsis — the LLM only needs the gist of the original task to
+ * anchor the SKILL.md it writes, and a 50-KB paste here would
+ * dominate the prompt and crowd out the actual session history.
+ */
+const MAX_USER_PROMPT_CHARS = 2000;
+
 // ── Types ────────────────────────────────────────────────────────────
 
 /**
@@ -95,8 +104,16 @@ export function extractSessionContext(state: AgentState): SessionContext {
     .sort((a, b) => b.count - a.count)
     .slice(0, MAX_TOP_TOOLS);
 
+  // Truncate the user prompt so a giant paste doesn't dominate the
+  // session-context summary.  We keep the head — the leading phrase
+  // is the strongest signal of intent.
+  const userPrompt =
+    state.currentUserPrompt.length > MAX_USER_PROMPT_CHARS
+      ? state.currentUserPrompt.slice(0, MAX_USER_PROMPT_CHARS) + "…"
+      : state.currentUserPrompt;
+
   return {
-    userPrompt: state.currentUserPrompt,
+    userPrompt,
     topTools,
     mcpServersUsed: Array.from(state.mcpServersUsed).sort(),
     modulesRegistered: [...state.modulesRegistered].sort(),
 
@@ -29,9 +29,11 @@ import { ALLOWED_TOOLS } from "./tool-gating.js";
 /**
  * Root directory for user-created skills.
  *
- * Defaults to `~/.hyperagent/skills/`.  The `HYPERAGENT_USER_SKILLS_DIR`
- * environment variable overrides the default — tests use this to point
- * at a temporary directory without polluting the real user library.
+ * Resolved at MODULE LOAD TIME.  Tests that need to redirect the path
+ * to a tmpdir must set `HYPERAGENT_USER_SKILLS_DIR` and then re-import
+ * this module via `vi.resetModules()` + dynamic `import()` — see
+ * `tests/skill-writer.test.ts` for the pattern.  Setting the env var
+ * AFTER the first import has no effect.
  */
 const DEFAULT_USER_SKILLS_DIR =
   process.env.HYPERAGENT_USER_SKILLS_DIR ??
@@ -49,6 +51,27 @@ const MAX_TRIGGERS = 50;
 /** Kebab-case name pattern (lowercase letters, digits, hyphens). */
 const VALID_NAME_RE = /^[a-z][a-z0-9-]*$/;
 
+/**
+ * Names that double as `/skills` subcommands — accepting them as skill
+ * names would let `/skills <name>` shadow `/skills info|edit|delete|list`
+ * and create confusing CLI behaviour.
+ */
+const RESERVED_SKILL_NAMES: ReadonlySet<string> = new Set([
+  "info",
+  "edit",
+  "delete",
+  "list",
+]);
+
+/**
+ * Pattern matching YAML-frontmatter characters that would break a
+ * single-line `key: value` representation — newlines split fields and
+ * a literal `---` terminates the frontmatter block.  Used by
+ * `validateSkillData` to reject payloads that would otherwise need
+ * heavy YAML escaping in `renderSkillMarkdown`.
+ */
+const YAML_UNSAFE_RE = /[\r\n]|^---$/;
+
 // ── Types ────────────────────────────────────────────────────────────
 
 /** Input data for a new skill, mirroring SKILL.md frontmatter fields. */
@@ -101,7 +124,8 @@ export function getUserSkillsDir(): string {
  * Validate a skill name. Returns an error message string, or null if valid.
  *
  * Rules: kebab-case (lowercase letters, digits, hyphens; must start with a
- * letter), ≤64 characters, no path traversal characters.
+ * letter), ≤64 characters, no path traversal characters, and not one of
+ * the reserved `/skills` subcommand names.
  */
 export function validateSkillName(name: string): string | null {
   if (!name) return "Skill name must not be empty";
@@ -112,6 +136,9 @@ export function validateSkillName(name: string): string | null {
   if (name.includes("..") || name.includes("/") || name.includes("\\")) {
     return "Skill name must not contain path traversal characters";
   }
+  if (RESERVED_SKILL_NAMES.has(name)) {
+    return `Skill name '${name}' is reserved (collides with a /skills subcommand)`;
+  }
   return null;
 }
 
@@ -141,6 +168,10 @@ export function validateSkillData(
     errors.push(
       `Skill description must be ≤${MAX_DESCRIPTION_LENGTH} characters`,
     );
+  } else if (YAML_UNSAFE_RE.test(data.description)) {
+    errors.push(
+      "Skill description must be a single line (no newlines or '---')",
+    );
   }
 
   if (!Array.isArray(data.triggers) || data.triggers.length === 0) {
@@ -153,6 +184,12 @@ export function validateSkillData(
         errors.push("Triggers must be non-empty strings");
         break;
       }
+      if (YAML_UNSAFE_RE.test(t)) {
+        errors.push(
+          "Triggers must be single-line strings (no newlines or '---')",
+        );
+        break;
+      }
     }
   }
 
@@ -252,9 +289,12 @@ export function writeUserSkill(data: SkillData, patternsDir: string): string {
   }
 
   const rendered = renderSkillMarkdown(data);
-  if (rendered.length > MAX_SKILL_SIZE_BYTES) {
+  // Size cap is on disk-bytes (UTF-8) — `.length` would under-count for
+  // multi-byte characters and let a payload sneak past the limit.
+  const renderedBytes = Buffer.byteLength(rendered, "utf-8");
+  if (renderedBytes > MAX_SKILL_SIZE_BYTES) {
     throw new Error(
-      `SKILL.md exceeds maximum size (${rendered.length} bytes > ${MAX_SKILL_SIZE_BYTES} bytes)`,
+      `SKILL.md exceeds maximum size (${renderedBytes} bytes > ${MAX_SKILL_SIZE_BYTES} bytes)`,
     );
   }