code-yeongyu · potb · Jan 25, 2026 · Jan 25, 2026 · Jan 25, 2026 · Jan 25, 2026
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,9 @@
 .sisyphus/
 node_modules/
 
+# Debug mode artifacts
+.opencode/debug/
+
 # Build output
 dist/
 

diff --git a/src/features/builtin-commands/commands.test.ts b/src/features/builtin-commands/commands.test.ts
@@ -0,0 +1,41 @@
+import { describe, test, expect } from "bun:test"
+import { loadBuiltinCommands } from "./commands"
+
+describe("loadBuiltinCommands", () => {
+  test("should include debug command", () => {
+    // #given - default loading (no disabled commands)
+    // #when
+    const commands = loadBuiltinCommands()
+
+    // #then - debug command should exist with correct structure
+    expect(commands["debug"]).toBeDefined()
+    expect(commands["debug"].name).toBe("debug")
+    expect(commands["debug"].description).toContain("Debug runtime issues")
+    expect(commands["debug"].template).toContain("DEBUG MODE")
+  })
+
+  test("should respect disabled commands", () => {
+    // #given - debug command disabled
+    // #when
+    const commands = loadBuiltinCommands(["debug"])
+
+    // #then - debug command should not exist
+    expect(commands["debug"]).toBeUndefined()
+  })
+
+  test("should have 7 builtin commands total", () => {
+    // #given / #when
+    const commands = loadBuiltinCommands()
+
+    // #then - all 7 commands present
+    const commandNames = Object.keys(commands)
+    expect(commandNames).toHaveLength(7)
+    expect(commandNames).toContain("init-deep")
+    expect(commandNames).toContain("ralph-loop")
+    expect(commandNames).toContain("ulw-loop")
+    expect(commandNames).toContain("cancel-ralph")
+    expect(commandNames).toContain("refactor")
+    expect(commandNames).toContain("start-work")
+    expect(commandNames).toContain("debug")
+  })
+})
diff --git a/src/features/builtin-commands/commands.ts b/src/features/builtin-commands/commands.ts
@@ -4,6 +4,7 @@ import { INIT_DEEP_TEMPLATE } from "./templates/init-deep"
 import { RALPH_LOOP_TEMPLATE, CANCEL_RALPH_TEMPLATE } from "./templates/ralph-loop"
 import { REFACTOR_TEMPLATE } from "./templates/refactor"
 import { START_WORK_TEMPLATE } from "./templates/start-work"
+import { DEBUG_TEMPLATE } from "./templates/debug"
 
 const BUILTIN_COMMAND_DEFINITIONS: Record<BuiltinCommandName, Omit<CommandDefinition, "name">> = {
   "init-deep": {
@@ -53,10 +54,10 @@ ${REFACTOR_TEMPLATE}
 </command-instruction>`,
     argumentHint: "<refactoring-target> [--scope=<file|module|project>] [--strategy=<safe|aggressive>]",
   },
-  "start-work": {
-    description: "(builtin) Start Sisyphus work session from Prometheus plan",
-    agent: "atlas",
-    template: `<command-instruction>
+   "start-work": {
+     description: "(builtin) Start Sisyphus work session from Prometheus plan",
+     agent: "atlas",
+     template: `<command-instruction>
 ${START_WORK_TEMPLATE}
 </command-instruction>
 
@@ -68,9 +69,15 @@ Timestamp: $TIMESTAMP
 <user-request>
 $ARGUMENTS
 </user-request>`,
-    argumentHint: "[plan-name]",
-  },
-}
+     argumentHint: "[plan-name]",
+   },
+   debug: {
+     description: "(builtin) Debug runtime issues with hypothesis-driven instrumentation",
+     template: `<command-instruction>
+${DEBUG_TEMPLATE}
+</command-instruction>`,
+   },
+ }
 
 export function loadBuiltinCommands(
   disabledCommands?: BuiltinCommandName[]

diff --git a/src/features/builtin-commands/templates/debug.ts b/src/features/builtin-commands/templates/debug.ts
@@ -0,0 +1,43 @@
+export const DEBUG_TEMPLATE = `
+# Debug Mode
+
+You are now in DEBUG MODE for hypothesis-driven runtime debugging.
+
+## FIRST: Ensure .gitignore is Updated (DO THIS IMMEDIATELY)
+
+Before anything else, check if \`.opencode/debug/\` is in the project's .gitignore:
+1. Read the project's .gitignore file (create if it doesn't exist)
+2. If \`.opencode/debug/\` is NOT present, append it:
+   \`\`\`
+   # Debug mode artifacts (oh-my-opencode)
+   .opencode/debug/
+   \`\`\`
+3. Confirm to user: "✓ Updated .gitignore to exclude debug artifacts"
+
+## Quick Start Workflow
+1. Ask the user to describe the bug they're experiencing
+2. Generate 3-5 specific, testable hypotheses (labeled A, B, C, D, E)
+3. Create the debug server: .opencode/debug/server.js (port 7777)
+4. Start the server: \`node .opencode/debug/server.js &\`
+5. Instrument code with __debugLog(hypothesisId, label, location, message, data?) calls
+6. Ask user to reproduce the bug
+7. Read and analyze .opencode/debug/debug.log (NDJSON format)
+8. Propose a fix based on the evidence
+9. After user verifies fix: remove instrumentation and cleanup
+
+## Detailed Implementation Reference
+For complete details (server code, NDJSON schema, instrumentation patterns for JS/TS/Python/Go), load the **runtime-debugging** skill:
+\`\`\`
+/runtime-debugging
+\`\`\`
+
+## Important
+- Each hypothesis gets its own hypothesisId (A, B, C, etc.)
+- Artifacts go in .opencode/debug/ (automatically added to .gitignore)
+- Cleanup: remove instrumentation calls, stop server, delete .opencode/debug/
+
+## Frontend CSP Note
+If debugging browser code, Content Security Policy (CSP) may block connections to localhost:7777. Check browser console for "Refused to connect" errors. The runtime-debugging skill includes detailed CSP detection and handling instructions.
+
+Start by updating .gitignore (if needed), then ask: "What bug are you experiencing? Please describe what happens and what you expected to happen."
+`
diff --git a/src/features/builtin-commands/types.ts b/src/features/builtin-commands/types.ts
@@ -1,6 +1,6 @@
 import type { CommandDefinition } from "../claude-code-command-loader"
 
-export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work"
+export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "debug"
 
 export interface BuiltinCommandConfig {
   disabled_commands?: BuiltinCommandName[]

diff --git a/src/features/builtin-skills/skills.test.ts b/src/features/builtin-skills/skills.test.ts
@@ -75,15 +75,30 @@ describe("createBuiltinSkills", () => {
 		}
 	})
 
-	test("returns exactly 3 skills regardless of provider", () => {
+	test("returns exactly 5 skills regardless of provider", () => {
 		// #given
 
 		// #when
 		const defaultSkills = createBuiltinSkills()
 		const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
 
 		// #then
-		expect(defaultSkills).toHaveLength(3)
-		expect(agentBrowserSkills).toHaveLength(3)
+		expect(defaultSkills).toHaveLength(5)
+		expect(agentBrowserSkills).toHaveLength(5)
+	})
+
+	test("includes runtime-debugging skill", () => {
+		// #given
+
+		// #when
+		const skills = createBuiltinSkills()
+
+		// #then
+		const runtimeDebuggingSkill = skills.find((s) => s.name === "runtime-debugging")
+		expect(runtimeDebuggingSkill).toBeDefined()
+		expect(runtimeDebuggingSkill!.description).toContain("runtime")
+		expect(runtimeDebuggingSkill!.template).toContain("Debug Server")
+		expect(runtimeDebuggingSkill!.template).toContain("NDJSON")
+		expect(runtimeDebuggingSkill!.template).toContain("hypothesisId")
 	})
 })