Skip to content

Commit 10db093

Browse files
author
StackMemory Bot (CLI)
committed
feat: add deterministic harness smoke tooling
1 parent 39c1b39 commit 10db093

19 files changed

Lines changed: 1467 additions & 252 deletions

File tree

.husky/pre-commit

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,14 @@
1-
#!/bin/sh
2-
# GITBUTLER_MANAGED_HOOK_V1
3-
# This hook is managed by GitButler to prevent accidental commits on the workspace branch.
4-
# Your original pre-commit hook has been preserved as 'pre-commit-user'.
5-
6-
HOOKS_DIR=$(dirname "$0")
7-
8-
# Run user's hook first if it exists - if it fails, stop here
9-
if [ -x "$HOOKS_DIR/pre-commit-user" ]; then
10-
"$HOOKS_DIR/pre-commit-user" "$@" || exit $?
11-
fi
12-
13-
# Get the current branch name
14-
BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null)
15-
16-
if [ "$BRANCH" = "gitbutler/workspace" ]; then
17-
echo ""
18-
echo "GITBUTLER_ERROR: Cannot commit directly to gitbutler/workspace branch."
19-
echo ""
20-
echo "GitButler manages commits on this branch. Please use GitButler to commit your changes:"
21-
echo " - Use the GitButler app to create commits"
22-
echo " - Or run 'but commit' from the command line"
23-
echo ""
24-
echo "If you want to exit GitButler mode and use normal git:"
25-
echo " - Run 'but teardown' to switch to a regular branch"
26-
echo " - Or directly checkout another branch: git checkout <branch>"
27-
echo ""
28-
echo "If you no longer have the GitButler CLI installed, you can simply remove this hook and checkout another branch:"
29-
printf ' rm "%s/pre-commit"\n' "$HOOKS_DIR"
30-
echo ""
31-
exit 1
32-
fi
33-
34-
# Not on workspace branch - run user's original hook if it exists
35-
if [ -x "$HOOKS_DIR/pre-commit-user" ]; then
36-
echo ""
37-
echo "WARNING: GitButler's pre-commit hook is still installed but you're not on gitbutler/workspace."
38-
echo "If you're no longer using GitButler, you can restore your original hook:"
39-
printf ' mv "%s/pre-commit-user" "%s/pre-commit"\n' "$HOOKS_DIR" "$HOOKS_DIR"
40-
echo ""
1+
# Use Node version from .nvmrc
2+
export NVM_DIR="$HOME/.nvm"
3+
if [ -s "$NVM_DIR/nvm.sh" ]; then
4+
. "$NVM_DIR/nvm.sh"
5+
nvm use 2>/dev/null
6+
elif [ -d "$HOME/.nvm/versions/node" ]; then
7+
NODE_VER=$(cat "$(git rev-parse --show-toplevel)/.nvmrc" 2>/dev/null || echo "20")
8+
NODE_PATH=$(ls -d "$HOME/.nvm/versions/node/v${NODE_VER}"* 2>/dev/null | head -1)
9+
[ -n "$NODE_PATH" ] && export PATH="$NODE_PATH/bin:$PATH"
4110
fi
4211

43-
exit 0
12+
npx lint-staged
13+
bash scripts/determinism-pre-commit.sh
14+
npm run build

package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@
114114
"test:run": "vitest run",
115115
"test:pre-publish": "./scripts/test-pre-publish-quick.sh",
116116
"test:pre-commit": "vitest related --run --reporter=dot --silent --bail=1",
117+
"determinism:smoke": "node --import tsx src/cli/index.ts bench determinism --task \"Determinism probe\" --runs 5",
118+
"determinism:watch": "node --import tsx src/cli/index.ts bench determinism --task \"Determinism probe\" --runs 3 --watch",
119+
"determinism:latest": "node --import tsx src/cli/index.ts bench determinism --latest --json",
120+
"determinism:test": "npx vitest run src/orchestrators/multimodal/__tests__/determinism.test.ts --reporter=dot",
121+
"determinism:pre-commit": "bash scripts/determinism-pre-commit.sh",
117122
"prepublishOnly": "npm run build && npm run verify:dist && npm run test:pre-publish",
118123
"quality": "npm run lint && npm run test:run && npm run build",
119124
"dev": "tsx watch src/integrations/mcp/server.ts",

scripts/determinism-pre-commit.sh

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
6+
cd "$PROJECT_ROOT"
7+
8+
BLUE='\033[0;34m'
9+
GREEN='\033[0;32m'
10+
YELLOW='\033[1;33m'
11+
NC='\033[0m'
12+
13+
log_info() {
14+
echo -e "${BLUE}[determinism]${NC} $1"
15+
}
16+
17+
log_success() {
18+
echo -e "${GREEN}[determinism]${NC} $1"
19+
}
20+
21+
log_skip() {
22+
echo -e "${YELLOW}[determinism]${NC} $1"
23+
}
24+
25+
if [ "${STACKMEMORY_DETERMINISM_SKIP:-0}" = "1" ]; then
26+
log_skip "Skipping because STACKMEMORY_DETERMINISM_SKIP=1"
27+
exit 0
28+
fi
29+
30+
CHANGED_FILES="${STACKMEMORY_DETERMINISM_FILES:-}"
31+
if [ -z "$CHANGED_FILES" ]; then
32+
CHANGED_FILES="$(git diff --cached --name-only --diff-filter=ACMR)"
33+
fi
34+
35+
if [ -z "$CHANGED_FILES" ]; then
36+
log_skip "No staged files detected"
37+
exit 0
38+
fi
39+
40+
RELEVANT_PATTERN='^(src/orchestrators/multimodal/|src/cli/commands/bench\.ts$|src/cli/index\.ts$|src/core/monitoring/logger\.ts$)'
41+
RELEVANT_FILES="$(printf '%s\n' "$CHANGED_FILES" | rg "$RELEVANT_PATTERN" || true)"
42+
43+
if [ -z "$RELEVANT_FILES" ]; then
44+
log_skip "No harness determinism files staged"
45+
exit 0
46+
fi
47+
48+
RUNS="${STACKMEMORY_DETERMINISM_RUNS:-3}"
49+
TASK="${STACKMEMORY_DETERMINISM_TASK:-Determinism pre-commit}"
50+
51+
log_info "Running deterministic smoke check for staged harness files"
52+
printf '%s\n' "$RELEVANT_FILES" | sed 's/^/ - /'
53+
54+
REPORT_JSON="$(node --import tsx src/cli/index.ts bench determinism --task "$TASK" --runs "$RUNS" --json)"
55+
56+
SCORE="$(printf '%s' "$REPORT_JSON" | node -e "let data='';process.stdin.on('data',d=>data+=d);process.stdin.on('end',()=>{const report=JSON.parse(data);process.stdout.write(String(report.score));});")"
57+
58+
if [ "$SCORE" != "100" ] && [ "$SCORE" != "100.00" ]; then
59+
log_skip "Determinism smoke failed with score $SCORE/100"
60+
printf '%s\n' "$REPORT_JSON"
61+
exit 1
62+
fi
63+
64+
log_info "Running deterministic harness tests"
65+
npx vitest run src/orchestrators/multimodal/__tests__/determinism.test.ts --reporter=dot
66+
67+
log_success "Determinism guard passed ($SCORE/100)"

src/cli/claude-sm.ts

Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,17 @@ import { program } from 'commander';
1717
import { v4 as uuidv4 } from 'uuid';
1818
import chalk from 'chalk';
1919
import { initializeTracing, trace } from '../core/trace/index.js';
20+
import { resolveRealCliBin } from './utils/real-cli-bin.js';
21+
import {
22+
type DeterminismWatcherHandle,
23+
startDeterminismWatcher,
24+
stopDeterminismWatcher,
25+
} from './utils/determinism-watcher.js';
2026
import {
2127
canonicalStateStore,
2228
projectIdFromIdentifier,
2329
} from '../core/shared-state/canonical-store.js';
30+
import { loadProjectHandoff } from '../core/session/project-handoff.js';
2431
import {
2532
getModelRouter,
2633
loadModelRouterConfig,
@@ -136,6 +143,8 @@ class ClaudeSM {
136143
private sessionId: string;
137144
private ownsSession: boolean;
138145
private sessionEnded: boolean;
146+
private determinismWatcher: DeterminismWatcherHandle | null;
147+
private skippedHandoffReason: string | null;
139148

140149
constructor() {
141150
// Load persistent defaults
@@ -166,6 +175,8 @@ class ClaudeSM {
166175
this.sessionId = process.env['STACKMEMORY_SESSION'] || uuidv4();
167176
this.ownsSession = !process.env['STACKMEMORY_SESSION'];
168177
this.sessionEnded = false;
178+
this.determinismWatcher = null;
179+
this.skippedHandoffReason = null;
169180

170181
// Ensure config directory exists
171182
if (!fs.existsSync(this.claudeConfigDir)) {
@@ -275,19 +286,16 @@ class ClaudeSM {
275286
}
276287

277288
private resolveClaudeBin(): string | null {
278-
// 1) CLI-specified
279-
if (this.config.claudeBin && this.config.claudeBin.trim()) {
280-
return this.config.claudeBin.trim();
281-
}
282-
// 2) Env override
283-
const envBin = process.env['CLAUDE_BIN'];
284-
if (envBin && envBin.trim()) return envBin.trim();
285-
// 3) PATH detection
286-
try {
287-
execSync('which claude', { stdio: 'ignore' });
288-
return 'claude';
289-
} catch {}
290-
return null;
289+
return resolveRealCliBin({
290+
explicitBin: this.config.claudeBin,
291+
envBin: process.env['CLAUDE_BIN'],
292+
preferredPaths: [
293+
path.join(os.homedir(), '.local', 'bin', 'claude'),
294+
'/usr/local/bin/claude',
295+
'/opt/homebrew/bin/claude',
296+
],
297+
pathCommands: ['claude'],
298+
});
291299
}
292300

293301
private gepaProcesses: ReturnType<typeof spawn>[] = [];
@@ -373,6 +381,30 @@ class ClaudeSM {
373381
this.gepaProcesses = [];
374382
}
375383

384+
private startDeterminismWatcher(): void {
385+
this.determinismWatcher = startDeterminismWatcher({
386+
stackmemoryBin: this.stackmemoryPath,
387+
cwd: process.cwd(),
388+
task: this.config.task,
389+
instanceId: this.config.instanceId,
390+
sessionId: this.sessionId,
391+
tool: 'claude',
392+
});
393+
394+
if (this.determinismWatcher) {
395+
const modeLabel =
396+
this.determinismWatcher.mode === 'targeted'
397+
? 'targeted'
398+
: 'repo-root fallback';
399+
console.log(chalk.gray(` Determinism: ${modeLabel}`));
400+
}
401+
}
402+
403+
private stopDeterminismWatcher(): void {
404+
stopDeterminismWatcher(this.determinismWatcher);
405+
this.determinismWatcher = null;
406+
}
407+
376408
private setupWorktree(): string | null {
377409
if (!this.config.useWorktree || !this.isGitRepo()) {
378410
return null;
@@ -489,19 +521,25 @@ class ClaudeSM {
489521
if (!this.config.contextEnabled) return null;
490522

491523
try {
492-
const handoffPath = path.join(
524+
const handoff = loadProjectHandoff(
493525
process.cwd(),
494-
'.stackmemory',
495-
'last-handoff.md'
526+
this.isGitRepo() ? this.getCurrentBranch() : undefined
496527
);
497-
if (fs.existsSync(handoffPath)) {
498-
const content = fs.readFileSync(handoffPath, 'utf8').trim();
499-
if (content.length > 0) {
500-
// Cap at 8000 chars to avoid excessively long system prompts
501-
return content.length > 8000
502-
? content.substring(0, 8000) + '\n\n[...truncated]'
503-
: content;
504-
}
528+
if (!handoff) {
529+
this.skippedHandoffReason = null;
530+
return null;
531+
}
532+
if (!handoff.compatible) {
533+
this.skippedHandoffReason = handoff.mismatchReason || 'stale handoff';
534+
return null;
535+
}
536+
this.skippedHandoffReason = null;
537+
const content = handoff.content.trim();
538+
if (content.length > 0) {
539+
// Cap at 8000 chars to avoid excessively long system prompts
540+
return content.length > 8000
541+
? content.substring(0, 8000) + '\n\n[...truncated]'
542+
: content;
505543
}
506544
} catch {
507545
// Silently continue - handoff loading is optional
@@ -759,6 +797,7 @@ class ClaudeSM {
759797
payload: Record<string, unknown> = {}
760798
): Promise<void> {
761799
this.stopGEPAWatcher();
800+
this.stopDeterminismWatcher();
762801

763802
this.saveContext(
764803
eventType === 'session_end'
@@ -1045,6 +1084,7 @@ class ClaudeSM {
10451084
}
10461085

10471086
await this.publishSessionStart();
1087+
this.startDeterminismWatcher();
10481088
console.log(chalk.gray(`🤖 Instance ID: ${this.config.instanceId}`));
10491089
console.log(chalk.gray(`🧠 Session ID: ${this.sessionId.slice(0, 8)}`));
10501090
console.log(chalk.gray(`📁 Working in: ${process.cwd()}`));
@@ -1133,6 +1173,10 @@ class ClaudeSM {
11331173
if (handoffContent) {
11341174
initialInput = handoffContent;
11351175
console.log(chalk.gray(' Handoff context ready'));
1176+
} else if (this.skippedHandoffReason) {
1177+
console.log(
1178+
chalk.gray(` Handoff skipped: ${this.skippedHandoffReason}`)
1179+
);
11361180
}
11371181

11381182
const theoryContent = this.getTheoryContent();

0 commit comments

Comments
 (0)