Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 26 additions & 15 deletions scripts/evals/run-ricky-evals.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -338,26 +338,37 @@ function readGeneratedArtifactContent(stdout, workingDir) {
const realWorkingDir = safeRealpath(workingDir);
if (!realWorkingDir) return '';
for (const artifactPath of artifactPaths) {
const fullPath = path.resolve(workingDir, artifactPath);
if (!existsSync(fullPath)) continue;
const realFullPath = safeRealpath(fullPath);
if (!realFullPath) continue;
if (realFullPath !== realWorkingDir && !realFullPath.startsWith(`${realWorkingDir}${path.sep}`)) {
continue;
}
try {
if (!statSync(realFullPath).isFile()) continue;
} catch {
continue;
for (const generatedPath of generatedArtifactAndSidecarPaths(artifactPath)) {
const fullPath = path.resolve(workingDir, generatedPath);
if (!existsSync(fullPath)) continue;
const realFullPath = safeRealpath(fullPath);
if (!realFullPath) continue;
if (realFullPath !== realWorkingDir && !realFullPath.startsWith(`${realWorkingDir}${path.sep}`)) {
continue;
}
try {
if (!statSync(realFullPath).isFile()) continue;
} catch {
continue;
}
sections.push([
`\n--- GENERATED ARTIFACT: ${generatedPath} ---`,
readFileSync(realFullPath, 'utf8'),
].join('\n'));
}
sections.push([
`\n--- GENERATED ARTIFACT: ${artifactPath} ---`,
readFileSync(realFullPath, 'utf8'),
].join('\n'));
}
return sections.join('\n');
}

function generatedArtifactAndSidecarPaths(artifactPath) {
const paths = [artifactPath];
if (artifactPath.startsWith('workflows/generated/') && artifactPath.endsWith('.ts')) {
const stem = artifactPath.slice(0, -'.ts'.length);
paths.push(`${stem}.spec.md`, `${stem}.children.json`);
}
return paths;
}

function safeRealpath(value) {
try {
return realpathSync(value);
Expand Down
47 changes: 7 additions & 40 deletions scripts/run-ricky-overnight.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1765,48 +1765,15 @@ repo_has_meaningful_delta() {
}

commit_if_clean_delta() {
# Auto-commit and auto-push directly to origin/main were disabled
# intentionally: this loop was shipping unreviewed changes straight to a
# shared branch (commits authored as "Miya"). Future progress capture
# belongs in a per-run branch + PR. Keep the function as a no-op so the
# callers don't need to change.
local workflow_path="$1"
local push_output_file="$ARTIFACT_DIR/git-push.txt"
local ahead="0"
local behind="0"

if ! repo_has_meaningful_delta; then
log "no tracked/untracked repo delta after $workflow_path"
return 0
fi

validate_repo

local short
local head_advanced="false"
short="$(basename "$workflow_path" .ts)"
if repo_has_captured_head_delta; then
head_advanced="true"
fi

if meaningful_tracked_delta_exists || meaningful_untracked_delta_exists; then
git add -A ':!tmp/' ':!.workflow-artifacts/' ':!.trajectories/'
git commit -m "chore(overnight): capture $short progress" || true
elif [[ "$head_advanced" == "true" ]]; then
log "repo HEAD already advanced during $workflow_path; capturing committed state"
fi

: > "$push_output_file"
if ! git push origin main >"$push_output_file" 2>&1; then
cat "$push_output_file" >&2 || true
git fetch origin main:refs/remotes/origin/main >/dev/null 2>&1 || true
if git show-ref --verify --quiet refs/remotes/origin/main; then
read -r ahead behind < <(git rev-list --left-right --count HEAD...refs/remotes/origin/main)
log "push rejected after $workflow_path; local main diverged from origin/main (ahead=${ahead}, behind=${behind})"
else
log "push rejected after $workflow_path; unable to read refs/remotes/origin/main for divergence details"
fi
inspect_repo_changes
return 1
fi

git rev-parse HEAD > "$LAST_COMMIT_FILE"
log "auto-commit/auto-push disabled; skipping post-workflow capture for $workflow_path"
inspect_repo_changes
return 0
}

workflow_hit_claude_rate_limit() {
Expand Down
27 changes: 26 additions & 1 deletion src/local/entrypoint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import type { ChildProcess } from 'node:child_process';
import { spawn } from 'node:child_process';
import { createHash, randomUUID } from 'node:crypto';
import { createRequire } from 'node:module';
import { delimiter, dirname, isAbsolute, join, resolve } from 'node:path';
import { delimiter, dirname, isAbsolute, join, resolve, sep } from 'node:path';
import { createInterface } from 'node:readline';
import { pathToFileURL } from 'node:url';

Expand Down Expand Up @@ -924,6 +924,16 @@ function isNoSuchProcessError(error: unknown): boolean {
return typeof error === 'object' && error !== null && 'code' in error && (error as { code?: unknown }).code === 'ESRCH';
}

function resolveRepoContainedOutputPath(cwd: string, outputPath: string): string | undefined {
if (isAbsolute(outputPath)) return undefined;

const root = resolve(cwd);
const resolved = resolve(root, outputPath);
if (resolved === root) return undefined;
if (!resolved.startsWith(`${root}${sep}`)) return undefined;
return resolved;
}

async function workflowSdkLoaderNodeOption(cwd: string): Promise<string | undefined> {
const runtime = await resolveWorkflowSdkRuntime(cwd);
if (!runtime) return undefined;
Expand Down Expand Up @@ -1199,6 +1209,21 @@ export function createLocalExecutor(options: LocalExecutorOptions = {}): LocalEx

onProgress?.(`Writing workflow artifact to ${artifact.artifactPath}...`);
await artifactWriter.writeArtifact(artifact.artifactPath, artifact.content, cwd);
// Sidecar files travel with the master workflow on disk so the
// generated TS can stay small (master-renderer pushes the spec text
// and child source map here to avoid argv-blow-out from spawn E2BIG).
if (artifact.sidecarFiles) {
for (const [sidecarPath, sidecarContent] of Object.entries(artifact.sidecarFiles)) {
const resolvedSidecarPath = resolveRepoContainedOutputPath(cwd, sidecarPath);
if (!resolvedSidecarPath) {
warnings.push(`Skipped unsafe workflow sidecar path outside invocation root: ${sidecarPath}`);
logs.push(`[local] skipped unsafe workflow sidecar: ${sidecarPath}`);
continue;
}
await artifactWriter.writeArtifact(resolvedSidecarPath, sidecarContent, cwd);
logs.push(`[local] wrote workflow sidecar: ${sidecarPath}`);
Comment on lines +1215 to +1224
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Enforce repo-root confinement before writing sidecars.

sidecarPath is written without validating it stays inside the invocation root. A malformed/absolute path can write outside the workspace.

🔒 Proposed hardening
-import { delimiter, dirname, isAbsolute, join, resolve } from 'node:path';
+import { delimiter, dirname, isAbsolute, join, relative, resolve } from 'node:path';
@@
         if (artifact.sidecarFiles) {
+          const repoRoot = resolve(cwd);
           for (const [sidecarPath, sidecarContent] of Object.entries(artifact.sidecarFiles)) {
+            const resolvedSidecarPath = resolve(cwd, sidecarPath);
+            const rel = relative(repoRoot, resolvedSidecarPath);
+            if (rel.startsWith('..') || isAbsolute(rel)) {
+              throw new Error(`Invalid sidecar path outside workspace: ${sidecarPath}`);
+            }
             await artifactWriter.writeArtifact(sidecarPath, sidecarContent, cwd);
             logs.push(`[local] wrote workflow sidecar: ${sidecarPath}`);
           }
         }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if (artifact.sidecarFiles) {
for (const [sidecarPath, sidecarContent] of Object.entries(artifact.sidecarFiles)) {
await artifactWriter.writeArtifact(sidecarPath, sidecarContent, cwd);
logs.push(`[local] wrote workflow sidecar: ${sidecarPath}`);
if (artifact.sidecarFiles) {
const repoRoot = resolve(cwd);
for (const [sidecarPath, sidecarContent] of Object.entries(artifact.sidecarFiles)) {
const resolvedSidecarPath = resolve(cwd, sidecarPath);
const rel = relative(repoRoot, resolvedSidecarPath);
if (rel.startsWith('..') || isAbsolute(rel)) {
throw new Error(`Invalid sidecar path outside workspace: ${sidecarPath}`);
}
await artifactWriter.writeArtifact(sidecarPath, sidecarContent, cwd);
logs.push(`[local] wrote workflow sidecar: ${sidecarPath}`);
}
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/local/entrypoint.ts` around lines 1205 - 1208, The loop writes
artifact.sidecarFiles entries directly via
artifactWriter.writeArtifact(sidecarPath, ...) without validating paths; resolve
each sidecarPath against the invocation root (cwd) using path.resolve(cwd,
sidecarPath) and ensure the resulting absolute path is inside the repo root
(e.g., resolvedPath.startsWith(path.resolve(cwd) + path.sep)) to prevent
absolute paths or ../ traversal; if the check fails, skip or error and log a
warning instead of calling artifactWriter.writeArtifact; update the code around
artifact.sidecarFiles handling and the call site of artifactWriter.writeArtifact
to use the validated resolvedPath.

}
}
if (options.persistGenerationMetadataArtifacts === true) {
await writeGenerationMetadataArtifacts(generationResult, artifactWriter, cwd);
}
Expand Down
101 changes: 93 additions & 8 deletions src/product/generation/master-workflow-renderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,16 @@ export function renderMasterExecutionWorkflow(input: RenderMasterWorkflowInput):
const gates = buildMasterGates(artifactsDir, plan, input.spec);
const skillApplicationEvidence = buildMasterSkillEvidence(input.skills);
const toolSelections = buildMasterToolSelections(plan);
// Sidecars travel with the generated workflow on disk. Embedding the spec
// and the child-source map inline ballooned the master workflow past
// ARG_MAX (spawn E2BIG) every time `materialize-child-workflows` ran; the
// sidecars push that payload off of argv entirely.
const artifactPathNoExt = artifactPath.replace(/\.ts$/, '');
const specSidecarPath = `${artifactPathNoExt}.spec.md`;
const childrenSidecarPath = `${artifactPathNoExt}.children.json`;
const childSources = Object.fromEntries(
plan.children.map((child) => [child.workflowFilePath, childWorkflowSource(child, input.spec, specSidecarPath)]),
);
const content = renderMasterSource({
spec: input.spec,
pattern: input.pattern,
Expand All @@ -191,6 +201,9 @@ export function renderMasterExecutionWorkflow(input: RenderMasterWorkflowInput):
plan,
skills: input.skills,
skillApplicationEvidence,
childSources,
specSidecarPath,
childrenSidecarPath,
});

return {
Expand All @@ -210,6 +223,10 @@ export function renderMasterExecutionWorkflow(input: RenderMasterWorkflowInput):
skillMatches: input.skills.matches,
toolSelections,
artifactsDir,
sidecarFiles: {
[specSidecarPath]: input.spec.description,
[childrenSidecarPath]: `${JSON.stringify(childSources, null, 2)}\n`,
},
},
};
}
Expand Down Expand Up @@ -240,10 +257,10 @@ function renderMasterSource(input: {
plan: MasterExecutionPlan;
skills: SkillContext;
skillApplicationEvidence: SkillApplicationEvidence[];
childSources: Record<string, string>;
specSidecarPath: string;
childrenSidecarPath: string;
}): string {
const childSources = Object.fromEntries(
input.plan.children.map((child) => [child.workflowFilePath, childWorkflowSource(child, input.spec)]),
);
const planJson = JSON.stringify(input.plan, null, 2);
const skillMatchesJson = JSON.stringify(input.skills.matches, null, 2);
const skillBoundaryJson = JSON.stringify({
Expand All @@ -253,11 +270,14 @@ function renderMasterSource(input: {
loadedSkills: input.skills.applicableSkillNames,
applicationEvidence: input.skillApplicationEvidence,
}, null, 2);
// Read the child source map from disk at runtime instead of inlining the
// JSON into this shell command. Inlining once N children deep produced a
// multi-megabyte argv element and triggered `spawn E2BIG`.
const materializeCommand = [
'node --input-type=module <<\'NODE\'',
'import { mkdirSync, writeFileSync } from \'node:fs\';',
'import { mkdirSync, writeFileSync, readFileSync } from \'node:fs\';',
'import { dirname } from \'node:path\';',
`const childSources = ${JSON.stringify(childSources, null, 2)};`,
`const childSources = JSON.parse(readFileSync(${literal(input.childrenSidecarPath)}, 'utf8'));`,
'for (const [filePath, source] of Object.entries(childSources)) {',
' mkdirSync(dirname(filePath), { recursive: true });',
' writeFileSync(filePath, source, \'utf8\');',
Expand Down Expand Up @@ -330,6 +350,12 @@ function renderMasterSource(input: {
'echo RICKY_MASTER_FINAL_VALIDATION_READY',
];

// The master workflow's `.description()` is sent to runtime agents as
// context. Inlining the full spec text here meant every agent invocation
// carried ~100KB+ of markdown that the agents did not need (the spec lives
// on disk at specSidecarPath and child slices `cp` it locally). The short
// ref keeps the description small without losing the pointer.
const masterDescription = `${firstHeadingOrSummary(input.spec.description)} (full spec on disk at ${input.specSidecarPath}; child workflows read it from there).`;
return `${[
"import { workflow } from '@agent-relay/sdk/workflows';",
'',
Expand All @@ -339,7 +365,7 @@ function renderMasterSource(input: {
'',
'async function main() {',
` const result = await workflow(${literal(input.workflowId)})`,
` .description(${literal(input.spec.description)})`,
` .description(${literal(masterDescription)})`,
` .pattern(${literal(input.pattern.pattern)})`,
` .channel(${literal(input.channel)})`,
' .maxConcurrency(4)',
Expand Down Expand Up @@ -471,7 +497,7 @@ function renderChildRunStep(child: ChildWorkflowPlan): string[] {
];
}

export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWorkflowSpec): string {
export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWorkflowSpec, specSidecarPath?: string): string {
const artifactsDir = child.signoffArtifactPath.replace(/\/signoff\.md$/, '');
const validationCommand = child.validationCommands[0] ?? 'npm run typecheck';
const targetScope = child.targetFiles.length > 0 ? child.targetFiles.join(' ') : 'NO_TARGET_FILES_DECLARED';
Expand Down Expand Up @@ -515,7 +541,14 @@ export function childWorkflowSource(child: ChildWorkflowPlan, spec: NormalizedWo
` command: ${literal([
'set -e',
`mkdir -p ${shellQuote(artifactsDir)}`,
`printf '%s\\n' ${shellQuote(spec.description)} > ${shellQuote(`${artifactsDir}/normalized-spec.txt`)}`,
// Copy the spec from its sidecar file rather than embedding it via
// `printf '%s\\n' '<huge spec text>'`. The embedded form pushed every
// child's prepare-context heredoc to ~100KB and made the materialize
// step's argv multi-megabyte. When the sidecar path isn't known
// (legacy call sites), fall back to a no-op so this child still runs.
specSidecarPath
? `cp ${shellQuote(specSidecarPath)} ${shellQuote(`${artifactsDir}/normalized-spec.txt`)}`
: `: > ${shellQuote(`${artifactsDir}/normalized-spec.txt`)}`,
`printf '%s\\n' ${shellQuote(targetScope)} > ${shellQuote(`${artifactsDir}/target-files.txt`)}`,
// Snapshot the worktree's dirty set BEFORE this child touches anything.
// The master executor runs every child in the SAME checkout, so by the
Expand Down Expand Up @@ -899,6 +932,58 @@ function literal(value: string | string[]): string {
return JSON.stringify(value);
}

function firstHeadingOrSummary(specText: string): string {
const candidate = firstMarkdownHeadingOrParagraph(specText)
?? specText.split('\n').find((line) => line.trim().length > 0)?.trim()
?? 'Ricky master workflow';
return candidate.length > 200 ? `${candidate.slice(0, 197)}...` : candidate;
}
Comment on lines +935 to +940
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major | ⚡ Quick win

Use mdast traversal for heading extraction, not regex line matching.

This helper currently inspects markdown with split + regex. Switch to fromMarkdown(...) AST traversal to extract the first H1/H2 and fallback text.

As per coding guidelines, "Default to AST walk for the file's grammar (TS, mdast, JSON.parse) before using tokenizers, regex, or substring matching when analyzing source text in Ricky implementation."

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/product/generation/master-workflow-renderer.ts` around lines 935 - 945,
Replace the regex/line-splitting logic in firstHeadingOrSummary with an mdast
parse-and-walk: call fromMarkdown(specText) to build the AST, find the first
heading node with depth 1 or 2 and use its text content (strip/trim), otherwise
fall back to the first non-empty paragraph or plain text node's value, and if
none exist return the existing default label ("Ricky master workflow"); preserve
the existing 200-character cap and truncation behavior when returning the
candidate.


function firstMarkdownHeadingOrParagraph(specText: string): string | undefined {
let root: Nodes;
try {
root = fromMarkdown(specText);
} catch {
return undefined;
}

let fallback: string | undefined;
const visit = (node: Nodes): string | undefined => {
if (node.type === 'heading' && 'depth' in node && (node.depth === 1 || node.depth === 2)) {
const text = markdownNodeText(node);
if (text) return text;
}

if (!fallback && (node.type === 'paragraph' || node.type === 'text')) {
fallback = markdownNodeText(node);
}

if ('children' in node) {
for (const child of node.children) {
const found = visit(child);
if (found) return found;
}
}

return undefined;
};

return visit(root) ?? fallback;
}

function markdownNodeText(node: Nodes): string | undefined {
if ('value' in node && typeof node.value === 'string') {
return node.value.replace(/\s+/g, ' ').trim() || undefined;
}
if (!('children' in node)) return undefined;
const text = node.children
.map((child) => markdownNodeText(child) ?? '')
.join(' ')
.replace(/\s+/g, ' ')
.trim();
return text || undefined;
}

function templateLiteral(value: string): string {
return `\`${value.replace(/\\/g, '\\\\').replace(/`/g, '\\`').replace(/\$\{/g, '\\${')}\``;
}
Expand Down
Loading