Skip to content
25 changes: 22 additions & 3 deletions .github/skills/coc-knowledge/references/ralph.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,31 @@ working-directory-relative `Plans/<area>/<feature>/` when none is given.

Session resume endpoints share infrastructure in
`packages/coc/src/server/routes/ralph-route-utils.ts`.
`/continue` and `/new-loop` both use it for in-flight Ralph task scans,
`additionalIterations` validation/default resolution, resume hard caps, and
best-effort recovery of `workingDirectory` / `folderPath` from the latest
`/continue`, `/new-loop`, and `/resume` all use it for in-flight Ralph task
scans, `additionalIterations` validation/default resolution, resume hard caps,
and best-effort recovery of `workingDirectory` / `folderPath` from the latest
iteration process. Final-check gap-fix loops use the same additional-iteration
resolver so per-repo `maxRalphIterations` fallback stays consistent.

### Resume Stuck Executing Sessions

`POST /api/workspaces/:workspaceId/ralph-sessions/:sessionId/resume`
(`packages/coc/src/server/routes/ralph-resume-routes.ts`) handles sessions
stuck in `phase=executing` with no in-flight task — the typical outcome when
the last iteration's task failed/was cancelled or the server crashed mid-loop.

Eligibility: `phase === 'executing'` AND `currentIteration < maxIterations`
AND no queued/running task for this `sessionId`.

The endpoint appends a resume marker to `progress.md` (via
`appendResumeMarker`) and enqueues iteration `currentIteration + 1` without
changing `maxIterations`. If the session has reached its cap, the endpoint
returns 409 directing the user to `/continue` instead.

The SPA `RalphWorkflowPane` shows a "Resume" button (amber) when it detects
a stuck executing session (phase executing, iterations > 0, no iteration with
status `running`). `coc-client` exposes `resumeRalphSession()`.

## Scheduled Ralph Runs

Prompt schedules with `mode='ralph'` seed a repo-scoped Ralph session before
Expand Down
11 changes: 11 additions & 0 deletions .github/skills/coc-knowledge/references/rest-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ CoC server exposes HTTP endpoints organized by domain. All routes are registered
| POST | `/api/queue/:id/cancel` | Cancel queued task |
| PATCH | `/api/queue/pause` | Pause/resume queue |

## Ralph Sessions

| Method | Path | Description |
|--------|------|-------------|
| POST | `/api/processes/:id/ralph-start` | Start Ralph execution after grilling |
| POST | `/api/ralph-launch` | Direct Ralph launch (skip grilling) |
| GET | `/api/workspaces/:wsId/ralph-sessions/:sessionId` | Read session journal (record + progress sections) |
| POST | `/api/workspaces/:wsId/ralph-sessions/:sessionId/continue` | Extend completed session (CAP_REACHED or NO_SIGNAL) by N iterations |
| POST | `/api/workspaces/:wsId/ralph-sessions/:sessionId/new-loop` | New goal loop after RALPH_COMPLETE |
| POST | `/api/workspaces/:wsId/ralph-sessions/:sessionId/resume` | Resume stuck executing session (no in-flight task) |

## Schedules

| Method | Path | Description |
Expand Down
16 changes: 12 additions & 4 deletions packages/coc-client/src/contracts/servers.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export type RemoteServerKind = 'url' | 'devtunnel';
export type RemoteServerKind = 'url' | 'devtunnel' | 'ssh';
export type RemoteServerRuntimeStatus = 'idle' | 'connecting' | 'online' | 'offline' | 'failed';

export interface BaseRemoteServer {
Expand All @@ -25,15 +25,23 @@ export interface DevTunnelRemoteServer extends BaseRemoteServer {
tunnelId: string;
}

export type RemoteServer = UrlRemoteServer | DevTunnelRemoteServer;
export interface SshRemoteServer extends BaseRemoteServer {
kind: 'ssh';
host: string; // SSH config alias, e.g. "ubuntu-arm"
localPort: number; // forwarded local port, e.g. 4000
}

export type RemoteServer = UrlRemoteServer | DevTunnelRemoteServer | SshRemoteServer;

export type RemoteServerInput =
| { kind: 'url'; label: string; url: string }
| { kind: 'devtunnel'; label: string; tunnelId: string };
| { kind: 'devtunnel'; label: string; tunnelId: string }
| { kind: 'ssh'; label: string; host: string; localPort: number };

export type RemoteServerPatch =
| { label?: string; kind?: 'url'; url?: string }
| { label?: string; kind?: 'devtunnel'; tunnelId?: string };
| { label?: string; kind?: 'devtunnel'; tunnelId?: string }
| { label?: string; kind?: 'ssh'; host?: string; localPort?: number };

export interface RemoteServerHealth {
serverId: string;
Expand Down
9 changes: 9 additions & 0 deletions packages/coc-client/src/contracts/workspaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,12 @@ export interface RalphNewLoopResponse {
nextIteration: number;
newMaxIterations: number;
}

export interface RalphResumeResponse {
resumed: true;
sessionId: string;
workspaceId: string;
taskId: string;
nextIteration: number;
maxIterations: number;
}
16 changes: 16 additions & 0 deletions packages/coc-client/src/domains/workspaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import type {
MyWorkSyncResponse,
RalphContinueResponse,
RalphNewLoopResponse,
RalphResumeResponse,
RalphSessionResponse,
RegisterWorkspaceRequest,
TerminalPinResponse,
Expand Down Expand Up @@ -268,6 +269,21 @@ export class WorkspacesClient {
);
}

/**
* Resume a Ralph session stuck in `executing` phase with no in-flight task
* (e.g. after a task failure or server crash). Enqueues the next iteration
* without changing the iteration cap.
*/
resumeRalphSession(
workspaceId: string,
sessionId: string,
): Promise<RalphResumeResponse> {
return this.transport.request<RalphResumeResponse>(
`/workspaces/${encodePathSegment(workspaceId)}/ralph-sessions/${encodePathSegment(sessionId)}/resume`,
{ method: 'POST' },
);
}

syncMyWork(request: MyWorkSyncRequest = {}): Promise<MyWorkSyncResponse> {
return this.transport.request<MyWorkSyncResponse>('/my-work/sync', { method: 'POST', body: { ...request } });
}
Expand Down
9 changes: 8 additions & 1 deletion packages/coc/src/server/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import { NotesGitTimerManager } from './notes/git/notes-git-timer-manager';
import { migrateWorkspaceRegistryIfNeeded } from './storage/startup-workspace-migration';
import { migrateProcessHistoryIfNeeded } from './storage/startup-process-migration';
import { DevTunnelConnector } from './servers/devtunnel-connector';
import { SshConnector } from './servers/ssh-connector';
import { RemoteServerStore } from './servers/remote-server-store';
import { pruneAllStaleClassifications } from './repos/classification-store';
import { SyncEngine } from './sync/sync-engine';
Expand Down Expand Up @@ -78,6 +79,7 @@ interface CloseHandlerDeps {
terminalWsServer?: { closeAll(): void };
terminalSessionManager?: { destroyAll(): void };
remoteServerConnector: { dispose(): void };
remoteServerSshConnector: { dispose(): void };
loopExecutor?: { shutdownAll(): void };
loopInfraDispose?: () => void;
mcpOauthDispose?: () => void;
Expand Down Expand Up @@ -124,6 +126,7 @@ function buildCloseHandler(deps: CloseHandlerDeps): (opts?: ServerCloseOptions)
deps.terminalSessionManager?.destroyAll();
deps.terminalWsServer?.closeAll();
deps.remoteServerConnector.dispose();
deps.remoteServerSshConnector.dispose();
wsServer.closeAll();
for (const socket of activeSockets) {
socket.destroy();
Expand Down Expand Up @@ -467,6 +470,7 @@ export async function createExecutionServer(options: ExecutionServerOptions = {}
const notesGitTimerManager = new NotesGitTimerManager();
const remoteServerStore = new RemoteServerStore(dataDir);
const remoteServerConnector = new DevTunnelConnector();
const remoteServerSshConnector = new SshConnector();

// Sync engines — one per virtual workspace, only active when gitRemote is configured.
const syncEngines = new Map<string, SyncEngine>();
Expand All @@ -493,6 +497,7 @@ export async function createExecutionServer(options: ExecutionServerOptions = {}
runtimeConfigService,
remoteServerStore,
remoteServerConnector,
remoteServerSshConnector,
loopStore: loopInfra?.loopStore,
loopExecutor: loopInfra?.loopExecutor,
mcpOauthManager: mcpOauthInfra?.manager,
Expand Down Expand Up @@ -619,8 +624,9 @@ export async function createExecutionServer(options: ExecutionServerOptions = {}
await new Promise<void>((resolve, reject) => { server.on('error', reject); server.listen(port, host, resolve); });
try {
void remoteServerConnector.connectConfigured(remoteServerStore.list());
void remoteServerSshConnector.connectConfigured(remoteServerStore.list());
} catch (error) {
process.stderr.write(`[servers] Failed to start DevTunnel connectors: ${error instanceof Error ? error.message : String(error)}\n`);
process.stderr.write(`[servers] Failed to start remote server connectors: ${error instanceof Error ? error.message : String(error)}\n`);
}
cleanupAllStalePasteFiles(dataDir).catch(() => { /* best-effort */ });
try {
Expand Down Expand Up @@ -674,6 +680,7 @@ export async function createExecutionServer(options: ExecutionServerOptions = {}
terminalWsServer: terminalInfra?.terminalWsServer,
terminalSessionManager: terminalInfra?.terminalSessionManager,
remoteServerConnector,
remoteServerSshConnector,
loopExecutor: loopInfra?.loopExecutor,
loopInfraDispose: loopInfra?.dispose,
mcpOauthDispose: mcpOauthInfra?.dispose,
Expand Down
39 changes: 39 additions & 0 deletions packages/coc/src/server/ralph/ralph-session-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,45 @@ export class RalphSessionStore {
await fs.promises.appendFile(progressPath, marker, 'utf-8');
}

/**
* Append a "Session resumed at <ts> — picking up from iteration <N>" banner
* to `progress.md`. Idempotent against double-appends within the same tick.
*/
async appendResumeMarker(
workspaceId: string,
sessionId: string,
lastIteration: number,
nowIso?: string,
): Promise<void> {
const dir = this.getSessionDir(workspaceId, sessionId);
await fs.promises.mkdir(dir, { recursive: true });
const progressPath = this.getProgressPath(workspaceId, sessionId);
const ts = nowIso ?? new Date().toISOString();
const marker = `\n---\n## Session resumed at ${ts} — picking up from iteration ${lastIteration}\n`;

try {
const stat = await fs.promises.stat(progressPath);
const readBytes = Math.min(stat.size, 1024);
if (readBytes > 0) {
const fd = await fs.promises.open(progressPath, 'r');
try {
const buf = Buffer.alloc(readBytes);
await fd.read(buf, 0, readBytes, stat.size - readBytes);
const tail = buf.toString('utf-8');
if (tail.includes(`## Session resumed at ${ts} — picking up from iteration ${lastIteration}`)) {
return;
}
} finally {
await fd.close();
}
}
} catch {
// Missing file — fall through and append (which will create it).
}

await fs.promises.appendFile(progressPath, marker, 'utf-8');
}

/**
* Append a final-check section to `progress.md`.
*
Expand Down
5 changes: 5 additions & 0 deletions packages/coc/src/server/routes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,14 @@ import type { TerminalSessionManager } from '../terminal/index';
import { registerRemoteServerRoutes } from '../servers/remote-server-routes';
import { RemoteServerStore } from '../servers/remote-server-store';
import { DevTunnelConnector } from '../servers/devtunnel-connector';
import type { SshConnector } from '../servers/ssh-connector';
import { registerRalphRoutes } from './queue-ralph-routes';
import { registerRalphSessionRoutes } from './ralph-session-routes';
import { registerRalphContinueRoutes } from './ralph-continue-routes';
import { registerRalphNewLoopRoutes } from './ralph-new-loop-routes';
import { registerRalphPromoteRoutes } from './ralph-promote-routes';
import { registerRalphLaunchRoutes } from './ralph-launch-routes';
import { registerRalphResumeRoutes } from './ralph-resume-routes';
import { registerLoopRoutes } from '../loops/loop-handler';
import type { LoopStore } from '../loops/loop-store';
import type { LoopExecutor, LoopEventEmit } from '../loops/loop-executor';
Expand Down Expand Up @@ -140,6 +142,7 @@ export interface RegisterRoutesOptions {
runtimeConfigService?: RuntimeConfigService;
remoteServerStore?: RemoteServerStore;
remoteServerConnector?: DevTunnelConnector;
remoteServerSshConnector?: SshConnector;
loopStore?: LoopStore;
loopExecutor?: LoopExecutor;
mcpOauthManager?: McpOauthManager;
Expand Down Expand Up @@ -186,6 +189,7 @@ export function registerAllRoutes(routes: Route[], opts: RegisterRoutesOptions):
registerRemoteServerRoutes(routes, {
store: opts.remoteServerStore ?? new RemoteServerStore(dataDir),
connector: opts.remoteServerConnector ?? new DevTunnelConnector(),
sshConnector: opts.remoteServerSshConnector,
});
registerProviderRoutes(routes, dataDir);
// Provider SDK install routes (on-demand install of @openai/codex-sdk and @anthropic-ai/claude-agent-sdk).
Expand Down Expand Up @@ -428,6 +432,7 @@ export function registerAllRoutes(routes: Route[], opts: RegisterRoutesOptions):
registerRalphNewLoopRoutes(routes, { bridge, store, dataDir });
registerRalphPromoteRoutes(routes, { bridge, store, dataDir });
registerRalphLaunchRoutes(routes, { bridge, dataDir });
registerRalphResumeRoutes(routes, { bridge, store, dataDir });

// Work item routes
const workItemStore = new FileWorkItemStore({ dataDir });
Expand Down
17 changes: 8 additions & 9 deletions packages/coc/src/server/routes/ralph-continue-routes.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
/**
* POST /api/workspaces/:workspaceId/ralph-sessions/:sessionId/continue
*
* Extends a Ralph session that hit its iteration cap (terminalReason
* `CAP_REACHED`, or `NO_SIGNAL` with `currentIteration === maxIterations`)
* by N additional iterations. Same `sessionId`, same `progress.md` and
* `session.json` — appends a continuation banner and enqueues iteration
* `currentIteration + 1`.
* Extends a completed Ralph session by N additional iterations. Covers:
* - `CAP_REACHED` — agent wanted to continue but hit the iteration cap
* - `NO_SIGNAL` — agent stopped without emitting RALPH_NEXT / RALPH_COMPLETE
* (either at the cap or due to an agent failure mid-run)
*
* Same `sessionId`, same `progress.md` and `session.json` — appends a
* continuation banner and enqueues iteration `currentIteration + 1`.
*/

import { sendJSON, sendError, parseBody } from '../core/api-handler';
Expand Down Expand Up @@ -39,8 +41,7 @@ export function isResumableTerminalState(record: RalphSessionRecord): boolean {
if (record.terminalReason === 'CAP_REACHED') {
return true;
}
if (record.terminalReason === 'NO_SIGNAL'
&& record.currentIteration >= record.maxIterations) {
if (record.terminalReason === 'NO_SIGNAL') {
return true;
}
return false;
Expand Down Expand Up @@ -87,8 +88,6 @@ export function registerRalphContinueRoutes(routes: Route[], ctx: RalphContinueR
detail = 'Session was marked RALPH_COMPLETE; start a new loop instead';
} else if (record.terminalReason === 'CANCELLED') {
detail = 'Session was cancelled; start a new loop instead';
} else if (record.terminalReason === 'NO_SIGNAL') {
detail = 'NO_SIGNAL session has not yet reached its iteration cap';
}
return sendError(res, 409, detail);
}
Expand Down
Loading
Loading