Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
a0ac7d4
🤖 feat: unify bash into task_* tools
ThomasK33 Dec 23, 2025
47e65d3
🤖 fix: stabilize unit tests for bun + window guards
ThomasK33 Dec 23, 2025
f7f74b5
🤖 fix: make task tool schema provider-compatible
ThomasK33 Dec 23, 2025
36b7e0a
🤖 tests: migrate bash tool tests to task(kind="bash")
ThomasK33 Dec 23, 2025
49104da
🤖 tests: make task bash integration tests deterministic
ThomasK33 Dec 23, 2025
c127ea9
fix(task_await): honor timeout_secs=0 for agent tasks
ThomasK33 Dec 24, 2025
4cd46b9
fix(task_await): treat timeout_secs=0 as non-blocking for agents
ThomasK33 Dec 24, 2025
884cec4
fix(task): preserve bash truncation metadata
ThomasK33 Dec 24, 2025
2a78837
tests: satisfy prettier for task tool
ThomasK33 Dec 24, 2025
fdcd566
fix(task_await): make polling guidance tool-aware
ThomasK33 Dec 24, 2025
fce0b71
fix(task_await): allow awaiting cleaned-up agent tasks
ThomasK33 Dec 24, 2025
1e40728
fix(task): allow omitting bash display_name
ThomasK33 Dec 25, 2025
9dfbe63
tests: use vision-capable model for image integration
ThomasK33 Dec 25, 2025
d6ef909
🤖 tests: update reviewRefresh integration test for task
ThomasK33 Dec 26, 2025
5a8e8a6
🤖 fix: refresh ReviewPanel on task tool end
ThomasK33 Dec 26, 2025
ea3dd59
🤖 fix: guard pending task finalization against bash
ThomasK33 Dec 26, 2025
d852396
🤖 feat: include thinking level in workspace activity
ThomasK33 Dec 26, 2025
f7c087c
🤖 fix: split task consumer breakdown by kind
ThomasK33 Dec 26, 2025
ad99f85
🤖 fix: show truncation notice in bash tool output
ThomasK33 Dec 26, 2025
557ce2f
🤖 fix: harden task(kind=bash) integration
ThomasK33 Dec 27, 2025
40d1862
🤖 fix: stop parsing bash output from task reportMarkdown
ThomasK33 Dec 28, 2025
ea14144
🤖 fix: stabilize task-bash helpers and background wrapper
ThomasK33 Dec 28, 2025
f7b6113
🤖 fix: update background wrapper trap test
ThomasK33 Dec 28, 2025
35e7f07
🤖 fix: harden legacy task(kind=bash) parsing
ThomasK33 Dec 28, 2025
cd0ccf8
🤖 fix: keep task_await scope checks after task cleanup
ThomasK33 Dec 28, 2025
fbd1725
🤖 fix: dedupe migrated background bash IDs
ThomasK33 Dec 28, 2025
1885cd7
🤖 fix: avoid duplicate bash output in task_await
ThomasK33 Dec 29, 2025
d7fc8bf
🤖 refactor: split bash from task tool
ThomasK33 Dec 29, 2025
50a85a2
🤖 ci: retry codex comments check
ThomasK33 Dec 29, 2025
82d73f6
🤖 tests: reduce flake in background bash + fork
ThomasK33 Dec 29, 2025
c3acfa1
🤖 fix: add missing task/bash converters for mobile
ThomasK33 Dec 29, 2025
b8dd6dd
🤖 fix: preserve legacy bash failures in converters
ThomasK33 Dec 29, 2025
d1fa4f0
🤖 fix: avoid misclassifying task errors as bash results
ThomasK33 Dec 29, 2025
db508a8
🤖 fix: accept legacy background bash results
ThomasK33 Dec 29, 2025
3057d2b
🤖 ci: harden shfmt install
ThomasK33 Dec 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,17 @@ jobs:
key: ${{ runner.os }}-shfmt-latest
- name: Install shfmt
run: |
set -euo pipefail
if [[ ! -f "$HOME/.local/bin/shfmt" ]] || ! "$HOME/.local/bin/shfmt" --version >/dev/null 2>&1; then
curl -sS https://webinstall.dev/shfmt | bash
# webinstall.dev can be flaky; retry and force HTTP/1.1 to avoid HTTP/2 stream errors.
curl --retry 5 --retry-all-errors --retry-delay 2 -LsSf --http1.1 https://webinstall.dev/shfmt | bash
fi

if ! "$HOME/.local/bin/shfmt" --version >/dev/null 2>&1; then
echo "Error: shfmt install failed"
exit 1
fi

echo "$HOME/.local/bin" >> $GITHUB_PATH
- uses: cachix/install-nix-action@v27
with:
Expand Down Expand Up @@ -161,7 +169,16 @@ jobs:
- run: make storybook-build
- run: |
bun x http-server storybook-static -p 6006 &
sleep 5
for i in {1..30}; do
if curl -sf http://127.0.0.1:6006 >/dev/null; then
echo "Storybook ready"
break
fi
echo "Waiting for Storybook... ($i/30)"
sleep 0.5
done
# Fail the step if Storybook never became reachable.
curl -sf http://127.0.0.1:6006 >/dev/null
- run: make test-storybook

test-e2e:
Expand Down
93 changes: 80 additions & 13 deletions mobile/src/messages/tools/toolRenderers.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ import React from "react";
import { View, Text, ScrollView, StyleSheet, Pressable } from "react-native";
import { Link } from "expo-router";
import { parsePatch } from "diff";
import {
coerceBashToolResult,
convertTaskBashResult,
} from "@/common/utils/tools/taskResultConverters";
import { isTaskBashArgs, isTaskBashArgsFromUnknown } from "@/common/utils/tools/taskToolTypeGuards";
import { resolveBashDisplayName } from "@/common/utils/tools/bashDisplayName";
import type { DisplayedMessage } from "@/common/types/message";
import {
FILE_EDIT_TOOL_NAMES,
Expand Down Expand Up @@ -88,6 +94,9 @@ export function renderSpecializedToolCard(message: ToolDisplayedMessage): ToolCa
if (!isTaskToolArgs(message.args)) {
return null;
}
if (isTaskBashArgs(message.args)) {
return buildTaskBashViewModel(message as ToolDisplayedMessage & { args: TaskToolArgs });
}
return buildTaskViewModel(message as ToolDisplayedMessage & { args: TaskToolArgs });
case "task_await":
if (!isTaskAwaitToolArgs(message.args)) {
Expand Down Expand Up @@ -401,6 +410,69 @@ function BashBackgroundTerminateContent({
return <ThemedText>{result.message}</ThemedText>;
}

function buildTaskBashViewModel(
message: ToolDisplayedMessage & { args: TaskToolArgs }
): ToolCardViewModel {
const args = message.args;
if (!isTaskBashArgs(args)) {
return {
icon: "💻",
caption: "bash",
title: "bash",
content: <ThemedText variant="muted">Invalid bash task args</ThemedText>,
defaultExpanded: true,
};
}

const displayName = resolveBashDisplayName(args.script, args.display_name);

const bashArgs: BashToolArgs = {
script: args.script,
timeout_secs: args.timeout_secs,
run_in_background: Boolean(args.run_in_background),
display_name: displayName,
};

const taskResult = coerceTaskToolResult(message.result);
const bashResult = convertTaskBashResult(taskResult, { legacySuccessCheckInclErrorLine: true });

const preview = truncate(args.script.trim().split("\n")[0], 80) || "bash";

const metadata: MetadataItem[] = [];
metadata.push({ label: "name", value: displayName });

if (typeof args.timeout_secs === "number") {
metadata.push({ label: "timeout", value: `${args.timeout_secs}s` });
}
if (bashResult && bashResult.exitCode !== undefined) {
metadata.push({ label: "exit code", value: String(bashResult.exitCode) });
}
if (bashResult && "truncated" in bashResult && bashResult.truncated) {
metadata.push({
label: "truncated",
value: bashResult.truncated.reason,
tone: "warning",
});
}

return {
icon: "💻",
caption: "bash",
title: preview,
summary: metadata.length > 0 ? <MetadataList items={metadata} /> : undefined,
content: (
<BashToolContent
args={bashArgs}
result={bashResult}
status={message.status}
toolCallId={message.toolCallId}
/>
),
defaultExpanded:
message.status !== "completed" || Boolean(bashResult && bashResult.success === false),
};
}

function buildTaskViewModel(
message: ToolDisplayedMessage & { args: TaskToolArgs }
): ToolCardViewModel {
Expand Down Expand Up @@ -1503,8 +1575,15 @@ function isBashBackgroundTerminateArgs(value: unknown): value is BashBackgroundT
}

function isTaskToolArgs(value: unknown): value is TaskToolArgs {
if (!value || typeof value !== "object") {
return false;
}

if (isTaskBashArgsFromUnknown(value)) {
return true;
}

return (
Boolean(value && typeof value === "object") &&
typeof (value as TaskToolArgs).prompt === "string" &&
typeof (value as TaskToolArgs).title === "string" &&
typeof (value as TaskToolArgs).subagent_type === "string"
Expand Down Expand Up @@ -1544,18 +1623,6 @@ function isFileEditArgsUnion(value: unknown): value is FileEditArgsUnion {
return Boolean(value && typeof (value as FileEditArgsUnion).file_path === "string");
}

function coerceBashToolResult(value: unknown): BashToolResult | null {
if (
value &&
typeof value === "object" &&
"success" in value &&
typeof (value as BashToolResult).success === "boolean"
) {
return value as BashToolResult;
}
return null;
}

function coerceWebFetchToolResult(value: unknown): WebFetchToolResult | null {
if (value && typeof value === "object" && "success" in value) {
return value as WebFetchToolResult;
Expand Down
28 changes: 23 additions & 5 deletions scripts/check_codex_comments.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,29 @@ REPO_INFO=$(gh repo view --json owner,name --jq '{owner: .owner.login, name: .na
OWNER=$(echo "$REPO_INFO" | jq -r '.owner')
REPO=$(echo "$REPO_INFO" | jq -r '.name')

RESULT=$(gh api graphql \
-f query="$GRAPHQL_QUERY" \
-F owner="$OWNER" \
-F repo="$REPO" \
-F pr="$PR_NUMBER")
# Depot runners sometimes hit transient network timeouts to api.github.com.
# Retry the GraphQL request a few times before failing the required check.
MAX_ATTEMPTS=5
BACKOFF_SECS=2

for ((attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)); do
if RESULT=$(gh api graphql \
-f query="$GRAPHQL_QUERY" \
-F owner="$OWNER" \
-F repo="$REPO" \
-F pr="$PR_NUMBER"); then
break
fi

if [ $attempt -eq $MAX_ATTEMPTS ]; then
echo "❌ GraphQL query failed after ${MAX_ATTEMPTS} attempts"
exit 1
fi

echo "⚠️ GraphQL query failed (attempt ${attempt}/${MAX_ATTEMPTS}); retrying in ${BACKOFF_SECS}s..."
sleep "$BACKOFF_SECS"
BACKOFF_SECS=$((BACKOFF_SECS * 2))
done

# Filter regular comments from bot that aren't minimized, excluding:
# - "Didn't find any major issues" (no issues found)
Expand Down
7 changes: 7 additions & 0 deletions src/browser/components/AppLoader.auth.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,16 @@ void mock.module("@/browser/contexts/API", () => ({
}));

void mock.module("@/browser/components/AuthTokenModal", () => ({
// Note: Module mocks leak between bun test files.
// Export all commonly-used symbols to avoid cross-test import errors.
AuthTokenModal: (props: { error?: string | null }) => (
<div data-testid="AuthTokenModalMock">{props.error ?? "no-error"}</div>
),
getStoredAuthToken: () => null,
// eslint-disable-next-line @typescript-eslint/no-empty-function
setStoredAuthToken: () => {},
// eslint-disable-next-line @typescript-eslint/no-empty-function
clearStoredAuthToken: () => {},
}));

import { AppLoader } from "./AppLoader";
Expand Down
10 changes: 9 additions & 1 deletion src/browser/components/tools/BashToolCall.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,12 @@ export const BashToolCall: React.FC<BashToolCallProps> = ({
const showLiveOutput =
!isBackground && (status === "executing" || (Boolean(liveOutput) && !resultHasOutput));

const truncatedInfo = result && "truncated" in result ? result.truncated : undefined;

const handleToggle = () => {
userToggledRef.current = true;
toggleExpanded();
};

return (
<ToolContainer expanded={expanded}>
<ToolHeader onClick={handleToggle}>
Expand Down Expand Up @@ -260,6 +261,13 @@ export const BashToolCall: React.FC<BashToolCallProps> = ({
</DetailSection>
)}

{truncatedInfo && (
<div className="text-muted px-2 text-[10px] italic">
Output truncated — reason: {truncatedInfo.reason} • totalLines:{" "}
{truncatedInfo.totalLines}
</div>
)}

{"backgroundProcessId" in result ? (
// Background process: show process ID inline with icon (compact, no section wrapper)
<div className="flex items-center gap-2 text-[11px]">
Expand Down
54 changes: 39 additions & 15 deletions src/browser/components/tools/TaskToolCall.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -179,28 +179,29 @@ export const TaskToolCall: React.FC<TaskToolCallProps> = ({ args, result, status
const hasReport = result?.status === "completed" && !!result.reportMarkdown;
const { expanded, toggleExpanded } = useToolExpansion(hasReport);

const isBackground = args.run_in_background ?? false;
const isBackground = args.run_in_background;

const title = args.title ?? "Task";
const prompt = args.prompt ?? "";
const agentType = args.agentId ?? args.subagent_type ?? "unknown";
const prompt = args.prompt;
const title = args.title;
const kindBadge = <AgentTypeBadge type={agentType} />;

// Derive task state from result
const taskId = result?.taskId;
const taskStatus = result?.status;
const reportMarkdown = result?.status === "completed" ? result.reportMarkdown : undefined;
const reportTitle = result?.status === "completed" ? result.title : undefined;

// Show preview of prompt (first line or truncated)
const promptPreview =
prompt.length > 60 ? prompt.slice(0, 60).trim() + "…" : prompt.split("\n")[0];
// Show preview (first line or truncated)
const preview = prompt.length > 60 ? prompt.slice(0, 60).trim() + "…" : prompt.split("\n")[0];

return (
<ToolContainer expanded={expanded}>
<ToolHeader onClick={toggleExpanded}>
<ExpandIcon expanded={expanded}>▶</ExpandIcon>
<TaskIcon toolName="task" />
<ToolName>task</ToolName>
<AgentTypeBadge type={agentType} />
{kindBadge}
{isBackground && (
<span className="text-backgrounded text-[10px] font-medium">background</span>
)}
Expand All @@ -211,18 +212,18 @@ export const TaskToolCall: React.FC<TaskToolCallProps> = ({ args, result, status
<ToolDetails>
{/* Task info surface */}
<div className="task-surface mt-1 rounded-md p-3">
<div className="task-divider mb-2 flex items-center gap-2 border-b pb-2">
<div className="task-divider mb-2 flex flex-wrap items-center gap-2 border-b pb-2">
<span className="text-task-mode text-[12px] font-semibold">
{reportTitle ?? title}
</span>
{taskId && <TaskId id={taskId} />}
{taskStatus && <TaskStatusBadge status={taskStatus} />}
</div>

{/* Prompt section */}
{/* Prompt / script */}
<div className="mb-2">
<div className="text-muted mb-1 text-[10px] tracking-wide uppercase">Prompt</div>
<div className="text-foreground bg-code-bg max-h-[100px] overflow-y-auto rounded-sm p-2 text-[11px] break-words whitespace-pre-wrap">
<div className="text-foreground bg-code-bg max-h-[140px] overflow-y-auto rounded-sm p-2 text-[11px] break-words whitespace-pre-wrap">
{prompt}
</div>
</div>
Expand All @@ -249,7 +250,7 @@ export const TaskToolCall: React.FC<TaskToolCallProps> = ({ args, result, status
)}

{/* Collapsed preview */}
{!expanded && <div className="text-muted mt-1 truncate text-[10px]">{promptPreview}</div>}
{!expanded && <div className="text-muted mt-1 truncate text-[10px]">{preview}</div>}
</ToolContainer>
);
};
Expand All @@ -276,6 +277,12 @@ export const TaskAwaitToolCall: React.FC<TaskAwaitToolCallProps> = ({
const timeoutSecs = args.timeout_secs;
const results = result?.results ?? [];

const showConfigInfo =
taskIds !== undefined ||
timeoutSecs !== undefined ||
args.filter !== undefined ||
args.filter_exclude === true;

// Summary for header
const completedCount = results.filter((r) => r.status === "completed").length;
const totalCount = results.length;
Expand All @@ -298,10 +305,12 @@ export const TaskAwaitToolCall: React.FC<TaskAwaitToolCallProps> = ({
<ToolDetails>
<div className="task-surface mt-1 rounded-md p-3">
{/* Config info */}
{(taskIds ?? timeoutSecs) && (
{showConfigInfo && (
<div className="task-divider text-muted mb-2 flex flex-wrap gap-2 border-b pb-2 text-[10px]">
{taskIds && <span>Waiting for: {taskIds.length} task(s)</span>}
{timeoutSecs && <span>Timeout: {timeoutSecs}s</span>}
{taskIds !== undefined && <span>Waiting for: {taskIds.length} task(s)</span>}
{timeoutSecs !== undefined && <span>Timeout: {timeoutSecs}s</span>}
{args.filter !== undefined && <span>Filter: {args.filter}</span>}
{args.filter_exclude === true && <span>Exclude: true</span>}
</div>
)}

Expand Down Expand Up @@ -335,20 +344,35 @@ const TaskAwaitResult: React.FC<{
const reportMarkdown = isCompleted ? result.reportMarkdown : undefined;
const title = isCompleted ? result.title : undefined;

const output = "output" in result ? result.output : undefined;
const note = "note" in result ? result.note : undefined;
const exitCode = "exitCode" in result ? result.exitCode : undefined;
const elapsedMs = "elapsed_ms" in result ? result.elapsed_ms : undefined;

return (
<div className="bg-code-bg rounded-sm p-2">
<div className="mb-1 flex items-center gap-2">
<div className="mb-1 flex flex-wrap items-center gap-2">
<TaskId id={result.taskId} />
<TaskStatusBadge status={result.status} />
{title && <span className="text-foreground text-[11px] font-medium">{title}</span>}
{exitCode !== undefined && <span className="text-muted text-[10px]">exit {exitCode}</span>}
{elapsedMs !== undefined && <span className="text-muted text-[10px]">{elapsedMs}ms</span>}
</div>

{!isCompleted && output && output.length > 0 && (
<div className="text-foreground bg-code-bg max-h-[140px] overflow-y-auto rounded-sm p-2 text-[11px] break-words whitespace-pre-wrap">
{output}
</div>
)}

{reportMarkdown && (
<div className="mt-2 text-[11px]">
<MarkdownRenderer content={reportMarkdown} />
</div>
)}

{note && <div className="text-muted mt-1 text-[10px]">{note}</div>}

{"error" in result && result.error && (
<div className="text-danger mt-1 text-[11px]">{result.error}</div>
)}
Expand Down
4 changes: 4 additions & 0 deletions src/browser/contexts/API.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,13 @@ void mock.module("@orpc/client/message-port", () => ({
}));

void mock.module("@/browser/components/AuthTokenModal", () => ({
// Note: Module mocks leak between bun test files.
// Export all commonly-used symbols to avoid cross-test import errors.
AuthTokenModal: () => null,
getStoredAuthToken: () => null,
// eslint-disable-next-line @typescript-eslint/no-empty-function
setStoredAuthToken: () => {},
// eslint-disable-next-line @typescript-eslint/no-empty-function
clearStoredAuthToken: () => {},
}));

Expand Down
Loading
Loading