Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## 2026-05-30

### Changes
- [API Testing] `explorbot api` now reads knowledge files. Planning (Chief) and execution (Curler) prompts now include knowledge that matches the endpoint under test.
- [API Testing] API knowledge lives in `knowledge/api/` and matches endpoints via the `endpoint:` frontmatter field. `api init` and `api know` now write there.
- Knowledge files support a `scope` frontmatter field — `web`, `api`, or `all` — so web exploration and API testing can isolate or share knowledge. Unscoped files default to their directory's mode (`knowledge/` → web, `knowledge/api/` → api); use `scope: all` for shared knowledge such as credentials.

## 2026-05-11

### New CLI Options
Expand Down
10 changes: 9 additions & 1 deletion boat/api-tester/src/ai/chief.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export class Chief extends ChiefBase {
this.apiClient = apiClient || null;
}

async plan(endpoint: string, opts?: { style?: string; specDefinition?: string }): Promise<Plan> {
async plan(endpoint: string, opts?: { style?: string; specDefinition?: string; knowledge?: string }): Promise<Plan> {
tag('info').log(`Planning API tests for ${endpoint}`);
if (opts?.style) tag('info').log(`Planning style: ${opts.style}`);

Expand All @@ -56,6 +56,14 @@ export class Chief extends ChiefBase {
const sampleData = await this.collectSampleData(endpoint);
const conversation = this.buildConversation(endpoint, opts?.style, sampleData);

if (opts?.knowledge) {
conversation.addUserText(dedent`
<knowledge>
${opts.knowledge}
</knowledge>
`);
}

if (opts?.specDefinition) {
conversation.addUserText(dedent`
<api_spec>
Expand Down
15 changes: 12 additions & 3 deletions boat/api-tester/src/ai/curler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export class Curler {
this.reporter = reporter;
}

async test(test: Test, opts?: { specDefinition?: string; baseEndpoint?: string; searchSpec?: (query: string) => string }): Promise<{ success: boolean }> {
async test(test: Test, opts?: { specDefinition?: string; baseEndpoint?: string; searchSpec?: (query: string) => string; knowledge?: string }): Promise<{ success: boolean }> {
tag('info').log(`Testing: ${test.scenario}`);
debugLog('Starting test:', test.scenario);

Expand All @@ -37,7 +37,7 @@ export class Curler {
const conversation = this.provider.startConversation(this.buildSystemPrompt(), 'curler', this.provider.getAgenticModel('curler'));
const tools = createCurlerTools(this.apiClient, this.requestState, test, opts?.searchSpec);

const initialPrompt = this.buildTestPrompt(test, opts?.specDefinition, opts?.baseEndpoint);
const initialPrompt = this.buildTestPrompt(test, opts?.specDefinition, opts?.baseEndpoint, opts?.knowledge);
conversation.addUserText(initialPrompt);

await loop(
Expand Down Expand Up @@ -202,7 +202,7 @@ export class Curler {
}
}

private buildTestPrompt(test: Test, specDefinition?: string, baseEndpoint?: string): string {
private buildTestPrompt(test: Test, specDefinition?: string, baseEndpoint?: string, knowledge?: string): string {
let prompt = dedent`
<task>
SCENARIO: ${test.scenario}
Expand Down Expand Up @@ -238,6 +238,15 @@ export class Curler {
prompt += specBlock;
}

if (knowledge) {
prompt += dedent`

<knowledge>
${knowledge}
</knowledge>
`;
}

return prompt;
}

Expand Down
15 changes: 14 additions & 1 deletion boat/api-tester/src/apibot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import path from 'node:path';
import { AIProvider } from '../../../src/ai/provider.ts';
import { RequestStore } from '../../../src/api/request-store.ts';
import { extractEndpointDefinition, loadSpec, searchEndpoints, validateSpecs } from '../../../src/api/spec-reader.ts';
import { KnowledgeTracker } from '../../../src/knowledge-tracker.ts';
import { Reporter } from '../../../src/reporter.ts';
import { Plan } from '../../../src/test-plan.ts';
import { setVerboseMode, tag } from '../../../src/utils/logger.ts';
Expand All @@ -20,6 +21,7 @@ export class ApiBot {
private apiClient!: ApiClient;
private requestState!: RequestStore;
private reporter!: Reporter;
private knowledgeTracker!: KnowledgeTracker;
private options: ApibotOptions;
private apiSpec: any;

Expand Down Expand Up @@ -48,6 +50,9 @@ export class ApiBot {
this.requestState = new RequestStore(outputDir);
this.reporter = new Reporter(this.config.reporter);

const knowledgeDir = this.configParser.getKnowledgeDir();
this.knowledgeTracker = new KnowledgeTracker({ knowledgeDir: path.join(knowledgeDir, 'api'), sharedDir: knowledgeDir, scope: 'api' });

validateSpecs(this.config.api.spec);
this.apiSpec = await loadSpec(this.config.api.spec!, outputDir);
tag('info').log('OpenAPI spec loaded');
Expand Down Expand Up @@ -109,7 +114,8 @@ export class ApiBot {

const chief = this.agentChief();
const specDefinition = this.getEndpointDefinition(endpoint);
this.currentPlan = await chief.plan(endpoint, { style: opts.style, specDefinition });
const knowledge = this.getKnowledgeForEndpoint(endpoint);
this.currentPlan = await chief.plan(endpoint, { style: opts.style, specDefinition, knowledge });
const savedPath = this.savePlan();
if (savedPath) {
tag('info').log(`Plan saved to: ${path.relative(process.cwd(), savedPath)}`);
Expand Down Expand Up @@ -174,6 +180,13 @@ export class ApiBot {
return extractEndpointDefinition(this.apiSpec, endpoint, this.config.api.baseEndpoint);
}

getKnowledgeForEndpoint(endpoint: string): string {
return this.knowledgeTracker
.getMatchingKnowledge(endpoint)
.map((k) => k.content.trim())
.join('\n\n');
}

searchSpec(query: string): string {
return searchEndpoints(this.apiSpec, query, this.config.api.baseEndpoint);
}
Expand Down
15 changes: 9 additions & 6 deletions boat/api-tester/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ export function createApiCommands(name = 'api'): Command {
specDefinition,
baseEndpoint: bot.getConfig().api.baseEndpoint,
searchSpec: (query) => bot.searchSpec(query),
knowledge: bot.getKnowledgeForEndpoint(test.startUrl),
});
if (result.success) passed++;
else failed++;
Expand Down Expand Up @@ -146,6 +147,7 @@ export function createApiCommands(name = 'api'): Command {
specDefinition,
baseEndpoint: bot.getConfig().api.baseEndpoint,
searchSpec: (query) => bot.searchSpec(query),
knowledge: bot.getKnowledgeForEndpoint(test.startUrl),
});
totalTests++;
if (result.success) totalPassed++;
Expand Down Expand Up @@ -237,11 +239,11 @@ export default {
console.log(`\nCreated: ${configPath}`);

fs.mkdirSync('output', { recursive: true });
fs.mkdirSync('knowledge', { recursive: true });
fs.mkdirSync(path.join('knowledge', 'api'), { recursive: true });

if (knowledge) {
const knowledgePath = path.resolve('knowledge', 'general.md');
fs.writeFileSync(knowledgePath, `---\nendpoint: "*"\n---\n${knowledge}\n`, 'utf8');
const knowledgePath = path.resolve('knowledge', 'api', 'general.md');
fs.writeFileSync(knowledgePath, `---\nendpoint: "*"\nscope: api\n---\n${knowledge}\n`, 'utf8');
console.log(`Created: ${knowledgePath}`);
}

Expand Down Expand Up @@ -281,12 +283,13 @@ export default {
if (options.path) knowledgeDir = path.join(path.resolve(options.path), 'knowledge');
}

fs.mkdirSync(knowledgeDir, { recursive: true });
const apiKnowledgeDir = path.join(knowledgeDir, 'api');
fs.mkdirSync(apiKnowledgeDir, { recursive: true });

const filename = endpoint.replace(/^\//, '').replace(/[^a-zA-Z0-9]/g, '_') || 'general';
const filePath = path.join(knowledgeDir, `${filename}.md`);
const filePath = path.join(apiKnowledgeDir, `${filename}.md`);

const content = `---\nendpoint: "${endpoint}"\n---\n${description}\n`;
const content = `---\nendpoint: "${endpoint}"\nscope: api\n---\n${description}\n`;

if (fs.existsSync(filePath)) {
fs.appendFileSync(filePath, `\n---\n${description}\n`, 'utf8');
Expand Down
41 changes: 36 additions & 5 deletions docs/knowledge.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,38 @@ Notes:

| Field | Purpose |
|-------|---------|
| `url` | URL pattern to match (required) |
| `url` | URL pattern to match (required for web) |
| `endpoint` | Endpoint pattern to match (used by API testing instead of `url`) |
| `scope` | Which mode the file applies to: `web`, `api`, or `all` (optional) |
| `title` | Human-readable title (optional) |
| Custom fields | Any additional metadata for agents |

## Scope: Web vs API

Web exploration and API testing read separate knowledge so hints don't cross over:

- Web testing reads `knowledge/*.md` (top level) and matches on `url`.
- API testing reads `knowledge/api/*.md` and matches on `endpoint`.

The `scope` field controls which mode a file applies to. When omitted, scope defaults from the file's directory (`knowledge/` → `web`, `knowledge/api/` → `api`).

| Scope | Read by |
|-------|---------|
| `web` | Web exploration |
| `api` | API testing |
| `all` | Both |

Use `scope: all` for shared knowledge such as credentials a top-level file should also reach API testing:

```markdown
---
url: '*'
scope: all
---

API token: ${env.API_KEY}
```

## Variables

Knowledge files support variable interpolation using `${namespace.key}` syntax. Variables are resolved when knowledge is loaded.
Expand Down Expand Up @@ -291,10 +319,13 @@ When an agent operates on a page, it receives relevant knowledge based on URL ma

```
./knowledge/
├── login.md # /login page
├── checkout.md # /checkout page
├── general.md # * (all pages)
└── admin_users.md # /admin/users/*
├── login.md # /login page (web)
├── checkout.md # /checkout page (web)
├── general.md # * (all pages, web)
├── admin_users.md # /admin/users/* (web)
└── api/
├── general.md # * (all endpoints, api)
└── users.md # /users endpoint (api)
```

Files are named based on URL pattern. Multiple entries for the same URL are appended to the same file.
Expand Down
18 changes: 12 additions & 6 deletions src/action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import type { StateManager } from './state-manager.js';
import { extractCodeBlocks } from './utils/code-extractor.js';
import { htmlCombinedSnapshot, minifyHtml } from './utils/html.js';
import { createDebug, log, setStepSpanParent, tag } from './utils/logger.js';
import { withRetry } from './utils/retry.js';
import { safeFilename } from './utils/strings.ts';
import { throttle } from './utils/throttle.ts';

Expand Down Expand Up @@ -79,12 +80,17 @@ class Action {
const page = this.playwrightHelper.page;
const frame = this.playwrightHelper.frame;
await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {});
const grabAll = () => Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]);
const [html, title, browserLogs] = await grabAll().catch(async (err: Error) => {
const msg = err instanceof Error ? err.message : String(err);
if (!/navigating and changing the content/i.test(msg)) throw err;
await page?.waitForLoadState('domcontentloaded', { timeout: 10000 })?.catch(() => {});
return grabAll();
const grabAll = async () => {
try {
return await Promise.all([(this.actor as any).grabSource(), (this.actor as any).grabTitle(), this.captureBrowserLogs()]);
} catch (err) {
await recorder.reset();
await recorder.start();
throw err;
}
};
const [html, title, browserLogs] = await withRetry(grabAll, {
retryCondition: (err) => /navigating and changing the content/i.test(err.message),
});
const url = page?.url() || (await (this.actor as any).grabCurrentUrl?.());

Expand Down
1 change: 0 additions & 1 deletion src/ai/session-analyst.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ export class SessionAnalyst implements Agent {
private serializeTest(test: Test, ref: number): string {
const log = test
.getLog()
.slice(-30)
.map((entry) => ` - [${entry.type}] ${entry.content}`)
.join('\n');

Expand Down
12 changes: 11 additions & 1 deletion src/commands/explore-command.ts
Original file line number Diff line number Diff line change
Expand Up @@ -495,8 +495,18 @@ export class ExploreCommand extends BaseCommand {
if (this.dryRun) {
test.start();
test.finish(TestResult.SKIPPED);
} else {
this.testsRun++;
return;
}
try {
await this.explorBot.agentTester().test(test);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
tag('warning').log(`Test failed: ${test.scenario} — ${msg}`);
if (!test.hasFinished) {
test.addNote(`Aborted: ${msg}`, TestResult.FAILED);
test.finish(TestResult.FAILED);
}
}
this.testsRun++;
}
Expand Down
2 changes: 1 addition & 1 deletion src/explorbot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ import { KnowledgeTracker } from './knowledge-tracker.ts';
import { WebPageState } from './state-manager.ts';
import type { Suite } from './suite.ts';
import { Plan, type Test } from './test-plan.ts';
import { parsePlansFromMarkdown } from './utils/test-plan-markdown.ts';
import { setVerboseMode, tag } from './utils/logger.ts';
import { relativeToCwd } from './utils/next-steps.ts';
import { sanitizeFilename } from './utils/strings.ts';
import { parsePlansFromMarkdown } from './utils/test-plan-markdown.ts';

export interface ExplorBotOptions {
from?: string;
Expand Down
2 changes: 1 addition & 1 deletion src/explorer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ declare namespace CodeceptJS {

const debugLog = createDebug('explorbot:explorer');
const FATAL_BROWSER_ERRORS = /Frame was detached|Target closed|Execution context was destroyed|Protocol error|Session closed/i;
const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load/i;
const RECOVERABLE_NAVIGATION_ERRORS = /net::ERR_ABORTED|page\.screenshot.*Timeout|waiting for fonts to load|Unable to retrieve content because the page is navigating/i;

interface TabInfo {
url: string;
Expand Down
Loading