Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
343 changes: 259 additions & 84 deletions boat/doc-collector/src/ai/documentarian.ts

Large diffs are not rendered by default.

544 changes: 544 additions & 0 deletions boat/doc-collector/src/ai/tools.ts

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions boat/doc-collector/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ export function createDocsCommands(name = 'docs'): Command {
maxPages: 100,
output: 'docs',
screenshot: true,
interactive: false,
collapseDynamicPages: true,
scope: 'site',
includePaths: [],
Expand Down
2 changes: 2 additions & 0 deletions boat/doc-collector/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class DocbotConfigParser {
maxPages: 100,
output: 'docs',
screenshot: true,
interactive: false,
collapseDynamicPages: true,
scope: 'site',
includePaths: [],
Expand Down Expand Up @@ -155,6 +156,7 @@ interface DocbotConfig {
deniedPathSegments?: string[];
minCanActions?: number;
minInteractiveElements?: number;
interactive?: boolean;
};
}

Expand Down
69 changes: 64 additions & 5 deletions boat/doc-collector/src/docbot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class DocBot {
config: this.options.docsConfig,
path: this.options.path,
});
this.documentarian = new Documentarian(this.explorBot.getProvider(), this.config);
this.documentarian = new Documentarian(this.explorBot.getProvider(), this.config, this.explorBot.getExplorer());
this.ensureDirectory(this.configParser.getOutputDir());
this.ensureDirectory(this.getPagesDir());
}
Expand Down Expand Up @@ -128,18 +128,22 @@ class DocBot {
summary: documentation.summary,
canCount: documentation.can.length,
mightCount: documentation.might.length,
interactionCount: (documentation.interactions || []).length,
canActions: documentation.can.map((item) => item.action),
mightActions: documentation.might.map((item) => item.action),
interactionActions: (documentation.interactions || []).map((item) => item.action),
qualityNotes: documentation.qualityNotes || [],
filePath,
});
documented.add(pageKey);

const nextPaths = this.extractNextPaths(state, baseUrl, research);
const nextPaths = this.extractNextPaths(state, baseUrl, research, documentation);
const interactionPriorityPaths = new Set(this.extractInteractionPaths(baseUrl, documentation));
for (const nextPath of nextPaths) {
if (documented.has(this.getPageKey(nextPath))) {
continue;
}
if (stateManager.hasVisitedState(nextPath)) {
if (!interactionPriorityPaths.has(nextPath) && stateManager.hasVisitedState(nextPath)) {
continue;
}
this.enqueuePath(nextPath, queue, queued);
Expand Down Expand Up @@ -185,10 +189,18 @@ class DocBot {
return true;
}

private extractNextPaths(state: WebPageState, baseUrl: string, research: string): string[] {
private extractNextPaths(state: WebPageState, baseUrl: string, research: string, documentation?: PageDocumentation): string[] {
const paths: string[] = [];
const seen = new Set<string>();

for (const interactionPath of this.extractInteractionPaths(baseUrl, documentation)) {
if (seen.has(interactionPath)) {
continue;
}
seen.add(interactionPath);
paths.push(interactionPath);
}

for (const link of state.links || []) {
const nextPath = this.resolveLink(link, baseUrl);
if (!nextPath) {
Expand Down Expand Up @@ -224,11 +236,58 @@ class DocBot {
return paths;
}

private extractInteractionPaths(baseUrl: string, documentation?: PageDocumentation): string[] {
const paths: string[] = [];
const seen = new Set<string>();
const interactions = documentation?.interactions;

for (const interaction of interactions || []) {
if (interaction.targetUrl) {
const nextPath = this.resolveRawUrl(interaction.targetUrl, baseUrl);
if (nextPath && this.isEligibleNextPath(nextPath) && !seen.has(nextPath)) {
seen.add(nextPath);
paths.push(nextPath);
}
}

for (const discoveredUrl of interaction.discoveredUrls || []) {
const discoveredPath = this.resolveRawUrl(discoveredUrl, baseUrl);
if (!discoveredPath) {
continue;
}
if (!this.isEligibleNextPath(discoveredPath)) {
continue;
}
if (seen.has(discoveredPath)) {
continue;
}
seen.add(discoveredPath);
paths.push(discoveredPath);
}
}

return paths;
}

private isEligibleNextPath(nextPath: string): boolean {
if (!shouldCrawlDocPath(nextPath, this.config)) {
return false;
}
if (!this.isInScope(nextPath)) {
return false;
}
return true;
}

private resolveLink(link: Link, baseUrl: string): string | null {
return this.resolveRawUrl(link.url, baseUrl);
}

private resolveRawUrl(rawUrl: string, baseUrl: string): string | null {
let resolved: URL;

try {
resolved = new URL(link.url, baseUrl);
resolved = new URL(rawUrl, baseUrl);
} catch {
return null;
}
Expand Down
58 changes: 56 additions & 2 deletions boat/doc-collector/src/docs-renderer.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import path from 'node:path';
import type { WebPageState } from '../../../src/state-manager.ts';
import type { PageDocumentation } from './ai/documentarian.ts';
import type { PageDocumentation, StateTransition } from './ai/documentarian.ts';

function renderPageDocumentation(state: WebPageState, documentation: PageDocumentation): string {
const lines: string[] = [];
Expand All @@ -16,6 +16,28 @@ function renderPageDocumentation(state: WebPageState, documentation: PageDocumen
lines.push('');
lines.push(ensureSentence(documentation.summary));
lines.push('');

const interactions = documentation.interactions;
if (interactions && interactions.length > 0) {
lines.push('## State Transitions');
lines.push('');
for (const transition of interactions) {
lines.push(`### ${transition.action}`);
lines.push('');
lines.push(`**Before:** ${transition.before}`);
lines.push('');
lines.push(`**After:** ${transition.after}`);
lines.push('');
if (transition.newCapabilities && transition.newCapabilities.length > 0) {
lines.push('**Observed changes:**');
for (const cap of transition.newCapabilities) {
lines.push(`- ${cap}`);
}
lines.push('');
}
}
}

lines.push('## User Can');
lines.push('');

Expand Down Expand Up @@ -50,6 +72,16 @@ function renderPageDocumentation(state: WebPageState, documentation: PageDocumen
lines.push('');
}

const qualityNotes = documentation.qualityNotes;
if (qualityNotes && qualityNotes.length > 0) {
lines.push('## Coverage Notes');
lines.push('');
for (const note of qualityNotes) {
lines.push(`- ${ensureSentence(note)}`);
}
lines.push('');
}

return `${lines.join('\n').trimEnd()}\n`;
}

Expand Down Expand Up @@ -79,6 +111,9 @@ function renderSpecIndex(outputDir: string, startPath: string, pages: Documented
lines.push(`Purpose: ${ensureSentence(page.summary)}`);
lines.push(`Proven actions: ${page.canCount}`);
lines.push(`Possible actions: ${page.mightCount}`);
if (page.interactionCount > 0) {
lines.push(`Interactive transitions: ${page.interactionCount}`);
}
if (page.title) {
lines.push(`Title: ${normalizeInlineText(page.title)}`);
}
Expand All @@ -99,6 +134,22 @@ function renderSpecIndex(outputDir: string, startPath: string, pages: Documented
}
lines.push('');
}

if (page.interactionActions.length > 0) {
lines.push('Interactive Findings:');
for (const action of page.interactionActions.slice(0, 3)) {
lines.push(`- ${normalizeInlineText(action)}`);
}
lines.push('');
}

if (page.qualityNotes.length > 0) {
lines.push('Coverage Notes:');
for (const note of page.qualityNotes) {
lines.push(`- ${ensureSentence(note)}`);
}
lines.push('');
}
}

if (skipped.length > 0) {
Expand Down Expand Up @@ -173,8 +224,11 @@ interface DocumentedPage {
summary: string;
canCount: number;
mightCount: number;
interactionCount: number;
canActions: string[];
mightActions: string[];
interactionActions: string[];
qualityNotes: string[];
filePath: string;
}

Expand All @@ -184,4 +238,4 @@ interface SkippedPage {
}

export { renderPageDocumentation, renderSpecIndex, ensureSentence, normalizeAction };
export type { DocumentedPage, SkippedPage };
export type { DocumentedPage, SkippedPage, StateTransition };
75 changes: 67 additions & 8 deletions docs/doc-collector.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,69 @@

`doc-collector` crawls pages and generates a lightweight spec:

- `output/docs/spec.md`
- `output/docs/pages/*.md`
- `output/research/*.md`
- `output/docs/spec.md` - Main index
- `output/docs/pages/*.md` - Individual page documentation
- `output/research/*.md` - Research data

Each page is summarized as:

- `Purpose`
- `User Can`
- `User Might`
- `User Can` (proven capabilities)
- `User Might` (assumed capabilities)
- `State Transitions` (when interactive mode is enabled and useful)

## Features

### Static Documentation (Default)

Analyzes pages without interaction:

- ✅ Researches page structure via Researcher agent
- ✅ Identifies UI elements and navigation
- ✅ Generates documentation from static analysis
- ✅ Fast and reliable

### Interactive Documentation

When `interactive: true` in config:

- ✅ Tries selected page interactions before final documentation
- Captures raw state observations after clicking links, buttons, and tab controls
- Lets the Documentarian classify observed behavior from before/after evidence
- ✅ Can enqueue URLs discovered from successful interactions
- ✅ Falls back to static documentation when interaction results are weak or unreliable

This mode is intended for cases where static research alone is not enough, for example:

- alternate page states such as tabs
- post-click behavior
- item/detail navigation
- documenting what changed after an interaction

When interaction results are useful, page docs may include:

- `State Transitions`
- `Before`
- `After`
- `Observed changes`
- `Coverage Notes`

Example:

```markdown
## State Transitions

### Clicked tab: Merged
**Before:** 18 elements (tab:3, link:5, text:7)
**After:** Tab content: 21 elements (tab:3, link:8, text:7)

### Clicked "Save" button
**Before:** Form with 8 fields
**After:** Success message appeared, form cleared
**Observed changes:**
- User can create new runs
- User can see run ID after creation
```

## Commands

Expand All @@ -21,7 +75,7 @@ Start from a relative path or a full URL:
```bash
explorbot docs collect /users/sign_in
explorbot docs collect /docs/openapi#tag/project-analytics-tags --max-pages 20
explorbot docs collect https://teleportal.ua/ua/serials/stb/kod --path explorbot-testing --show --session --max-pages 20
explorbot docs collect https://example.com/workspace/projects --path explorbot-testing --show --session --max-pages 20
```

Supported options:
Expand Down Expand Up @@ -74,7 +128,7 @@ export default {
deniedPathSegments: ['callback', 'callbacks', 'logout', 'signout', 'sign_out', 'destroy', 'delete', 'remove'],
minCanActions: 1,
minInteractiveElements: 3,
// prompt: 'Add domain-specific guidance here',
interactive: false,
},
};
```
Expand All @@ -84,6 +138,7 @@ export default {
| `maxPages` | `100` | Maximum pages to document |
| `output` | `'docs'` | Output folder inside `output/` |
| `screenshot` | `true` | Allow screenshot-assisted research |
| `interactive` | `false` | Enable interaction attempts before final documentation |
| `prompt` | unset | Extra instructions for the Documentarian |
| `collapseDynamicPages` | `true` | Collapse dynamic URLs like `/users/123` and `/users/456` into one crawl key |
| `scope` | `'site'` | Crawl breadth mode |
Expand Down Expand Up @@ -130,8 +185,12 @@ Softer boundary than `subtree`: keep the same scope root, its descendants, and c
- same-origin only
- visited pages are tracked through the state manager
- dead loops are stopped
- next targets are discovered from links and research navigation
- next targets are discovered from links, research navigation, and successful interaction results
- low-signal pages can be skipped
- interactive mode does not replace static documentation; it augments it
- static mode is unchanged when `interactive` is disabled
- if interaction-driven generation fails, the collector falls back to static documentation
- output quality still depends on research quality

## Related Docs

Expand Down
Loading
Loading