testomatio · DavertMik · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 
+## 2026-06-01
+
+### Configuration
+- **`ai.agents.navigator.verifyAttempts`** — How many assertion checks the Navigator runs when verifying a claim before deciding pass/fail. Lower it to make verification faster, raise it for more confidence. Default: `3`.
+- **`ai.agents.navigator.verifyTimeout`** — Timeout in milliseconds for each verification assertion, so a check that won't match fails fast instead of waiting the full page timeout. Default: `1500`.
+
+### Changes
+- [Navigator] Verification is faster — it stops as soon as the outcome is decided instead of running every check, runs fewer assertions, and gives up quickly on checks that won't match rather than waiting the full timeout.
+- [Navigator] Reuses an earlier verification result on the same page instead of checking the same claim again — including when the new claim is worded differently but means the same thing.
+- [Pilot] A scenario whose goal was not actually performed this run no longer passes. Reaching a page, tab, or prompt is treated as a milestone, not success. Scenarios that cannot proceed because a prerequisite is missing — a required control is absent, an integration is not connected, or only a setup/empty-state prompt is shown — are now marked skipped instead of passed.
+- [Reporter] Local HTML and markdown reports are no longer produced automatically — turn them on with `reporter.html: true` and `reporter.markdown: true`. The run group is no longer a hardcoded "Explorbot <date>" default. `explorbot init` now writes a `reporter` block (HTML on, markdown on, and a date-based run group) into the generated config, so report output is visible and editable instead of assumed.
+
 ## 2026-05-25
 
 ### Changes

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -57,6 +57,8 @@ Avoid repetitive code patterns
 Avoid ternary operators!
 Never use `...(condition ? { key: value } : {})` spread pattern — use a plain `if` statement instead
 Avoid creating extra functions that were not explicitly set
+Private methods must be placed after public methods
+Avoid `=== null` / `=== undefined` comparisons when not needed — prefer shorter `if (...)` or `if (!...)` when applicable
 Use dedent when formatting prompts
 Use `mdq()` from `src/utils/markdown-query.ts` for all markdown manipulation (find sections, replace tables, extract text). Never do manual line-splitting/counting on markdown.
 Put types into the end of file

diff --git a/bun.lock b/bun.lock
diff --git a/package.json b/package.json
@@ -65,7 +65,7 @@
     "@opentelemetry/sdk-trace-base": "^2.2.0",
     "@opentelemetry/semantic-conventions": "^1.38.0",
     "@scalar/openapi-parser": "^0.25.6",
-    "@testomatio/reporter": "^2.7.9-beta.3-markdown",
+    "@testomatio/reporter": "^2.8.4",
     "ai": "^6.0.6",
     "axe-core": "^4.11.1",
     "bash-tool": "^1.3.15",
@@ -91,7 +91,7 @@
     "micromatch": "^4.0.8",
     "ora-classic": "^5.4.2",
     "parse5": "^8.0.0",
-    "playwright": "^1.59.0",
+    "playwright": "^1.60",
     "react": "^19.1.1",
     "strip-ansi": "^7.1.2",
     "turndown": "^7.2.1",

diff --git a/src/action-result.ts b/src/action-result.ts
@@ -207,6 +207,17 @@ export class ActionResult implements ActionResultData {
     this.verifications[assertion] = passed;
   }
 
+  getVerification(message: string | RegExp): boolean | null {
+    if (!this.verifications) return null;
+    if (typeof message === 'string') {
+      return this.verifications[message] ?? null;
+    }
+    for (const [assertion, passed] of Object.entries(this.verifications)) {
+      if (message.test(assertion)) return passed;
+    }
+    return null;
+  }
+
   isSameUrl(state: WebPageState): boolean {
     if (!this.url || this.url === '') {
       return false;

diff --git a/src/ai/navigator.ts b/src/ai/navigator.ts
@@ -82,6 +82,14 @@ class Navigator implements Agent {
     this.hooksRunner = new HooksRunner(explorer, explorer.getConfig());
   }
 
+  private get verifyAttempts(): number {
+    return this.explorer.getConfig().ai?.agents?.navigator?.verifyAttempts ?? 3;
+  }
+
+  private get verifyTimeout(): number {
+    return this.explorer.getConfig().ai?.agents?.navigator?.verifyTimeout ?? 1500;
+  }
+
   private getBaseOrigin(): string | null {
     const baseUrl = this.explorer.getConfig().playwright.url;
     try {
@@ -623,6 +631,12 @@ class Navigator implements Agent {
     tag('info').log('AI Navigator verifying state at', actionResult.url);
     debugLog('Verification message:', message);
 
+    const cachedVerification = actionResult.getVerification(message);
+    if (cachedVerification !== null) {
+      tag('substep').log(`Reusing cached verification: ${cachedVerification ? 'PASS' : 'FAIL'}`);
+      return { verified: cachedVerification, successfulCodes: [], assertionSteps: [], totalAttempted: 0 };
+    }
+
     let knowledge = '';
     let experience = '';
 
@@ -645,6 +659,21 @@ class Navigator implements Agent {
       }
     }
 
+    const priorVerifications = Object.entries(actionResult.verifications ?? {});
+    let verificationContext = '';
+    if (priorVerifications.length > 0) {
+      const lines = priorVerifications.map(([claim, passed]) => `- "${claim}" → ${passed ? 'passed' : 'failed'}`).join('\n');
+      verificationContext = dedent`
+        <already_verified>
+        These claims were already checked on this page:
+        ${lines}
+
+        If the claim to verify has the same meaning as one above (even if worded differently), do NOT write any assertion code.
+        Respond with a single line and nothing else: ALREADY_VERIFIED: <exact text of the matching claim>
+        </already_verified>
+      `;
+    }
+
     const prompt = dedent`
       <message>
         ${message}
@@ -658,11 +687,13 @@ class Navigator implements Agent {
         </page_html>
       </page>
 
+      ${verificationContext}
+
       <task>
         Identify what assertion the user wants to verify on the page.
-        Propose different CodeceptJS assertion code blocks to verify the expected state.
+        Propose 2-3 strong, distinct CodeceptJS assertion code blocks that each directly prove the claim.
         Use only data from the <page> context to plan the verification.
-        Try various locators and approaches to verify the assertion.
+        Prefer the fewest, most specific assertions over many variants of the same locator.
 
         IMPORTANT: Each code block must verify the SPECIFIC claim in the message, not just a generic aspect of it.
         Bad: I.seeElement({"role":"button","aria-pressed":"true"}) — matches ANY button, not the specific one
@@ -684,64 +715,98 @@ class Navigator implements Agent {
     const conversation = this.provider.startConversation(this.systemPrompt, 'navigator');
     conversation.addUserText(prompt);
 
+    let alreadyVerified = false;
     const tools = this.buildExperienceTools();
 
     let codeBlocks: string[] = [];
     const successfulCodes: string[] = [];
     const assertionSteps: Array<{ name: string; args: any[] }> = [];
 
     const action = this.explorer.createAction();
+    let failures = 0;
 
-    await loop(
-      async ({ stop, iteration }) => {
-        if (codeBlocks.length === 0) {
-          const result = await this.provider.invokeConversation(conversation, tools);
-          if (!result) return;
-          const aiResponse = result?.response?.text;
-          debugLog('Received AI response:', aiResponse?.length ?? 0, 'characters');
-          tag('step').log('Verifying assertion...');
-          codeBlocks = extractCodeBlocks(aiResponse ?? '');
-        }
+    const page = this.explorer.playwrightHelper?.page;
+    const originalTimeout = this.explorer.playwrightHelper?.options?.timeout ?? 3000;
+    page?.setDefaultTimeout(this.verifyTimeout);
 
-        if (codeBlocks.length === 0) {
-          return;
-        }
+    try {
+      await loop(
+        async ({ stop, iteration }) => {
+          if (codeBlocks.length === 0) {
+            const result = await this.provider.invokeConversation(conversation, tools);
+            if (!result) return;
+            const aiResponse = result?.response?.text ?? '';
+            debugLog('Received AI response:', aiResponse.length, 'characters');
+            tag('step').log('Verifying assertion...');
+
+            if (this.checkAlreadyVerified(aiResponse, actionResult)) {
+              alreadyVerified = true;
+              stop();
+              return;
+            }
 
-        const codeBlock = codeBlocks[iteration - 1];
-        if (!codeBlock) {
-          stop();
-          return;
-        }
+            codeBlocks = extractCodeBlocks(aiResponse);
+          }
 
-        await this.explorer.switchToMainFrame();
+          if (codeBlocks.length === 0) {
+            return;
+          }
 
-        const verified = await action.attempt(codeBlock, message, false);
+          const codeBlock = codeBlocks[iteration - 1];
+          if (!codeBlock) {
+            stop();
+            return;
+          }
 
-        if (verified) {
-          tag('success').log('Verification passed');
-          successfulCodes.push(codeBlock);
-          assertionSteps.push(...action.assertionSteps);
-        }
-      },
-      {
-        maxAttempts: this.MAX_ATTEMPTS,
-        observability: {
-          agent: 'navigator',
-        },
-        catch: async (error) => {
-          debugLog(error);
+          await this.explorer.switchToMainFrame();
+
+          const verified = await action.attempt(codeBlock, message, false);
+
+          if (verified) {
+            tag('success').log('Verification passed');
+            successfulCodes.push(codeBlock);
+            assertionSteps.push(...action.assertionSteps);
+          } else {
+            failures++;
+          }
+
+          const target = Math.min(codeBlocks.length, this.verifyAttempts);
+          const majorityNeeded = Math.floor(target / 2) + 1;
+          if (successfulCodes.length >= majorityNeeded || failures > target - majorityNeeded) {
+            stop();
+          }
         },
-      }
-    );
+        {
+          maxAttempts: this.verifyAttempts,
+          observability: {
+            agent: 'navigator',
+          },
+          catch: async (error) => {
+            debugLog(error);
+          },
+        }
+      );
+    } finally {
+      page?.setDefaultTimeout(originalTimeout);
+    }
 
-    const totalAttempted = Math.min(codeBlocks.length, this.MAX_ATTEMPTS);
-    const verified = totalAttempted <= 1 ? successfulCodes.length > 0 : successfulCodes.length > totalAttempted / 2;
+    const totalAttempted = Math.min(codeBlocks.length, this.verifyAttempts);
+    const majorityNeeded = Math.floor(totalAttempted / 2) + 1;
+    let verified = successfulCodes.length >= majorityNeeded;
+    if (alreadyVerified) verified = true;
 
     actionResult.addVerification(message, verified);
     this.explorer.getStateManager().updateState(actionResult);
 
     return { verified, successfulCodes, assertionSteps, totalAttempted };
   }
+
+  private checkAlreadyVerified(aiResponse: string, actionResult: ActionResult): boolean {
+    const verifiedMatch = aiResponse.match(/ALREADY_VERIFIED:\s*(.+)/i);
+    if (!verifiedMatch) return false;
+    const claim = verifiedMatch[1].trim().replace(/^["']|["']$/g, '');
+    return actionResult.getVerification(claim) === true;
+  }
 }
 
 export { Navigator };
diff --git a/src/ai/pilot.ts b/src/ai/pilot.ts
@@ -322,6 +322,12 @@ export class Pilot implements Agent {
       - "Delete X" → X must be gone. Clicking delete is NOT enough.
       - "Edit X" → updated value must be persisted (visible in list/detail). Opening edit is NOT enough; redirect after save with the new value visible IS enough.
       - Negative tests ("without a name", "invalid", "duplicate", "unauthorized") → success means the system PREVENTED the action with validation/error.
+      - Navigation-prefixed titles ("Access/Open/Go to X to <do Y>") → the goal is <do Y>; reaching X is
+        only a milestone. A satisfied milestone (tab active, panel/prompt visible, list shown) is NEVER a pass
+        if <do Y> did not occur this run.
+      - If the page reveals the goal cannot be performed here — required control absent, integration not
+        connected, or only a setup/connect/empty-state prompt is shown — vote "skipped" (prerequisites unmet),
+        never "pass".
 
       PROVENANCE: the entity you cite as proof must appear by name in <notes> or
       <session_log> tool inputs for THIS run. Name absent from tester activity = stale

diff --git a/src/commands/init-command.ts b/src/commands/init-command.ts
@@ -30,6 +30,15 @@ const config = {
     // agentic model for decision making
     agenticModel: openrouter('minimax/minimax-m2.5:nitro'),
   },
+
+  reporter: {
+    // Save a local HTML report after each run.
+    html: true,
+    // Save a local markdown report after each run.
+    markdown: true,
+    // Group runs by title in Testomat.io / HTML reports. Defaults to today's date — customize or remove.
+    runGroup: new Date().toISOString().slice(0, 10),
+  },
 };
 
 export default config;

diff --git a/src/config.ts b/src/config.ts
@@ -84,6 +84,8 @@ interface PilotAgentConfig extends AgentConfig {
 interface NavigatorAgentConfig extends AgentConfig {
   addHtmlOnTry?: number;
   maxAttempts?: number;
+  verifyAttempts?: number;
+  verifyTimeout?: number;
 }
 
 type HealFn = (ctx: { I: any }) => Promise<void> | void;

diff --git a/src/reporter.ts b/src/reporter.ts
@@ -29,7 +29,7 @@ export class Reporter {
     this.reporterEnabled = Reporter.resolveEnabled(config);
     this.stateManager = stateManager;
 
-    if (this.reporterEnabled && (!process.env.TESTOMATIO || config?.html)) {
+    if (this.reporterEnabled && config?.html) {
       this.configureHtmlPipe();
     }
 
@@ -63,6 +63,7 @@ export class Reporter {
   static resolveEnabled(config?: ReporterConfig): boolean {
     if (config?.enabled === true) return true;
     if (config?.enabled === false) return false;
+    if (config?.html || config?.markdown) return true;
     return Boolean(process.env.TESTOMATIO);
   }
 
@@ -88,12 +89,8 @@ export class Reporter {
 
   private configureRunGroup(runGroup: string | null | undefined): void {
     if (process.env.TESTOMATIO_RUNGROUP_TITLE) return;
-    if (runGroup === null) return;
-    if (runGroup) {
-      process.env.TESTOMATIO_RUNGROUP_TITLE = runGroup;
-      return;
-    }
-    process.env.TESTOMATIO_RUNGROUP_TITLE = `Explorbot ${new Date().toISOString().slice(0, 10)}`;
+    if (!runGroup) return;
+    process.env.TESTOMATIO_RUNGROUP_TITLE = runGroup;
   }
 
   async startRun(): Promise<void> {