patchmemory · patchmemory · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
diff --git a/docs/testing.md b/docs/testing.md
@@ -290,3 +290,104 @@ npm run e2e:headed   # optional, debug mode
 ```
 
 **Note:** E2E relies on BASE_URL from global-setup (spawns Flask). `SCIDK_PROVIDERS` defaults to `local_fs` in CI. The scan E2E uses a real temp directory under the runner OS temp path and triggers a synchronous scan via `/api/scan`.
+
+## E2E Testing Complete Guide
+
+### Quick Start
+
+1. **Install dependencies** (one-time setup):
+   ```bash
+   npm install
+   npm run e2e:install  # Installs Playwright browsers
+   ```
+
+2. **Run all E2E tests**:
+   ```bash
+   npm run e2e          # Headless (recommended for CI/local verification)
+   npm run e2e:headed   # With visible browser (useful for debugging)
+   ```
+
+3. **Run specific test files**:
+   ```bash
+   npm run e2e -- e2e/smoke.spec.ts
+   npm run e2e -- e2e/core-flows.spec.ts
+   npm run e2e -- e2e/negative.spec.ts
+   ```
+
+### Available Test Suites
+
+- **`e2e/smoke.spec.ts`**: Basic page load and navigation smoke tests
+- **`e2e/core-flows.spec.ts`**: Full user workflows (scan → browse → details)
+- **`e2e/scan.spec.ts`**: Directory scanning functionality
+- **`e2e/browse.spec.ts`**: File browsing and navigation
+- **`e2e/negative.spec.ts`**: Error handling, empty states, edge cases
+
+### CI Integration
+
+E2E tests run automatically in GitHub Actions on every push and PR. See `.github/workflows/ci.yml`:
+
+- **Job: `e2e`**: Runs Playwright tests with `SCIDK_PROVIDERS=local_fs`
+- **On failure**: Uploads Playwright report and traces as artifacts
+- **Access artifacts**: Go to Actions → failed run → download `playwright-report`
+
+To view traces locally:
+```bash
+npx playwright show-trace test-results/<test-name>/trace.zip
+```
+
+## Troubleshooting
+
+### E2E Tests
+
+**Problem: `spawn python ENOENT` or Python not found**
+- **Cause**: Playwright global-setup can't find Python executable
+- **Fix**: The `e2e/global-setup.ts` uses `python3` on Linux/Mac, `python` on Windows
+- **Verify**: `which python3` (Linux/Mac) or `where python` (Windows)
+
+**Problem: Tests fail with "element not found" or timeouts**
+- **Cause**: Page load too slow, or elements missing `data-testid` attributes
+- **Fix 1**: Check Flask server logs in test output for errors
+- **Fix 2**: Run with headed mode to see what's happening: `npm run e2e:headed`
+- **Fix 3**: Verify `data-testid` attributes exist in templates (`scidk/ui/templates/`)
+
+**Problem: "Port already in use" error**
+- **Cause**: Previous Flask server didn't shut down cleanly
+- **Fix**: Kill stale processes: `pkill -f "python.*scidk.app"` or `lsof -ti:5000 | xargs kill`
+
+**Problem: Tests pass locally but fail in CI**
+- **Cause**: Different providers enabled, or timing differences
+- **Check**: CI uses `SCIDK_PROVIDERS=local_fs` only (see `.github/workflows/ci.yml`)
+- **Fix**: Run locally with same env: `SCIDK_PROVIDERS=local_fs npm run e2e`
+
+### pytest Tests
+
+**Problem: `ModuleNotFoundError` for scidk package**
+- **Cause**: Package not installed in editable mode
+- **Fix**: `pip install -e .[dev]`
+
+**Problem: Tests fail with "No such file or directory" for temp files**
+- **Cause**: Tests didn't clean up properly, or timing issue with `tmp_path`
+- **Fix**: Use pytest's `tmp_path` fixture, which auto-cleans after each test
+
+**Problem: "RuntimeError: Working outside of application context"**
+- **Cause**: Flask test missing `app` or `client` fixture
+- **Fix**: Add `def test_something(client):` to use Flask test client
+
+**Problem: Neo4j or rclone tests fail**
+- **Cause**: Missing mocks/fakes for external dependencies
+- **Fix**: Use helpers from `tests/helpers/`:
+  - `from tests.helpers.neo4j import inject_fake_neo4j`
+  - `from tests.helpers.rclone import rclone_env`
+
+**Problem: Slow tests or database locks**
+- **Cause**: SQLite WAL mode or concurrent access
+- **Fix**: Use `tmp_path` for isolated test databases, avoid shared state between tests
+
+### General Tips
+
+- **Run tests verbosely**: `python -m pytest -v` or `npm run e2e -- --debug`
+- **Run single test**: `python -m pytest tests/test_foo.py::test_bar -v`
+- **Skip slow tests**: `python -m pytest -m "not e2e" -q`
+- **Clear pytest cache**: `rm -rf .pytest_cache`
+- **Check logs**: E2E server logs appear inline with test output
+- **Update snapshots**: If visual regression tests exist, use `npm run e2e -- --update-snapshots`
diff --git a/e2e/core-flows.spec.ts b/e2e/core-flows.spec.ts
@@ -0,0 +1,124 @@
+import { test, expect, request } from '@playwright/test';
+import os from 'os';
+import fs from 'fs';
+import path from 'path';
+
+/**
+ * Core E2E flows for SciDK: scan → browse → file details
+ * Tests user-visible outcomes with stable selectors (data-testid)
+ */
+
+function createTestDirectory(prefix = 'scidk-e2e-core-'): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), prefix));
+  // Create a small directory structure for browsing
+  fs.writeFileSync(path.join(dir, 'data.txt'), 'sample data');
+  fs.writeFileSync(path.join(dir, 'notes.md'), '# Notes\nTest content');
+  const subdir = path.join(dir, 'subdir');
+  fs.mkdirSync(subdir);
+  fs.writeFileSync(path.join(subdir, 'nested.txt'), 'nested file');
+  return dir;
+}
+
+test('complete flow: scan → browse → file details', async ({ page, baseURL, request: pageRequest }) => {
+  const consoleMessages: { type: string; text: string }[] = [];
+  page.on('console', (msg) => {
+    consoleMessages.push({ type: msg.type(), text: msg.text() });
+  });
+
+  const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000';
+  const tempDir = createTestDirectory();
+
+  // Step 1: Scan the directory via API
+  const api = pageRequest || (await request.newContext());
+  const scanResp = await api.post(`${base}/api/scan`, {
+    headers: { 'Content-Type': 'application/json' },
+    data: { path: tempDir, recursive: true },
+  });
+  expect(scanResp.ok()).toBeTruthy();
+
+  // Step 2: Navigate to Home and verify scan appears
+  await page.goto(base);
+  await page.waitForLoadState('networkidle');
+
+  const homeScans = await page.getByTestId('home-recent-scans');
+  await expect(homeScans).toBeVisible();
+
+  // Verify the scanned path appears on the page
+  const pathOccurrences = await page.getByText(tempDir, { exact: false }).count();
+  expect(pathOccurrences).toBeGreaterThan(0);
+
+  // Step 3: Navigate to Files page
+  await page.getByTestId('nav-files').click();
+  await page.waitForLoadState('networkidle');
+  await expect(page.getByTestId('files-title')).toBeVisible();
+  await expect(page.getByTestId('files-root')).toBeVisible();
+
+  // Step 4: Verify browsing works (check that scanned files are listed)
+  // The Files page should show directories; verify our temp directory is accessible
+  const filesContent = await page.getByTestId('files-root').textContent();
+  expect(filesContent).toBeTruthy();
+
+  // Step 5: Ensure no console errors occurred during the flow
+  await page.waitForTimeout(500); // Brief wait to catch any delayed errors
+  const errors = consoleMessages.filter((m) => m.type === 'error');
+  expect(errors.length).toBe(0);
+
+  // Cleanup
+  fs.rmSync(tempDir, { recursive: true, force: true });
+});
+
+test('scan with recursive flag captures nested files', async ({ page, baseURL, request: pageRequest }) => {
+  const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000';
+  const tempDir = createTestDirectory('scidk-e2e-recursive-');
+
+  const api = pageRequest || (await request.newContext());
+  const scanResp = await api.post(`${base}/api/scan`, {
+    headers: { 'Content-Type': 'application/json' },
+    data: { path: tempDir, recursive: true },
+  });
+  expect(scanResp.ok()).toBeTruthy();
+
+  // Verify via API that nested files are indexed
+  const directoriesResp = await api.get(`${base}/api/directories`);
+  expect(directoriesResp.ok()).toBeTruthy();
+  const directories = await directoriesResp.json();
+  expect(Array.isArray(directories)).toBe(true);
+
+  // Check that our scanned directory appears
+  const hasTempDir = directories.some((d: any) =>
+    d.path && d.path.includes(tempDir)
+  );
+  expect(hasTempDir).toBe(true);
+
+  // Cleanup
+  fs.rmSync(tempDir, { recursive: true, force: true });
+});
+
+test('browse page shows correct file listing structure', async ({ page, baseURL, request: pageRequest }) => {
+  const base = baseURL || process.env.BASE_URL || 'http://127.0.0.1:5000';
+  const tempDir = createTestDirectory('scidk-e2e-browse-');
+
+  // Scan directory first
+  const api = pageRequest || (await request.newContext());
+  await api.post(`${base}/api/scan`, {
+    headers: { 'Content-Type': 'application/json' },
+    data: { path: tempDir, recursive: false },
+  });
+
+  // Navigate to Files/Datasets page (accessible via nav-files button)
+  await page.goto(base);
+  await page.waitForLoadState('networkidle');
+  await page.getByTestId('nav-files').click();
+  await page.waitForLoadState('networkidle');
+
+  // Verify stable selectors are present
+  await expect(page.getByTestId('files-title')).toBeVisible();
+  await expect(page.getByTestId('files-root')).toBeVisible();
+
+  // The page should have rendered without errors
+  const title = await page.title();
+  expect(title).toBeTruthy();
+
+  // Cleanup
+  fs.rmSync(tempDir, { recursive: true, force: true });
+});
diff --git a/tests/test_helpers_example.py b/tests/test_helpers_example.py
@@ -0,0 +1,71 @@
+"""
+Example test demonstrating the tests.helpers package.
+
+This file serves as documentation and verification that the helper modules
+are correctly importable and usable in tests.
+"""
+import pytest
+from tests.helpers.rclone import rclone_env
+from tests.helpers.neo4j import inject_fake_neo4j, CypherRecorder
+from tests.helpers.builders import build_tree, write_csv
+from tests.helpers.asserts import assert_json, assert_error
+
+
+def test_rclone_helper_example(monkeypatch):
+    """Example usage of rclone test helper."""
+    env_config = rclone_env(
+        monkeypatch,
+        listremotes=["local", "s3", "gdrive"],
+        version="rclone v1.62.2"
+    )
+    assert env_config["version"] == "rclone v1.62.2"
+    assert "s3" in env_config["listremotes"]
+
+
+def test_neo4j_helper_example(monkeypatch):
+    """Example usage of neo4j test helpers."""
+    # Inject fake credentials to avoid connecting to real Neo4j
+    inject_fake_neo4j(monkeypatch, uri="", user="", password="")
+
+    # Use CypherRecorder to capture queries without executing them
+    recorder = CypherRecorder()
+    recorder.run("CREATE (n:Node {name: $name})", name="test")
+    recorder.run("MATCH (n:Node) RETURN n")
+
+    assert len(recorder.records) == 2
+    assert recorder.last().query == "MATCH (n:Node) RETURN n"
+
+
+def test_builders_helper_example(tmp_path):
+    """Example usage of builders test helpers."""
+    # Create a filesystem tree for testing
+    build_tree(tmp_path, {
+        'data': {
+            'sample.txt': 'hello world',
+            'nested': {
+                'file.txt': 'nested content'
+            }
+        },
+        'output.csv': [['id', 'name'], [1, 'Alice'], [2, 'Bob']]
+    })
+
+    assert (tmp_path / 'data' / 'sample.txt').read_text() == 'hello world'
+    assert (tmp_path / 'data' / 'nested' / 'file.txt').exists()
+    assert (tmp_path / 'output.csv').exists()
+
+    # Write a standalone CSV
+    write_csv(tmp_path / 'users.csv', [['id', 'email'], [1, 'test@example.com']])
+    assert (tmp_path / 'users.csv').exists()
+
+
+def test_asserts_helper_example(client):
+    """Example usage of asserts test helpers."""
+    # Test successful JSON response
+    resp = client.get('/api/providers')
+    data = assert_json(resp, shape=list)
+    assert isinstance(data, list)
+
+    # Test error response
+    resp_err = client.get('/api/scans/nonexistent-id/status')
+    error_data = assert_error(resp_err)
+    assert isinstance(error_data, dict)