Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docs/roadmap/ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -591,14 +591,18 @@ src/
- ✅ `builder.js` → single `runAnalyses` call replaces 4 sequential blocks + WASM pre-parse
- ✅ Extracted pure computations to `metrics.js` (Halstead derived math, LOC, MI)
- ✅ Extracted shared helpers to `visitor-utils.js` (from dataflow.js)
- 🔲 **CFG visitor rewrite** (see below)
- **CFG visitor rewrite** — `createCfgVisitor` in `ast-analysis/visitors/cfg-visitor.js`, integrated into engine.js unified walk, Mode A/B split eliminated

**Remaining: CFG visitor rewrite.** `buildFunctionCFG` (813 lines) uses a statement-level traversal (`getStatements` + `processStatement` with `loopStack`, `labelMap`, `blockIndex`) that is fundamentally incompatible with the node-level DFS used by `walkWithVisitors`. This is why the engine runs CFG as a separate Mode B pass — the only analysis that can't participate in the shared single-DFS walk.

Rewrite the CFG algorithm as a node-level visitor that builds basic blocks and edges incrementally via `enterNode`/`exitNode` hooks, tracking block boundaries at branch/loop/return nodes the same way the complexity visitor tracks nesting. This eliminates the last redundant tree traversal during build and lets CFG share the exact same DFS pass as complexity, dataflow, and AST extraction. The statement-level `getStatements` helper and per-language `CFG_RULES.statementTypes` can be replaced by detecting block-terminating node types in `enterNode`. Also simplifies `engine.js` by removing the Mode A/B split and WASM pre-parse special-casing for CFG.

**Remaining: Derive cyclomatic complexity from CFG.** Once CFG participates in the unified walk, cyclomatic complexity can be derived directly from CFG edge/block counts (`edges - nodes + 2`) rather than independently computed by the complexity visitor. This creates a single source of truth for control flow metrics and eliminates redundant computation. Can also be done as a simpler SQL-only approach against stored `cfg_blocks`/`cfg_edges` tables (see backlog ID 45).

**Follow-up tasks (post CFG visitor rewrite):**
- ✅ **Derive cyclomatic complexity from CFG** — CFG visitor computes `E - N + 2` per function; engine.js overrides complexity visitor's cyclomatic with CFG-derived value (single source of truth)
- ✅ **Remove `buildFunctionCFG` implementation** — 813-line standalone implementation replaced with thin visitor wrapper (~15 lines); `buildCFGData` WASM fallback uses file-level visitor walk instead of per-function `findFunctionNode` calls

**Affected files:** `src/complexity.js`, `src/cfg.js`, `src/dataflow.js`, `src/ast.js` → split into `src/ast-analysis/`

### 3.2 -- Command/Query Separation ★ Critical 🔄
Expand Down
108 changes: 86 additions & 22 deletions src/ast-analysis/engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@
* - CFG construction (basic blocks + edges)
* - Dataflow analysis (define-use chains, arg flows, mutations)
*
* Two modes:
* Mode A (node-level visitor): AST + complexity + dataflow — single DFS per file
* Mode B (statement-level): CFG keeps its own traversal via buildFunctionCFG
* All 4 analyses run as visitors in a single DFS walk via walkWithVisitors.
*
* Optimization strategy: for files with WASM trees, run all applicable visitors
* in a single walkWithVisitors call, then store results in the format that the
* existing buildXxx functions expect as pre-computed data. This eliminates ~3
* redundant tree traversals per file.
* in a single walkWithVisitors call. Store results in the format that buildXxx
* functions already expect as pre-computed data (same fields as native engine
* output). This eliminates redundant tree traversals per file.
*/

import path from 'node:path';
Expand All @@ -32,6 +30,7 @@ import { buildExtensionSet, buildExtToLangMap } from './shared.js';
import { walkWithVisitors } from './visitor.js';
import { functionName as getFuncName } from './visitor-utils.js';
import { createAstStoreVisitor } from './visitors/ast-store-visitor.js';
import { createCfgVisitor } from './visitors/cfg-visitor.js';
import { createComplexityVisitor } from './visitors/complexity-visitor.js';
import { createDataflowVisitor } from './visitors/dataflow-visitor.js';

Expand Down Expand Up @@ -74,25 +73,15 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) {
const extToLang = buildExtToLangMap();

// ── WASM pre-parse for files that need it ───────────────────────────
if (doCfg || doDataflow) {
// CFG now runs as a visitor in the unified walk, so only dataflow
// triggers WASM pre-parse when no tree exists.
if (doDataflow) {
let needsWasmTrees = false;
for (const [relPath, symbols] of fileSymbols) {
if (symbols._tree) continue;
const ext = path.extname(relPath).toLowerCase();

if (doCfg && CFG_EXTENSIONS.has(ext)) {
const fnDefs = (symbols.definitions || []).filter(
(d) => (d.kind === 'function' || d.kind === 'method') && d.line,
);
if (
fnDefs.length > 0 &&
!fnDefs.every((d) => d.cfg === null || Array.isArray(d.cfg?.blocks))
) {
needsWasmTrees = true;
break;
}
}
if (doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext)) {
if (!symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext)) {
needsWasmTrees = true;
break;
}
Expand Down Expand Up @@ -185,6 +174,24 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) {
}
}

// ─ CFG visitor ─
const cfgRulesForLang = CFG_RULES.get(langId);
let cfgVisitor = null;
if (doCfg && cfgRulesForLang && CFG_EXTENSIONS.has(ext)) {
// Only use visitor if some functions lack pre-computed CFG
const needsWasmCfg = defs.some(
(d) =>
(d.kind === 'function' || d.kind === 'method') &&
d.line &&
d.cfg !== null &&
!Array.isArray(d.cfg?.blocks),
);
if (needsWasmCfg) {
cfgVisitor = createCfgVisitor(cfgRulesForLang);
visitors.push(cfgVisitor);
}
}

// ─ Dataflow visitor ─
const dfRules = DATAFLOW_RULES.get(langId);
let dataflowVisitor = null;
Expand Down Expand Up @@ -216,12 +223,21 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) {
for (const r of complexityResults) {
if (r.funcNode) {
const line = r.funcNode.startPosition.row + 1;
resultByLine.set(line, r);
if (!resultByLine.has(line)) resultByLine.set(line, []);
resultByLine.get(line).push(r);
}
}
for (const def of defs) {
if ((def.kind === 'function' || def.kind === 'method') && def.line && !def.complexity) {
const funcResult = resultByLine.get(def.line);
const candidates = resultByLine.get(def.line);
const funcResult = !candidates
? undefined
: candidates.length === 1
? candidates[0]
: (candidates.find((r) => {
const n = r.funcNode.childForFieldName('name');
return n && n.text === def.name;
}) ?? candidates[0]);
if (funcResult) {
const { metrics } = funcResult;
const loc = computeLOCMetrics(funcResult.funcNode, langId);
Expand All @@ -247,6 +263,54 @@ export async function runAnalyses(db, fileSymbols, rootDir, opts, engineOpts) {
}
}

// ─ Store CFG results on definitions (buildCFGData will find def.cfg and skip its walk) ─
if (cfgVisitor) {
const cfgResults = results.cfg || [];
const cfgByLine = new Map();
for (const r of cfgResults) {
if (r.funcNode) {
const line = r.funcNode.startPosition.row + 1;
if (!cfgByLine.has(line)) cfgByLine.set(line, []);
cfgByLine.get(line).push(r);
}
}
for (const def of defs) {
if (
(def.kind === 'function' || def.kind === 'method') &&
def.line &&
!def.cfg?.blocks?.length
) {
const candidates = cfgByLine.get(def.line);
const cfgResult = !candidates
? undefined
: candidates.length === 1
? candidates[0]
: (candidates.find((r) => {
const n = r.funcNode.childForFieldName('name');
return n && n.text === def.name;
}) ?? candidates[0]);
if (cfgResult) {
def.cfg = { blocks: cfgResult.blocks, edges: cfgResult.edges };

// Override complexity's cyclomatic with CFG-derived value (single source of truth)
// and recompute maintainability index to stay consistent
if (def.complexity && cfgResult.cyclomatic != null) {
def.complexity.cyclomatic = cfgResult.cyclomatic;
const { loc, halstead } = def.complexity;
const volume = halstead ? halstead.volume : 0;
const commentRatio = loc?.loc > 0 ? loc.commentLines / loc.loc : 0;
def.complexity.maintainabilityIndex = computeMaintainabilityIndex(
volume,
cfgResult.cyclomatic,
loc?.sloc ?? 0,
commentRatio,
);
}
}
}
}
}

// ─ Store dataflow results (buildDataflowEdges will find symbols.dataflow and skip its walk) ─
if (dataflowVisitor) {
symbols.dataflow = results.dataflow;
Expand Down
Loading