Skip to content

Commit 9f42aa5

Browse files
committed
chore: streamlined legacy-json and ast generation
1 parent 40cead8 commit 9f42aa5

File tree

9 files changed

+166
-58
lines changed

9 files changed

+166
-58
lines changed

bin/commands/generate.mjs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,13 @@ import { coerce } from 'semver';
66
import { NODE_CHANGELOG_URL, NODE_VERSION } from '../../src/constants.mjs';
77
import { publicGenerators } from '../../src/generators/index.mjs';
88
import createGenerator from '../../src/generators.mjs';
9+
import logger from '../../src/logger/index.mjs';
910
import { parseChangelog, parseIndex } from '../../src/parsers/markdown.mjs';
1011
import { DEFAULT_TYPE_MAP } from '../../src/utils/parser/constants.mjs';
1112
import { loadFromURL } from '../../src/utils/parser.mjs';
12-
import { loadAndParse } from '../utils.mjs';
1313

1414
const availableGenerators = Object.keys(publicGenerators);
1515

16-
// Half of available logical CPUs guarantees in general all physical CPUs are being used
17-
// which in most scenarios is the best way to maximize performance
18-
// When spawning more than a said number of threads, the overhead of context switching
19-
// and CPU contention starts to degrade performance rather than improve it.
20-
// Therefore, we set the optimal threads to half the number of CPU cores, with a minimum of 6.
21-
const optimalThreads = Math.max(cpus().length, 2);
22-
2316
/**
2417
* @typedef {Object} Options
2518
* @property {Array<string>|string} input - Specifies the glob/path for input files.
@@ -70,7 +63,7 @@ export default {
7063
prompt: {
7164
type: 'text',
7265
message: 'How many threads to allow',
73-
initialValue: String(Math.max(optimalThreads, 2)),
66+
initialValue: String(cpus().length),
7467
},
7568
},
7669
chunkSize: {
@@ -146,15 +139,18 @@ export default {
146139
* @returns {Promise<void>}
147140
*/
148141
async action(opts) {
149-
const docs = await loadAndParse(opts.input, opts.ignore);
142+
logger.debug('Starting doc-kit', opts);
143+
144+
const { runGenerators } = createGenerator();
145+
150146
const releases = await parseChangelog(opts.changelog);
151147

152148
const rawTypeMap = await loadFromURL(opts.typeMap);
153149
const typeMap = JSON.parse(rawTypeMap);
154150

155151
const index = opts.index && (await parseIndex(opts.index));
156152

157-
const { runGenerators } = createGenerator(docs);
153+
logger.debug(`Starting generation with targets: ${opts.target.join(', ')}`);
158154

159155
await runGenerators({
160156
generators: opts.target,

src/generators.mjs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,11 @@ const generatorsLogger = logger.child('generators');
1313
* documentation generators in dependency order, with support for parallel
1414
* processing and streaming results.
1515
*
16-
* @param {ParserOutput} input - The API doc AST tree
1716
* @returns {{ runGenerators: (options: GeneratorOptions) => Promise<unknown[]> }}
1817
*/
19-
const createGenerator = input => {
18+
const createGenerator = () => {
2019
/** @type {{ [key: string]: Promise<unknown> | AsyncGenerator }} */
21-
const cachedGenerators = { ast: Promise.resolve(input) };
20+
const cachedGenerators = {};
2221

2322
const streamingCache = createStreamingCache();
2423

@@ -28,10 +27,14 @@ const createGenerator = input => {
2827
/**
2928
* Gets the collected input from a dependency generator.
3029
*
31-
* @param {string} dependsOn - Dependency generator name
30+
* @param {string | undefined} dependsOn - Dependency generator name
3231
* @returns {Promise<unknown>}
3332
*/
3433
const getDependencyInput = async dependsOn => {
34+
if (!dependsOn) {
35+
return undefined;
36+
}
37+
3538
const result = await cachedGenerators[dependsOn];
3639

3740
if (isAsyncGenerator(result)) {

src/generators/__tests__/index.test.mjs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import semver from 'semver';
55

66
import { allGenerators } from '../index.mjs';
77

8-
const validDependencies = [...Object.keys(allGenerators), 'ast'];
8+
const validDependencies = Object.keys(allGenerators);
99
const generatorEntries = Object.entries(allGenerators);
1010

1111
describe('All Generators', () => {
@@ -34,9 +34,18 @@ describe('All Generators', () => {
3434
if (generator.dependsOn) {
3535
assert.ok(
3636
validDependencies.includes(generator.dependsOn),
37-
`Generator "${key}" depends on "${generator.dependsOn}" which is not a valid generator or 'ast'`
37+
`Generator "${key}" depends on "${generator.dependsOn}" which is not a valid generator`
3838
);
3939
}
4040
});
4141
});
42+
43+
it('should have ast generator as a top-level generator with no dependencies', () => {
44+
assert.ok(allGenerators.ast, 'ast generator should exist');
45+
assert.equal(
46+
allGenerators.ast.dependsOn,
47+
undefined,
48+
'ast generator should have no dependencies'
49+
);
50+
});
4251
});

src/generators/api-links/__tests__/fixtures.test.mjs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { readdir } from 'node:fs/promises';
22
import { cpus } from 'node:os';
33
import { basename, extname, join } from 'node:path';
4-
import { describe, it } from 'node:test';
4+
import { after, before, describe, it } from 'node:test';
55

66
import createWorkerPool from '../../../threading/index.mjs';
77
import createParallelWorker from '../../../threading/parallel.mjs';
@@ -16,12 +16,20 @@ const sourceFiles = fixtures
1616
.map(fixture => join(FIXTURES_DIRECTORY, fixture));
1717

1818
describe('api links', () => {
19+
const threads = cpus().length;
20+
let pool;
21+
22+
before(() => {
23+
pool = createWorkerPool(threads);
24+
});
25+
26+
after(async () => {
27+
await pool.destroy();
28+
});
29+
1930
describe('should work correctly for all fixtures', () => {
2031
sourceFiles.forEach(sourceFile => {
2132
it(`${basename(sourceFile)}`, async t => {
22-
const threads = cpus().length;
23-
const pool = createWorkerPool(threads);
24-
2533
const worker = createParallelWorker('ast-js', pool, {
2634
threads,
2735
chunkSize: 10,
@@ -46,8 +54,6 @@ describe('api links', () => {
4654
}
4755

4856
t.assert.snapshot(actualOutput);
49-
50-
await pool.destroy();
5157
});
5258
});
5359
});

src/generators/ast/index.mjs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
'use strict';
2+
3+
import { readFile } from 'node:fs/promises';
4+
import { extname } from 'node:path';
5+
6+
import { globSync } from 'glob';
7+
import { VFile } from 'vfile';
8+
9+
import createQueries from '../../utils/queries/index.mjs';
10+
import { getRemark } from '../../utils/remark.mjs';
11+
12+
const remarkProcessor = getRemark();
13+
14+
const { updateStabilityPrefixToLink } = createQueries();
15+
16+
/**
17+
* Parses a single markdown file into an AST.
18+
*
19+
* @param {string} filePath - Path to the markdown file
20+
* @returns {Promise<ParserOutput<import('mdast').Root>>}
21+
*/
22+
const parseMarkdownFile = async filePath => {
23+
const fileContents = await readFile(filePath, 'utf-8');
24+
const vfile = new VFile({ path: filePath, value: fileContents });
25+
26+
// Normalizes all the Stability Index prefixes with Markdown links
27+
updateStabilityPrefixToLink(vfile);
28+
29+
// Parses the API doc into an AST tree using `unified` and `remark`
30+
const tree = remarkProcessor.parse(vfile);
31+
32+
return { file: { stem: vfile.stem, basename: vfile.basename }, tree };
33+
};
34+
35+
/**
36+
* This generator parses Markdown API doc files into AST trees.
37+
* It parallelizes the parsing across worker threads for better performance.
38+
*
39+
* @typedef {undefined} Input
40+
*
41+
* @type {GeneratorMetadata<Input, Array<ParserOutput<import('mdast').Root>>>}
42+
*/
43+
export default {
44+
name: 'ast',
45+
46+
version: '1.0.0',
47+
48+
description: 'Parses Markdown API doc files into AST trees',
49+
50+
dependsOn: undefined,
51+
52+
processChunk: Object.assign(
53+
/**
54+
* Process a chunk of markdown files in a worker thread.
55+
* Loads and parses markdown files into AST representations.
56+
*
57+
* @param {string[]} inputSlice - Sliced input paths for this chunk
58+
* @param {number[]} itemIndices - Indices into the sliced array
59+
* @returns {Promise<Array<ParserOutput<import('mdast').Root>>>}
60+
*/
61+
async (inputSlice, itemIndices) => {
62+
const results = [];
63+
64+
for (const idx of itemIndices) {
65+
const parsed = await parseMarkdownFile(inputSlice[idx]);
66+
67+
results.push(parsed);
68+
}
69+
70+
return results;
71+
},
72+
{ sliceInput: true }
73+
),
74+
75+
/**
76+
* Generates AST trees from markdown input files.
77+
*
78+
* @param {Input} _ - Unused (top-level generator)
79+
* @param {Partial<GeneratorOptions>} options
80+
* @returns {AsyncGenerator<Array<ParserOutput<import('mdast').Root>>>}
81+
*/
82+
async *generate(_, { input = [], worker }) {
83+
const files = globSync(input).filter(path => extname(path) === '.md');
84+
85+
// Parse markdown files in parallel using worker threads
86+
for await (const chunkResult of worker.stream(files, files)) {
87+
yield chunkResult;
88+
}
89+
},
90+
};

src/generators/index.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import addonVerify from './addon-verify/index.mjs';
44
import apiLinks from './api-links/index.mjs';
5+
import ast from './ast/index.mjs';
56
import astJs from './ast-js/index.mjs';
67
import jsonSimple from './json-simple/index.mjs';
78
import jsxAst from './jsx-ast/index.mjs';
@@ -32,6 +33,7 @@ export const publicGenerators = {
3233
// These ones are special since they don't produce standard output,
3334
// and hence, we don't expose them to the CLI.
3435
const internalGenerators = {
36+
ast,
3537
metadata,
3638
'jsx-ast': jsxAst,
3739
'ast-js': astJs,

src/generators/jsx-ast/utils/buildContent.mjs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,7 @@ const buildContent = async (metadataEntries, head, sideBarProps, remark) => {
295295
const ast = await remark.run(root);
296296

297297
// The final MDX content is the expression in the Program's first body node
298-
return {
299-
...ast.body[0].expression,
300-
data: head,
301-
};
298+
return { ...ast.body[0].expression, data: head };
302299
};
303300

304301
export default buildContent;

src/generators/legacy-json/index.mjs

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,31 @@ export default {
3030

3131
dependsOn: 'metadata',
3232

33-
/**
34-
* Process a chunk of items in a worker thread.
35-
* Builds JSON sections - FS operations happen in generate().
36-
*
37-
* @param {Input} fullInput - Full metadata input for context rebuilding
38-
* @param {number[]} itemIndices - Indices of head nodes to process
39-
* @param {Partial<Omit<GeneratorOptions, 'worker'>>} _options - Serializable options (unused)
40-
* @returns {Promise<import('./types.d.ts').Section[]>} JSON sections for each processed module
41-
*/
42-
async processChunk(fullInput, itemIndices) {
43-
const groupedModules = groupNodesByModule(fullInput);
44-
45-
const headNodes = fullInput.filter(node => node.heading.depth === 1);
46-
47-
const results = [];
48-
49-
for (const idx of itemIndices) {
50-
const head = headNodes[idx];
51-
const nodes = groupedModules.get(head.api);
33+
processChunk: Object.assign(
34+
/**
35+
* Process a chunk of items in a worker thread.
36+
* Builds JSON sections - FS operations happen in generate().
37+
*
38+
* With sliceInput, each item is pre-grouped {head, nodes} - no need to
39+
* recompute groupNodesByModule for every chunk.
40+
*
41+
* @param {Array<{head: ApiDocMetadataEntry, nodes: ApiDocMetadataEntry[]}>} slicedInput - Pre-sliced module data
42+
* @param {number[]} itemIndices - Indices into the sliced array
43+
* @returns {Promise<import('./types.d.ts').Section[]>} JSON sections for each processed module
44+
*/
45+
async (slicedInput, itemIndices) => {
46+
const results = [];
47+
48+
for (const idx of itemIndices) {
49+
const { head, nodes } = slicedInput[idx];
50+
51+
results.push(buildSection(head, nodes));
52+
}
5253

53-
results.push(buildSection(head, nodes));
54-
}
55-
56-
return results;
57-
},
54+
return results;
55+
},
56+
{ sliceInput: true }
57+
),
5858

5959
/**
6060
* Generates a legacy JSON file.
@@ -64,11 +64,18 @@ export default {
6464
* @returns {AsyncGenerator<Array<import('./types.d.ts').Section>>}
6565
*/
6666
async *generate(input, { output, worker }) {
67+
const groupedModules = groupNodesByModule(input);
68+
6769
const headNodes = input.filter(node => node.heading.depth === 1);
6870

69-
const deps = { output };
71+
// Create sliced input: each item contains head + its module's entries
72+
// This avoids sending all 4900+ entries to every worker
73+
const slicedInput = headNodes.map(head => ({
74+
head,
75+
nodes: groupedModules.get(head.api),
76+
}));
7077

71-
for await (const chunkResult of worker.stream(headNodes, input, deps)) {
78+
for await (const chunkResult of worker.stream(slicedInput, slicedInput)) {
7279
if (output) {
7380
for (const section of chunkResult) {
7481
const out = join(output, `${section.api}.json`);

src/generators/types.d.ts

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,12 @@ declare global {
101101
* If you pass `createGenerator` with ['react', 'html'], the 'react' generator will be executed first,
102102
* as it is a top level generator and then the 'html' generator would be executed after the 'react' generator.
103103
*
104-
* The 'ast' generator is the top-level parser, and if 'ast' is passed to `dependsOn`, then the generator
105-
* will be marked as a top-level generator.
104+
* The 'ast' generator is the top-level parser for markdown files. It has no dependencies.
106105
*
107106
* The `ast-js` generator is the top-level parser for JavaScript files. It
108-
* passes the ASTs for any JavaScript files given in the input. Like `ast`,
109-
* any generator depending on it is marked as a top-level generator.
107+
* passes the ASTs for any JavaScript files given in the input.
110108
*/
111-
dependsOn: keyof AllGenerators | 'ast';
109+
dependsOn: keyof AllGenerators | undefined;
112110

113111
/**
114112
* Generators are abstract and the different generators have different sort of inputs and outputs.

0 commit comments

Comments
 (0)