Skip to content

Commit f7dd452

Browse files
committed
feat: chunked processing (streaming)
1 parent 26d5760 commit f7dd452

File tree

15 files changed

+661
-212
lines changed

15 files changed

+661
-212
lines changed

bin/commands/generate.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ export default {
7979
prompt: {
8080
type: 'text',
8181
message: 'Items per worker thread',
82-
initialValue: '10',
82+
initialValue: '20',
8383
},
8484
},
8585
version: {

src/generators.mjs

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
'use strict';
22

33
import { allGenerators } from './generators/index.mjs';
4+
import { isAsyncGenerator, createStreamingCache } from './streaming.mjs';
45
import WorkerPool from './threading/index.mjs';
56
import createParallelWorker from './threading/parallel.mjs';
67

@@ -26,12 +27,38 @@ const createGenerator = input => {
2627
/**
2728
* We store all the registered generators to be processed
2829
* within a Record, so we can access their results at any time whenever needed
29-
* (we store the Promises of the generator outputs)
30+
* (we store the Promises of the generator outputs, or AsyncGenerators for streaming)
3031
*
3132
* @type {{ [K in keyof AllGenerators]: ReturnType<AllGenerators[K]['generate']> }}
3233
*/
3334
const cachedGenerators = { ast: Promise.resolve(input) };
3435

36+
/**
37+
* Cache for collected async generator results.
38+
* When a streaming generator is first consumed, we collect all chunks
39+
* and store the promise here so subsequent consumers share the same result.
40+
*/
41+
const streamingCache = createStreamingCache();
42+
43+
/**
44+
* Gets the dependency input, handling both regular promises and async generators.
45+
* For async generators, ensures only one collection happens and result is cached.
46+
* @param {string} dependsOn - Name of the dependency generator
47+
* @returns {Promise<any>}
48+
*/
49+
const getDependencyInput = async dependsOn => {
50+
// First, await the cached promise to get the actual result
51+
const result = await cachedGenerators[dependsOn];
52+
53+
// Check if the result is an async generator (streaming)
54+
if (isAsyncGenerator(result)) {
55+
return streamingCache.getOrCollect(dependsOn, result);
56+
}
57+
58+
// Regular result - return it directly
59+
return result;
60+
};
61+
3562
/**
3663
* Runs the Generator engine with the provided top-level input and the given generator options
3764
*
@@ -57,18 +84,20 @@ const createGenerator = input => {
5784

5885
// Ensure dependency is scheduled (but don't await its result yet)
5986
if (dependsOn && !(dependsOn in cachedGenerators)) {
60-
await runGenerators({ ...options, generators: [dependsOn] });
87+
// Recursively schedule - don't await, just ensure it's in cachedGenerators
88+
runGenerators({ ...options, generators: [dependsOn] });
6189
}
6290

6391
// Create a ParallelWorker for this generator
92+
// The worker supports both batch (map) and streaming (stream) modes
6493
const worker = createParallelWorker(generatorName, chunkPool, options);
6594

6695
/**
6796
* Schedule the generator - it awaits its dependency internally
68-
* his allows multiple generators with the same dependency to run in parallel
97+
* This allows multiple generators with the same dependency to run in parallel
6998
*/
7099
const scheduledGenerator = async () => {
71-
const input = await cachedGenerators[dependsOn];
100+
const input = await getDependencyInput(dependsOn);
72101

73102
return generate(input, { ...options, worker });
74103
};

src/generators/api-links/__tests__/fixtures.test.mjs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,17 @@ describe('api links', () => {
2626
chunkSize: 10,
2727
});
2828

29-
const astJsResult = await astJs.generate(undefined, {
29+
// Collect results from the async generator
30+
const astJsResults = [];
31+
32+
for await (const chunk of astJs.generate(undefined, {
3033
input: [sourceFile],
3134
worker,
32-
});
35+
})) {
36+
astJsResults.push(...chunk);
37+
}
3338

34-
const actualOutput = await apiLinks.generate(astJsResult, {
39+
const actualOutput = await apiLinks.generate(astJsResults, {
3540
gitRef: 'https://github.com/nodejs/node/tree/HEAD',
3641
});
3742

src/generators/ast-js/index.mjs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,20 @@ export default {
5050
},
5151

5252
/**
53-
* @param {Input} _
53+
* @param {Input} i
5454
* @param {Partial<GeneratorOptions>} options
55+
* @returns {AsyncGenerator<Array<object>>}
5556
*/
56-
async generate(_, { input = [], worker }) {
57+
async *generate(i, { input = [], worker }) {
5758
const sourceFiles = globSync(input).filter(
5859
filePath => extname(filePath) === '.js'
5960
);
6061

62+
const deps = { input: sourceFiles };
63+
6164
// Parse the Javascript sources into ASTs in parallel using worker threads
62-
return worker.map(sourceFiles, _, { input: sourceFiles });
65+
for await (const chunkResult of worker.stream(sourceFiles, i, deps)) {
66+
yield chunkResult;
67+
}
6368
},
6469
};

src/generators/jsx-ast/index.mjs

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,9 @@
1-
import { OVERRIDDEN_POSITIONS } from './constants.mjs';
21
import { buildSideBarProps } from './utils/buildBarProps.mjs';
32
import buildContent from './utils/buildContent.mjs';
3+
import { getSortedHeadNodes } from './utils/getSortedHeadNodes.mjs';
44
import { groupNodesByModule } from '../../utils/generators.mjs';
55
import { getRemarkRecma } from '../../utils/remark.mjs';
66

7-
/**
8-
* Sorts entries by OVERRIDDEN_POSITIONS and then heading name.
9-
* @param {Array<ApiDocMetadataEntry>} entries
10-
*/
11-
const getSortedHeadNodes = entries => {
12-
/**
13-
* Sorts entries by OVERRIDDEN_POSITIONS and then heading name.
14-
* @param {ApiDocMetadataEntry} a
15-
* @param {ApiDocMetadataEntry} b
16-
* @returns {number}
17-
*/
18-
const headingSortFn = (a, b) => {
19-
const ai = OVERRIDDEN_POSITIONS.indexOf(a.api);
20-
const bi = OVERRIDDEN_POSITIONS.indexOf(b.api);
21-
22-
if (ai !== -1 && bi !== -1) {
23-
return ai - bi;
24-
}
25-
26-
if (ai !== -1) {
27-
return -1;
28-
}
29-
30-
if (bi !== -1) {
31-
return 1;
32-
}
33-
34-
return a.heading.data.name.localeCompare(b.heading.data.name);
35-
};
36-
37-
return entries.filter(node => node.heading.depth === 1).sort(headingSortFn);
38-
};
39-
407
/**
418
* Generator for converting MDAST to JSX AST.
429
*
@@ -97,11 +64,15 @@ export default {
9764
*
9865
* @param {Input} entries
9966
* @param {Partial<GeneratorOptions>} options
100-
* @returns {Promise<Array<string>>} Array of generated content
67+
* @returns {AsyncGenerator<Array<string>>}
10168
*/
102-
async generate(entries, { index, releases, version, worker }) {
69+
async *generate(entries, { index, releases, version, worker }) {
10370
const headNodes = entries.filter(node => node.heading.depth === 1);
10471

105-
return worker.map(headNodes, entries, { index, releases, version });
72+
const deps = { index, releases, version };
73+
74+
for await (const chunkResult of worker.stream(headNodes, entries, deps)) {
75+
yield chunkResult;
76+
}
10677
},
10778
};
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
'use strict';
2+
3+
import { OVERRIDDEN_POSITIONS } from '../constants.mjs';
4+
5+
/**
6+
* Sorts entries by OVERRIDDEN_POSITIONS and then heading name.
7+
* @param {ApiDocMetadataEntry} a
8+
* @param {ApiDocMetadataEntry} b
9+
* @returns {number}
10+
*/
11+
const headingSortFn = (a, b) => {
12+
const ai = OVERRIDDEN_POSITIONS.indexOf(a.api);
13+
const bi = OVERRIDDEN_POSITIONS.indexOf(b.api);
14+
15+
if (ai !== -1 && bi !== -1) {
16+
return ai - bi;
17+
}
18+
19+
if (ai !== -1) {
20+
return -1;
21+
}
22+
23+
if (bi !== -1) {
24+
return 1;
25+
}
26+
27+
return a.heading.data.name.localeCompare(b.heading.data.name);
28+
};
29+
30+
/**
31+
* Filters and sorts entries by OVERRIDDEN_POSITIONS and then heading name.
32+
* @param {Array<ApiDocMetadataEntry>} entries
33+
* @returns {Array<ApiDocMetadataEntry>}
34+
*/
35+
export const getSortedHeadNodes = entries =>
36+
entries.filter(node => node.heading.depth === 1).sort(headingSortFn);

src/generators/legacy-html-all/index.mjs

Lines changed: 96 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { join, resolve } from 'node:path';
66
import HTMLMinifier from '@minify-html/node';
77

88
import { getRemarkRehype } from '../../utils/remark.mjs';
9-
import dropdowns from '../legacy-html/utils/buildDropdowns.mjs';
9+
import { replaceTemplateValues } from '../legacy-html/utils/replaceTemplateValues.mjs';
1010
import tableOfContents from '../legacy-html/utils/tableOfContents.mjs';
1111

1212
/**
@@ -40,67 +40,109 @@ export default {
4040

4141
dependsOn: 'legacy-html',
4242

43+
/**
44+
* Process a chunk of template values from the dependency.
45+
* Extracts toc and content from each entry for aggregation.
46+
* @param {Input} fullInput
47+
* @param {number[]} itemIndices
48+
*/
49+
processChunk(fullInput, itemIndices) {
50+
const results = [];
51+
52+
for (const idx of itemIndices) {
53+
const entry = fullInput[idx];
54+
55+
// Skip the index entry
56+
if (entry.api === 'index') {
57+
continue;
58+
}
59+
60+
results.push({
61+
api: entry.api,
62+
section: entry.section,
63+
toc: entry.toc,
64+
content: entry.content,
65+
});
66+
}
67+
68+
return results;
69+
},
70+
4371
/**
4472
* Generates the `all.html` file from the `legacy-html` generator
4573
* @param {Input} input
4674
* @param {Partial<GeneratorOptions>} options
75+
* @returns {AsyncGenerator<Array<{api: string; section: string; toc: string; content: string}>>}
4776
*/
48-
async generate(input, { version, releases, output }) {
49-
const inputWithoutIndex = input.filter(entry => entry.api !== 'index');
50-
51-
// Gets a Remark Processor that parses Markdown to minified HTML
52-
const remarkWithRehype = getRemarkRehype();
53-
54-
// Current directory path relative to the `index.mjs` file
55-
// from the `legacy-html` generator, as all the assets are there
56-
const baseDir = resolve(import.meta.dirname, '..', 'legacy-html');
57-
58-
// Reads the API template.html file to be used as a base for the HTML files
59-
const apiTemplate = await readFile(join(baseDir, 'template.html'), 'utf-8');
60-
61-
// Aggregates all individual Table of Contents into one giant string
62-
const aggregatedToC = inputWithoutIndex.map(entry => entry.toc).join('\n');
63-
64-
// Aggregates all individual content into one giant string
65-
const aggregatedContent = inputWithoutIndex
66-
.map(entry => entry.content)
67-
.join('\n');
68-
69-
// Creates a "mimic" of an `ApiDocMetadataEntry` which fulfils the requirements
70-
// for generating the `tableOfContents` with the `tableOfContents.parseNavigationNode` parser
71-
const sideNavigationFromValues = inputWithoutIndex.map(entry => ({
72-
api: entry.api,
73-
heading: { data: { depth: 1, name: entry.section } },
74-
}));
75-
76-
// Generates the global Table of Contents (Sidebar Navigation)
77-
const parsedSideNav = remarkWithRehype.processSync(
78-
tableOfContents(sideNavigationFromValues, {
79-
maxDepth: 1,
80-
parser: tableOfContents.parseNavigationNode,
81-
})
82-
);
83-
84-
const generatedAllTemplate = apiTemplate
85-
.replace('__ID__', 'all')
86-
.replace(/__FILENAME__/g, 'all')
87-
.replace('__SECTION__', 'All')
88-
.replace(/__VERSION__/g, `v${version.version}`)
89-
.replace(/__TOC__/g, tableOfContents.wrapToC(aggregatedToC))
90-
.replace(/__GTOC__/g, parsedSideNav)
91-
.replace('__CONTENT__', aggregatedContent)
92-
.replace(/__TOC_PICKER__/g, dropdowns.buildToC(aggregatedToC))
93-
.replace(/__GTOC_PICKER__/g, '')
94-
.replace('__ALTDOCS__', dropdowns.buildVersions('all', '', releases))
95-
.replace('__EDIT_ON_GITHUB__', '');
96-
97-
// We minify the html result to reduce the file size and keep it "clean"
98-
const minified = HTMLMinifier.minify(Buffer.from(generatedAllTemplate), {});
77+
async *generate(input, { version, releases, output, worker }) {
78+
// Collect all chunks as they stream in
79+
const allChunks = [];
9980

81+
for await (const chunkResult of worker.stream(input, input, {})) {
82+
allChunks.push(...chunkResult);
83+
84+
yield chunkResult;
85+
}
86+
87+
// After all chunks are collected, build and write the final file
10088
if (output) {
89+
// Gets a Remark Processor that parses Markdown to minified HTML
90+
const remarkWithRehype = getRemarkRehype();
91+
92+
// Current directory path relative to the `index.mjs` file
93+
// from the `legacy-html` generator, as all the assets are there
94+
const baseDir = resolve(import.meta.dirname, '..', 'legacy-html');
95+
96+
// Reads the API template.html file to be used as a base for the HTML files
97+
const apiTemplate = await readFile(
98+
join(baseDir, 'template.html'),
99+
'utf-8'
100+
);
101+
102+
// Aggregates all individual Table of Contents into one giant string
103+
const aggregatedToC = allChunks.map(entry => entry.toc).join('\n');
104+
105+
// Aggregates all individual content into one giant string
106+
const aggregatedContent = allChunks
107+
.map(entry => entry.content)
108+
.join('\n');
109+
110+
// Creates a "mimic" of an `ApiDocMetadataEntry` which fulfils the requirements
111+
// for generating the `tableOfContents` with the `tableOfContents.parseNavigationNode` parser
112+
const sideNavigationFromValues = allChunks.map(entry => ({
113+
api: entry.api,
114+
heading: { data: { depth: 1, name: entry.section } },
115+
}));
116+
117+
// Generates the global Table of Contents (Sidebar Navigation)
118+
const parsedSideNav = remarkWithRehype.processSync(
119+
tableOfContents(sideNavigationFromValues, {
120+
maxDepth: 1,
121+
parser: tableOfContents.parseNavigationNode,
122+
})
123+
);
124+
125+
const templateValues = {
126+
api: 'all',
127+
added: '',
128+
section: 'All',
129+
version: `v${version.version}`,
130+
toc: aggregatedToC,
131+
nav: String(parsedSideNav),
132+
content: aggregatedContent,
133+
};
134+
135+
const generatedAllTemplate = replaceTemplateValues(
136+
apiTemplate,
137+
templateValues,
138+
releases,
139+
{ skipGitHub: true, skipGtocPicker: true }
140+
);
141+
142+
// We minify the html result to reduce the file size and keep it "clean"
143+
const minified = HTMLMinifier.minify(Buffer.from(generatedAllTemplate));
144+
101145
await writeFile(join(output, 'all.html'), minified);
102146
}
103-
104-
return minified;
105147
},
106148
};

0 commit comments

Comments
 (0)