Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/cspell-lib/api/api.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ exports[`docValidator suggestions > suggestions 1`] = `
"word": "orangs",
"wordAdjustedToMatchCase": "Orangs",
},
{
"word": "orangey",
"wordAdjustedToMatchCase": "Orangey",
},
{
"word": "orange's",
},
Expand All @@ -26,9 +30,5 @@ exports[`docValidator suggestions > suggestions 1`] = `
"word": "ranges",
"wordAdjustedToMatchCase": "Ranges",
},
{
"word": "orangier",
"wordAdjustedToMatchCase": "Orangier",
},
]
`;
Binary file modified packages/cspell-tools/src/__snapshots__/build.test.ts.snap
Binary file not shown.
2 changes: 2 additions & 0 deletions packages/cspell-tools/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ export async function run(program: Command, argv: string[], flags?: FeatureFlags
.command('btrie [files...]')
.description('Generate BTrie files from word list files.')
.option('-n, --no-compress', 'By default the files are GZipped, this will turn off GZ compression.')
.option('--no-optimize', 'Do not try to optimize.')
.option('--no-use-string-table', 'Do not use a string table in the BTrie.')
.action(generateBTrie);

program
Expand Down
7 changes: 2 additions & 5 deletions packages/cspell-tools/src/bTrie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@ import zlib from 'node:zlib';

const gzip = promisify(zlib.gzip);

import type { GenerateBTrieOptions } from './compiler/bTrie.ts';
import { createBTrieFromFile } from './compiler/bTrie.ts';

interface GenerateBTrieOptions {
compress?: boolean;
}

export function generateBTrie(files: string[], options: GenerateBTrieOptions): Promise<void> {
return generateBTrieFromFiles(files, options);
}
Expand All @@ -19,7 +16,7 @@ async function generateBTrieFromFiles(files: string[], options: GenerateBTrieOpt
console.log(`Generating BTrie for ${files.length} file(s).`);
for (const file of files) {
console.log(`Processing file: ${file}`);
const btrie = await createBTrieFromFile(file);
const btrie = await createBTrieFromFile(file, options);
let outFile = bTrieFileName(file);
if (compress) {
const gzipped = await gzip(btrie);
Expand Down
12 changes: 9 additions & 3 deletions packages/cspell-tools/src/build.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ import { beforeEach, describe, expect, test } from 'vitest';

import { build } from './build.ts';
import { setLogger } from './compiler/index.ts';
import { readTextFile } from './compiler/readers/readTextFile.ts';
import { readFile, readTextFile } from './compiler/readers/readTextFile.ts';
import { spyOnConsole } from './test/console.ts';
import { createTestHelper } from './test/TestHelper.ts';
import { hexDump } from './util/hexDump.ts';

const helper = createTestHelper(import.meta.url);

Expand Down Expand Up @@ -45,8 +46,9 @@ describe('build action', () => {
const shouldExist = builds.filter((a) => !a.startsWith('!'));
const shouldNotExist = builds.filter((a) => a.startsWith('!')).map((a) => a.slice(1));
for (const build of shouldExist) {
const content = await readTextFile(t(build));
expect(content).toMatchSnapshot();
const content = await readFile(t(build));
const text = isBinary(content) ? hexDump(content) : new TextDecoder('utf-8').decode(content);
expect(text).toMatchSnapshot();
}
for (const build of shouldNotExist) {
const found = await helper.fileExists(t(build));
Expand All @@ -71,3 +73,7 @@ function f(...parts: string[]): string {
function cfgYaml(...parts: string[]): string {
return helper.resolveFixture(...parts, 'cspell-tools.config.yaml');
}

function isBinary(data: Uint8Array): boolean {
return data.includes(0);
}
10 changes: 8 additions & 2 deletions packages/cspell-tools/src/compiler/bTrie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@ import { encodeTrieDataToBTrie } from 'cspell-trie-lib';

import { createReader } from './Reader.ts';

export async function createBTrieFromFile(file: string): Promise<Uint8Array> {
export interface GenerateBTrieOptions {
compress?: boolean;
optimize?: boolean;
useStringTable?: boolean;
}

export async function createBTrieFromFile(file: string, buildOptions: GenerateBTrieOptions): Promise<Uint8Array> {
const reader = await createReader(file, {});

const trie = reader.toTrie();

return encodeTrieDataToBTrie(trie.data);
return encodeTrieDataToBTrie(trie.data, buildOptions);
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export async function readHunspellFiles(filename: string, options: ReaderOptions
},
toTrie: () => {
if (trie) return trie;
trie = parseDictionary(lines(), { stripCaseAndAccents: false });
trie = parseDictionary(lines(), { stripCaseAndAccents: false, optimize: true });
return trie;
},
};
Expand Down
16 changes: 8 additions & 8 deletions packages/cspell-tools/src/compiler/readers/readTextFile.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
import assert from 'node:assert';
import { Buffer } from 'node:buffer';
import { promises as fs } from 'node:fs';

import { decompress } from '../../gzip/index.ts';

const isGzFile = /\.gz$/;

export function readTextFile(filename: string): Promise<string> {
const content = readFile(filename).then((buffer) => {
return new TextDecoder('utf-8').decode(buffer);
});
return content;
}

export function readFile(filename: string): Promise<Uint8Array<ArrayBuffer>> {
const content = fs
.readFile(filename)
.then(async (buffer) => (isGzFile.test(filename) ? decompress(buffer) : buffer))
.then((buffer) => (assertIsBuffer(buffer), buffer.toString('utf8')));
.then(async (buffer) => (isGzFile.test(filename) ? decompress(buffer) : buffer));
return content;
}

export async function readTextFileLines(filename: string): Promise<string[]> {
const content = await readTextFile(filename);
return content.split('\n');
}

function assertIsBuffer(value: unknown): asserts value is Buffer {
assert(Buffer.isBuffer(value));
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export async function textFileReader(filename: string): Promise<Reader> {
lines: words,
toTrie: () => {
if (trie) return trie;
trie = parseDictionary(words, { stripCaseAndAccents: false });
trie = parseDictionary(words, { stripCaseAndAccents: false, optimize: true });
return trie;
},
};
Expand Down
8 changes: 5 additions & 3 deletions packages/cspell-tools/src/gzip/compressFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ function fixOSSystemID(zBuf: Uint8Array, os: OSFlags = OSFlags.Unix): Uint8Array
return zBuf;
}

export async function decompress(buf: Uint8Array | Buffer, encoding?: undefined): Promise<Uint8Array>;
type U8Array = Uint8Array<ArrayBuffer>;

export async function decompress(buf: Uint8Array | Buffer, encoding?: undefined): Promise<U8Array>;
export async function decompress(buf: Uint8Array | Buffer, encoding: 'utf8'): Promise<string>;
export async function decompress(buf: Uint8Array | Buffer, encoding: 'utf8' | undefined): Promise<string | Uint8Array>;
export async function decompress(buf: Uint8Array | Buffer, encoding?: 'utf8'): Promise<string | Uint8Array> {
export async function decompress(buf: Uint8Array | Buffer, encoding: 'utf8' | undefined): Promise<string | U8Array>;
export async function decompress(buf: Uint8Array | Buffer, encoding?: 'utf8'): Promise<string | U8Array> {
const dBuf = gunzip(buf);
if (!encoding) return dBuf;
return (await dBuf).toString(encoding);
Expand Down
16 changes: 16 additions & 0 deletions packages/cspell-tools/src/util/hexDump.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/* eslint-disable unicorn/prefer-code-point */
export function hexDump(buffer: Uint8Array): string {
const lines: string[] = [];
const chunkSize = 16;
for (let i = 0; i < buffer.length; i += chunkSize) {
const chunk = buffer.subarray(i, i + chunkSize);
lines.push(hexLine(i, chunk));
}
return lines.join('\n');
}

function hexLine(offset: number, chunk: Uint8Array): string {
const hex = [...chunk].map((b, i) => b.toString(16).padStart(2, '0') + ((i & 3) === 3 ? ' ' : '')).join(' ');
const ascii = [...chunk].map((b) => (b >= 32 && b <= 126 ? String.fromCharCode(b) : '.')).join('');
return offset.toString(16).padStart(8, '0') + ' ' + hex.padEnd(52, ' ') + ' ' + ascii;
}
31 changes: 28 additions & 3 deletions packages/cspell-trie-lib/api/api.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/cspell-trie-lib/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
},
"devDependencies": {
"@cspell/cspell-pipe": "workspace:*",
"@cspell/dict-cpp": "^7.0.2",
"@cspell/dict-en_us": "^4.4.27",
"@cspell/dict-es-es": "^3.0.8",
"@cspell/dict-nl-nl": "^2.4.2",
Expand Down
13 changes: 13 additions & 0 deletions packages/cspell-trie-lib/src/lib/BuildOptions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
export interface BuildOptions {
/**
* Optimize the trie for size by merging duplicate sub-tries and using a String Table.
* @default false
*/
optimize?: boolean | undefined;

/**
* Use a string table to reduce memory usage.
* @default false
*/
useStringTable?: boolean | undefined;
}
33 changes: 32 additions & 1 deletion packages/cspell-trie-lib/src/lib/ITrie.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest';

import { defaultTrieInfo } from './constants.ts';
import type { ITrie } from './ITrie.ts';
import { ITrieImpl as ITrieClass } from './ITrie.ts';
import { ITrieImpl as ITrieClass, iTrieToStructuredStringLines } from './ITrie.ts';
import type { ITrieNode } from './ITrieNode/ITrieNode.ts';
import { parseDictionary, parseDictionaryLegacy } from './SimpleDictionaryParser.ts';
import type { SuggestionOptions } from './suggestions/genSuggestionsOptions.ts';
Expand Down Expand Up @@ -418,6 +418,37 @@ describe('Validate Trie Class', () => {
expect(trie.find('play+time', true)?.f).toBe(1);
expect(trie.find('play++time', true)?.f).toBe(1);
});

test('iTrieToStringLines', () => {
const words = `
# Sample Word List
!playtime
begin
beginning
end
ending
café
cafe
time
ride
hide
riding
`;

const trie = parseDictionary(words);
const trieForceOptimize = parseDictionary(words, { optimize: true });
const trieWithStringTable = parseDictionary(words, { optimize: true, useStringTable: true });

const expected = iTrieToStructuredStringLines(trie, false);
const expectedWithId = iTrieToStructuredStringLines(trie, true);

expect(iTrieToStructuredStringLines(trieForceOptimize, false)).toEqual(expected);
// small word list are auto optimized, so we expect the same result
expect(iTrieToStructuredStringLines(trieForceOptimize, true)).toEqual(expectedWithId);
expect(iTrieToStructuredStringLines(trieWithStringTable, false)).toEqual(expected);
// Uses a string table, so we expect a different result
expect(iTrieToStructuredStringLines(trieWithStringTable, true)).not.toEqual(expectedWithId);
});
});

const sampleWords = [
Expand Down
Loading
Loading