Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
import { deepCopy, KMWString } from "@keymanapp/web-utils";

import { SearchPath } from "./search-path.js";
import { SearchSpace, TokenInputSource } from "./search-space.js";
import { SearchSpace, PathInputProperties } from "./search-space.js";
import { TokenSplitMap } from "./context-tokenization.js";
import { generateSubsetId } from './tokenization-subsets.js';

import Distribution = LexicalModelTypes.Distribution;
import LexicalModel = LexicalModelTypes.LexicalModel;
Expand Down Expand Up @@ -110,9 +111,13 @@ export class ContextToken {

rawTransformDistributions.forEach((entry) => {
searchSpace = new SearchPath(searchSpace, entry, {
trueTransform: entry[0].sample,
inputStartIndex: 0,
bestProbFromSet: 1
segment: {
trueTransform: entry[0].sample,
transitionId: entry[0].sample.id,
start: 0
},
bestProbFromSet: 1,
subsetId: generateSubsetId()
});
});

Expand All @@ -124,7 +129,7 @@ export class ContextToken {
* Call this to record the original keystroke Transforms for the context range
* corresponding to this token.
*/
addInput(inputSource: TokenInputSource, distribution: Distribution<Transform>) {
addInput(inputSource: PathInputProperties, distribution: Distribution<Transform>) {
this._searchSpace = new SearchPath(this._searchSpace, distribution, inputSource);
}

Expand All @@ -143,8 +148,8 @@ export class ContextToken {
* Denotes the original keystroke Transforms comprising the range corresponding
* to this token.
*/
get inputRange() {
return this.searchSpace.sourceIdentifiers;
get inputSegments() {
return this.searchSpace.inputSegments;
}

/**
Expand All @@ -163,15 +168,6 @@ export class ContextToken {
return this.searchSpace.sourceRangeKey;
}

/**
* Gets a simple, compact string-based representation of `inputRange`.
*
* This should only ever be used for debugging purposes.
*/
get sourceText(): string {
return this.searchSpace.likeliestSourceText;
}

/**
* Generates text corresponding to the net effects of the most likely inputs
* received that can correspond to the current instance.
Expand All @@ -192,7 +188,7 @@ export class ContextToken {
// Thus, we don't set the .isWhitespace flag field.
const resultToken = new ContextToken(lexicalModel);

let lastSourceInput: TokenInputSource;
let lastSourceInput: PathInputProperties;
let lastInputDistrib: Distribution<Transform>;
for(const token of tokensToMerge) {
const inputCount = token.inputCount;
Expand All @@ -203,7 +199,7 @@ export class ContextToken {
}

// Are we re-merging on a previously split transform?
if(lastSourceInput?.trueTransform != token.inputRange[0].trueTransform) {
if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) {
if(lastSourceInput) {
resultToken.addInput(lastSourceInput, lastInputDistrib);
} // else: there's nothing to add as input
Expand Down Expand Up @@ -232,9 +228,9 @@ export class ContextToken {
// Ignore the last entry for now - it may need to merge with a matching
// entry in the next token!
for(let i = startIndex; i < inputCount - 1; i++) {
resultToken.addInput(token.inputRange[i], token.searchSpace.inputSequence[i]);
resultToken.addInput(token.inputSegments[i], token.searchSpace.inputSequence[i]);
}
lastSourceInput = token.inputRange[inputCount-1];
lastSourceInput = token.inputSegments[inputCount-1];
lastInputDistrib = token.searchSpace.inputSequence[inputCount-1];
}

Expand All @@ -257,7 +253,7 @@ export class ContextToken {

// Build an alternate version of the transforms: if we preprocess all deleteLefts,
// what text remains from each?
const alteredSources = preprocessInputSources(this.inputRange);
const alteredSources = preprocessInputSources(this.inputSegments);

const blankContext = { left: '', startOfBuffer: true, endOfBuffer: true };
const splitSpecs = split.matches.slice();
Expand Down Expand Up @@ -313,16 +309,18 @@ export class ContextToken {
};
});

const priorSourceInput = overextendedToken.inputRange[lastInputIndex];
const priorSourceInput = overextendedToken.inputSegments[lastInputIndex];
constructingToken.addInput(priorSourceInput, headDistribution);
tokensFromSplit.push(constructingToken);

constructingToken = new ContextToken(lexicalModel);
backupToken = new ContextToken(constructingToken);
constructingToken.addInput({
trueTransform: priorSourceInput.trueTransform,
inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded,
bestProbFromSet: priorSourceInput.bestProbFromSet
...priorSourceInput,
segment: {
...priorSourceInput.segment,
start: priorSourceInput.segment.start + extraCharsAdded
}
}, tailDistribution);

const lenToCommit = lenBeforeLastApply + extraCharsAdded;
Expand All @@ -338,34 +336,34 @@ export class ContextToken {

backupToken = new ContextToken(constructingToken);
lenBeforeLastApply = KMWString.length(currentText.left);
currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText);
constructingToken.addInput(this.inputRange[transformIndex], this.searchSpace.inputSequence[transformIndex]);
currentText = applyTransform(alteredSources[transformIndex].segment.trueTransform, currentText);
constructingToken.addInput(this.inputSegments[transformIndex], this.searchSpace.inputSequence[transformIndex]);
transformIndex++;
}

return tokensFromSplit;
}
}

export function preprocessInputSources(inputSources: ReadonlyArray<TokenInputSource>) {
export function preprocessInputSources(inputSources: ReadonlyArray<PathInputProperties>) {
const alteredSources = deepCopy(inputSources);
let trickledDeleteLeft = 0;
for(let i = alteredSources.length - 1; i >= 0; i--) {
const source = alteredSources[i];
if(trickledDeleteLeft) {
const insLen = KMWString.length(source.trueTransform.insert);
const insLen = KMWString.length(source.segment.trueTransform.insert);
if(insLen <= trickledDeleteLeft) {
source.trueTransform.insert = '';
source.segment.trueTransform.insert = '';
trickledDeleteLeft -= insLen;
} else {
source.trueTransform.insert = KMWString.substring(source.trueTransform.insert, 0, insLen - trickledDeleteLeft);
source.segment.trueTransform.insert = KMWString.substring(source.segment.trueTransform.insert, 0, insLen - trickledDeleteLeft);
trickledDeleteLeft = 0;
}
}
trickledDeleteLeft += source.trueTransform.deleteLeft;
source.trueTransform.deleteLeft = 0;
trickledDeleteLeft += source.segment.trueTransform.deleteLeft;
source.segment.trueTransform.deleteLeft = 0;
}

alteredSources[0].trueTransform.deleteLeft = trickledDeleteLeft;
alteredSources[0].segment.trueTransform.deleteLeft = trickledDeleteLeft;
return alteredSources;
}
Original file line number Diff line number Diff line change
Expand Up @@ -158,16 +158,6 @@ export class ContextTokenization {
return this.tail.spaceId;
}

/**
* Returns plain-text strings representing the most probable representation for all
* tokens represented by this tokenization instance.
*
* Intended for debugging use only.
*/
get sourceText() {
return this.tokens.map(token => token.sourceText);
}

/**
* Returns a plain-text string representing the most probable representation for all
* tokens represented by this tokenization instance.
Expand Down Expand Up @@ -596,9 +586,13 @@ export class ContextTokenization {
distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
}
affectedToken.addInput({
trueTransform: sourceInput,
inputStartIndex: appliedLength,
bestProbFromSet: bestProbFromSet
segment: {
trueTransform: sourceInput,
transitionId: sourceInput.id,
start: appliedLength
},
bestProbFromSet: bestProbFromSet,
subsetId: pendingTokenization.inputSubsetId
}, distribution);
appliedLength += KMWString.length(distribution[0].sample.insert);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@

import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
import { LexicalModelTypes } from '@keymanapp/common-types';
import { applyTransform } from '@keymanapp/models-templates';

import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
import { generateSpaceSeed, PathResult, SearchSpace, TokenInputSource } from './search-space.js';
import { generateSpaceSeed, PathResult, SearchSpace, PathInputProperties } from './search-space.js';
import { generateSubsetId } from './tokenization-subsets.js';

import Context = LexicalModelTypes.Context;
import Distribution = LexicalModelTypes.Distribution;
import LexicalModel = LexicalModelTypes.LexicalModel;
import ProbabilityMass = LexicalModelTypes.ProbabilityMass;
Expand All @@ -30,7 +29,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator<SearchNode> = function(arg1, arg2
export class SearchPath implements SearchSpace {
private selectionQueue: PriorityQueue<SearchNode> = new PriorityQueue(QUEUE_NODE_COMPARATOR);
readonly inputs?: Distribution<Transform>;
readonly inputSource?: TokenInputSource;
readonly inputSource?: PathInputProperties;

readonly parentSpace: SearchSpace;
readonly spaceId: number;
Expand Down Expand Up @@ -62,6 +61,7 @@ export class SearchPath implements SearchSpace {
/**
* Extends an existing SearchSpace (and its correction data) by a keystroke based
* on a subset of the incoming keystroke's fat-finger distribution.
*
* @param space
* @param inputs
* @param srcKeystroke The sample from the incoming distribution that represents data actually
Expand All @@ -79,31 +79,35 @@ export class SearchPath implements SearchSpace {
* @param srcKeystroke Data about the actual context range represented by `inputs` and
* its underlying keystroke.
*/
constructor(space: SearchSpace, inputs: Distribution<Transform>, srcKeystroke: TokenInputSource);
constructor(arg1: LexicalModel | SearchSpace, inputs?: Distribution<Transform>, inputSource?: TokenInputSource | ProbabilityMass<Transform>) {
constructor(space: SearchSpace, inputs: Distribution<Transform>, srcKeystroke: PathInputProperties);
constructor(arg1: LexicalModel | SearchSpace, inputs?: Distribution<Transform>, inputSource?: PathInputProperties | ProbabilityMass<Transform>) {
// If we're taking in a pre-constructed search node, it's got an associated,
// pre-assigned spaceID - so use that.
const isExtending = (arg1 instanceof SearchPath);
this.spaceId = generateSpaceSeed();

// Coerce inputSource to TokenInputSource format.
if(inputSource && (inputSource as TokenInputSource).trueTransform == undefined) {
if(inputSource && (inputSource as ProbabilityMass<Transform>).sample != undefined) {
const keystroke = inputSource as ProbabilityMass<Transform>;
inputSource = {
trueTransform: keystroke.sample,
segment: {
trueTransform: keystroke.sample,
transitionId: keystroke.sample.id,
start: 0
},
bestProbFromSet: keystroke.p,
inputStartIndex: 0
subsetId: generateSubsetId()
}
};

const inputSrc = inputSource as TokenInputSource;
const inputSrc = inputSource as PathInputProperties;

if(isExtending) {
const parentSpace = arg1 as SearchSpace;
const logTierCost = -Math.log(inputSrc.bestProbFromSet);

const transitionId = (inputs?.[0].sample.id);
if(transitionId !== undefined && inputSrc.trueTransform.id != transitionId) {
if(transitionId !== undefined && inputSrc.segment.transitionId != transitionId) {
throw new Error("Input distribution and input-source transition IDs must match");
}

Expand Down Expand Up @@ -198,23 +202,6 @@ export class SearchPath implements SearchSpace {
}
}

get likeliestSourceText(): string {
let prefixContext: Context = { left: this.parentSpace?.likeliestSourceText ?? '', startOfBuffer: true, endOfBuffer: true };
const inputTransform = this.inputSource?.trueTransform ?? { insert: '', deleteLeft: 0 };

const excessDeletes = inputTransform.deleteLeft - KMWString.length(prefixContext.left);
if(excessDeletes > 0) {
prefixContext = {
...prefixContext,
// \u{2421} = ␡ (Unicode symbol for Delete)
left: '\u{2421}'.repeat(excessDeletes) + prefixContext.left
};
}

const result = applyTransform(inputTransform, prefixContext);
return result.left;
}

get parents() {
// The SearchPath class may only have a single parent.
return this.parentSpace ? [this.parentSpace] : [];
Expand Down Expand Up @@ -362,15 +349,15 @@ export class SearchPath implements SearchSpace {
return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v));
}

public get sourceIdentifiers(): TokenInputSource[] {
public get inputSegments(): PathInputProperties[] {
if(!this.parentSpace) {
return [];
}

const parentSources = this.parentSpace.sourceIdentifiers;
const parentSources = this.parentSpace.inputSegments;
if(this.inputSource) {
const inputId = this.inputSource.trueTransform.id;
if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].trueTransform.id == inputId) {
const inputId = this.inputSource.segment.transitionId;
if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].segment.transitionId == inputId) {
return parentSources;
}

Expand All @@ -386,11 +373,11 @@ export class SearchPath implements SearchSpace {
*/
get sourceRangeKey(): string {
const components: string[] = [];
const sources = this.sourceIdentifiers;
const sources = this.inputSegments;

for(const source of sources) {
const i = source.inputStartIndex;
components.push(`T${source.trueTransform.id}${i != 0 ? '@' + i : ''}`);
const i = source.segment.start;
components.push(`T${source.segment.transitionId}${i != 0 ? '@' + i : ''}`);
}

return components.join('+');
Expand Down
Loading