Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions packages/element-selector/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
const baseConfig = require('../../jest.config.js');
const package = require('./package');

module.exports = {
...baseConfig,
displayName: package.name,
rootDir: '.',
testEnvironment: 'jsdom',
// Scaffolding only — coverage gates land alongside the algorithm in subsequent tickets.
coverageThreshold: undefined,
};
45 changes: 45 additions & 0 deletions packages/element-selector/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"name": "@amplitude/element-selector",
"version": "0.1.0",
"private": true,
"description": "Shared element-selector algorithm consumed by autocapture, the Amplitude dashboard, and the Chrome extension visual tagger.",
"author": "Amplitude Inc",
"homepage": "https://github.com/amplitude/Amplitude-TypeScript",
"license": "MIT",
"main": "lib/cjs/index.js",
"module": "lib/esm/index.js",
"types": "lib/esm/index.d.ts",
"sideEffects": false,
"publishConfig": {
"access": "public",
"tag": "latest"
},
"repository": {
"type": "git",
"url": "git+https://github.com/amplitude/Amplitude-TypeScript.git"
},
"scripts": {
"build": "pnpm build:es5 & pnpm build:esm",
"build:es5": "tsc -p ./tsconfig.es5.json",
"build:esm": "tsc -p ./tsconfig.esm.json",
"watch": "tsc -p ./tsconfig.esm.json --watch",
"clean": "rimraf node_modules lib coverage",
"fix": "pnpm fix:eslint & pnpm fix:prettier",
"fix:eslint": "eslint '{src,test}/**/*.ts' --fix",
"fix:prettier": "prettier --write \"{src,test}/**/*.ts\"",
"lint": "pnpm lint:eslint & pnpm lint:prettier",
"lint:eslint": "eslint '{src,test}/**/*.ts'",
"lint:prettier": "prettier --check \"{src,test}/**/*.ts\"",
"test": "jest",
"typecheck": "tsc -p ./tsconfig.json"
},
"bugs": {
"url": "https://github.com/amplitude/Amplitude-TypeScript/issues"
},
"dependencies": {
"tslib": "^2.4.1"
},
"files": [
"lib"
]
}
56 changes: 56 additions & 0 deletions packages/element-selector/src/helpers/describe-relative.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Positional descent builder.
*
* Given an anchor element (the result of pass 2's walk in the orchestration PR)
* and the trail of intermediate elements between the anchor and the original
* click target, produce the CSS-selector descent string. Each step in the
* descent uses `tag:nth-of-type(n)` so the resulting selector resolves
* unambiguously to the original target regardless of class-state churn.
*
* Matches ContentSquare's "position within identical markers" convention.
*
* Example:
*
* describeRelative(anchor, [<section>, <ul>, <li>])
* → "section:nth-of-type(1) > ul:nth-of-type(1) > li:nth-of-type(3)"
*
* Combined with the anchor selector (`anchor#some-id`) by the orchestrator:
*
* "anchor#some-id > section:nth-of-type(1) > ul:nth-of-type(1) > li:nth-of-type(3)"
*
* The `anchor` parameter is accepted for symmetry with the orchestrator's call
* site but isn't currently used — the function only needs each trail element's
* own parent context. We keep the signature so subsequent work can reference
* the anchor when extending the descent format (e.g., to optimize bare
* `:nth-of-type(1)` away when there's only one of that type).
*/
export function describeRelative(_anchor: Element, trail: Element[]): string {
return trail.map(stepFor).join(' > ');
}

function stepFor(el: Element): string {
const tag = el.tagName.toLowerCase();
const parent = el.parentElement;
if (parent === null) {
// Detached element or root — emit just the tag. The orchestrator shouldn't
// call us with a detached trail, but we don't want to crash if it does.
return tag;
}
const index = sameTypeIndex(el, parent);
return `${tag}:nth-of-type(${index})`;
}

function sameTypeIndex(el: Element, parent: Element): number {
// 1-based index among same-tag element siblings, mirroring :nth-of-type semantics.
let count = 0;
for (let i = 0; i < parent.children.length; i++) {
const sibling = parent.children[i];
if (sibling.tagName === el.tagName) {
count += 1;
if (sibling === el) return count;
}
}
// Element wasn't found among its parent's children — shouldn't happen for a
// live element. Return 1 as a defensive fallback rather than throwing.
return 1;
}
44 changes: 44 additions & 0 deletions packages/element-selector/src/helpers/get-stable-id.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { ResolvedSelectorConfig } from '../types';
import { isStableId } from '../patterns/autogenerated-ids';

/**
* Shared helper that the `stableId` strategy (in this PR) and the
* `fallback-css-path` walker (in the orchestration PR) both consult to decide
* whether an element's id is usable as a selector anchor.
*
* Resolution order:
*
* 1. Check the customer's explicit-tracking attribute. If set with an empty
* value (e.g. `data-amp-track-id=""`), that's an explicit suppression
* signal — the customer is telling us to ignore this element's id even
* if it would otherwise look stable. Return null.
* 2. Read the element's id. If absent or empty, return null.
* 3. Check the id against the autogenerated-id pattern pack. If matched,
* return null (the algorithm should walk past this element).
* 4. Otherwise return the id.
*
* The single point of consultation means the strategy and the fallback always
* agree on what's a usable id — no chance of one component filtering an id
* while another uses it anyway.
*/
export function getStableId(el: Element, cfg: ResolvedSelectorConfig): string | null {
// 1. Suppression signal: empty explicit-tracking attribute on this element.
const trackAttr = el.getAttribute(cfg.explicitTrackingAttribute);
if (trackAttr !== null && trackAttr === '') {
return null;
}

// 2. Id presence.
const id = el.getAttribute('id');
if (id === null || id === '') {
return null;
}

// 3. Autogenerated-pattern filter.
if (!isStableId(id, cfg.autogeneratedIdPatterns)) {
return null;
}

// 4. Stable.
return id;
}
46 changes: 46 additions & 0 deletions packages/element-selector/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/**
* @amplitude/element-selector
*
* Shared element-selector algorithm consumed by autocapture, the
* app.amplitude.com tagging UI, and the Chrome extension visual tagger.
*
* This file is the package's public API surface. Subsequent PRs land the
* orchestrator, fallback, config resolver, and factory; the strategies +
* primitives below are the foundation everything composes on top of.
*
* See the design doc:
* packages/plugin-autocapture-browser/element-selector-strategy-v1-no-classes.md
*/

export const PACKAGE_NAME = '@amplitude/element-selector';
export const PACKAGE_VERSION = '0.1.0';

// ===== Types =====
export type {
Strategy,
StrategyContext,
ResolvedSelectorConfig,
ElementSelectorRemoteConfig,
SelectorEngine,
} from './types';

// ===== Pattern packs =====
export {
DEFAULT_AUTOGENERATED_ID_PATTERNS,
compile as compileAutogeneratedIdPatterns,
isStableId,
} from './patterns/autogenerated-ids';

export {
DEFAULT_UNSTABLE_CLASS_PATTERNS,
compile as compileUnstableClassPatterns,
filterClasses,
} from './patterns/unstable-classes';

// ===== Helpers =====
export { getStableId } from './helpers/get-stable-id';
export { describeRelative } from './helpers/describe-relative';

// ===== Strategies =====
export { explicitTrackingAttribute } from './strategies/explicit-tracking-attribute';
export { stableId as stableIdStrategy } from './strategies/stable-id';
68 changes: 68 additions & 0 deletions packages/element-selector/src/patterns/autogenerated-ids.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/**
* Autogenerated-id pattern pack.
*
* When an element id matches any of these regexes, the algorithm treats the
* element as if it has no id — the stableId strategy returns null and the
* fallback walks past it instead of anchoring on it. Stops selectors from
* pinning to ids that change between page loads or component re-mounts.
*
* Defaults are surfaced to customers in remote config so they can audit what's
* being filtered and add or remove patterns to match their stack.
*/

/**
* Built-in defaults, in the order they're applied (order doesn't affect
* correctness — `some()` short-circuits on the first match).
*/
export const DEFAULT_AUTOGENERATED_ID_PATTERNS: ReadonlyArray<RegExp> = [
// React useId() — ":r0:", ":r1:", ":rk:". Stable for one render; new id on next mount.
/^:r[0-9a-z]+:$/,
// Radix UI primitives — "radix-:r3:", "radix-A1B2C3". Prefix match.
/^radix-:/,
// Headless UI (Tailwind Labs) — "headlessui-menu-button-:r5:". Prefix match.
/^headlessui-/,
// MUI internal id prefix — "mui-12345". Don't confuse with Mui-* CSS class state markers.
/^mui-/,
// Hex-only ids ≥16 chars — UUIDs without hyphens, build hashes. Whole-string match.
/^[a-f0-9]{16,}$/,
// Trailing long digit run — "session-1700000000", "row-20250515". Timestamps, counters.
/-\d{8,}$/,
// Any 4+ consecutive digits anywhere — "product-2134", "swiper-wrapper-…41039".
// ContentSquare's heuristic; catches the digit-heavy portions of library-generated ids.
/\d{4,}/,
];

/**
* Compile a list of regex pattern strings (e.g. from a remote-config payload)
* into RegExp objects. Invalid patterns are skipped silently — proper logger
* integration with @amplitude/analytics-core lands alongside
* `resolveSelectorConfig` in the orchestration PR.
*
* Callers decide whether to merge the result with `DEFAULT_AUTOGENERATED_ID_PATTERNS`
* or use it as a full replacement. That policy lives in the config resolver,
* not here.
*/
export function compile(patterns: string[]): RegExp[] {
const compiled: RegExp[] = [];
for (const pattern of patterns) {
try {
compiled.push(new RegExp(pattern));
} catch (_e) {
// Invalid regex string — skip. Logger integration lands in the orchestration PR.
}
}
return compiled;
}

/**
* Check whether `id` is "stable" relative to a set of patterns. An id is
* stable when it's a non-empty string AND doesn't match any pattern in
* `patterns`. Null, undefined, and empty-string ids are not stable.
*/
export function isStableId(id: string | null | undefined, patterns: ReadonlyArray<RegExp>): boolean {
if (id === null || id === undefined || id === '') return false;
for (const pattern of patterns) {
if (pattern.test(id)) return false;
}
return true;
}
115 changes: 115 additions & 0 deletions packages/element-selector/src/patterns/unstable-classes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/**
* Unstable-class pattern pack.
*
* The fallback walker filters classes through these regexes before adding them
* to a selector. Classes that match are dropped — they don't participate in
* sibling disambiguation and never appear in the emitted output. Defends
* against three failure modes:
*
* 1. Build-tool / framework utilities (Tailwind, etc.) — class names that
* look stable but change with every design tweak.
* 2. CSS-in-JS / build-hash classes (Emotion, CSS modules, styled-components,
* styled-jsx) — change on every build.
* 3. Library runtime state classes (Swiper, MUI, Radix, Headless UI,
* BEM-style is-active/is-open) — class is stable in name but its presence
* on a given element moves as the user interacts.
*
* Defaults are surfaced to customers in remote config so they can audit what's
* being filtered and add or remove patterns to match their stack.
*/

/**
* Built-in defaults grouped by category for readability. The runtime treats
* the whole list uniformly via `Array.prototype.some(pattern.test)`.
*/
export const DEFAULT_UNSTABLE_CLASS_PATTERNS: ReadonlyArray<RegExp> = [
// ===== Tailwind / build-tool utilities =====

// Tailwind spacing: p-4, px-2, py-8, pt-1, mt-4, etc.
/^(p|m|px|py|mx|my|pt|pb|pl|pr|mt|mb|ml|mr)-\d+$/,
// Tailwind sizing: w-full, h-screen, max-w-[1440px], etc.
/^(w|h|min-w|max-w|min-h|max-h)-/,
// Tailwind color / visual: bg-blue-500, text-white, border-gray-200, ring-2, etc.
/^(text|bg|border|ring|fill|stroke)-/,
// Tailwind state variants: hover:underline, focus:ring-2, active:bg-transparent.
/^(hover|focus|active|disabled|group-hover):/,
// Tailwind breakpoint variants: md:flex, lg:grid-cols-3.
/^(sm|md|lg|xl|2xl):/,
// Tailwind z-index utility: z-10, z-50.
/^z-\d+$/,
// Tailwind arbitrary data-attribute variants: data-[state=open]:bg-white.
/^data-\[/,
// Tailwind arbitrary selector variants: [&_.swiper-slide]:h-auto, [&>div]:p-4.
/^\[/,

// ===== CSS-in-JS / build hashes =====

// Emotion: css-1abcd23, css-9xyzkw0.
/^css-[a-z0-9]{6,}$/,
// CSS modules: Button_root__abc123, Card_container__xyz789.
/^[a-zA-Z]+_[a-zA-Z0-9]{5,}__[a-zA-Z0-9]{5,}$/,
// styled-components: sc-bdVaJa, sc-1jjuPXC0.
/^sc-[a-zA-Z0-9]{6,}$/,
// styled-jsx (Next.js): jsx-1234567.
/^jsx-\d+$/,

// ===== Library runtime state classes =====

// Swiper carousel slide states. Move between elements as carousel advances.
/^swiper-slide-(visible|fully-visible|active|prev|next|duplicate)$/,
// BEM-style interaction state: is-active, is-open, is-selected, etc.
/^is-(active|open|selected|hovered|focused|expanded)$/,
// MUI per-component state: MuiButton-focusVisible, MuiSwitch-checked, etc.
/^Mui[A-Z][a-zA-Z]+-(focused|selected|disabled|expanded|focusVisible|active|checked)$/,
// MUI bare state classes: Mui-selected, Mui-disabled.
/^Mui-(selected|focused|disabled|expanded|focusVisible|active|checked)$/,
// Radix-style state class mirrors: data-state-open, data-state-checked.
/^data-state-/,
];

/**
* Compile a list of regex pattern strings (e.g. from a remote-config payload)
* into RegExp objects. Invalid patterns are skipped silently — proper logger
* integration with @amplitude/analytics-core lands alongside
* `resolveSelectorConfig` in the orchestration PR.
*
* Callers decide whether to merge the result with `DEFAULT_UNSTABLE_CLASS_PATTERNS`
* or use it as a full replacement. That policy lives in the config resolver,
* not here.
*/
export function compile(patterns: string[]): RegExp[] {
const compiled: RegExp[] = [];
for (const pattern of patterns) {
try {
compiled.push(new RegExp(pattern));
} catch (_e) {
// Invalid regex string — skip. Logger integration lands in the orchestration PR.
}
}
return compiled;
}

/**
* Filter a list of class names, dropping any that match a pattern. Returns a
* new array containing only the survivors, preserving original order.
*
* Used by the fallback walker (in the next PR) before adding classes to a
* selector for sibling disambiguation. Also returns sensibly on null /
* undefined / empty inputs.
*/
export function filterClasses(classes: string[], patterns: ReadonlyArray<RegExp>): string[] {
if (!classes || classes.length === 0) return [];
const survivors: string[] = [];
for (const cls of classes) {
if (!cls) continue;
let matched = false;
for (const pattern of patterns) {
if (pattern.test(cls)) {
matched = true;
break;
}
}
if (!matched) survivors.push(cls);
}
return survivors;
}
Loading
Loading