Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 43 additions & 40 deletions src/core/series.ts
Original file line number Diff line number Diff line change
Expand Up @@ -764,8 +764,12 @@ export class Series<T extends Scalar = Scalar> {
let nanCount = 0;
let allNumeric = true;

// Single pass: partition NaN/null and initialise AoS radix entries for finite numerics.
// fvals is indexed by compact slot (finCount) so reads and writes are sequential.
// Clear histograms before the init loop so we can accumulate them inline.
_rxHisto.fill(0);

// Single pass: partition NaN/null, initialise AoS radix entries for finite
// numerics, and accumulate all 8 histograms simultaneously — eliminating the
// separate O(n) histogram scan that the previous implementation required.
for (let i = 0; i < n; i++) {
const v = vals[i];
if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
Expand All @@ -791,6 +795,24 @@ export class Series<T extends Scalar = Scalar> {
_rxA[base] = i;
_rxA[base + 1] = lo;
_rxA[base + 2] = hi;
// Accumulate all 8 histogram passes inline — no second scan needed.
let idx: number;
idx = lo & 0xff;
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 256 + ((lo >>> 8) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 512 + ((lo >>> 16) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 768 + ((lo >>> 24) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 1024 + (hi & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 1280 + ((hi >>> 8) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 1536 + ((hi >>> 16) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 1792 + ((hi >>> 24) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
} else {
allNumeric = false;
}
Expand All @@ -807,35 +829,9 @@ export class Series<T extends Scalar = Scalar> {

if (allNumeric && finCount > 0) {
// ── LSD radix sort: 8 passes × 8 bits over IEEE-754 transformed keys ──
// _rxA is already initialised by the merged loop above.
// _rxA and _rxHisto are already initialised by the merged loop above.
// AoS layout: srcBuf[i*3]=origIdx, srcBuf[i*3+1]=loKey, srcBuf[i*3+2]=hiKey.

// Pre-compute all 8 histograms in a single O(n) scan, saving 7 redundant
// count loops. _rxHisto[pass*256 + byte] = count of elements with that byte.
_rxHisto.fill(0);
for (let i = 0; i < finCount; i++) {
const si = i * 3;
const lo = srcBuf[si + 1]!;
const hi = srcBuf[si + 2]!;
let idx: number;
idx = 0 * 256 + (lo & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 1 * 256 + ((lo >>> 8) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 2 * 256 + ((lo >>> 16) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 3 * 256 + ((lo >>> 24) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 4 * 256 + (hi & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 5 * 256 + ((hi >>> 8) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 6 * 256 + ((hi >>> 16) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
idx = 7 * 256 + ((hi >>> 24) & 0xff);
_rxHisto[idx] = _rxHisto[idx]! + 1;
}

// Convert each histogram to an exclusive prefix sum (cumulative offsets).
for (let pass = 0; pass < 8; pass++) {
const base = pass * 256;
Expand All @@ -855,8 +851,8 @@ export class Series<T extends Scalar = Scalar> {
const keyOff = pass < 4 ? 1 : 2;
const shift = (pass % 4) * 8;
const histoBase = pass * 256;
for (let i = 0; i < finCount; i++) {
const si = i * 3;
// Use accumulated stride counter (si += 3) to avoid i*3 multiply per element.
for (let i = 0, si = 0; i < finCount; i++, si += 3) {
const bucket = (srcBuf[si + keyOff]! >>> shift) & 0xff;
const p = _rxHisto[histoBase + bucket]!;
_rxHisto[histoBase + bucket] = p + 1;
Expand Down Expand Up @@ -904,15 +900,15 @@ export class Series<T extends Scalar = Scalar> {
}
if (allNumeric) {
if (ascending) {
for (let i = 0; i < finCount; i++) {
const idx = srcBuf[i * 3]!;
for (let i = 0, si = 0; i < finCount; i++, si += 3) {
const idx = srcBuf[si]!;
perm[pos] = idx;
outData[pos] = vals[idx] as T;
pos = pos + 1;
}
} else {
for (let i = finCount - 1; i >= 0; i--) {
const idx = srcBuf[i * 3]!;
for (let i = finCount - 1, si = (finCount - 1) * 3; i >= 0; i--, si -= 3) {
const idx = srcBuf[si]!;
perm[pos] = idx;
outData[pos] = vals[idx] as T;
pos = pos + 1;
Expand All @@ -929,15 +925,15 @@ export class Series<T extends Scalar = Scalar> {
} else {
if (allNumeric) {
if (ascending) {
for (let i = 0; i < finCount; i++) {
const idx = srcBuf[i * 3]!;
for (let i = 0, si = 0; i < finCount; i++, si += 3) {
const idx = srcBuf[si]!;
perm[pos] = idx;
outData[pos] = vals[idx] as T;
pos = pos + 1;
}
} else {
for (let i = finCount - 1; i >= 0; i--) {
const idx = srcBuf[i * 3]!;
for (let i = finCount - 1, si = (finCount - 1) * 3; i >= 0; i--, si -= 3) {
const idx = srcBuf[si]!;
perm[pos] = idx;
outData[pos] = vals[idx] as T;
pos = pos + 1;
Expand All @@ -959,9 +955,16 @@ export class Series<T extends Scalar = Scalar> {
}
}

// RangeIndex fast path: for a default 0-based RangeIndex the output index is
// just perm itself — skip 100k bounds-checked at() calls from index.take().
const outIndex: Index<Label> =
this.index instanceof RangeIndex && this.index.start === 0 && this.index.step === 1
? new Index<Label>(perm, this.index.name)
: this.index.take(perm);

return new Series<T>({
data: outData,
index: this.index.take(perm),
index: outIndex,
dtype: this.dtype,
name: this.name,
});
Expand Down
Loading