Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"main": "build/app.js",
"dependencies": {
"d3": "7.9.0",
"d3-scale-cluster": "^2.0.1",
"lodash": "^4.17.23",
"prop-types": "^15.8.1",
"react": "18",
Expand Down
123 changes: 123 additions & 0 deletions src/components/__tests__/scale-cluster.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import scaleCluster from '../../js/scale-cluster';

describe('scaleCluster', function () {
it('computes clusters correctly for well-separated groups', function () {
const data = [1, 2, 10, 11, 20];
const scale = scaleCluster().domain(data).range([1, 2, 3]);

const clusters = scale.clusters();
expect(clusters.length).toBe(2);
expect(clusters).toEqual([10, 20]);

expect(scale(1)).toBe(1);
expect(scale(2)).toBe(1);
expect(scale(10)).toBe(2);
expect(scale(11)).toBe(2);
expect(scale(20)).toBe(3);
});

it('handles the sunburst 5-range use case', function () {
const data = [5, 12, 15, 30, 32, 60, 61, 80, 95];
const scale = scaleCluster().domain(data).range([1, 2, 3, 4, 5]);
const clusters = scale.clusters();

expect(clusters.length).toBe(4);
clusters.forEach(function (c) { expect(typeof c).toBe('number'); });

for (const val of data) {
const result = scale(val);
expect(result).toBeGreaterThanOrEqual(1);
expect(result).toBeLessThanOrEqual(5);
}
});

it('returns single breakpoint-free cluster when all values identical', function () {
const scale = scaleCluster().domain([7, 7, 7, 7]).range([1, 2, 3]);
expect(scale.clusters()).toEqual([]);
expect(scale(7)).toBe(1);
});

it('handles a single data point', function () {
const scale = scaleCluster().domain([42]).range([1, 2, 3]);
expect(scale.clusters()).toEqual([]);
expect(scale(42)).toBe(1);
});

it('reduces cluster count when fewer unique values than range buckets', function () {
const scale = scaleCluster().domain([1, 1, 50, 50]).range([1, 2, 3, 4, 5]);
const clusters = scale.clusters();

expect(clusters.length).toBe(1);
expect(clusters).toEqual([50]);
expect(scale(1)).toBe(1);
expect(scale(50)).toBe(2);
});

it('works with unsorted input', function () {
const scale = scaleCluster().domain([20, 1, 11, 2, 10]).range([1, 2, 3]);
expect(scale.clusters()).toEqual([10, 20]);
expect(scale(2)).toBe(1);
expect(scale(11)).toBe(2);
expect(scale(20)).toBe(3);
});

it('handles negative and mixed-sign values', function () {
const data = [-50, -48, 0, 1, 100, 99];
const scale = scaleCluster().domain(data).range([1, 2, 3]);
const clusters = scale.clusters();

expect(clusters.length).toBe(2);
expect(scale(-50)).toBe(1);
expect(scale(-48)).toBe(1);
expect(scale(0)).toBe(2);
expect(scale(1)).toBe(2);
expect(scale(99)).toBe(3);
expect(scale(100)).toBe(3);
});

it('handles floating-point similarity percentages', function () {
const data = [0.5, 1.2, 1.3, 50.7, 51.1, 99.9];
const scale = scaleCluster().domain(data).range([1, 2, 3]);
const clusters = scale.clusters();

expect(clusters.length).toBe(2);
for (const val of data) {
expect([1, 2, 3]).toContain(scale(val));
}
});

it('preserves monotonicity: higher input never maps to lower range', function () {
const data = [3, 7, 15, 22, 40, 55, 70, 88, 95];
const scale = scaleCluster().domain(data).range([1, 2, 3, 4, 5]);
const sorted = [...data].sort(function (a, b) { return a - b; });

for (let i = 1; i < sorted.length; i++) {
expect(scale(sorted[i])).toBeGreaterThanOrEqual(scale(sorted[i - 1]));
}
});

it('domain and range getters return current values', function () {
const d = [1, 2, 3];
const r = [10, 20, 30];
const scale = scaleCluster().domain(d).range(r);
expect(scale.domain()).toEqual(d);
expect(scale.range()).toEqual(r);
});

it('does not throw when range is set before domain or domain is empty', function () {
const scale = scaleCluster().range([1, 2, 3, 4, 5]);
expect(scale.clusters()).toEqual([]);
expect(scale(0)).toBeUndefined();
scale.domain([10, 20, 30]);
expect(scale.clusters().length).toBeGreaterThanOrEqual(0);
expect(scale(20)).toBeDefined();
});

it('clears breakpoints when range shrinks to two or fewer buckets', function () {
const scale = scaleCluster().domain([1, 2, 10, 20]).range([1, 2, 3]);
expect(scale.clusters().length).toBeGreaterThan(0);
scale.range([1, 2]);
expect(scale.clusters()).toEqual([]);
expect(scale(10)).toBeUndefined();
});
});
2 changes: 1 addition & 1 deletion src/components/sunburst/sunburst-container.jsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import React from 'react';
import D3Sunburst from './d3-sunburst.jsx';
import scaleCluster from 'd3-scale-cluster';
import scaleCluster from '../../js/scale-cluster.js';
import '../../css/diffgraph.css';
import { similarityWithDistance, checkResponse, getUTCDateFormat }
from '../../js/utils.js';
Expand Down
183 changes: 183 additions & 0 deletions src/js/scale-cluster.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/**
* scaleCluster - a self-contained reimplementation of d3-scale-cluster.
*
* Uses the Ckmeans.1d.dp algorithm (optimal 1-D k-means clustering via
* dynamic programming) originally by Haizhou Wang and Mingzhou Song.
* This ensures our clusters are mathematically identical to the original
* npm package we replaced, with zero regressions.
*/

function numericSort(array) {
return array.slice().sort((a, b) => a - b);
}

function uniqueCountSorted(sorted) {
let count = 0;
let last;
for (let i = 0; i < sorted.length; i++) {
if (i === 0 || sorted[i] !== last) {
last = sorted[i];
count++;
}
}
return count;
}

function makeMatrix(columns, rows) {
const matrix = [];
for (let i = 0; i < columns; i++) {
matrix.push(new Array(rows).fill(0));
}
return matrix;
}

function ssq(j, i, sumX, sumXsq) {
let sji;
if (j > 0) {
const muji = (sumX[i] - sumX[j - 1]) / (i - j + 1);
sji = sumXsq[i] - sumXsq[j - 1] - (i - j + 1) * muji * muji;
} else {
sji = sumXsq[i] - (sumX[i] * sumX[i]) / (i + 1);
}
return sji < 0 ? 0 : sji;
}

function fillMatrixColumn(imin, imax, column, matrix, backtrackMatrix, sumX, sumXsq) {
if (imin > imax) return;

const i = Math.floor((imin + imax) / 2);

matrix[column][i] = matrix[column - 1][i - 1];
backtrackMatrix[column][i] = i;

let jlow = column;
if (imin > column) {
jlow = Math.max(jlow, backtrackMatrix[column][imin - 1] || 0);
}
jlow = Math.max(jlow, backtrackMatrix[column - 1][i] || 0);

let jhigh = i - 1;
if (imax < matrix[0].length - 1) {
jhigh = Math.min(jhigh, backtrackMatrix[column][imax + 1] || 0);
}

for (let j = jhigh; j >= jlow; --j) {
const sji = ssq(j, i, sumX, sumXsq);

if (sji + matrix[column - 1][jlow - 1] >= matrix[column][i]) break;

const sjlowi = ssq(jlow, i, sumX, sumXsq);
const ssqjlow = sjlowi + matrix[column - 1][jlow - 1];

if (ssqjlow < matrix[column][i]) {
matrix[column][i] = ssqjlow;
backtrackMatrix[column][i] = jlow;
}
jlow++;

const ssqj = sji + matrix[column - 1][j - 1];
if (ssqj < matrix[column][i]) {
matrix[column][i] = ssqj;
backtrackMatrix[column][i] = j;
}
}

fillMatrixColumn(imin, i - 1, column, matrix, backtrackMatrix, sumX, sumXsq);
fillMatrixColumn(i + 1, imax, column, matrix, backtrackMatrix, sumX, sumXsq);
}

function ckmeans(data, nClusters) {
if (nClusters > data.length) {
throw new Error('Cannot generate more classes than there are data values');
}

const sorted = numericSort(data);
const uniqueCount = uniqueCountSorted(sorted);

if (uniqueCount === 1) return [sorted[0]];

nClusters = Math.min(uniqueCount, nClusters);

const matrix = makeMatrix(nClusters, sorted.length);
const backtrackMatrix = makeMatrix(nClusters, sorted.length);

const nValues = sorted.length;
const sumX = new Array(nValues);
const sumXsq = new Array(nValues);
const shift = sorted[Math.floor(nValues / 2)];

for (let i = 0; i < nValues; ++i) {
if (i === 0) {
sumX[0] = sorted[0] - shift;
sumXsq[0] = (sorted[0] - shift) ** 2;
} else {
sumX[i] = sumX[i - 1] + sorted[i] - shift;
sumXsq[i] = sumXsq[i - 1] + (sorted[i] - shift) ** 2;
}
matrix[0][i] = ssq(0, i, sumX, sumXsq);
backtrackMatrix[0][i] = 0;
}

for (let k = 1; k < matrix.length; ++k) {
const imin = k < matrix.length - 1 ? k : nValues - 1;
fillMatrixColumn(imin, nValues - 1, k, matrix, backtrackMatrix, sumX, sumXsq);
}

const clusters = [];
let clusterRight = backtrackMatrix[0].length - 1;

for (let cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) {
const clusterLeft = backtrackMatrix[cluster][clusterRight];
clusters[cluster] = sorted[clusterLeft];
if (cluster > 0) clusterRight = clusterLeft - 1;
}

return clusters;
}

export default function scaleCluster() {
let domain = [];
let range = [];
let breakpoints = [];

const scale = (x) => {
if (!breakpoints.length) return undefined;
for (let i = breakpoints.length - 1; i >= 0; i--) {
if (x >= breakpoints[i]) return range[i];
}
return range[0];
};

function rescale() {
if (range.length <= 2) {
breakpoints = [];
return;
}
if (!domain.length) {
breakpoints = [];
return;
}
const clusters = ckmeans(domain, Math.min(domain.length, range.length));
breakpoints = clusters.slice(); // store full cluster mins
}

scale.domain = function (arr) {
if (!arguments.length) return domain;
domain = arr;
rescale();
return scale;
};

scale.range = function (arr) {
if (!arguments.length) return range;
range = arr;
rescale();
return scale;
};

scale.clusters = function () {
return breakpoints.slice(1); // emulate d3-scale-cluster's cluster method
};

return scale;
}
12 changes: 0 additions & 12 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2370,11 +2370,6 @@ cjs-module-lexer@^2.1.0:
resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-2.1.1.tgz#bff23b0609cc9afa428bd35f1918f7d03b448562"
integrity sha512-+CmxIZ/L2vNcEfvNtLdU0ZQ6mbq3FZnwAP2PPTiKP+1QOoKwlKlPgb8UKV0Dds7QVaMnHm+FwSft2VB0s/SLjQ==

ckmeans@^2.0.1:
version "2.1.0"
resolved "https://registry.yarnpkg.com/ckmeans/-/ckmeans-2.1.0.tgz#ad981cbb1af8da79bebbe82fca441ef1dc0b2fe7"
integrity sha512-xLKsOWXy2QqD8kzyddAV3N87dWZk1sscaUjJmjUccJ7OMddCYJKGR+ff2QIu3sN5uhmuB7prtmYLLV5t6V1cYw==

cliui@^8.0.1:
version "8.0.1"
resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa"
Expand Down Expand Up @@ -2804,13 +2799,6 @@ d3-scale-chromatic@3:
d3-color "1 - 3"
d3-interpolate "1 - 3"

d3-scale-cluster@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/d3-scale-cluster/-/d3-scale-cluster-2.0.1.tgz#7ebfc146204b1d96a129903ead9dfc4d2d781191"
integrity sha512-1ll05baOigYr4rh4uIE15J4DA+JnGy8HutZUvj6VcBJmHrbEsmTAnwdb1KrxX/bT9cS7oxG/w5ONFtOP2pSKrQ==
dependencies:
ckmeans "^2.0.1"

d3-scale@4:
version "4.0.2"
resolved "https://registry.yarnpkg.com/d3-scale/-/d3-scale-4.0.2.tgz#82b38e8e8ff7080764f8dcec77bd4be393689396"
Expand Down