Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions lib/doc/pivot-table.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,14 @@ function makePivotTable(worksheet, model) {
let {rows, columns, values, pages = []} = model;
const {metric, pageDefaults = {}} = model;

// Generate sharedItems for ALL fields in the source, not just the ones used by this pivot table
// This ensures Excel can properly display any field configuration
// Generate sharedItems only for fields used as pivot axes (rows, columns, pages).
// Value fields and unused fields use lightweight sharedItems (no enumeration) — Excel
// flags pivot caches that materialize sharedItems for unused/long-text fields with a
// "Repaired Records: PivotTable report from /xl/pivotCache/pivotCacheDefinition*.xml"
// warning on open (see protobi/exceljs#42).
const allHeaderNames = sourceSheet.getRow(1).values.slice(1);
const cacheFields = makeCacheFields(sourceSheet, allHeaderNames);
const axialFieldNames = [...rows, ...columns, ...pages];
const cacheFields = makeCacheFields(sourceSheet, axialFieldNames, allHeaderNames);

// let {rows, columns, values, pages} use indices instead of names;
// names can then be accessed via `pivotTable.cacheFields[index].name`.
Expand Down Expand Up @@ -140,8 +144,10 @@ function validate(worksheet, model) {
}
}

function makeCacheFields(worksheet, fieldNamesWithSharedItems) {
function makeCacheFields(worksheet, fieldNamesWithSharedItems, allFieldNames /* reserved */) {
// Cache fields are used in pivot tables to reference source data.
// `fieldNamesWithSharedItems` = axial fields (rows/columns/pages) that need full enumeration.
// `allFieldNames` is reserved for future use; iteration uses worksheet.getRow(1).
//
// Example
// -------
Expand Down Expand Up @@ -193,12 +199,35 @@ function makeCacheFields(worksheet, fieldNamesWithSharedItems) {
return toSortedArray(uniqueValues);
};

// Inspect a column's body values to derive lightweight sharedItems type hints
// WITHOUT enumerating every unique value. Used for non-axial fields — Excel can
// rebuild the index on refresh from the inline values in pivotCacheRecords.
const inspect = columnIndex => {
const columnValues = worksheet.getColumn(columnIndex).values.slice(2);
const hints = {containsBlank: false, containsString: false, containsNumber: false};
for (const value of columnValues) {
if (value === null || value === undefined) {
hints.containsBlank = true;
} else if (typeof value === 'number') {
hints.containsNumber = true;
} else {
hints.containsString = true;
}
}
return hints;
};

// make result
const result = [];
for (const columnIndex of range(1, names.length)) {
const name = names[columnIndex];
const sharedItems = nameToHasSharedItems[name] ? aggregate(columnIndex) : null;
result.push({name, sharedItems});
if (nameToHasSharedItems[name]) {
// Axial field — full enumeration so pivotCacheRecords can reference values by index.
result.push({name, sharedItems: aggregate(columnIndex)});
} else {
// Non-axial field — lightweight sharedItems with type hints only.
result.push({name, sharedItems: null, hints: inspect(columnIndex)});
}
}
return result;
}
Expand Down
49 changes: 31 additions & 18 deletions lib/xlsx/xform/pivot-table/cache-field.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
class CacheField {
constructor({name, sharedItems}) {
// string type
constructor({name, sharedItems, hints}) {
// Axial field (used as pivot row/column/page):
// { name: 'A', sharedItems: ['a1', 'a2', 'a3'] }
//
// {
// 'name': 'A',
// 'sharedItems': ['a1', 'a2', 'a3']
// }
//
// or
//
// integer type
//
// {
// 'name': 'D',
// 'sharedItems': null
// }
// Non-axial field (value field or unused) — sharedItems is null and `hints`
// describes the column's value types so we emit a lightweight
// <sharedItems .../> without enumerating every unique value.
// This avoids triggering Excel's "Repaired Records: PivotTable report"
// warning on pivot caches with high-cardinality or long-text non-axial
// fields (see protobi/exceljs#42).
// { name: 'D', sharedItems: null, hints: { containsBlank, containsString, containsNumber } }
this.name = name;
this.sharedItems = sharedItems;
this.hints = hints || null;
}

// Helper function to escape XML special characters
Expand All @@ -34,11 +30,28 @@ class CacheField {
// PivotCache Field: http://www.datypic.com/sc/ooxml/e-ssml_cacheField-1.html
// Shared Items: http://www.datypic.com/sc/ooxml/e-ssml_sharedItems-1.html

// integer types
// Non-axial field — emit lightweight <sharedItems> based on type hints.
// pivotCacheRecords inlines values for these fields (no <x v="N"/> indices).
if (this.sharedItems === null) {
// TK(2023-07-18): left out attributes... minValue="5" maxValue="45"
const hints = this.hints || {};
const attrs = [];
if (hints.containsNumber && hints.containsString) {
attrs.push('containsSemiMixedTypes="1"');
attrs.push('containsNumber="1"');
attrs.push('containsString="1"');
} else if (hints.containsNumber) {
attrs.push('containsSemiMixedTypes="0"');
attrs.push('containsString="0"');
attrs.push('containsNumber="1"');
} else if (hints.containsString) {
attrs.push('containsSemiMixedTypes="0"');
attrs.push('containsNumber="0"');
attrs.push('containsString="1"');
}
if (hints.containsBlank) attrs.push('containsBlank="1"');
const attrStr = attrs.length ? ` ${attrs.join(' ')}` : '';
return `<cacheField name="${this.escapeXml(this.name)}" numFmtId="0">
<sharedItems containsSemiMixedTypes="0" containsString="0" containsNumber="1" containsInteger="1" />
<sharedItems${attrStr} />
</cacheField>`;
}

Expand Down
9 changes: 8 additions & 1 deletion lib/xlsx/xform/pivot-table/pivot-cache-definition-xform.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ class PivotCacheDefinitionXform extends BaseXform {
render(xmlStream, model) {
const {sourceSheet, cacheFields} = model;

// recordCount must match the count attribute of <pivotCacheRecords> — the
// number of data rows in the source sheet (header excluded). Excel uses
// this to validate the cache; a mismatch triggers the
// "Repaired Records: PivotTable report from /xl/pivotCache/
// pivotCacheDefinition*.xml" warning (see protobi/exceljs#42).
const recordCount = Math.max(0, sourceSheet.getSheetValues().length - 2);

xmlStream.openXml(XmlStream.StdDocAttributes);
xmlStream.openNode(this.tag, {
...PivotCacheDefinitionXform.PIVOT_CACHE_DEFINITION_ATTRIBUTES,
Expand All @@ -31,7 +38,7 @@ class PivotCacheDefinitionXform extends BaseXform {
createdVersion: '8',
refreshedVersion: '8',
minRefreshableVersion: '3',
recordCount: cacheFields.length + 1,
recordCount,
});

xmlStream.openNode('cacheSource', {type: 'worksheet'});
Expand Down
121 changes: 121 additions & 0 deletions spec/integration/issues/issue-42-pivot-cache-repair.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Regression test for protobi/exceljs#42:
// "[BUG] Pivot cache triggers Excel repair"
//
// When a workbook contains a pivot table whose source sheet has fields that
// are NOT used as pivot axes (rows/columns/pages) and contain high-cardinality
// or long-text values, Excel opens the file with:
//
// "Repaired Records: PivotTable report from /xl/pivotCache/
// pivotCacheDefinition1.xml part (PivotTable cache)"
//
// Two bugs caused this:
// 1. `recordCount` on pivotCacheDefinition was set to `cacheFields.length + 1`
// instead of the number of data rows. Excel validates this matches the
// `<pivotCacheRecords count="...">` attribute.
// 2. `makeCacheFields` enumerated sharedItems for ALL source fields, including
// non-axial value/unused fields. Excel flags full sharedItems on non-axial
// fields as a repair condition for high-cardinality or long-text columns.
//
// Fix: only enumerate sharedItems for axial fields (rows/columns/pages).
// Non-axial fields get a lightweight <sharedItems /> with type hints.

const fs = require('fs');
const {promisify} = require('util');
const fsReadFileAsync = promisify(fs.readFile);
const JSZip = require('jszip');

const ExcelJS = verquire('exceljs');

const TEST_XLSX_FILEPATH = './spec/out/wb.issue-42.xlsx';

function findCacheField(xml, name) {
const re = new RegExp(`<cacheField name="${name}"[^>]*>[\\s\\S]*?</cacheField>`);
return xml.match(re);
}

describe('Workbook', () => {
describe('Pivot Tables (issue #42 — Excel repair on cache definition)', () => {
let cacheDefXml;
let cacheRecordsXml;
const ROW_COUNT = 20;

before(async () => {
const workbook = new ExcelJS.Workbook();
const dataSheet = workbook.addWorksheet('Data');
dataSheet.columns = [
{header: 'Repo', key: 'repo', width: 20},
{header: 'Severity', key: 'severity', width: 20},
// Description: long-text, high-cardinality, NOT used as a pivot axis.
{header: 'Description', key: 'description', width: 80},
{header: 'Counter', key: 'count', width: 10},
];

for (let i = 0; i < ROW_COUNT; i++) {
dataSheet.addRow({
repo: `repo-${i % 5}`,
severity: ['HIGH', 'LOW'][i % 2],
description: `long-description-${i}-${'x'.repeat(100)}`,
count: 1,
});
}

const pivotSheet = workbook.addWorksheet('Summary');
pivotSheet.addPivotTable({
sourceSheet: dataSheet,
rows: ['Repo'],
columns: ['Severity'],
values: ['Counter'],
metric: 'sum',
});

await workbook.xlsx.writeFile(TEST_XLSX_FILEPATH);
const buffer = await fsReadFileAsync(TEST_XLSX_FILEPATH);
const zip = await JSZip.loadAsync(buffer);
cacheDefXml = await zip.file('xl/pivotCache/pivotCacheDefinition1.xml').async('string');
cacheRecordsXml = await zip.file('xl/pivotCache/pivotCacheRecords1.xml').async('string');
});

it('emits a pivot cache definition', () => {
expect(cacheDefXml).to.be.a('string');
expect(cacheDefXml).to.include('<pivotCacheDefinition');
});

it('declares recordCount equal to the source data row count', () => {
// Was previously `cacheFields.length + 1` (= 5 for this fixture);
// Excel expects it to match <pivotCacheRecords count="..."> which is ROW_COUNT.
const match = cacheDefXml.match(/recordCount="(\d+)"/);
expect(match).to.not.be.null();
expect(Number(match[1])).to.equal(ROW_COUNT);

const recordsCountMatch = cacheRecordsXml.match(/<pivotCacheRecords[^>]*count="(\d+)"/);
expect(recordsCountMatch).to.not.be.null();
expect(Number(recordsCountMatch[1])).to.equal(ROW_COUNT);
});

it('does NOT enumerate sharedItems for the non-axial Description field', () => {
const descMatch = findCacheField(cacheDefXml, 'Description');
expect(descMatch, 'Description cacheField missing').to.not.be.null();
const descBlock = descMatch[0];
// Must not contain inline <s v="long-description-..."> children.
expect(descBlock).to.not.include('<s v="long-description-');
// Should be a self-closing sharedItems element.
expect(descBlock).to.match(/<sharedItems[^>]*\/>/);
});

it('keeps full sharedItems enumeration for axial Repo and Severity fields', () => {
for (const fieldName of ['Repo', 'Severity']) {
const block = findCacheField(cacheDefXml, fieldName);
expect(block, `${fieldName} cacheField missing`).to.not.be.null();
expect(block[0]).to.include('</sharedItems>');
expect(block[0]).to.match(/<s v="/);
}
});

it('round-trip-loads the resulting workbook without throwing', async () => {
const reloaded = new ExcelJS.Workbook();
await reloaded.xlsx.readFile(TEST_XLSX_FILEPATH);
expect(reloaded.worksheets.map(ws => ws.name)).to.include('Data');
expect(reloaded.worksheets.map(ws => ws.name)).to.include('Summary');
});
});
});