Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 48 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,55 @@ HighTable uses a data model called `DataFrame`, which defines how data is fetche
- `numRows`: The total number of rows in the dataset.
- `getRowNumber`: A function that returns the row number for a given row index. If not resolved yet, it returns undefined.
- `getCell`: A function that returns the value of a cell at a specific row and column. If not resolved yet, it returns undefined.
- `eventTarget`: An optional event target which must dispatch the event `resolve` when a cell or row number is resolved. This can be used to trigger re-renders or other side effects.
- `fetch`: An optional asynchronous function that fetches cells and row numbers, for a range of rows and columns. It should only fetch the missing data, and once the data is fetched, `getRowNumber` and `getCell` should return the resolved values. It is responsible for dispaching the `resolve` event (once or multiple times) on the `eventTarget` when the data is ready.
- `fetch`: An optional asynchronous function that fetches cells and row numbers, for a range of rows and columns. It should only fetch the missing data, and once the data is fetched, `getRowNumber` and `getCell` should return the resolved values. When using `useDataFrameCache`, call `setCell` to store fetched data.

## Usage

Here's a basic example of how to use HighTable in your React application:

```jsx
import HighTable from 'hightable'
import HighTable, { useDataFrameCache } from 'hightable'

function App() {
const { dataframe, setCell } = useDataFrameCache({
numRows: 1000000,
columnDescriptors: [{name: 'ID'}, {name: 'Name'}, {name: 'Email'}],
async fetch({ rowStart, rowEnd, columns, orderBy, signal }) {
// fetch cell data from your data source here
for (let row = rowStart; row < rowEnd; row++) {
for (const column of columns ?? []) {
// Simulate async data fetching
const value = await fetchCellData(row, column)
// store the fetched data in the cache
setCell(row, column, value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you have to call setRowNumber too, otherwise, the row numbers will never appear (if I understand well)

}
}
}
})

return (
<HighTable
data={dataframe}
onError={console.error}
/>
)
}

// Example fetch function
async function fetchCellData(row, column) {
// Your data fetching logic here
return column === 'ID' ? `row-${row}` :
column === 'Name' ? `User ${row}` :
`${column.toLowerCase()}${row}@example.com`
}
```

### Alternative: Manual Cache Management

If you need more control over caching, you can still manage the cache manually:

```jsx
import HighTable, { createGetRowNumber } from 'hightable'

const eventTarget = new EventTarget()
const cache = new Map()
Expand All @@ -61,15 +101,16 @@ const store = (cells) => {
}
}
}

const dataframe = {
columnDescriptors: [{name: 'ID'}, {name: 'Name'}, {name: 'Email'}],
numRows: 1000000,
getRowNumber: ({ row }) => ({ value: rowIndex }),
getCell: ({ row, col }) => cache.get(col).get(row),
getRowNumber: createGetRowNumber({ numRows: 1000000 }),
getCell: ({ row, column }) => cache.get(column)?.get(row),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well spot (col -> column)

eventTarget,
async fetch({row, column}) {
async fetch({ rowStart, rowEnd, columns, orderBy, signal }) {
// fetch cell data from your data source here
const cells = await fetchCellData(row, col)
const cells = await fetchCellData(rowStart, rowEnd, columns)
// store the fetched data in the cache
store(cells)
// dispatch the resolve event to notify the table that the data is ready
Expand Down
43 changes: 43 additions & 0 deletions src/components/HighTable/HighTable.stories.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import type { Fetch, ResolvedValue } from '../../helpers/dataframe/types.js'
import type { Selection } from '../../helpers/selection.js'
import type { OrderBy } from '../../helpers/sort.js'
import { createEventTarget } from '../../helpers/typedEventTarget.js'
import { useDataFrameCache } from '../../hooks/useDataFrameCache.js'
import type { CellContentProps } from './HighTable.js'
import HighTable from './HighTable.js'

Expand Down Expand Up @@ -390,6 +391,48 @@ export const ReadOnlySelection: Story = {
},
}

function AsyncDataWithHook() {
const columnDescriptors = ['ID', 'Count'].map(name => ({ name }))
const numRows = 500

const { dataframe, setCell } = useDataFrameCache({
numRows,
columnDescriptors,
fetch: async ({ rowStart, rowEnd, columns, orderBy, signal }) => {
checkSignal(signal)
validateFetchParams({ rowStart, rowEnd, columns, orderBy, data: { numRows, columnDescriptors } })

for (const column of columns ?? []) {
for (let row = rowStart; row < rowEnd; row++) {
// Simulate variable delay times
const rowMs = row % 3 === 0 ? 10 * Math.floor(10 * Math.random()) :
row % 3 === 1 ? 20 * Math.floor(10 * Math.random()) :
500
const ms = rowMs * (column === 'ID' ? 1 : 2)
const resolvedValue = column === 'ID' ? `row ${row}` : numRows - row

delay(resolvedValue, ms).then((value) => {
checkSignal(signal)
setCell(row, column, value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to call setRowNumber here too

}).catch((error) => {
if (error instanceof DOMException && error.name === 'AbortError') {
// Ignore abort errors
return
}
throw error
})
}
}
},
}

return <HighTable data={dataframe} />
}

export const AsyncDataWithCache: Story = {
render: () => <AsyncDataWithHook />,
}

export const LegacySortable: Story = {
render: (args) => {
const [selection, onSelectionChange] = useState<Selection>({
Expand Down
130 changes: 130 additions & 0 deletions src/helpers/dataframe/cache.ts
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea to provide a cache API + an implementation

Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import type { ResolvedValue } from './types.js'

export type CellListener = () => void

export interface DataFrameCache {
getCell(row: number, column: string): ResolvedValue | undefined
getRowNumber(row: number): ResolvedValue<number> | undefined
setCell(row: number, column: string, value: any): void
setRowNumber(row: number, rowNumber: number): void
registerCellListener(callback: CellListener): () => void
clearCache(): void
getCachedColumns(): string[]
getCachedRowsForColumn(column: string): number[]
}

/**
* Creates a cache for DataFrame cell data that automatically notifies listeners
* when cells are modified.
*/
export function createDataFrameCache(): DataFrameCache {
const cellCache = new Map<string, Map<number, ResolvedValue>>()
const rowNumberCache = new Map<number, ResolvedValue<number>>()
const listeners = new Set<CellListener>()

function notifyListeners(): void {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conceptually, we might want to use the same cache for two dataframes, for example, one that covers the first 1,000 rows of a dataset, and another one that covers the first 1M rows. But any update to the second range of rows would also notify the first listener, right?

Maybe we could pass arguments, like the row and the column (in the case of setCell), to the listeners?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(and maybe also pass the value... not sure if it's needed, but would it hurt?)

// Call all registered listeners
listeners.forEach(listener => {
try {
listener()
} catch (error) {
console.error('Error in DataFrameCache listener:', error)
}
})
}

return {

/**
* Get a cell value from the cache.
* Returns undefined if the cell hasn't been cached yet.
*/
getCell(row: number, column: string): ResolvedValue | undefined {
return cellCache.get(column)?.get(row)
},

/**
* Get a row number from the cache.
* Returns undefined if the row number hasn't been cached yet.
*/
getRowNumber(row: number): ResolvedValue<number> | undefined {
return rowNumberCache.get(row)
},

/**
* Set a cell value in the cache and trigger event listeners.
*/
setCell(row: number, column: string, value: any): void {
if (!cellCache.has(column)) {
cellCache.set(column, new Map())
}

const columnCache = cellCache.get(column)
if (!columnCache) {
throw new Error(`Column "${column}" not found in cache`)
}
Comment on lines +58 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit, but I generally prefer using let for this annoying case instead of throwing an error that will never occur:

Suggested change
if (!cellCache.has(column)) {
cellCache.set(column, new Map())
}
const columnCache = cellCache.get(column)
if (!columnCache) {
throw new Error(`Column "${column}" not found in cache`)
}
let columnCache = cellCache.get(column)
if (columnCache === undefined) {
columnCache = new Map()
cellCache.set(column, columnCache)
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(but if you think the other version is better, I'm interested in knowing why, because this construction is always a pain)

const existingCell = columnCache.get(row)
const resolvedValue: ResolvedValue = { value }

// Only update and notify if the value has actually changed
if (!existingCell || existingCell.value !== value) {
columnCache.set(row, resolvedValue)
notifyListeners()
}
},

/**
* Set a row number in the cache and trigger event listeners.
*/
setRowNumber(row: number, rowNumber: number): void {
const existingRowNumber = rowNumberCache.get(row)
const resolvedValue: ResolvedValue<number> = { value: rowNumber }

// Only update and notify if the value has actually changed
if (!existingRowNumber || existingRowNumber.value !== rowNumber) {
rowNumberCache.set(row, resolvedValue)
notifyListeners()
}
},

/**
* Register a listener to be called when cells are updated.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: as we call the listeners when the row number changes (which is not exactly a cell), maybe don't mention 'cell' here and in the method name

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe "registerUpdateListener"?

* Returns a function to unregister the listener.
*/
registerCellListener(callback: CellListener): () => void {
listeners.add(callback)
return () => {
listeners.delete(callback)
}
},

/**
* Clear all cached data.
*/
clearCache(): void {
const hadData = cellCache.size > 0 || rowNumberCache.size > 0
cellCache.clear()
rowNumberCache.clear()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TIL about Map.clear()


// Notify listeners if we had data before clearing
if (hadData) {
notifyListeners()
}
},

/**
* Get all cached columns.
*/
getCachedColumns(): string[] {
return Array.from(cellCache.keys())
},

/**
* Get all cached rows for a specific column.
*/
getCachedRowsForColumn(column: string): number[] {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for the row numbers?

const columnCache = cellCache.get(column)
return columnCache ? Array.from(columnCache.keys()) : []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use an array, should we sort?

Or we might prefer to return a Set

},
}
}
24 changes: 5 additions & 19 deletions src/helpers/dataframe/filter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { createEventTarget } from '../typedEventTarget.js'
import { checkSignal, getContinuousRanges, validateFetchParams, validateGetCellParams, validateGetRowNumberParams, validateRow } from './helpers.js'
import type { DataFrame, DataFrameEvents, Fetch, Obj } from './types.js'
import type { DataFrame, Fetch, Obj } from './types.js'
import type { OrderBy } from '../sort.js'

// return an unsortable data frame: we can call sortableDataFrame on it later, so that we sort on a small subset of the data
Expand Down Expand Up @@ -45,28 +44,15 @@ export function filterDataFrame<M extends Obj, C extends Obj>(
}

if (upstreamFetch !== undefined) {
const eventTarget = createEventTarget<DataFrameEvents>()
const fetch: Fetch = async function({ rowStart, rowEnd, columns, orderBy, signal }: { rowStart: number, rowEnd: number, columns?: string[], orderBy?: OrderBy, signal?: AbortSignal }) {
validateFetchParams({ rowStart, rowEnd, columns, orderBy, data: { numRows, columnDescriptors } })
checkSignal(signal)

function callback() {
eventTarget.dispatchEvent(new CustomEvent('resolve'))
}
try {
data.eventTarget?.addEventListener('resolve', callback)
// The upstream rows are ordered, so we can fetch them by continuous ranges.
const ranges = getContinuousRanges(upstreamRows.slice(rowStart, rowEnd))
const promises = ranges.map((range) => upstreamFetch({ ...range, columns, signal }).then(() => {
checkSignal(signal)
eventTarget.dispatchEvent(new CustomEvent('resolve'))
}))
await Promise.all(promises)
} finally {
data.eventTarget?.removeEventListener('resolve', callback)
}
// The upstream rows are ordered, so we can fetch them by continuous ranges.
const ranges = getContinuousRanges(upstreamRows.slice(rowStart, rowEnd))
const promises = ranges.map((range) => upstreamFetch({ ...range, columns, signal }))
await Promise.all(promises)
}
df.eventTarget = eventTarget
df.fetch = fetch
}

Expand Down
2 changes: 2 additions & 0 deletions src/helpers/dataframe/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
export { arrayDataFrame } from './array.js'
export { createDataFrameCache } from './cache.js'
export type { CellListener, DataFrameCache } from './cache.js'
export { filterDataFrame } from './filter.js'
export { checkSignal, createGetRowNumber, validateColumn, validateFetchParams, validateGetCellParams, validateGetRowNumberParams, validateOrderBy, validateRow } from './helpers.js'
export { convertV1ToDataFrame } from './legacy/convert.js'
Expand Down
8 changes: 1 addition & 7 deletions src/helpers/dataframe/legacy/convert.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { createEventTarget } from '../../typedEventTarget.js'
import { checkSignal, getContinuousRanges, validateFetchParams, validateGetCellParams, validateGetRowNumberParams } from '../helpers.js'
import { DataFrame, DataFrameEvents, ResolvedValue, sortableDataFrame } from '../index.js'
import { DataFrame, ResolvedValue, sortableDataFrame } from '../index.js'
import { DataFrameV1 } from './dataframeV1.js'
import type { OrderBy } from '../../sort.js'

Expand All @@ -19,8 +18,6 @@ export function convertV1ToUnsortableDataFrame(data: DataFrameV1): DataFrame {
const columnDescriptors = data.header.map(name => ({ name }))
const { numRows } = data

const eventTarget = createEventTarget<DataFrameEvents>()

// Create a local cache for the row numbers and the cells.
const rowNumbersCache = new Map<number, ResolvedValue<number>>()
const cellsCache = new Map<string, Map<number, ResolvedValue>>()
Expand Down Expand Up @@ -62,7 +59,6 @@ export function convertV1ToUnsortableDataFrame(data: DataFrameV1): DataFrame {
const cachedRowNumber = rowNumbersCache.get(rowIndex)
if (!cachedRowNumber || cachedRowNumber.value !== rowNumber) {
rowNumbersCache.set(rowIndex, { value: rowNumber })
eventTarget.dispatchEvent(new CustomEvent('resolve'))
}
}))
for (const column of columnDescriptors.map(c => c.name)) {
Expand All @@ -78,7 +74,6 @@ export function convertV1ToUnsortableDataFrame(data: DataFrameV1): DataFrame {
const cachedCell = cellsCache.get(column)?.get(rowIndex)
if (!cachedCell || cachedCell.value !== value) {
cellsCache.get(column)?.set(rowIndex, { value })
eventTarget.dispatchEvent(new CustomEvent('resolve'))
}
}))
}
Expand All @@ -93,6 +88,5 @@ export function convertV1ToUnsortableDataFrame(data: DataFrameV1): DataFrame {
getRowNumber,
getCell,
fetch,
eventTarget,
}
}
Loading
Loading