-
Notifications
You must be signed in to change notification settings - Fork 5
Initial version of internal cache, I don't love it yet #265
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,15 +41,55 @@ HighTable uses a data model called `DataFrame`, which defines how data is fetche | |
| - `numRows`: The total number of rows in the dataset. | ||
| - `getRowNumber`: A function that returns the row number for a given row index. If not resolved yet, it returns undefined. | ||
| - `getCell`: A function that returns the value of a cell at a specific row and column. If not resolved yet, it returns undefined. | ||
| - `eventTarget`: An optional event target which must dispatch the event `resolve` when a cell or row number is resolved. This can be used to trigger re-renders or other side effects. | ||
| - `fetch`: An optional asynchronous function that fetches cells and row numbers, for a range of rows and columns. It should only fetch the missing data, and once the data is fetched, `getRowNumber` and `getCell` should return the resolved values. It is responsible for dispaching the `resolve` event (once or multiple times) on the `eventTarget` when the data is ready. | ||
| - `fetch`: An optional asynchronous function that fetches cells and row numbers, for a range of rows and columns. It should only fetch the missing data, and once the data is fetched, `getRowNumber` and `getCell` should return the resolved values. When using `useDataFrameCache`, call `setCell` to store fetched data. | ||
|
|
||
| ## Usage | ||
|
|
||
| Here's a basic example of how to use HighTable in your React application: | ||
|
|
||
| ```jsx | ||
| import HighTable from 'hightable' | ||
| import HighTable, { useDataFrameCache } from 'hightable' | ||
|
|
||
| function App() { | ||
| const { dataframe, setCell } = useDataFrameCache({ | ||
| numRows: 1000000, | ||
| columnDescriptors: [{name: 'ID'}, {name: 'Name'}, {name: 'Email'}], | ||
| async fetch({ rowStart, rowEnd, columns, orderBy, signal }) { | ||
| // fetch cell data from your data source here | ||
| for (let row = rowStart; row < rowEnd; row++) { | ||
| for (const column of columns ?? []) { | ||
| // Simulate async data fetching | ||
| const value = await fetchCellData(row, column) | ||
| // store the fetched data in the cache | ||
| setCell(row, column, value) | ||
| } | ||
| } | ||
| } | ||
| }) | ||
|
|
||
| return ( | ||
| <HighTable | ||
| data={dataframe} | ||
| onError={console.error} | ||
| /> | ||
| ) | ||
| } | ||
|
|
||
| // Example fetch function | ||
| async function fetchCellData(row, column) { | ||
| // Your data fetching logic here | ||
| return column === 'ID' ? `row-${row}` : | ||
| column === 'Name' ? `User ${row}` : | ||
| `${column.toLowerCase()}${row}@example.com` | ||
| } | ||
| ``` | ||
|
|
||
| ### Alternative: Manual Cache Management | ||
|
|
||
| If you need more control over caching, you can still manage the cache manually: | ||
|
|
||
| ```jsx | ||
| import HighTable, { createGetRowNumber } from 'hightable' | ||
|
|
||
| const eventTarget = new EventTarget() | ||
| const cache = new Map() | ||
|
|
@@ -61,15 +101,16 @@ const store = (cells) => { | |
| } | ||
| } | ||
| } | ||
|
|
||
| const dataframe = { | ||
| columnDescriptors: [{name: 'ID'}, {name: 'Name'}, {name: 'Email'}], | ||
| numRows: 1000000, | ||
| getRowNumber: ({ row }) => ({ value: rowIndex }), | ||
| getCell: ({ row, col }) => cache.get(col).get(row), | ||
| getRowNumber: createGetRowNumber({ numRows: 1000000 }), | ||
| getCell: ({ row, column }) => cache.get(column)?.get(row), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well spot (col -> column) |
||
| eventTarget, | ||
| async fetch({row, column}) { | ||
| async fetch({ rowStart, rowEnd, columns, orderBy, signal }) { | ||
| // fetch cell data from your data source here | ||
| const cells = await fetchCellData(row, col) | ||
| const cells = await fetchCellData(rowStart, rowEnd, columns) | ||
| // store the fetched data in the cache | ||
| store(cells) | ||
| // dispatch the resolve event to notify the table that the data is ready | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ import type { Fetch, ResolvedValue } from '../../helpers/dataframe/types.js' | |
| import type { Selection } from '../../helpers/selection.js' | ||
| import type { OrderBy } from '../../helpers/sort.js' | ||
| import { createEventTarget } from '../../helpers/typedEventTarget.js' | ||
| import { useDataFrameCache } from '../../hooks/useDataFrameCache.js' | ||
| import type { CellContentProps } from './HighTable.js' | ||
| import HighTable from './HighTable.js' | ||
|
|
||
|
|
@@ -390,6 +391,48 @@ export const ReadOnlySelection: Story = { | |
| }, | ||
| } | ||
|
|
||
| function AsyncDataWithHook() { | ||
| const columnDescriptors = ['ID', 'Count'].map(name => ({ name })) | ||
| const numRows = 500 | ||
|
|
||
| const { dataframe, setCell } = useDataFrameCache({ | ||
| numRows, | ||
| columnDescriptors, | ||
| fetch: async ({ rowStart, rowEnd, columns, orderBy, signal }) => { | ||
| checkSignal(signal) | ||
| validateFetchParams({ rowStart, rowEnd, columns, orderBy, data: { numRows, columnDescriptors } }) | ||
|
|
||
| for (const column of columns ?? []) { | ||
| for (let row = rowStart; row < rowEnd; row++) { | ||
| // Simulate variable delay times | ||
| const rowMs = row % 3 === 0 ? 10 * Math.floor(10 * Math.random()) : | ||
| row % 3 === 1 ? 20 * Math.floor(10 * Math.random()) : | ||
| 500 | ||
| const ms = rowMs * (column === 'ID' ? 1 : 2) | ||
| const resolvedValue = column === 'ID' ? `row ${row}` : numRows - row | ||
|
|
||
| delay(resolvedValue, ms).then((value) => { | ||
| checkSignal(signal) | ||
| setCell(row, column, value) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to call setRowNumber here too |
||
| }).catch((error) => { | ||
| if (error instanceof DOMException && error.name === 'AbortError') { | ||
| // Ignore abort errors | ||
| return | ||
| } | ||
| throw error | ||
| }) | ||
| } | ||
| } | ||
| }, | ||
| } | ||
|
|
||
| return <HighTable data={dataframe} /> | ||
| } | ||
|
|
||
| export const AsyncDataWithCache: Story = { | ||
| render: () => <AsyncDataWithHook />, | ||
| } | ||
|
|
||
| export const LegacySortable: Story = { | ||
| render: (args) => { | ||
| const [selection, onSelectionChange] = useState<Selection>({ | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good idea to provide a cache API + an implementation |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,130 @@ | ||||||||||||||||||||||||||||
| import type { ResolvedValue } from './types.js' | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| export type CellListener = () => void | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| export interface DataFrameCache { | ||||||||||||||||||||||||||||
| getCell(row: number, column: string): ResolvedValue | undefined | ||||||||||||||||||||||||||||
| getRowNumber(row: number): ResolvedValue<number> | undefined | ||||||||||||||||||||||||||||
| setCell(row: number, column: string, value: any): void | ||||||||||||||||||||||||||||
| setRowNumber(row: number, rowNumber: number): void | ||||||||||||||||||||||||||||
| registerCellListener(callback: CellListener): () => void | ||||||||||||||||||||||||||||
| clearCache(): void | ||||||||||||||||||||||||||||
| getCachedColumns(): string[] | ||||||||||||||||||||||||||||
| getCachedRowsForColumn(column: string): number[] | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Creates a cache for DataFrame cell data that automatically notifies listeners | ||||||||||||||||||||||||||||
| * when cells are modified. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| export function createDataFrameCache(): DataFrameCache { | ||||||||||||||||||||||||||||
| const cellCache = new Map<string, Map<number, ResolvedValue>>() | ||||||||||||||||||||||||||||
| const rowNumberCache = new Map<number, ResolvedValue<number>>() | ||||||||||||||||||||||||||||
| const listeners = new Set<CellListener>() | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| function notifyListeners(): void { | ||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Conceptually, we might want to use the same cache for two dataframes, for example, one that covers the first 1,000 rows of a dataset, and another one that covers the first 1M rows. But any update to the second range of rows would also notify the first listener, right? Maybe we could pass arguments, like the row and the column (in the case of setCell), to the listeners?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (and maybe also pass the value... not sure if it's needed, but would it hurt?) |
||||||||||||||||||||||||||||
| // Call all registered listeners | ||||||||||||||||||||||||||||
| listeners.forEach(listener => { | ||||||||||||||||||||||||||||
| try { | ||||||||||||||||||||||||||||
| listener() | ||||||||||||||||||||||||||||
| } catch (error) { | ||||||||||||||||||||||||||||
| console.error('Error in DataFrameCache listener:', error) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| }) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| return { | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Get a cell value from the cache. | ||||||||||||||||||||||||||||
| * Returns undefined if the cell hasn't been cached yet. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| getCell(row: number, column: string): ResolvedValue | undefined { | ||||||||||||||||||||||||||||
| return cellCache.get(column)?.get(row) | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Get a row number from the cache. | ||||||||||||||||||||||||||||
| * Returns undefined if the row number hasn't been cached yet. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| getRowNumber(row: number): ResolvedValue<number> | undefined { | ||||||||||||||||||||||||||||
| return rowNumberCache.get(row) | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Set a cell value in the cache and trigger event listeners. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| setCell(row: number, column: string, value: any): void { | ||||||||||||||||||||||||||||
| if (!cellCache.has(column)) { | ||||||||||||||||||||||||||||
| cellCache.set(column, new Map()) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| const columnCache = cellCache.get(column) | ||||||||||||||||||||||||||||
| if (!columnCache) { | ||||||||||||||||||||||||||||
| throw new Error(`Column "${column}" not found in cache`) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
|
Comment on lines
+58
to
+65
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit, but I generally prefer using
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (but if you think the other version is better, I'm interested in knowing why, because this construction is always a pain) |
||||||||||||||||||||||||||||
| const existingCell = columnCache.get(row) | ||||||||||||||||||||||||||||
| const resolvedValue: ResolvedValue = { value } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Only update and notify if the value has actually changed | ||||||||||||||||||||||||||||
| if (!existingCell || existingCell.value !== value) { | ||||||||||||||||||||||||||||
| columnCache.set(row, resolvedValue) | ||||||||||||||||||||||||||||
| notifyListeners() | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Set a row number in the cache and trigger event listeners. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| setRowNumber(row: number, rowNumber: number): void { | ||||||||||||||||||||||||||||
| const existingRowNumber = rowNumberCache.get(row) | ||||||||||||||||||||||||||||
| const resolvedValue: ResolvedValue<number> = { value: rowNumber } | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Only update and notify if the value has actually changed | ||||||||||||||||||||||||||||
| if (!existingRowNumber || existingRowNumber.value !== rowNumber) { | ||||||||||||||||||||||||||||
| rowNumberCache.set(row, resolvedValue) | ||||||||||||||||||||||||||||
| notifyListeners() | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Register a listener to be called when cells are updated. | ||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: as we call the listeners when the row number changes (which is not exactly a cell), maybe don't mention 'cell' here and in the method name
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe "registerUpdateListener"? |
||||||||||||||||||||||||||||
| * Returns a function to unregister the listener. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| registerCellListener(callback: CellListener): () => void { | ||||||||||||||||||||||||||||
| listeners.add(callback) | ||||||||||||||||||||||||||||
| return () => { | ||||||||||||||||||||||||||||
| listeners.delete(callback) | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Clear all cached data. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| clearCache(): void { | ||||||||||||||||||||||||||||
| const hadData = cellCache.size > 0 || rowNumberCache.size > 0 | ||||||||||||||||||||||||||||
| cellCache.clear() | ||||||||||||||||||||||||||||
| rowNumberCache.clear() | ||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL about Map.clear() |
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Notify listeners if we had data before clearing | ||||||||||||||||||||||||||||
| if (hadData) { | ||||||||||||||||||||||||||||
| notifyListeners() | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Get all cached columns. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| getCachedColumns(): string[] { | ||||||||||||||||||||||||||||
| return Array.from(cellCache.keys()) | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** | ||||||||||||||||||||||||||||
| * Get all cached rows for a specific column. | ||||||||||||||||||||||||||||
| */ | ||||||||||||||||||||||||||||
| getCachedRowsForColumn(column: string): number[] { | ||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same for the row numbers? |
||||||||||||||||||||||||||||
| const columnCache = cellCache.get(column) | ||||||||||||||||||||||||||||
| return columnCache ? Array.from(columnCache.keys()) : [] | ||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we use an array, should we sort? Or we might prefer to return a Set |
||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you have to call setRowNumber too, otherwise, the row numbers will never appear (if I understand well)