Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
6.0-alpha2
* BTree.FastBuilder.reset() fails to clear savedBuffer and savedNextKey, causing ClassCastException and SSTable header corruption during schema disagreement (CASSANDRA-21216, CASSANDRA-21260)
* Restore option to avoid hint transfer during decommission (CASSANDRA-21341)
* Add an offline cluster metadata tool (CASSANDRA-19151)
* Accord: Tail Latency Improvements (CASSANDRA-21361)
Expand Down
132 changes: 59 additions & 73 deletions src/java/org/apache/cassandra/utils/btree/BTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -3286,7 +3286,7 @@ && areIdentical(buffer, MAX_KEYS, sourceNode, count, count + 1))
* was constructed from for the contents of {@code buffer}.
* <p>
* For {@link FastBuilder} these are mostly the same, so they are fetched from a global cache and
* resized accordingly, but for {@link AbstractUpdater} we maintain a buffer of sizes.
* resized accordingly, but for {@link Updater} we maintain a buffer of sizes.
*/
int setDrainSizeMap(Object[] original, int keysInOriginal, Object[] branch, int keysInBranch)
{
Expand Down Expand Up @@ -3315,7 +3315,7 @@ int setDrainSizeMap(Object[] original, int keysInOriginal, Object[] branch, int
* was constructed from for the contents of {@code savedBuffer}.
* <p>
* For {@link FastBuilder} these are always the same size, so they are fetched from a global cache,
* but for {@link AbstractUpdater} we maintain a buffer of sizes.
* but for {@link Updater} we maintain a buffer of sizes.
*
* @return the size of {@code branch}
*/
Expand Down Expand Up @@ -3346,7 +3346,7 @@ int setOverflowSizeMap(Object[] branch, int keys)
* was constructed from the contents of both {@code savedBuffer} and {@code buffer}
* <p>
* For {@link FastBuilder} these are mostly the same size, so they are fetched from a global cache
* and only the last items updated, but for {@link AbstractUpdater} we maintain a buffer of sizes.
* and only the last items updated, but for {@link Updater} we maintain a buffer of sizes.
*/
void setRedistributedSizeMap(Object[] branch, int steal)
{
Expand Down Expand Up @@ -3485,11 +3485,60 @@ final LeafBuilder leaf()

/**
* Clear any references we might still retain, to avoid holding onto memory.
* <p>
* While this method is not strictly necessary, it exists to
* ensure the implementing classes are aware they must handle it.
*/
abstract void reset();
void reset()
{
leaf().count = 0;
clearLeafBuffer(leaf().buffer);
if (leaf().savedBuffer != null)
clearLeafBuffer(leaf().savedBuffer);
leaf().savedNextKey = null;
BranchBuilder branch = leaf().parent;
while (branch != null && branch.inUse)
{
branch.count = 0;
branch.hasRightChild = false;
clearBranchBuffer(branch.buffer);
if (branch.savedBuffer != null)
clearBranchBuffer(branch.savedBuffer);
branch.savedNextKey = null;
branch.clearSourceNode();
branch.inUse = false;
branch = branch.parent;
}
Invariants.require(branch == null || (branch.count == 0 && !branch.hasRightChild));
}

/**
* Clear the contents of a leaf buffer, aborting once we encounter a null entry
* to save time on small trees
*/
private void clearLeafBuffer(Object[] array)
{
if (array[0] == null)
return;
// find first null entry; loop from beginning, to amortise cost over size of working set
int i = 1;
while (i < array.length && array[i] != null)
++i;
Arrays.fill(array, 0, i, null);
}

/**
* Clear the contents of a branch buffer, aborting once we encounter a null entry
* to save time on small trees
*/
private void clearBranchBuffer(Object[] array)
{
if (array[0] == null && array[MAX_KEYS] == null)
return;
// find first null entry; loop from beginning, to amortise cost over size of working set
int i = 1;
while (i < MAX_KEYS && array[i] != null)
++i;
Arrays.fill(array, 0, i, null);
Arrays.fill(array, MAX_KEYS, MAX_KEYS + i + 1, null);
}
}

/**
Expand Down Expand Up @@ -3544,14 +3593,7 @@ public void close()
@Override
public void reset()
{
Arrays.fill(leaf().buffer, null);
leaf().count = 0;
BranchBuilder branch = leaf().parent;
while (branch != null && branch.inUse)
{
branch.reset();
branch = branch.parent;
}
super.reset();
}

public boolean validateEmpty()
Expand Down Expand Up @@ -3580,62 +3622,6 @@ private static boolean hasOnlyNulls(Object[] buffer)
}
}

private static abstract class AbstractUpdater extends AbstractFastBuilder implements AutoCloseable
{
void reset()
{
assert leaf().count == 0;
clearLeafBuffer(leaf().buffer);
if (leaf().savedBuffer != null)
Arrays.fill(leaf().savedBuffer, null);

BranchBuilder branch = leaf().parent;
while (branch != null && branch.inUse)
{
branch.count = 0;
branch.hasRightChild = false;
clearBranchBuffer(branch.buffer);
if (branch.savedBuffer != null && branch.savedBuffer[0] != null)
Arrays.fill(branch.savedBuffer, null); // by definition full, if non-empty
branch.inUse = false;
branch = branch.parent;
}
Invariants.require(branch == null || (branch.count == 0 && !branch.hasRightChild));
}

/**
* Clear the contents of a branch buffer, aborting once we encounter a null entry
* to save time on small trees
*/
private void clearLeafBuffer(Object[] array)
{
if (array[0] == null)
return;
// find first null entry; loop from beginning, to amortise cost over size of working set
int i = 1;
while (i < array.length && array[i] != null)
++i;
Arrays.fill(array, 0, i, null);
}

/**
* Clear the contents of a branch buffer, aborting once we encounter a null entry
* to save time on small trees
*/
private void clearBranchBuffer(Object[] array)
{
if (array[0] == null)
return;

// find first null entry; loop from beginning, to amortise cost over size of working set
int i = 1;
while (i < MAX_KEYS && array[i] != null)
++i;
Arrays.fill(array, 0, i, null);
Arrays.fill(array, MAX_KEYS, MAX_KEYS + i + 1, null);
}
}

/**
* A pooled object for modifying an existing tree with a new (typically smaller) tree.
* <p>
Expand All @@ -3650,7 +3636,7 @@ private void clearBranchBuffer(Object[] array)
* Searches within both trees to accelerate the process of modification, instead of performing a simple
* iteration over the new tree.
*/
static class Updater<Compare, Existing extends Compare, Insert extends Compare> extends AbstractUpdater implements AutoCloseable
static class Updater<Compare, Existing extends Compare, Insert extends Compare> extends AbstractFastBuilder implements AutoCloseable
{
static final TinyThreadLocalPool<Updater> POOL = new TinyThreadLocalPool<>();
TinyThreadLocalPool.TinyPool pool;
Expand Down Expand Up @@ -3869,7 +3855,7 @@ static int searchResultToComparison(int searchResult)
* <p>
* The approach taken here hopefully balances simplicity, garbage generation and execution time.
*/
static abstract class AbstractSeekingTransformer<I, O> extends AbstractUpdater implements AutoCloseable
static abstract class AbstractSeekingTransformer<I, O> extends AbstractFastBuilder implements AutoCloseable
{
/**
* An iterator over the tree we are updating
Expand Down
20 changes: 20 additions & 0 deletions test/unit/org/apache/cassandra/utils/btree/BTreeTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,26 @@ private static void checkResult(int count, Object[] btree, BTree.Dir dir)
assertEquals(count, i);
}

@Test
public void testFastBuilderResetClearsSavedState()
{
// Add >31 items to trigger overflow (savedBuffer/savedNextKey population)
try (BTree.FastBuilder<Integer> builder = BTree.fastBuilder())
{
for (int i = 0; i < 40; i++)
builder.add(i);
// Simulate an abandoned builder by closing without calling build().
// close() calls reset() then returns the builder to the pool.
}

// Reuse the pooled builder — it should be clean
try (BTree.FastBuilder<Integer> builder = BTree.fastBuilder())
{
assertTrue("FastBuilder should be empty after reset, but savedBuffer/savedNextKey leaked",
builder.validateEmpty());
}
}

/**
* <code>UpdateFunction</code> that count the number of call made to apply for each value.
*/
Expand Down