Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.KeepDeletedCells;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.ScanInfo;
Expand All @@ -31,6 +32,14 @@
@InterfaceAudience.Private
public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {

/**
* Number of consecutive range delete markers (DeleteColumn/DeleteFamily) to skip before switching
* to seek. Seeking is more expensive than skipping for a single marker, but much faster when
* markers accumulate. This threshold avoids the seek overhead for the common case (one delete per
* row) while still kicking in when markers pile up.
*/
private static final int SEEK_ON_DELETE_MARKER_THRESHOLD = 3;

/** Keeps track of deletes */
private final DeleteTracker deletes;

Expand All @@ -40,12 +49,20 @@ public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {
/** whether time range queries can see rows "behind" a delete */
protected final boolean seePastDeleteMarkers;

/** Whether seek optimization for range delete markers is applicable */
private final boolean canSeekOnDeleteMarker;

/** Count of consecutive range delete markers seen in the current row */
private int rangeDeleteCount;

protected NormalUserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now) {
super(scan, scanInfo, columns, hasNullColumn, oldestUnexpiredTS, now);
this.deletes = deletes;
this.get = scan.isGetScan();
this.seePastDeleteMarkers = scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE;
this.canSeekOnDeleteMarker =
!seePastDeleteMarkers && deletes.getClass() == ScanDeleteTracker.class;
}

@Override
Expand All @@ -70,9 +87,26 @@ public MatchCode match(ExtendedCell cell) throws IOException {
seePastDeleteMarkers ? tr.withinTimeRange(timestamp) : tr.withinOrAfterTimeRange(timestamp);
if (includeDeleteMarker) {
this.deletes.add(cell);
// A DeleteColumn or DeleteFamily masks all remaining cells for this column/family.
// Seek past them instead of skipping one cell at a time, but only after seeing
// enough consecutive markers to justify the seek overhead.
// Only safe with plain ScanDeleteTracker. Not safe with newVersionBehavior (sequence
// IDs determine visibility), visibility labels (delete/put label mismatch), or
// seePastDeleteMarkers (KEEP_DELETED_CELLS).
if (
canSeekOnDeleteMarker && (typeByte == KeyValue.Type.DeleteColumn.getCode()
|| typeByte == KeyValue.Type.DeleteFamily.getCode())
) {
if (++rangeDeleteCount >= SEEK_ON_DELETE_MARKER_THRESHOLD) {
return columns.getNextRowOrNextColumn(cell);
}
} else {
rangeDeleteCount = 0;
}
}
return MatchCode.SKIP;
}
rangeDeleteCount = 0;
returnCode = checkDeleted(deletes, cell);
if (returnCode != null) {
return returnCode;
Expand All @@ -83,6 +117,7 @@ public MatchCode match(ExtendedCell cell) throws IOException {
@Override
protected void reset() {
deletes.reset();
rangeDeleteCount = 0;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,11 @@ public void setToNewRow(ExtendedCell currentRow) {
public abstract boolean moreRowsMayExistAfter(ExtendedCell cell);

public ExtendedCell getKeyForNextColumn(ExtendedCell cell) {
// We aren't sure whether any DeleteFamily cells exist, so we can't skip to next column.
// TODO: Current way disable us to seek to next column quickly. Is there any better solution?
// see HBASE-18471 for more details
// see TestFromClientSide3#testScanAfterDeletingSpecifiedRow
// see TestFromClientSide3#testScanAfterDeletingSpecifiedRowV2
if (cell.getQualifierLength() == 0) {
// For cells with empty qualifier, we generally can't skip to the next column because
// DeleteFamily cells might exist that we haven't seen yet (see HBASE-18471).
// However, if the cell itself IS a DeleteFamily marker, we know we've already processed it,
// so we can safely seek to the next real column.
if (cell.getQualifierLength() == 0 && !PrivateCellUtil.isDeleteFamily(cell)) {
ExtendedCell nextKey = PrivateCellUtil.createNextOnRowCol(cell);
if (nextKey != cell) {
return nextKey;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeepDeletedCells;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterBase;
Expand Down Expand Up @@ -396,4 +397,42 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE,
Cell nextCell = qm.getKeyForNextColumn(lastCell);
assertArrayEquals(nextCell.getQualifierArray(), col4);
}

/**
* After enough consecutive range delete markers, the matcher should switch from SKIP to
* SEEK_NEXT_COL. Point deletes and KEEP_DELETED_CELLS always SKIP.
*/
@Test
public void testSeekOnRangeDelete() throws IOException {
// DeleteColumn: first two SKIP, third triggers SEEK_NEXT_COL
assertDeleteMatchCodes(KeepDeletedCells.FALSE, Type.DeleteColumn, MatchCode.SKIP,
MatchCode.SKIP, MatchCode.SEEK_NEXT_COL);

// DeleteFamily: same threshold behavior
assertDeleteMatchCodes(KeepDeletedCells.FALSE, Type.DeleteFamily, MatchCode.SKIP,
MatchCode.SKIP, MatchCode.SEEK_NEXT_COL);

// Delete (version): always SKIP (point delete, not range)
assertDeleteMatchCodes(KeepDeletedCells.FALSE, Type.Delete, MatchCode.SKIP, MatchCode.SKIP,
MatchCode.SKIP, MatchCode.SKIP, MatchCode.SKIP);

// KEEP_DELETED_CELLS=TRUE: always SKIP
assertDeleteMatchCodes(KeepDeletedCells.TRUE, Type.DeleteColumn, MatchCode.SKIP, MatchCode.SKIP,
MatchCode.SKIP, MatchCode.SKIP, MatchCode.SKIP);
}

private void assertDeleteMatchCodes(KeepDeletedCells keepDeletedCells, Type type,
MatchCode... expected) throws IOException {
long now = EnvironmentEdgeManager.currentTime();
UserScanQueryMatcher qm =
UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam1, 0, 1, ttl, keepDeletedCells,
HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), null, now - ttl, now, null);
boolean familyLevel = type == Type.DeleteFamily || type == Type.DeleteFamilyVersion;
byte[] qual = familyLevel ? HConstants.EMPTY_BYTE_ARRAY : col1;
qm.setToNewRow(new KeyValue(row1, fam1, qual, now, type));
for (int i = 0; i < expected.length; i++) {
KeyValue kv = new KeyValue(row1, fam1, qual, now - i, type);
assertEquals("Mismatch at index " + i, expected[i], qm.match(kv));
}
}
}