apache · junegunn · Mar 28, 2026 · Mar 30, 2026
diff --git a/...in/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java b/...in/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import org.apache.hadoop.hbase.ExtendedCell;
 import org.apache.hadoop.hbase.KeepDeletedCells;
+import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.regionserver.ScanInfo;
@@ -31,6 +32,14 @@
 @InterfaceAudience.Private
 public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {
 
+  /**
+   * Number of consecutive range delete markers (DeleteColumn/DeleteFamily) to skip before switching
+   * to seek. Seeking is more expensive than skipping for a single marker, but much faster when
+   * markers accumulate. This threshold avoids the seek overhead for the common case (one delete per
+   * row) while still kicking in when markers pile up.
+   */
+  private static final int SEEK_ON_DELETE_MARKER_THRESHOLD = 3;
+
   /** Keeps track of deletes */
   private final DeleteTracker deletes;
 
@@ -40,12 +49,20 @@ public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {
   /** whether time range queries can see rows "behind" a delete */
   protected final boolean seePastDeleteMarkers;
 
+  /** Whether seek optimization for range delete markers is applicable */
+  private final boolean canSeekOnDeleteMarker;
+
+  /** Count of consecutive range delete markers seen in the current row */
+  private int rangeDeleteCount;
+
   protected NormalUserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
     boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now) {
     super(scan, scanInfo, columns, hasNullColumn, oldestUnexpiredTS, now);
     this.deletes = deletes;
     this.get = scan.isGetScan();
     this.seePastDeleteMarkers = scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE;
+    this.canSeekOnDeleteMarker =
+      !seePastDeleteMarkers && deletes.getClass() == ScanDeleteTracker.class;
   }
 
   @Override
@@ -70,9 +87,26 @@ public MatchCode match(ExtendedCell cell) throws IOException {
         seePastDeleteMarkers ? tr.withinTimeRange(timestamp) : tr.withinOrAfterTimeRange(timestamp);
       if (includeDeleteMarker) {
         this.deletes.add(cell);
+        // A DeleteColumn or DeleteFamily masks all remaining cells for this column/family.
+        // Seek past them instead of skipping one cell at a time, but only after seeing
+        // enough consecutive markers to justify the seek overhead.
+        // Only safe with plain ScanDeleteTracker. Not safe with newVersionBehavior (sequence
+        // IDs determine visibility), visibility labels (delete/put label mismatch), or
+        // seePastDeleteMarkers (KEEP_DELETED_CELLS).
+        if (
+          canSeekOnDeleteMarker && (typeByte == KeyValue.Type.DeleteColumn.getCode()
+            || typeByte == KeyValue.Type.DeleteFamily.getCode())
+        ) {
+          if (++rangeDeleteCount >= SEEK_ON_DELETE_MARKER_THRESHOLD) {
+            return columns.getNextRowOrNextColumn(cell);
+          }
+        } else {
+          rangeDeleteCount = 0;
+        }
       }
       return MatchCode.SKIP;
     }
+    rangeDeleteCount = 0;
     returnCode = checkDeleted(deletes, cell);
     if (returnCode != null) {
       return returnCode;
@@ -83,6 +117,7 @@ public MatchCode match(ExtendedCell cell) throws IOException {
   @Override
   protected void reset() {
     deletes.reset();
+    rangeDeleteCount = 0;
   }
 
   @Override

diff --git a/...ver/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java b/...ver/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanQueryMatcher.java
@@ -292,12 +292,11 @@ public void setToNewRow(ExtendedCell currentRow) {
   public abstract boolean moreRowsMayExistAfter(ExtendedCell cell);
 
   public ExtendedCell getKeyForNextColumn(ExtendedCell cell) {
-    // We aren't sure whether any DeleteFamily cells exist, so we can't skip to next column.
-    // TODO: Current way disable us to seek to next column quickly. Is there any better solution?
-    // see HBASE-18471 for more details
-    // see TestFromClientSide3#testScanAfterDeletingSpecifiedRow
-    // see TestFromClientSide3#testScanAfterDeletingSpecifiedRowV2
-    if (cell.getQualifierLength() == 0) {
+    // For cells with empty qualifier, we generally can't skip to the next column because
+    // DeleteFamily cells might exist that we haven't seen yet (see HBASE-18471).
+    // However, if the cell itself IS a DeleteFamily marker, we know we've already processed it,
+    // so we can safely seek to the next real column.
+    if (cell.getQualifierLength() == 0 && !PrivateCellUtil.isDeleteFamily(cell)) {
       ExtendedCell nextKey = PrivateCellUtil.createNextOnRowCol(cell);
       if (nextKey != cell) {
         return nextKey;

diff --git a/...test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java b/...test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java
@@ -30,6 +30,7 @@
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeepDeletedCells;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValue.Type;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.filter.FilterBase;
@@ -396,4 +397,42 @@ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 5, ttl, KeepDeletedCells.FALSE,
     Cell nextCell = qm.getKeyForNextColumn(lastCell);
     assertArrayEquals(nextCell.getQualifierArray(), col4);
   }
+
+  /**
+   * After enough consecutive range delete markers, the matcher should switch from SKIP to
+   * SEEK_NEXT_COL. Point deletes and KEEP_DELETED_CELLS always SKIP.
+   */
+  @Test
+  public void testSeekOnRangeDelete() throws IOException {
+    // DeleteColumn: first two SKIP, third triggers SEEK_NEXT_COL
+    assertDeleteMatchCodes(KeepDeletedCells.FALSE, Type.DeleteColumn, MatchCode.SKIP,
+      MatchCode.SKIP, MatchCode.SEEK_NEXT_COL);
+
+    // DeleteFamily: same threshold behavior
+    assertDeleteMatchCodes(KeepDeletedCells.FALSE, Type.DeleteFamily, MatchCode.SKIP,
+      MatchCode.SKIP, MatchCode.SEEK_NEXT_COL);
+
+    // Delete (version): always SKIP (point delete, not range)
+    assertDeleteMatchCodes(KeepDeletedCells.FALSE, Type.Delete, MatchCode.SKIP, MatchCode.SKIP,
+      MatchCode.SKIP, MatchCode.SKIP, MatchCode.SKIP);
+
+    // KEEP_DELETED_CELLS=TRUE: always SKIP
+    assertDeleteMatchCodes(KeepDeletedCells.TRUE, Type.DeleteColumn, MatchCode.SKIP, MatchCode.SKIP,
+      MatchCode.SKIP, MatchCode.SKIP, MatchCode.SKIP);
+  }
+
+  private void assertDeleteMatchCodes(KeepDeletedCells keepDeletedCells, Type type,
+    MatchCode... expected) throws IOException {
+    long now = EnvironmentEdgeManager.currentTime();
+    UserScanQueryMatcher qm =
+      UserScanQueryMatcher.create(scan, new ScanInfo(this.conf, fam1, 0, 1, ttl, keepDeletedCells,
+        HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false), null, now - ttl, now, null);
+    boolean familyLevel = type == Type.DeleteFamily || type == Type.DeleteFamilyVersion;
+    byte[] qual = familyLevel ? HConstants.EMPTY_BYTE_ARRAY : col1;
+    qm.setToNewRow(new KeyValue(row1, fam1, qual, now, type));
+    for (int i = 0; i < expected.length; i++) {
+      KeyValue kv = new KeyValue(row1, fam1, qual, now - i, type);
+      assertEquals("Mismatch at index " + i, expected[i], qm.match(kv));
+    }
+  }
 }