apache · tkhurana · Feb 2, 2026 · Feb 6, 2026 · Feb 7, 2026
diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/DistinctPrefixFilter.java b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/DistinctPrefixFilter.java
@@ -19,6 +19,7 @@
 
 import java.io.DataInput;
 import java.io.DataOutput;
+import java.io.EOFException;
 import java.io.IOException;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.KeyValueUtil;
@@ -33,6 +34,7 @@
 import org.apache.phoenix.schema.ValueSchema.Field;
 import org.apache.phoenix.schema.types.PDataType;
 import org.apache.phoenix.util.ByteUtil;
+import org.apache.phoenix.util.ScanUtil;
 
 public class DistinctPrefixFilter extends FilterBase implements Writable {
   private static byte VERSION = 1;
@@ -44,26 +46,48 @@ public class DistinctPrefixFilter extends FilterBase implements Writable {
   private int lastPosition;
   private final ImmutableBytesWritable lastKey =
     new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY, -1, -1);
+  private byte[] emptyCF;
+  private byte[] emptyCQ;
 
   public DistinctPrefixFilter() {
   }
 
   public DistinctPrefixFilter(RowKeySchema schema, int prefixLength) {
     this.schema = schema;
     this.prefixLength = prefixLength;
+    this.emptyCF = null;
+    this.emptyCQ = null;
+  }
+
+  public DistinctPrefixFilter(RowKeySchema schema, int prefixLength, byte[] emptyCF,
+    byte[] emptyCQ) {
+    this(schema, prefixLength);
+    this.emptyCF = emptyCF;
+    this.emptyCQ = emptyCQ;
   }
 
   public void setOffset(int offset) {
     this.offset = offset;
   }
 
+  // This is used when the DistinctPrefixFilter is present on a scan on an uncovered index
+  public void reinitialize() {
+    lastKey.set(ByteUtil.EMPTY_BYTE_ARRAY, -1, -1);
+    lastPosition = -1;
+    filterAll = false;
+  }
+
   // No @Override for HBase 3 compatibility
   public ReturnCode filterKeyValue(Cell v) throws IOException {
     return filterCell(v);
   }
 
   @Override
   public ReturnCode filterCell(Cell v) throws IOException {
+    if (emptyCF != null && emptyCQ != null && !ScanUtil.isEmptyColumn(v, emptyCF, emptyCQ)) {
+      // wait for the empty column
+      return ReturnCode.NEXT_COL;
+    }
     ImmutableBytesWritable ptr = new ImmutableBytesWritable();
 
     // First determine the prefix based on the schema
@@ -151,6 +175,12 @@ public void write(DataOutput out) throws IOException {
     out.writeByte(VERSION);
     schema.write(out);
     out.writeInt(prefixLength);
+    if (emptyCF != null && emptyCQ != null) {
+      out.writeInt(emptyCF.length);
+      out.write(emptyCF);
+      out.writeInt(emptyCQ.length);
+      out.write(emptyCQ);
+    }
   }
 
   @Override
@@ -159,6 +189,18 @@ public void readFields(DataInput in) throws IOException {
     schema = new RowKeySchema();
     schema.readFields(in);
     prefixLength = in.readInt();
+    try {
+      int length = in.readInt();
+      emptyCF = new byte[length];
+      in.readFully(emptyCF, 0, length);
+      length = in.readInt();
+      emptyCQ = new byte[length];
+      in.readFully(emptyCQ, 0, length);
+    } catch (EOFException e) {
+      // Older client doesn't send empty column information
+      emptyCF = null;
+      emptyCQ = null;
+    }
   }
 
   @Override

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java
@@ -39,7 +39,6 @@ public class EmptyColumnOnlyFilter extends FilterBase implements Writable {
   private byte[] emptyCQ;
   private boolean found = false;
   private boolean first = true;
-  private Cell emptyColumnCell = null;
 
   public EmptyColumnOnlyFilter() {
   }
@@ -55,7 +54,6 @@ public EmptyColumnOnlyFilter(byte[] emptyCF, byte[] emptyCQ) {
   public void reset() throws IOException {
     found = false;
     first = true;
-    emptyColumnCell = null;
   }
 
   // No @Override for HBase 3 compatibility
@@ -70,7 +68,6 @@ public ReturnCode filterCell(final Cell cell) throws IOException {
     }
     if (ScanUtil.isEmptyColumn(cell, emptyCF, emptyCQ)) {
       found = true;
-      emptyColumnCell = cell;
       return ReturnCode.INCLUDE;
     }
     if (first) {
@@ -82,22 +79,8 @@ public ReturnCode filterCell(final Cell cell) throws IOException {
 
   @Override
   public void filterRowCells(List<Cell> kvs) throws IOException {
-    if (kvs.size() > 2) {
-      throw new IOException("EmptyColumnOnlyFilter got unexpected cells: " + kvs.size());
-    } else if (kvs.size() == 2) {
-      // remove the first cell and only return the empty column cell
+    if (kvs.size() > 1) {
       kvs.remove(0);
-    } else if (kvs.size() == 1) {
-      // we only have 1 cell, check if it is the empty column cell or not
-      // since the empty column cell could have been excluded by another filter like the
-      // DistinctPrefixFilter.
-      Cell cell = kvs.get(0);
-      if (found && !ScanUtil.isEmptyColumn(cell, emptyCF, emptyCQ)) {
-        // we found the empty cell, but it was not included so replace the existing cell
-        // with the empty column cell
-        kvs.remove(0);
-        kvs.add(emptyColumnCell);
-      }
     }
   }
 

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java b/phoenix-core-client/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java
@@ -336,9 +336,12 @@ private static void initializeScan(QueryPlan plan, Integer perScanLimit, Integer
           && groupBy.isOrderPreserving()
           && (context.getAggregationManager().isEmpty() || groupBy.isUngroupedAggregate())
       ) {
-
-        ScanUtil.andFilterAtEnd(scan,
-          new DistinctPrefixFilter(plan.getTableRef().getTable().getRowKeySchema(), cols));
+        byte[] ecf = SchemaUtil.getEmptyColumnFamily(table);
+        byte[] ecq = table.getEncodingScheme() == NON_ENCODED_QUALIFIERS
+          ? QueryConstants.EMPTY_COLUMN_BYTES
+          : table.getEncodingScheme().encode(QueryConstants.ENCODED_EMPTY_COLUMN_NAME);
+        ScanUtil.andFilterAtEnd(scan, new DistinctPrefixFilter(
+          plan.getTableRef().getTable().getRowKeySchema(), cols, ecf, ecq));
         if (!groupBy.isUngroupedAggregate() && plan.getLimit() != null) {
           // We can push the limit to the server,but for UngroupedAggregate
           // we can not push the limit.

diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/util/ScanUtil.java b/phoenix-core-client/src/main/java/org/apache/phoenix/util/ScanUtil.java
@@ -1900,6 +1900,33 @@ public static SkipScanFilter removeSkipScanFilter(Scan scan) {
     return null;
   }
 
+  public static DistinctPrefixFilter findDistinctPrefixFilter(Scan scan) {
+    Filter filter = scan.getFilter();
+    if (filter instanceof PagingFilter) {
+      filter = ((PagingFilter) filter).getDelegateFilter();
+    }
+    return findDistinctPrefixFilter(filter);
+  }
+
+  public static DistinctPrefixFilter findDistinctPrefixFilter(Filter filter) {
+    if (filter == null) {
+      return null;
+    }
+    if (filter instanceof DistinctPrefixFilter) {
+      return (DistinctPrefixFilter) filter;
+    }
+    if (filter instanceof FilterList) {
+      Iterator<Filter> filterIterator = ((FilterList) filter).getFilters().iterator();
+      while (filterIterator.hasNext()) {
+        DistinctPrefixFilter distinctFilter = findDistinctPrefixFilter(filterIterator.next());
+        if (distinctFilter != null) {
+          return distinctFilter;
+        }
+      }
+    }
+    return null;
+  }
+
   /**
    * Verify whether the given row key is in the scan boundaries i.e. scan start and end keys.
    * @param ptr  row key.

diff --git a/...core-server/src/main/java/org/apache/phoenix/coprocessor/CDCGlobalIndexRegionScanner.java b/...core-server/src/main/java/org/apache/phoenix/coprocessor/CDCGlobalIndexRegionScanner.java
@@ -91,9 +91,9 @@ public CDCGlobalIndexRegionScanner(final RegionScanner innerScanner, final Regio
     final Scan scan, final RegionCoprocessorEnvironment env, final Scan dataTableScan,
     final TupleProjector tupleProjector, final IndexMaintainer indexMaintainer,
     final byte[][] viewConstants, final ImmutableBytesWritable ptr, final long pageSizeMs,
-    final long queryLimit) throws IOException {
+    final long queryLimit, boolean isDistinct) throws IOException {
     super(innerScanner, region, scan, env, dataTableScan, tupleProjector, indexMaintainer,
-      viewConstants, ptr, pageSizeMs, queryLimit);
+      viewConstants, ptr, pageSizeMs, queryLimit, isDistinct);
     CDCUtil.setupScanForCDC(dataTableScan);
     cdcDataTableInfo = CDCTableInfo
       .createFromProto(CDCInfoProtos.CDCTableDef.parseFrom(scan.getAttribute(CDC_DATA_TABLE_DEF)));

diff --git a/...erver/src/main/java/org/apache/phoenix/coprocessor/UncoveredGlobalIndexRegionScanner.java b/...erver/src/main/java/org/apache/phoenix/coprocessor/UncoveredGlobalIndexRegionScanner.java
@@ -88,9 +88,9 @@ public UncoveredGlobalIndexRegionScanner(final RegionScanner innerScanner, final
     final Scan scan, final RegionCoprocessorEnvironment env, final Scan dataTableScan,
     final TupleProjector tupleProjector, final IndexMaintainer indexMaintainer,
     final byte[][] viewConstants, final ImmutableBytesWritable ptr, final long pageSizeMs,
-    final long queryLimit) throws IOException {
+    final long queryLimit, boolean isDistinct) throws IOException {
     super(innerScanner, region, scan, env, dataTableScan, tupleProjector, indexMaintainer,
-      viewConstants, ptr, pageSizeMs, queryLimit);
+      viewConstants, ptr, pageSizeMs, queryLimit, isDistinct);
     final Configuration config = env.getConfiguration();
     hTableFactory = IndexWriterUtils.getDefaultDelegateHTableFactory(env);
     rowCountPerTask =

diff --git a/...core-server/src/main/java/org/apache/phoenix/coprocessor/UncoveredIndexRegionScanner.java b/...core-server/src/main/java/org/apache/phoenix/coprocessor/UncoveredIndexRegionScanner.java
@@ -113,16 +113,25 @@ public UncoveredIndexRegionScanner(final RegionScanner innerScanner, final Regio
     final Scan scan, final RegionCoprocessorEnvironment env, final Scan dataTableScan,
     final TupleProjector tupleProjector, final IndexMaintainer indexMaintainer,
     final byte[][] viewConstants, final ImmutableBytesWritable ptr, final long pageSizeMs,
-    final long queryLimit) {
+    final long queryLimit, boolean isDistinct) {
     super(innerScanner);
     final Configuration config = env.getConfiguration();
 
-    byte[] pageSizeFromScan = scan.getAttribute(INDEX_PAGE_ROWS);
-    if (pageSizeFromScan != null) {
-      pageSizeInRows = (int) Bytes.toLong(pageSizeFromScan);
+    if (isDistinct) {
+      // If the scan has a DistinctPrefix filter set the batch size to 1. This is because we don't
+      // want to skip rows without first checking if the row is valid or not and passes any
+      // additional filters evaluated after merging with the data table. Using a batch of
+      // size 1 is OK when distinct prefix filter is used since if the row is valid we will jump to
+      // the next unique prefix so ideally we should be scanning very few rows.
+      pageSizeInRows = 1;
     } else {
-      pageSizeInRows = (int) config.getLong(INDEX_PAGE_SIZE_IN_ROWS,
-        QueryServicesOptions.DEFAULT_INDEX_PAGE_SIZE_IN_ROWS);
+      byte[] pageSizeFromScan = scan.getAttribute(INDEX_PAGE_ROWS);
+      if (pageSizeFromScan != null) {
+        pageSizeInRows = (int) Bytes.toLong(pageSizeFromScan);
+      } else {
+        pageSizeInRows = (int) config.getLong(INDEX_PAGE_SIZE_IN_ROWS,
+          QueryServicesOptions.DEFAULT_INDEX_PAGE_SIZE_IN_ROWS);
+      }
     }
     if (queryLimit != -1) {
       pageSizeInRows = Long.min(pageSizeInRows, queryLimit);

diff --git a/...server/src/main/java/org/apache/phoenix/coprocessor/UncoveredLocalIndexRegionScanner.java b/...server/src/main/java/org/apache/phoenix/coprocessor/UncoveredLocalIndexRegionScanner.java
@@ -49,9 +49,9 @@ public UncoveredLocalIndexRegionScanner(final RegionScanner innerScanner, final
     final Scan scan, final RegionCoprocessorEnvironment env, final Scan dataTableScan,
     final TupleProjector tupleProjector, final IndexMaintainer indexMaintainer,
     final byte[][] viewConstants, final ImmutableBytesWritable ptr, final long pageSizeMs,
-    final int offset, final byte[] actualStartKey, final long queryLimit) {
+    final int offset, final byte[] actualStartKey, final long queryLimit, boolean isDistinct) {
     super(innerScanner, region, scan, env, dataTableScan, tupleProjector, indexMaintainer,
-      viewConstants, ptr, pageSizeMs, queryLimit);
+      viewConstants, ptr, pageSizeMs, queryLimit, isDistinct);
     this.offset = offset;
     this.actualStartKey = actualStartKey;
   }

diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java b/phoenix-core-server/src/main/java/org/apache/phoenix/index/GlobalIndexChecker.java
@@ -48,8 +48,6 @@
 import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
 import org.apache.hadoop.hbase.coprocessor.RegionObserver;
 import org.apache.hadoop.hbase.filter.Filter;
-import org.apache.hadoop.hbase.filter.FilterList;
-import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
 import org.apache.hadoop.hbase.filter.PageFilter;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.regionserver.PhoenixScannerContext;
@@ -63,7 +61,6 @@
 import org.apache.phoenix.coprocessor.DataTableScanMetrics;
 import org.apache.phoenix.coprocessor.DelegateRegionScanner;
 import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants;
-import org.apache.phoenix.filter.EmptyColumnOnlyFilter;
 import org.apache.phoenix.filter.PagingFilter;
 import org.apache.phoenix.filter.UnverifiedRowFilter;
 import org.apache.phoenix.hbase.index.covered.update.ColumnReference;
@@ -237,23 +234,7 @@ private void init() throws IOException {
     }
 
     private boolean shouldCreateUnverifiedRowFilter(Filter delegateFilter) {
-      if (delegateFilter == null) {
-        return false;
-      }
-      Filter wrappedFilter = delegateFilter;
-      if (delegateFilter instanceof FilterList) {
-        List<Filter> filters = ((FilterList) delegateFilter).getFilters();
-        wrappedFilter = filters.get(0);
-      }
-      // Optimization since FirstKeyOnlyFilter and EmptyColumnOnlyFilter
-      // always include the empty column in the scan result
-      if (
-        wrappedFilter instanceof FirstKeyOnlyFilter
-          || wrappedFilter instanceof EmptyColumnOnlyFilter
-      ) {
-        return false;
-      }
-      return true;
+      return delegateFilter != null && !indexMaintainer.isUncovered();
     }
 
     public boolean next(List<Cell> result, boolean raw, ScannerContext scannerContext)
@@ -630,7 +611,7 @@ private boolean verifyRowAndRepairIfNecessary(List<Cell> cellList) throws IOExce
         long repairStart = EnvironmentEdgeManager.currentTimeMillis();
 
         byte[] rowKey = CellUtil.cloneRow(cell);
-        long ts = cellList.get(0).getTimestamp();
+        long ts = getMaxTimestamp(cellList);
         cellList.clear();
         long repairTime;
         try {

diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/iterate/RegionScannerFactory.java b/phoenix-core-server/src/main/java/org/apache/phoenix/iterate/RegionScannerFactory.java
@@ -52,6 +52,7 @@
 import org.apache.phoenix.expression.Expression;
 import org.apache.phoenix.expression.ExpressionType;
 import org.apache.phoenix.expression.KeyValueColumnExpression;
+import org.apache.phoenix.filter.DistinctPrefixFilter;
 import org.apache.phoenix.hbase.index.covered.update.ColumnReference;
 import org.apache.phoenix.index.IndexMaintainer;
 import org.apache.phoenix.query.QueryConstants;
@@ -117,6 +118,7 @@ public RegionScanner getWrappedScanner(final RegionCoprocessorEnvironment env,
       final long pageSizeMs = ScanUtil.getPageSizeMsForRegionScanner(scan);
       Expression extraWhere = null;
       long extraLimit = -1;
+      DistinctPrefixFilter distinctFilter = null;
 
       {
         // for indexes construct the row filter for uncovered columns if it exists
@@ -169,19 +171,22 @@ public RegionScanner getWrappedScanner(final RegionCoprocessorEnvironment env,
                 dataTableScan.addColumn(column.getFamily(), column.getQualifier());
               }
             }
+            // If the DistinctPrefix filter is present on the scan we set the batch size to 1
+            // when scanning uncovered index rows
+            distinctFilter = ScanUtil.findDistinctPrefixFilter(scan);
             if (ScanUtil.isLocalIndex(scan)) {
               s = new UncoveredLocalIndexRegionScanner(regionScanner, dataRegion, scan, env,
                 dataTableScan, tupleProjector, indexMaintainer, viewConstants, ptr, pageSizeMs,
-                offset, actualStartKey, extraLimit);
+                offset, actualStartKey, extraLimit, distinctFilter != null);
             } else {
               if (scan.getAttribute(CDC_DATA_TABLE_DEF) != null) {
                 s = new CDCGlobalIndexRegionScanner(regionScanner, dataRegion, scan, env,
                   dataTableScan, tupleProjector, indexMaintainer, viewConstants, ptr, pageSizeMs,
-                  extraLimit);
+                  extraLimit, distinctFilter != null);
               } else {
                 s = new UncoveredGlobalIndexRegionScanner(regionScanner, dataRegion, scan, env,
                   dataTableScan, tupleProjector, indexMaintainer, viewConstants, ptr, pageSizeMs,
-                  extraLimit);
+                  extraLimit, distinctFilter != null);
               }
             }
           }
@@ -253,6 +258,11 @@ public boolean nextRaw(List<Cell> result, ScannerContext scannerContext) throws
             return true;
           }
           if (result.size() == 0) {
+            if (distinctFilter != null) {
+              // we got an orphaned uncovered index row just reinitialize the distinct filter and
+              // move to the new row
+              distinctFilter.reinitialize();
+            }
             return next;
           }
           if ((ScanUtil.isLocalOrUncoveredGlobalIndex(scan)) && !ScanUtil.isAnalyzeTable(scan)) {
@@ -274,6 +284,12 @@ public boolean nextRaw(List<Cell> result, ScannerContext scannerContext) throws
               extraWhere.evaluate(merged, ptr);
               if (!Boolean.TRUE.equals(extraWhere.getDataType().toObject(ptr))) {
                 result.clear();
+                if (distinctFilter != null) {
+                  // The current row was rejected after evaluating the extra where conditions.
+                  // We can't skip to the next unique key prefix as that could result in skipping
+                  // valid result so reinitialize the distinct filter and move to the next row
+                  distinctFilter.reinitialize();
+                }
                 return next;
               }
             }