Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ API Changes

* GITHUB#15295 : Switched to a fixed CFS threshold (Shubham Sharma)

* GITHUB#15480: Deprecate SortField#setMissingValue and add migration test toward immutability. (Syed Mohammad Saad)

* GITHUB#15740: Add FieldMinMax utility to retrieve global min/max values of a numeric field across an IndexReader, unifying PointValues and DocValuesSkipper semantics and returning null when no values exist. (Syed Mohammad Saad)

New Features
---------------------

Expand Down
158 changes: 158 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/FieldMinMax.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;

import java.io.IOException;
import org.apache.lucene.util.NumericUtils;

/**
* Utility to retrieve global min/max values of a numeric field across an IndexReader.
*
* <p>This method abstracts over different storage implementations used by Lucene:
*
* <ul>
* <li>BKD PointValues (IntPoint / LongPoint)
* <li>DocValuesSkipper (fast metadata when available)
* <li>NumericDocValues scan (correct fallback)
* </ul>
*
* <p>Only single dimensional integral numeric fields are supported.
*
* <p>Returns {@code null} when:
*
* <ul>
* <li>field does not exist
* <li>field is floating point (float/double)
* <li>field is multi-dimensional
* <li>no segments contain values
* </ul>
*/
public final class FieldMinMax {

private FieldMinMax() {}

/** Immutable holder for global minimum and maximum values. */
public static final class MinMax {

/** The minimum value across all documents. */
public final long min;

/** The maximum value across all documents. */
public final long max;

/**
* Creates a new {@link MinMax} instance.
*
* @param min the minimum value
* @param max the maximum value
*/
public MinMax(long min, long max) {
this.min = min;
this.max = max;
}
}

/** Returns global min/max or null if unavailable */
public static MinMax get(IndexReader reader, String field) throws IOException {

// ---- 1. Prefer PointValues (accurate index statistics) ----
boolean found = false;
long globalMin = Long.MAX_VALUE;
long globalMax = Long.MIN_VALUE;

for (LeafReaderContext ctx : reader.leaves()) {
LeafReader leaf = ctx.reader();

PointValues values = leaf.getPointValues(field);
if (values == null || values.getNumDimensions() != 1) {
continue;
}

byte[] minPacked = values.getMinPackedValue();
byte[] maxPacked = values.getMaxPackedValue();
if (minPacked == null || maxPacked == null) {
continue;
}

int bytes = values.getBytesPerDimension();
Long min = decodeIntegral(minPacked, bytes);
Long max = decodeIntegral(maxPacked, bytes);

if (min != null && max != null) {
found = true;
globalMin = Math.min(globalMin, min);
globalMax = Math.max(globalMax, max);
}
}

if (found) {
return new MinMax(globalMin, globalMax);
}

// ---- 2. Try DocValuesSkipper (fast metadata) ----
long sMin = DocValuesSkipper.globalMinValue(reader, field);
long sMax = DocValuesSkipper.globalMaxValue(reader, field);

if (isValidSkipperRange(sMin, sMax)) {
return new MinMax(sMin, sMax);
}

// ---- 3. Guaranteed fallback: scan NumericDocValues ----
return scanNumericDocValues(reader, field);
}

/** Decode integral numeric point values only */
private static Long decodeIntegral(byte[] packed, int bytesPerDim) {
switch (bytesPerDim) {
case Integer.BYTES:
return (long) NumericUtils.sortableBytesToInt(packed, 0);
case Long.BYTES:
return NumericUtils.sortableBytesToLong(packed, 0);
default:
return null; // float/double unsupported
}
}

/** Validate skipper sentinel semantics */
private static boolean isValidSkipperRange(long min, long max) {
if (min == Long.MAX_VALUE && max == Long.MIN_VALUE) return false;
if (min == Long.MIN_VALUE && max == Long.MAX_VALUE) return false;
return true;
}

/** Full scan fallback for NumericDocValues */
private static MinMax scanNumericDocValues(IndexReader reader, String field) throws IOException {
boolean found = false;
long min = Long.MAX_VALUE;
long max = Long.MIN_VALUE;

for (LeafReaderContext ctx : reader.leaves()) {
LeafReader leaf = ctx.reader();
NumericDocValues values = leaf.getNumericDocValues(field);
if (values == null) continue;

while (values.nextDoc() != NumericDocValues.NO_MORE_DOCS) {
long v = values.longValue();
found = true;
min = Math.min(min, v);
max = Math.max(max, v);
}
}

return found ? new MinMax(min, max) : null;
}
}
13 changes: 12 additions & 1 deletion lucene/core/src/java/org/apache/lucene/search/SortField.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public enum Type {
private FieldComparatorSource comparatorSource;

// Used for 'sortMissingFirst/Last'
protected final Object missingValue;
protected Object missingValue;

// Indicates if sort should be optimized with indexed data. Set to true by default.
@Deprecated private boolean optimizeSortWithIndexedData = true;
Expand Down Expand Up @@ -332,6 +332,17 @@ public Object getMissingValue() {
return missingValue;
}

/**
* Sets the value to use for documents that don't have a value.
*
* @deprecated Use {@link #SortField(String, Type, boolean, Object)} to supply missing values at
* construction time. This method will be removed in Lucene 11.
*/
@Deprecated
public void setMissingValue(Object missingValue) {
this.missingValue = missingValue;
}

// Sets field & type, and ensures field is not NULL unless
// type is SCORE or DOC
private void validateField(String field, Type type, Object missingValue) {
Expand Down
154 changes: 154 additions & 0 deletions lucene/core/src/test/org/apache/lucene/index/TestFieldMinMax.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;

public class TestFieldMinMax extends LuceneTestCase {

public void testMissingField() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);

w.addDocument(new Document());
w.close();

DirectoryReader reader = DirectoryReader.open(dir);

FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
assertNull(mm);

reader.close();
dir.close();
}

public void testIntPointMinMax() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);

Document d1 = new Document();
d1.add(new IntPoint("age", 10));
w.addDocument(d1);

Document d2 = new Document();
d2.add(new IntPoint("age", 50));
w.addDocument(d2);

Document d3 = new Document();
d3.add(new IntPoint("age", 30));
w.addDocument(d3);

w.close();

DirectoryReader reader = DirectoryReader.open(dir);

FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
assertNotNull(mm);
assertEquals(10, mm.min);
assertEquals(50, mm.max);

reader.close();
dir.close();
}

public void testDocValuesMinMax() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);

Document d1 = new Document();
d1.add(new NumericDocValuesField("score", 5));
w.addDocument(d1);

Document d2 = new Document();
d2.add(new NumericDocValuesField("score", 100));
w.addDocument(d2);

Document d3 = new Document();
d3.add(new NumericDocValuesField("score", 42));
w.addDocument(d3);

w.commit();
w.forceMerge(1); // ensures skipper metadata exists

w.close();

DirectoryReader reader = DirectoryReader.open(dir);

FieldMinMax.MinMax mm = FieldMinMax.get(reader, "score");
assertNotNull(mm);
assertEquals(5, mm.min);
assertEquals(100, mm.max);

reader.close();
dir.close();
}

public void testMixedSegments() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);

Document d1 = new Document();
d1.add(new IntPoint("age", 7));
w.addDocument(d1);

w.commit(); // force new segment

Document d2 = new Document();
d2.add(new IntPoint("age", 70));
w.addDocument(d2);

w.close();

DirectoryReader reader = DirectoryReader.open(dir);

FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
assertNotNull(mm);
assertEquals(7, mm.min);
assertEquals(70, mm.max);

reader.close();
dir.close();
}

public void testEmptySegmentIgnored() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);

w.addDocument(new Document()); // empty doc

Document d = new Document();
d.add(new IntPoint("age", 25));
w.addDocument(d);

w.close();

DirectoryReader reader = DirectoryReader.open(dir);

FieldMinMax.MinMax mm = FieldMinMax.get(reader, "age");
assertNotNull(mm);
assertEquals(25, mm.min);
assertEquals(25, mm.max);

reader.close();
dir.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package test.org.apache.lucene.search;

import org.apache.lucene.search.SortField;
import org.apache.lucene.tests.util.LuceneTestCase;

public class TestSortFieldMissingValue extends LuceneTestCase {

@SuppressWarnings("deprecation")
public void testDeprecatedSetterStillWorks() {
SortField a = new SortField("age", SortField.Type.INT);
a.setMissingValue(0);

SortField b = new SortField("age", SortField.Type.INT, false, 0);

assertEquals(b, a);
assertEquals(a.hashCode(), b.hashCode());
}

public void testNullMissingValue() {
SortField a = new SortField("age", SortField.Type.INT);
SortField b = new SortField("age", SortField.Type.INT, false, null);

assertEquals(b, a);
}
}
Loading