-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Upgrade t-digest from 3.2 to 3.3 with error rate fix #18103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,8 +32,10 @@ | |
| import org.apache.commons.configuration2.Configuration; | ||
| import org.apache.commons.lang3.builder.ToStringBuilder; | ||
| import org.apache.commons.lang3.builder.ToStringStyle; | ||
| import org.apache.pinot.segment.local.aggregator.PercentileTDigestValueAggregator; | ||
| import org.apache.pinot.segment.spi.AggregationFunctionType; | ||
| import org.apache.pinot.segment.spi.ColumnMetadata; | ||
| import org.apache.pinot.segment.spi.Constants; | ||
| import org.apache.pinot.segment.spi.SegmentMetadata; | ||
| import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair; | ||
| import org.apache.pinot.segment.spi.index.startree.AggregationSpec; | ||
|
|
@@ -80,7 +82,8 @@ public static StarTreeV2BuilderConfig fromIndexConfig(StarTreeIndexConfig indexC | |
| AggregationFunctionColumnPair.resolveToStoredType(aggregationFunctionColumnPair); | ||
| // If there is already an equivalent functionColumnPair in the map, do not load another. | ||
| // This prevents the duplication of the aggregation when the StarTree is constructed. | ||
| aggregationSpecs.putIfAbsent(storedType, AggregationSpec.DEFAULT); | ||
| aggregationSpecs.putIfAbsent(storedType, | ||
| getDefaultAggregationSpec(aggregationFunctionColumnPair.getFunctionType())); | ||
| } | ||
| } | ||
| if (indexConfig.getAggregationConfigs() != null) { | ||
|
|
@@ -323,4 +326,22 @@ public String toString() { | |
| .append("skipStarNodeCreation", _skipStarNodeCreationForDimensions) | ||
| .append("aggregationSpecs", _aggregationSpecs).append("maxLeafRecords", _maxLeafRecords).toString(); | ||
| } | ||
|
|
||
| /** | ||
| * Returns the default {@link AggregationSpec} for the given function type. For PERCENTILETDIGEST and | ||
| * PERCENTILERAWTDIGEST, the default compression factor is explicitly included in the function parameters so that | ||
| * it is persisted in star-tree metadata. This allows {@code canUseStarTree()} to distinguish old segments (built | ||
| * with the previous default of 100, which have no compression metadata) from new segments. | ||
| */ | ||
| private static AggregationSpec getDefaultAggregationSpec(AggregationFunctionType functionType) { | ||
| switch (functionType) { | ||
| case PERCENTILETDIGEST: | ||
| case PERCENTILERAWTDIGEST: | ||
| return new AggregationSpec(null, null, null, null, null, | ||
| Map.of(Constants.PERCENTILETDIGEST_COMPRESSION_FACTOR_KEY, | ||
| PercentileTDigestValueAggregator.DEFAULT_TDIGEST_COMPRESSION)); | ||
| default: | ||
| return AggregationSpec.DEFAULT; | ||
| } | ||
|
Comment on lines
+330
to
+345
|
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment here says injecting the default compression will be “stored in star-tree metadata” and used by canUseStarTree() to distinguish old vs new segments. However, this method only returns ExpressionContext arguments for ValueAggregatorFactory; star-tree metadata persistence is based on AggregationSpec.getFunctionParameters() (see StarTreeV2Metadata.writeMetadata). If AggregationSpec.functionParameters is empty (e.g., aggregationConfigs without functionParameters), metadata will still omit compressionFactor and canUseStarTree() will treat it as default=100 while the builder will create digests at DEFAULT_TDIGEST_COMPRESSION=500. Consider fixing this by ensuring AggregationSpec.functionParameters always includes compressionFactor for PERCENTILETDIGEST/PERCENTILERAWTDIGEST when absent, and adjust/remove this comment accordingly.