Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
import org.apache.hive.common.util.HiveStringUtils;
Expand Down Expand Up @@ -199,13 +200,23 @@ public static String[] extractColumnValues(FieldSchema column, boolean isColumnS
}
} else if (statsData.isSetLongStats()) {
LongColumnStatsData longStats = statsData.getLongStats();
String lowVal = longStats.isSetLowValue() ? "" + longStats.getLowValue() : "";
String highVal = longStats.isSetHighValue() ? "" + longStats.getHighValue() : "";
ColumnStatisticsData._Fields field = statsData.getSetField();
String lowVal;
String highVal;
if (serdeConstants.TIMESTAMP_TYPE_NAME.equals(column.getType())) {
// if the long stats represent a timestamp, format it as a timestamp
lowVal = longStats.isSetLowValue() ? convertTimestampToString(longStats.getLowValue()) : "";
highVal = longStats.isSetHighValue() ? convertTimestampToString(longStats.getHighValue()) : "";
field = ColumnStatisticsData._Fields.TIMESTAMP_STATS;
} else {
lowVal = longStats.isSetLowValue() ? "" + longStats.getLowValue() : "";
highVal = longStats.isSetHighValue() ? "" + longStats.getHighValue() : "";
}
values.addAll(Lists.newArrayList(lowVal, highVal,
"" + longStats.getNumNulls(), "" + longStats.getNumDVs(), "", "", "", "",
convertToString(longStats.getBitVectors())));
if (histogramEnabled) {
values.add(convertHistogram(statsData.getLongStats().getHistogram(), statsData.getSetField()));
values.add(convertHistogram(statsData.getLongStats().getHistogram(), field));
}
} else if (statsData.isSetDateStats()) {
DateColumnStatsData dateStats = statsData.getDateStats();
Expand Down Expand Up @@ -297,6 +308,12 @@ private static String convertToString(byte[] buffer) {
return new String(Arrays.copyOfRange(buffer, 0, 2));
}

public static String convertTimestampToString(long val) {
TimestampWritableV2 writableValue = new TimestampWritableV2(
Timestamp.ofEpochSecond(val));
return writableValue.toString();
}

public static String convertToString(org.apache.hadoop.hive.metastore.api.Timestamp val) {
if (val == null) {
return "";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ private void getColumnDataColPathSpecified(Table table, Partition part, List<Fie
Map<String, String> tableProps = table.getParameters() == null ?
new HashMap<>() : table.getParameters();
if (table.isPartitionKey(colNames.get(0))) {
getColumnDataForPartitionKeyColumn(table, cols, colStats, colNames, tableProps);
boolean timestampAsLong =
MetastoreConf.getBoolVar(context.getConf(), MetastoreConf.ConfVars.HIVE_STATS_LEGACY_TIMESTAMP_AS_LONG);
getColumnDataForPartitionKeyColumn(table, cols, colStats, colNames, tableProps, timestampAsLong);
} else {
getColumnsForNotPartitionKeyColumn(table, cols, colStats, deserializer, colNames, tableProps);
}
Expand All @@ -222,7 +224,8 @@ private void getColumnDataColPathSpecified(Table table, Partition part, List<Fie
}

private void getColumnDataForPartitionKeyColumn(Table table, List<FieldSchema> cols,
List<ColumnStatisticsObj> colStats, List<String> colNames, Map<String, String> tableProps)
List<ColumnStatisticsObj> colStats, List<String> colNames, Map<String, String> tableProps,
boolean timestampAsLong)
throws HiveException, MetaException {
FieldSchema partCol = table.getPartColByName(colNames.get(0));
cols.add(partCol);
Expand All @@ -232,7 +235,7 @@ private void getColumnDataForPartitionKeyColumn(Table table, List<FieldSchema> c
TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, context.getConf());
ColumnStatisticsData data = new ColumnStatisticsData();
StatsUtils.fillColumnStatisticsData(data, cs, partCol.getType());
StatsUtils.fillColumnStatisticsData(data, cs, partCol.getType(), timestampAsLong);
ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
colStats.add(cso);
StatsSetupConst.setColumnStatsState(tableProps, colNames);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.hadoop.hive.ql.metadata.UniqueConstraint;
import org.apache.hadoop.hive.ql.metadata.formatting.MapBuilder;
import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.serde.serdeConstants;

import java.io.DataOutputStream;
import java.util.ArrayList;
Expand Down Expand Up @@ -112,7 +113,12 @@ private static Map<String, Object> createColumnInfo(FieldSchema column, ColumnSt
} else if (statistics.isSetDoubleStats()) {
addDoubleStats(statistics, result);
} else if (statistics.isSetLongStats()) {
addLongStats(statistics, result);
if (serdeConstants.TIMESTAMP_TYPE_NAME.equals(column.getType())) {
// if the long stat represents a timestamp, format it as a timestamp
addLongTimeStampStats(statistics, result);
} else {
addLongStats(statistics, result);
}
} else if (statistics.isSetDateStats()) {
addDateStats(statistics, result);
} else if (statistics.isSetTimestampStats()) {
Expand Down Expand Up @@ -237,6 +243,21 @@ private static void addTimeStampStats(ColumnStatisticsData statistics, Map<Strin
}
}

private static void addLongTimeStampStats(ColumnStatisticsData statistics, Map<String, Object> result) {
if (statistics.getDateStats().isSetLowValue()) {
result.put(COLUMN_MIN, ShowUtils.convertTimestampToString(statistics.getLongStats().getLowValue()));
}
if (statistics.getDateStats().isSetHighValue()) {
result.put(COLUMN_MAX, ShowUtils.convertTimestampToString(statistics.getLongStats().getHighValue()));
}
if (statistics.getDateStats().isSetNumNulls()) {
result.put(COLUMN_NUM_NULLS, statistics.getLongStats().getNumNulls());
}
if (statistics.getDateStats().isSetNumDVs()) {
result.put(COLUMN_DISTINCT_COUNT, statistics.getLongStats().getNumDVs());
}
}

private void addExtendedInfo(Table table, Partition partition, MapBuilder builder) {
if (partition != null) {
builder.put("partitionInfo", partition.getTPartition());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector;
import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector;
import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.metadata.Hive;
Expand Down Expand Up @@ -100,12 +101,16 @@ private ColumnStatistics constructColumnStatsFromInput()

statsObj.setColType(columnType);

boolean timestampAsLong =
MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.HIVE_STATS_LEGACY_TIMESTAMP_AS_LONG);

ColumnStatisticsData statsData = new ColumnStatisticsData();

if (columnType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME)
|| columnType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME)
|| columnType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)
|| columnType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
|| columnType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)
|| (timestampAsLong && columnType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME))) {
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setNumNullsIsSet(false);
longStats.setNumDVsIsSet(false);
Expand Down Expand Up @@ -272,7 +277,7 @@ private ColumnStatistics constructColumnStatsFromInput()
}
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
} else if (!timestampAsLong && columnType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) {
TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
Expand Down
26 changes: 17 additions & 9 deletions ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -832,13 +832,21 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col
cs.setNumNulls(csd.getBinaryStats().getNumNulls());
} else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
cs.setNumNulls(csd.getTimestampStats().getNumNulls());
Long lowVal = (csd.getTimestampStats().getLowValue() != null) ? csd.getTimestampStats().getLowValue()
.getSecondsSinceEpoch() : null;
Long highVal = (csd.getTimestampStats().getHighValue() != null) ? csd.getTimestampStats().getHighValue()
.getSecondsSinceEpoch() : null;
cs.setRange(lowVal, highVal);
cs.setHistogram(csd.getTimestampStats().getHistogram());
if (csd.isSetTimestampStats()) {
cs.setNumNulls(csd.getTimestampStats().getNumNulls());
Long lowVal = (csd.getTimestampStats().getLowValue() != null) ? csd.getTimestampStats().getLowValue()
.getSecondsSinceEpoch() : null;
Long highVal = (csd.getTimestampStats().getHighValue() != null) ? csd.getTimestampStats().getHighValue()
.getSecondsSinceEpoch() : null;
cs.setRange(lowVal, highVal);
cs.setHistogram(csd.getTimestampStats().getHistogram());
} else if (csd.isSetLongStats()) {
cs.setNumNulls(csd.getLongStats().getNumNulls());
Long lowVal = csd.getLongStats().isSetLowValue() ? csd.getLongStats().getLowValue() : null;
Long highVal = csd.getLongStats().isSetHighValue() ? csd.getLongStats().getHighValue() : null;
cs.setRange(lowVal, highVal);
cs.setHistogram(csd.getLongStats().getHistogram());
}
} else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
} else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
Expand Down Expand Up @@ -879,13 +887,13 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col
}

public static void fillColumnStatisticsData(ColumnStatisticsData data, ColStatistics cs,
String colType) throws MetaException {
String colType, boolean timestampAsLong) throws MetaException {
ColStatistics.Range r = cs.getRange();
Object lowValue = (r != null) ? r.minValue : null;
Object highValue = (r != null) ? r.maxValue : null;
StatObjectConverter.fillColumnStatisticsData(colType, data, lowValue, highValue,
cs.getNumNulls(), cs.getCountDistint(), cs.getBitVectors(), cs.getHistogram(),
cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses(), timestampAsLong);
}

private static void fillColStatisticsFromLongStatsData(ColStatistics cs, LongColumnStatsData longStats,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ void testGetColumnDataForPartitionKeyColumnDifferentRanges(
// Capture the ColStatistics passed to fillColumnStatisticsData
ArgumentCaptor<ColStatistics> colStatsCaptor = ArgumentCaptor.forClass(ColStatistics.class);
statsUtils.when(() -> StatsUtils.fillColumnStatisticsData(any(ColumnStatisticsData.class),
colStatsCaptor.capture(), any(String.class))).thenCallRealMethod();
colStatsCaptor.capture(), any(String.class), any(Boolean.class))).thenCallRealMethod();

DescTableOperation operation = new DescTableOperation(mockContext, mockDesc);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,15 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;

import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.ql.DriverFactory;
import org.apache.hadoop.hive.ql.IDriver;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
Expand All @@ -31,6 +37,7 @@
import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hive.testutils.HiveTestEnvSetup;
import org.apache.thrift.TException;
import org.hamcrest.Matchers;
import org.junit.AfterClass;
import org.junit.BeforeClass;
Expand All @@ -54,9 +61,12 @@ public static void beforeClass() throws Exception {
dropTables(driver);
String cmds[] = {
// @formatter:off
"create table t2(a integer, b string) STORED AS ORC",
"insert into t2 values (1, 'A1'),(2, 'A2'),(3, 'A3'),(4, 'A4'),(5, 'A5')," +
"(6, 'B1'),(7, 'B2'),(8, 'B3'),(9, 'B4'),(10, 'B5')",
"create table t2(a integer, b string, c timestamp) STORED AS ORC",
"insert into t2 values " +
"(1, 'A1', '2000-01-01'),(2, 'A2', '2000-01-02'),(3, 'A3', '2000-01-03')," +
"(4, 'A4', '2000-01-04'),(5, 'A5', '2000-01-05')," +
"(6, 'B1', '2000-01-06'),(7, 'B2', '2000-01-07'),(8, 'B3', '2000-01-08')," +
"(9, 'B4', '2000-01-09'),(10, 'B5', '2000-01-10')",
"analyze table t2 compute statistics for columns"
// @formatter:on
};
Expand Down Expand Up @@ -134,6 +144,37 @@ public void testFilterIntIn() throws ParseException, CommandProcessorException {

}

/** Test case for HIVE-29398. */
@Test
public void testTimestampAsLong() throws TException {
readStatsAndCheckTimestampField(true);
}

/** Test case for HIVE-29398. */
@Test
public void testTimestampAsTimestamp() throws TException {
readStatsAndCheckTimestampField(false);
}

private static void readStatsAndCheckTimestampField(boolean timestampAsLong) throws TException {
HiveConf conf = env_setup.getTestCtx().hiveConf;
boolean oldSetting = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.HIVE_STATS_LEGACY_TIMESTAMP_AS_LONG);
try {
MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_STATS_LEGACY_TIMESTAMP_AS_LONG, timestampAsLong);

try (IMetaStoreClient client = new HiveMetaStoreClient(conf)) {
List<ColumnStatisticsObj> tableColumnStatistics =
client.getTableColumnStatistics("default", "t2", Collections.singletonList("c"), "hive");
ColumnStatisticsObj columnStatisticsObj = tableColumnStatistics.getFirst();
ColumnStatisticsData statsData = columnStatisticsObj.getStatsData();
assertEquals(timestampAsLong, statsData.isSetLongStats());
assertEquals(!timestampAsLong, statsData.isSetTimestampStats());
}
} finally {
MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_STATS_LEGACY_TIMESTAMP_AS_LONG, oldSetting);
}
}

private static IDriver createDriver() {
HiveConf conf = env_setup.getTestCtx().hiveConf;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,11 @@ public enum ConfVars {
STATS_AUTO_UPDATE_WORKER_COUNT("metastore.stats.auto.analyze.worker.count",
"hive.metastore.stats.auto.analyze.worker.count", 1,
"Number of parallel analyze commands to run for background stats update."),
HIVE_STATS_LEGACY_TIMESTAMP_AS_LONG("metastore.stats.legacy.timestamp.as.long",
"hive.metastore.stats.legacy.timestamp.as.long", false,
"If true, store the timestamp stats in the long stats field, " +
"instead of the newer timestamp stats field.\nUse only if a dependent client" +
" (e.g. Impala) does not yet support the timestamp stats field.\nThe default value is false."),
STORAGE_SCHEMA_READER_IMPL("metastore.storage.schema.reader.impl", "metastore.storage.schema.reader.impl",
SERDE_STORAGE_SCHEMA_READER_CLASS,
"The class to use to read schemas from storage. It must implement " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class DirectSqlAggrStats {
private static final Logger LOG = LoggerFactory.getLogger(DirectSqlAggrStats.class);
private final PersistenceManager pm;
private final int batchSize;
private final boolean timestampAsLong;

@java.lang.annotation.Target(java.lang.annotation.ElementType.FIELD)
@java.lang.annotation.Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
Expand All @@ -88,6 +89,7 @@ public DirectSqlAggrStats(PersistenceManager pm, Configuration conf, String sche
configBatchSize = dbType.needsInBatching() ? 1000 : NO_BATCHING;
}
this.batchSize = configBatchSize;
this.timestampAsLong = MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.HIVE_STATS_LEGACY_TIMESTAMP_AS_LONG);
ImmutableMap.Builder<String, String> fieldNameToTableNameBuilder =
new ImmutableMap.Builder<>();

Expand Down Expand Up @@ -446,8 +448,8 @@ private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(
StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, row[LONG_LOW_VALUE.idx()],
row[LONG_HIGH_VALUE.idx()], row[DOUBLE_LOW_VALUE.idx()], row[DOUBLE_HIGH_VALUE.idx()], row[BIG_DECIMAL_LOW_VALUE.idx()], row[BIG_DECIMAL_HIGH_VALUE.idx()],
row[NUM_NULLS.idx()], row[NUM_DISTINCTS.idx()], row[AVG_COL_LEN.idx()], row[MAX_COL_LEN.idx()], row[NUM_TRUES.idx()], row[NUM_FALSES.idx()],
avgLong, avgDouble, avgDecimal, row[SUM_NUM_DISTINCTS.idx()],
useDensityFunctionForNDVEstimation, ndvTuner);
avgLong, avgDouble, avgDecimal, row[SUM_NUM_DISTINCTS.idx()], useDensityFunctionForNDVEstimation, ndvTuner,
timestampAsLong);
return cso;
}

Expand All @@ -457,8 +459,8 @@ private ColumnStatisticsObj prepareCSObj(Object[] row, int i) throws MetaExcepti
Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], bitVector = row[i++],
histogram = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i];
StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, bitVector, histogram, avglen, maxlen, trues, falses);
StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, llow, lhigh, dlow, dhigh, declow, dechigh,
nulls, dist, bitVector, histogram, avglen, maxlen, trues, falses, timestampAsLong);
return cso;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1805,7 +1805,6 @@ public List<Long> run(List<String> inputPartNames) throws MetaException {
return partsFound;
}


@SuppressWarnings("unchecked")
private <T> T executeWithArray(Query query, Object[] params, String sql) throws MetaException {
return executeWithArray(query, params, sql, -1);
Expand Down
Loading