Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,5 +187,55 @@ Check out [Pinot documentation](https://docs.pinot.apache.org/) for a complete d
- [Pinot Architecture](https://docs.pinot.apache.org/basics/architecture)
- [Pinot Query Language](https://docs.pinot.apache.org/users/user-guide-query/pinot-query-language)

### UUID Logical Type

Pinot supports a logical `UUID` type for single-value columns. In v1, Pinot stores `UUID` values using the existing
16-byte `BYTES` representation, while schema definitions and query results use canonical lowercase RFC 4122 strings.

Schema example:
```json
{
"schemaName": "events",
"dimensionFieldSpecs": [
{
"name": "eventId",
"dataType": "UUID"
}
]
}
```

Query example:
```sql
SELECT eventId
FROM events
WHERE eventId = CAST('550e8400-e29b-41d4-a716-446655440000' AS UUID)
```

UUID conversion helpers:
```sql
SELECT
TO_UUID('550E8400-E29B-41D4-A716-446655440000'),
UUID_TO_STRING(eventId),
UUID_TO_BYTES(eventId),
BYTES_TO_UUID(eventIdBytes),
IS_UUID(eventIdBytes)
FROM events
```

Behavior notes:
- Pinot accepts canonical RFC 4122 UUID strings in either upper or lower case on ingest and in functions/casts.
- Pinot always renders `UUID` results as canonical lowercase strings.
- `CAST(... AS UUID)` accepts canonical strings and 16-byte `BYTES` values.

Migration notes:
- Existing `BYTES` columns keep returning hex strings. Pinot only renders canonical UUID strings for columns declared as `UUID`.
- Pinot does not support changing the data type of an existing column in place. To adopt `UUID` for existing
`STRING` or `BYTES` UUID-shaped data, create a new `UUID` column or a new table/schema and reingest/backfill the
data into it.
- The `UUID` type itself does not require a segment or wire format bump in v1, but migration still requires rebuild or
reingest because schema type mutation is unsupported.
- Multi-value UUID columns are not supported in v1.

## License
Apache Pinot is under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import javax.annotation.Nullable;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
Expand Down Expand Up @@ -52,6 +53,7 @@ private FunctionUtils() {
put(Timestamp.class, PinotDataType.TIMESTAMP);
put(String.class, PinotDataType.STRING);
put(byte[].class, PinotDataType.BYTES);
put(UUID.class, PinotDataType.UUID);
put(int[].class, PinotDataType.PRIMITIVE_INT_ARRAY);
put(long[].class, PinotDataType.PRIMITIVE_LONG_ARRAY);
put(float[].class, PinotDataType.PRIMITIVE_FLOAT_ARRAY);
Expand All @@ -75,6 +77,7 @@ private FunctionUtils() {
put(Timestamp.class, PinotDataType.TIMESTAMP);
put(String.class, PinotDataType.STRING);
put(byte[].class, PinotDataType.BYTES);
put(UUID.class, PinotDataType.UUID);
put(int[].class, PinotDataType.PRIMITIVE_INT_ARRAY);
put(Integer[].class, PinotDataType.INTEGER_ARRAY);
put(long[].class, PinotDataType.PRIMITIVE_LONG_ARRAY);
Expand Down Expand Up @@ -103,6 +106,7 @@ private FunctionUtils() {
put(Timestamp.class, DataType.TIMESTAMP);
put(String.class, DataType.STRING);
put(byte[].class, DataType.BYTES);
put(UUID.class, DataType.UUID);
put(int[].class, DataType.INT);
put(long[].class, DataType.LONG);
put(float[].class, DataType.FLOAT);
Expand All @@ -125,6 +129,7 @@ private FunctionUtils() {
put(Timestamp.class, ColumnDataType.TIMESTAMP);
put(String.class, ColumnDataType.STRING);
put(byte[].class, ColumnDataType.BYTES);
put(UUID.class, ColumnDataType.UUID);
put(int[].class, ColumnDataType.INT_ARRAY);
put(long[].class, ColumnDataType.LONG_ARRAY);
put(float[].class, ColumnDataType.FLOAT_ARRAY);
Expand Down Expand Up @@ -197,6 +202,8 @@ public static RelDataType getRelDataType(RelDataTypeFactory typeFactory, Class<?
case STRING:
case JSON:
return typeFactory.createSqlType(SqlTypeName.VARCHAR);
case UUID:
return typeFactory.createSqlType(SqlTypeName.UUID);
case BYTES:
return typeFactory.createSqlType(SqlTypeName.VARBINARY);
case INT_ARRAY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import com.google.common.annotations.VisibleForTesting;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.Base64;
Expand All @@ -30,6 +29,7 @@
import org.apache.pinot.common.utils.URIUtils;
import org.apache.pinot.spi.annotations.ScalarFunction;
import org.apache.pinot.spi.utils.JsonUtils;
import org.apache.pinot.spi.utils.UuidUtils;


/**
Expand Down Expand Up @@ -442,12 +442,8 @@ public static String fromAscii(byte[] input) {
@ScalarFunction
public static byte[] toUUIDBytes(String input) {
try {
UUID uuid = UUID.fromString(input);
ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
bb.putLong(uuid.getMostSignificantBits());
bb.putLong(uuid.getLeastSignificantBits());
return bb.array();
} catch (IllegalArgumentException e) {
return UuidUtils.toBytes(UUID.fromString(input));
} catch (Exception e) {
return null;
}
}
Expand All @@ -459,10 +455,7 @@ public static byte[] toUUIDBytes(String input) {
*/
@ScalarFunction
public static String fromUUIDBytes(byte[] input) {
ByteBuffer bb = ByteBuffer.wrap(input);
long firstLong = bb.getLong();
long secondLong = bb.getLong();
return new UUID(firstLong, secondLong).toString();
return UuidUtils.toString(input);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.common.function.scalar.uuid;

import javax.annotation.Nullable;
import org.apache.pinot.common.function.FunctionInfo;
import org.apache.pinot.common.function.PinotScalarFunction;
import org.apache.pinot.common.utils.DataSchema.ColumnDataType;


/**
* Base class for UUID scalar functions that accept either a STRING or BYTES argument.
* Subclasses provide the two {@link FunctionInfo} constants and delegate the
* polymorphic dispatch here.
*/
abstract class AbstractStringOrBytesUuidFunction implements PinotScalarFunction {

/** {@link FunctionInfo} for the {@code String} overload. */
protected abstract FunctionInfo getStringFunctionInfo();

/** {@link FunctionInfo} for the {@code byte[]} overload. */
protected abstract FunctionInfo getBytesFunctionInfo();

@Nullable
@Override
public FunctionInfo getFunctionInfo(ColumnDataType[] argumentTypes) {
if (argumentTypes.length != 1) {
return null;
}
switch (argumentTypes[0]) {
case STRING:
return getStringFunctionInfo();
case BYTES:
return getBytesFunctionInfo();
default:
return null;
}
}

@Nullable
@Override
public FunctionInfo getFunctionInfo(int numArguments) {
return numArguments == 1 ? getStringFunctionInfo() : null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.common.function.scalar.uuid;

import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.pinot.common.function.FunctionInfo;
import org.apache.pinot.common.function.sql.PinotSqlFunction;
import org.apache.pinot.spi.annotations.ScalarFunction;
import org.apache.pinot.spi.utils.UuidUtils;


/**
* Polymorphic scalar function that validates string or bytes values as UUID inputs.
*
* <p>This implementation is stateless and thread-safe.
*/
@ScalarFunction(names = {"IS_UUID"})
public class IsUuidScalarFunction extends AbstractStringOrBytesUuidFunction {
private static final FunctionInfo STRING_FUNCTION_INFO;
private static final FunctionInfo BYTES_FUNCTION_INFO;

static {
try {
STRING_FUNCTION_INFO =
new FunctionInfo(IsUuidScalarFunction.class.getMethod("isUuid", String.class), IsUuidScalarFunction.class,
true);
BYTES_FUNCTION_INFO =
new FunctionInfo(IsUuidScalarFunction.class.getMethod("isUuid", byte[].class), IsUuidScalarFunction.class,
true);
} catch (NoSuchMethodException e) {
throw new RuntimeException(e);
}
}

@Override
protected FunctionInfo getStringFunctionInfo() {
return STRING_FUNCTION_INFO;
}

@Override
protected FunctionInfo getBytesFunctionInfo() {
return BYTES_FUNCTION_INFO;
}

@Override
public String getName() {
return "IS_UUID";
}

@Override
public Set<String> getNames() {
return Set.of("IS_UUID", "ISUUID");
}

@Nullable
@Override
public PinotSqlFunction toPinotSqlFunction() {
return new PinotSqlFunction("IS_UUID", ReturnTypes.BOOLEAN,
OperandTypes.or(OperandTypes.family(List.of(SqlTypeFamily.CHARACTER)),
OperandTypes.family(List.of(SqlTypeFamily.BINARY))));
}

public static boolean isUuid(String value) {
return UuidUtils.isUuid(value);
}

public static boolean isUuid(byte[] value) {
return UuidUtils.isUuid(value);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.common.function.scalar.uuid;

import java.util.List;
import java.util.Set;
import java.util.UUID;
import javax.annotation.Nullable;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.pinot.common.function.FunctionInfo;
import org.apache.pinot.common.function.sql.PinotSqlFunction;
import org.apache.pinot.spi.annotations.ScalarFunction;
import org.apache.pinot.spi.utils.UuidUtils;


/**
* Polymorphic scalar function that converts string or bytes inputs into Pinot's logical UUID type.
*
* <p>This implementation is stateless and thread-safe.
*/
@ScalarFunction(names = {"TO_UUID"})
public class ToUuidScalarFunction extends AbstractStringOrBytesUuidFunction {
private static final FunctionInfo STRING_FUNCTION_INFO;
private static final FunctionInfo BYTES_FUNCTION_INFO;

static {
try {
STRING_FUNCTION_INFO =
new FunctionInfo(ToUuidScalarFunction.class.getMethod("toUuid", String.class), ToUuidScalarFunction.class,
true);
BYTES_FUNCTION_INFO =
new FunctionInfo(ToUuidScalarFunction.class.getMethod("toUuid", byte[].class), ToUuidScalarFunction.class,
true);
} catch (NoSuchMethodException e) {
throw new RuntimeException(e);
}
}

@Override
protected FunctionInfo getStringFunctionInfo() {
return STRING_FUNCTION_INFO;
}

@Override
protected FunctionInfo getBytesFunctionInfo() {
return BYTES_FUNCTION_INFO;
}

@Override
public String getName() {
return "TO_UUID";
}

@Override
public Set<String> getNames() {
return Set.of("TO_UUID", "TOUUID");
}

@Nullable
@Override
public PinotSqlFunction toPinotSqlFunction() {
return new PinotSqlFunction("TO_UUID", ReturnTypes.explicit(org.apache.calcite.sql.type.SqlTypeName.UUID),
OperandTypes.or(OperandTypes.family(List.of(SqlTypeFamily.CHARACTER)),
OperandTypes.family(List.of(SqlTypeFamily.BINARY))));
}

public static UUID toUuid(String value) {
return value != null ? UuidUtils.toUUID(value) : null;
}

public static UUID toUuid(byte[] value) {
return value != null ? UuidUtils.toUUID(value) : null;
}
}
Loading
Loading