Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/sql-ref-datatypes.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Spark SQL and DataFrames support the following data types:
- `TimestampNTZType`: Timestamp without time zone(TIMESTAMP_NTZ). It represents values comprising values of fields year, month, day,
hour, minute, and second. All operations are performed without taking any time zone into account.
- Note: TIMESTAMP in Spark is a user-specified alias associated with one of the TIMESTAMP_LTZ and TIMESTAMP_NTZ variations. Users can set the default timestamp type as `TIMESTAMP_LTZ`(default value) or `TIMESTAMP_NTZ` via the configuration `spark.sql.timestampType`.
- `TimestampNTZNanosType(precision)` / `TimestampLTZNanosType(precision)`: Preview nanosecond-capable variants of `TIMESTAMP_NTZ` and `TIMESTAMP_LTZ` with fractional seconds precision `precision` in `[7, 9]`. Unparameterized `TIMESTAMP`, `TIMESTAMP_NTZ`, and `TIMESTAMP_LTZ` remain microsecond types. Enable the preview feature with `SET spark.sql.timestampNanosTypes.enabled=true;` before using these types in schemas or SQL.

* Interval types
- `YearMonthIntervalType(startField, endField)`: Represents a year-month interval which is made up of a contiguous subset of the following fields:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,13 +286,17 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {

def checkTimestampNanosTypesEnabled(): Unit = {
if (!SqlApiConf.get.timestampNanosTypesEnabled) {
throw new SparkException(
errorClass = "FEATURE_NOT_ENABLED",
messageParameters = Map(
"featureName" -> "Nanosecond-precision timestamp types",
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true"),
cause = null)
throw timestampNanosTypesNotEnabledError()
}
}

def timestampNanosTypesNotEnabledError(): Throwable = {
new SparkException(
errorClass = "FEATURE_NOT_ENABLED",
messageParameters = Map(
"featureName" -> "Nanosecond-precision timestamp types",
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true"),
cause = null)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.util.TimeZone
import scala.util.Try

import org.apache.spark.sql.types.{AtomicType, TimestampType}
import org.apache.spark.util.SparkClassUtils
import org.apache.spark.util.{SparkClassUtils, SparkEnvUtils}

/**
* Configuration for all objects that are placed in the `sql/api` project. The normal way of
Expand Down Expand Up @@ -113,5 +113,5 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf {
override def legacyParameterSubstitutionConstantsOnly: Boolean = false
override def legacyIdentifierClauseOnly: Boolean = false
override def typesFrameworkEnabled: Boolean = false
override def timestampNanosTypesEnabled: Boolean = false
override def timestampNanosTypesEnabled: Boolean = SparkEnvUtils.isTesting
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
import org.apache.spark.sql.catalyst.expressions.{Expression, RowOrdering}
import org.apache.spark.sql.catalyst.expressions.st.STExpressionUtils.isGeoSpatialType
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalNumericType}
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
import org.apache.spark.sql.errors.{DataTypeErrors, QueryCompilationErrors, QueryErrorsBase}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._

Expand Down Expand Up @@ -139,10 +139,20 @@ object TypeUtils extends QueryErrorsBase {
if (dataType.existsRecursively(isInterval)) f
}

private def containsTimestampNanosType(dataType: DataType): Boolean = {
dataType.existsRecursively {
case _: TimestampNTZNanosType | _: TimestampLTZNanosType => true
case _ => false
}
}

def failUnsupportedDataType(dataType: DataType, conf: SQLConf): Unit = {
if (!conf.isTimeTypeEnabled && dataType.existsRecursively(_.isInstanceOf[TimeType])) {
throw QueryCompilationErrors.unsupportedTimeTypeError()
}
if (!conf.timestampNanosTypesEnabled && containsTimestampNanosType(dataType)) {
throw DataTypeErrors.timestampNanosTypesNotEnabledError()
}
if (!conf.geospatialEnabled && dataType.existsRecursively(isGeoSpatialType)) {
throw new org.apache.spark.sql.AnalysisException(
errorClass = "UNSUPPORTED_FEATURE.GEOSPATIAL_DISABLED",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -647,15 +647,19 @@ object SQLConf {
val TIMESTAMP_NANOS_TYPES_ENABLED =
buildConf("spark.sql.timestampNanosTypes.enabled")
.internal()
.doc("When true, the parameterized nanosecond-precision timestamp types " +
"TIMESTAMP_NTZ(p) / TIMESTAMP_LTZ(p) for p in [7, 9] are recognized as " +
"Spark SQL data types at user-facing entry points. Default is false because " +
"downstream execution paths (Cast, PhysicalDataType, AnyTimestampType, encoders, " +
"Connect proto) are not yet wired for these types. See SPARK-56822.")
.doc("When true, allows nanosecond-capable timestamp types TIMESTAMP_NTZ(p) and " +
"TIMESTAMP_LTZ(p) with fractional seconds precision p in [7, 9] at user-facing " +
"entry points, including the SQL parser, schemas, and analyzed plans. This is a " +
"preview feature under SPARK-56822 and may change in future releases. The default is " +
"false in production; tests enable it by default via Utils.isTesting. " +
"Unparameterized TIMESTAMP, TIMESTAMP_NTZ, and TIMESTAMP_LTZ remain microsecond " +
"types. Enabling this flag does not guarantee full SQL support: casts, Parquet read, " +
"typed literals, and other operations may still fail until their respective features " +
"are implemented.")
.version("4.2.0")
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon @cloud-fan @dongjoon-hyun I merged this config to master, will it be released in 4.2.0? Should I merge new features to branch-4.x?

.withBindingPolicy(ConfigBindingPolicy.SESSION)
.booleanConf
.createWithDefault(false)
.createWithDefault(Utils.isTesting)

val EXTENDED_EXPLAIN_PROVIDERS = buildConf("spark.sql.extendedExplainProviders")
.doc("A comma-separated list of classes that implement the" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,29 +211,31 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper {
}
}

test("nanos timestamp parser surface is gated by SQL conf, disabled by default") {
test("nanos timestamp parser surface is gated by SQL conf when disabled") {
val gatedSpellings = Seq(
"TIMESTAMP_NTZ(7)",
"TIMESTAMP_LTZ(9)",
"TIMESTAMP(9) WITHOUT TIME ZONE",
"TIMESTAMP(9) WITH LOCAL TIME ZONE",
"TIMESTAMP(9)")
gatedSpellings.foreach { spelling =>
checkError(
exception = intercept[SparkException] {
CatalystSqlParser.parseDataType(spelling)
},
condition = "FEATURE_NOT_ENABLED",
parameters = Map(
"featureName" -> "Nanosecond-precision timestamp types",
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true"))
withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false") {
gatedSpellings.foreach { spelling =>
checkError(
exception = intercept[SparkException] {
CatalystSqlParser.parseDataType(spelling)
},
condition = "FEATURE_NOT_ENABLED",
parameters = Map(
"featureName" -> "Nanosecond-precision timestamp types",
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true"))
}
// Bare unparameterized forms remain accepted even with the gate off.
assert(parse("TIMESTAMP_NTZ") === TimestampNTZType)
assert(parse("TIMESTAMP_LTZ") === TimestampType)
assert(parse("TIMESTAMP WITHOUT TIME ZONE") === TimestampNTZType)
assert(parse("TIMESTAMP WITH LOCAL TIME ZONE") === TimestampType)
}
// Bare unparameterized forms remain accepted even with the gate off.
assert(parse("TIMESTAMP_NTZ") === TimestampNTZType)
assert(parse("TIMESTAMP_LTZ") === TimestampType)
assert(parse("TIMESTAMP WITHOUT TIME ZONE") === TimestampNTZType)
assert(parse("TIMESTAMP WITH LOCAL TIME ZONE") === TimestampType)
}

// DataType parser accepts certain reserved keywords.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@

package org.apache.spark.sql.catalyst.util

import org.apache.spark.SparkFunSuite
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils

class TypeUtilsSuite extends SparkFunSuite {
class TypeUtilsSuite extends SparkFunSuite with SQLHelper {

private def typeCheckPass(types: Seq[DataType]): Unit = {
assert(TypeUtils.checkForSameTypeInputExpr(types, "a") == TypeCheckSuccess)
Expand All @@ -44,4 +47,61 @@ class TypeUtilsSuite extends SparkFunSuite {
typeCheckPass(ArrayType(StringType, containsNull = true) ::
ArrayType(StringType, containsNull = false) :: Nil)
}

test("TIMESTAMP_NANOS_TYPES_ENABLED defaults to Utils.isTesting") {
assert(SQLConf.get.timestampNanosTypesEnabled === Utils.isTesting)
}

test("failUnsupportedDataType rejects timestamp nanos types when preview is disabled") {
val ntzNanos = TimestampNTZNanosType(9)
val ltzNanos = TimestampLTZNanosType(9)
val nestedNtzNanos = StructType(StructField("ts", ntzNanos) :: Nil)

withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false") {
val conf = SQLConf.get
val expectedParams = Map(
"featureName" -> "Nanosecond-precision timestamp types",
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true")
checkError(
intercept[SparkException] {
TypeUtils.failUnsupportedDataType(ntzNanos, conf)
},
condition = "FEATURE_NOT_ENABLED",
parameters = expectedParams)

checkError(
intercept[SparkException] {
TypeUtils.failUnsupportedDataType(ltzNanos, conf)
},
condition = "FEATURE_NOT_ENABLED",
parameters = expectedParams)

checkError(
intercept[SparkException] {
TypeUtils.failUnsupportedDataType(nestedNtzNanos, conf)
},
condition = "FEATURE_NOT_ENABLED",
parameters = expectedParams)
}
}

test("failUnsupportedDataType allows timestamp nanos types when preview is enabled") {
val ntzNanos = TimestampNTZNanosType(9)
val ltzNanos = TimestampLTZNanosType(9)
val nestedLtzNanos = ArrayType(ltzNanos)

withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
TypeUtils.failUnsupportedDataType(ntzNanos, SQLConf.get)
TypeUtils.failUnsupportedDataType(ltzNanos, SQLConf.get)
TypeUtils.failUnsupportedDataType(nestedLtzNanos, SQLConf.get)
}
}

test("failUnsupportedDataType does not reject microsecond timestamp types") {
withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false") {
TypeUtils.failUnsupportedDataType(TimestampType, SQLConf.get)
TypeUtils.failUnsupportedDataType(TimestampNTZType, SQLConf.get)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1555,19 +1555,21 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
}

test("SPARK-56965: JSON parser rejects nanos timestamp types when preview flag is off") {
Seq(
"\"timestamp_ltz(7)\"" -> "Nanosecond-precision timestamp types",
"\"timestamp_ntz(9)\"" -> "Nanosecond-precision timestamp types").foreach {
case (json, featureName) =>
checkError(
exception = intercept[SparkException] {
DataType.fromJson(json)
},
condition = "FEATURE_NOT_ENABLED",
parameters = Map(
"featureName" -> featureName,
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true"))
withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "false") {
Seq(
"\"timestamp_ltz(7)\"" -> "Nanosecond-precision timestamp types",
"\"timestamp_ntz(9)\"" -> "Nanosecond-precision timestamp types").foreach {
case (json, featureName) =>
checkError(
exception = intercept[SparkException] {
DataType.fromJson(json)
},
condition = "FEATURE_NOT_ENABLED",
parameters = Map(
"featureName" -> featureName,
"configKey" -> "spark.sql.timestampNanosTypes.enabled",
"configValue" -> "true"))
}
}
}

Expand Down