-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-55440][SQL] Types Framework - Phase 1a - Core Type System Foundation #54223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9794339
7102f42
68b3033
98ca6df
cfd5506
81b71ef
a192d6f
ef9784b
cf25684
4cb221a
9d640c2
8ac05f0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.types.ops | ||
|
|
||
| import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder | ||
| import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.LocalTimeEncoder | ||
| import org.apache.spark.sql.catalyst.util.{FractionTimeFormatter, TimeFormatter} | ||
| import org.apache.spark.sql.types.{DataType, TimeType} | ||
|
|
||
| /** | ||
| * Client-side (spark-api) operations for TimeType. | ||
| * | ||
| * This class implements all TypeApiOps methods for the TIME data type: | ||
| * - String formatting: uses FractionTimeFormatter for consistent output | ||
| * - Row encoding: uses LocalTimeEncoder for java.time.LocalTime | ||
| * | ||
| * RELATIONSHIP TO TimeTypeOps: TimeTypeOps (in catalyst package) extends this class to inherit | ||
| * client-side operations while adding server-side operations (physical type, literals, etc.). | ||
| * | ||
| * @param t | ||
| * The TimeType with precision information | ||
| * @since 4.2.0 | ||
| */ | ||
| class TimeTypeApiOps(val t: TimeType) extends TypeApiOps { | ||
|
|
||
| override def dataType: DataType = t | ||
|
|
||
| // ==================== String Formatting ==================== | ||
|
|
||
| @transient | ||
| private lazy val timeFormatter: TimeFormatter = new FractionTimeFormatter() | ||
|
|
||
| override def format(v: Any): String = { | ||
| timeFormatter.format(v.asInstanceOf[Long]) | ||
| } | ||
|
|
||
| override def toSQLValue(v: Any): String = { | ||
| s"TIME '${format(v)}'" | ||
| } | ||
|
|
||
| // ==================== Row Encoding ==================== | ||
|
|
||
| override def getEncoder: AgnosticEncoder[_] = LocalTimeEncoder | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.types.ops | ||
|
|
||
| import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder | ||
| import org.apache.spark.sql.internal.SqlApiConf | ||
| import org.apache.spark.sql.types.{DataType, TimeType} | ||
| import org.apache.spark.unsafe.types.UTF8String | ||
|
|
||
| /** | ||
| * Client-side (spark-api) type operations for the Types Framework. | ||
| * | ||
| * This trait consolidates all client-side operations that a data type must implement to be usable | ||
| * in the Spark SQL API layer. All methods are mandatory because a type cannot function correctly | ||
| * without string formatting (needed for CAST to STRING, EXPLAIN, SHOW) or encoding (needed for | ||
| * Dataset[T] operations). | ||
| * | ||
| * This single-interface design was chosen over separate FormatTypeOps/EncodeTypeOps traits to | ||
| * make it clear what a new type must implement - there is one mandatory interface, and it | ||
| * contains everything required. Optional capabilities (e.g., proto, Arrow, JDBC) are defined as | ||
| * separate traits that can be mixed in incrementally. | ||
| * | ||
| * RELATIONSHIP TO TypeOps: | ||
| * - TypeOps (catalyst): Server-side operations - physical types, literals, conversions | ||
| * - TypeApiOps (spark-api): Client-side operations - formatting, encoding | ||
| * | ||
| * The split exists because sql/api cannot depend on sql/catalyst. For TimeType, TimeTypeOps | ||
| * (catalyst) extends TimeTypeApiOps (sql-api) to inherit both sets of operations. | ||
| * | ||
| * @see | ||
| * TimeTypeApiOps for reference implementation | ||
| * @since 4.2.0 | ||
| */ | ||
| trait TypeApiOps extends Serializable { | ||
|
|
||
| /** The DataType this Ops instance handles. */ | ||
| def dataType: DataType | ||
|
|
||
| // ==================== String Formatting ==================== | ||
|
|
||
| /** | ||
| * Formats an internal value as a display string. | ||
| * | ||
| * Used by CAST to STRING, EXPLAIN output, SHOW commands. | ||
| * | ||
| * @param v | ||
| * the internal value (e.g., Long nanoseconds for TimeType) | ||
| * @return | ||
| * formatted string (e.g., "10:30:45.123456") | ||
| */ | ||
| def format(v: Any): String | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where do we call it at the client side?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. at the moment, it's only called from the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually, looking at it now there's no reason to not add it immediately, it fits purposefully into this PR... adding it |
||
|
|
||
| /** | ||
| * Formats an internal value as a UTF8String. | ||
| * | ||
| * Default implementation wraps format(). Override for performance if needed. | ||
| */ | ||
| def formatUTF8(v: Any): UTF8String = UTF8String.fromString(format(v)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we expect any data type to override it? It seems weird if
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nvm, I see the use case. |
||
|
|
||
| /** | ||
| * Formats an internal value as a SQL literal string. | ||
| * | ||
| * @param v | ||
| * the internal value | ||
| * @return | ||
| * SQL literal string (e.g., "TIME '10:30:00'") | ||
| */ | ||
| def toSQLValue(v: Any): String | ||
|
|
||
| // ==================== Row Encoding ==================== | ||
|
|
||
| /** | ||
| * Returns the AgnosticEncoder for this type. | ||
| * | ||
| * Used by RowEncoder for Dataset[T] operations. | ||
| * | ||
| * @return | ||
| * AgnosticEncoder instance (e.g., LocalTimeEncoder for TimeType) | ||
| */ | ||
| def getEncoder: AgnosticEncoder[_] | ||
| } | ||
|
|
||
| /** | ||
| * Factory object for creating TypeApiOps instances. | ||
| * | ||
| * Returns Option to serve as both lookup and existence check - callers use getOrElse to fall | ||
| * through to legacy handling. The feature flag check is inside apply(), so callers don't need to | ||
| * check it separately. | ||
| */ | ||
| object TypeApiOps { | ||
|
|
||
| /** | ||
| * Returns a TypeApiOps instance for the given DataType, if supported by the framework. | ||
| * | ||
| * Returns None if the type is not supported or the framework is disabled. This is the single | ||
| * registration point for all client-side type operations. | ||
| * | ||
| * @param dt | ||
| * the DataType to get operations for | ||
| * @return | ||
| * Some(TypeApiOps) if supported, None otherwise | ||
| */ | ||
| def apply(dt: DataType): Option[TypeApiOps] = { | ||
| if (!SqlApiConf.get.typesFrameworkEnabled) return None | ||
| dt match { | ||
| case tt: TimeType => Some(new TimeTypeApiOps(tt)) | ||
| // Add new types here - single registration point | ||
| case _ => None | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.