Skip to content

Commit 8fb7905

Browse files
committed
fix: apply support-level consistency fixes surfaced by datetime audit
Mechanical fixes for the support-level / reason alignment issues found during the datetime audit. No behavioural changes; the only observable effect is that EXPLAIN-time fallback messages now include the specific reason instead of a generic "not fully compatible with Spark". - TimeFieldSerde companion (new) hoists the shared TimestampNTZ reason string used by CometHour, CometMinute, and CometSecond, mirroring the existing UTCTimestampSerde pattern. The three serdes now share one reason and one support-level helper. - CometTruncDate extracts the duplicated reason strings into private vals and corrects the wording drift (the inline reason said "Invalid" while the docs reason said "Non-literal"; they now match). - CometTruncTimestamp adds the missing non-literal-format reason to getIncompatibleReasons, adds the missing getUnsupportedReasons override for unsupported format literals, and extracts both reasons into private vals. - CometSecondsToTimestamp adds the missing getUnsupportedReasons override so the Compatibility Guide reflects which input types are supported. - CometHours and CometDays add getSupportLevel and getUnsupportedReasons overrides so the unsupported-input-type fallback surfaces in EXPLAIN output and the Compatibility Guide; the dispatcher now handles the fall-back uniformly and the withInfo call in convert is no longer needed for those branches. - CometFromUnixTime moves the format-pattern check out of convert into getSupportLevel (returning Unsupported for non-default patterns and Incompatible for the DataFusion timestamp-range issue on default patterns). Reasons are shared via private vals; getUnsupportedReasons and getIncompatibleReasons both populated. As a side effect the fallback message for non-default formats now includes the specific reason ("Only the default datetime format pattern...") rather than the generic "not fully compatible with Spark"; updated the existing from_unix_time.sql expect_fallback assertions accordingly. The CometDateFormat and CometUnixTimestamp findings need deeper semantics analysis and are left for follow-up.
1 parent 5cc97d4 commit 8fb7905

3 files changed

Lines changed: 110 additions & 86 deletions

File tree

spark/src/main/scala/org/apache/comet/serde/datetime.scala

Lines changed: 88 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import java.util.Locale
2323

2424
import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year}
2525
import org.apache.spark.sql.internal.SQLConf
26-
import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType}
26+
import org.apache.spark.sql.types.{DataType, DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType}
2727
import org.apache.spark.unsafe.types.UTF8String
2828

2929
import org.apache.comet.CometConf
@@ -179,23 +179,24 @@ object CometQuarter extends CometExpressionSerde[Quarter] with CometExprGetDateF
179179
}
180180
}
181181

182-
object CometHour extends CometExpressionSerde[Hour] {
182+
private object TimeFieldSerde {
183+
val timestampNtzIncompatReason: String =
184+
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
185+
" (https://github.com/apache/datafusion-comet/issues/3180)"
183186

184-
val incompatReason: String = "Incorrectly applies timezone conversion to TimestampNTZ inputs" +
185-
" (https://github.com/apache/datafusion-comet/issues/3180)"
187+
def supportLevelForChild(childType: DataType): SupportLevel = childType match {
188+
case TimestampNTZType => Incompatible(Some(timestampNtzIncompatReason))
189+
case _ => Compatible()
190+
}
191+
}
186192

187-
override def getIncompatibleReasons(): Seq[String] = Seq(incompatReason)
193+
object CometHour extends CometExpressionSerde[Hour] {
188194

189-
override def getSupportLevel(expr: Hour): SupportLevel = {
190-
if (expr.child.dataType == TimestampNTZType) {
191-
Incompatible(
192-
Some(
193-
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
194-
" (https://github.com/apache/datafusion-comet/issues/3180)"))
195-
} else {
196-
Compatible()
197-
}
198-
}
195+
override def getIncompatibleReasons(): Seq[String] =
196+
Seq(TimeFieldSerde.timestampNtzIncompatReason)
197+
198+
override def getSupportLevel(expr: Hour): SupportLevel =
199+
TimeFieldSerde.supportLevelForChild(expr.child.dataType)
199200

200201
override def convert(
201202
expr: Hour,
@@ -224,20 +225,11 @@ object CometHour extends CometExpressionSerde[Hour] {
224225

225226
object CometMinute extends CometExpressionSerde[Minute] {
226227

227-
override def getIncompatibleReasons(): Seq[String] = Seq(
228-
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
229-
" (https://github.com/apache/datafusion-comet/issues/3180)")
230-
231-
override def getSupportLevel(expr: Minute): SupportLevel = {
232-
if (expr.child.dataType == TimestampNTZType) {
233-
Incompatible(
234-
Some(
235-
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
236-
" (https://github.com/apache/datafusion-comet/issues/3180)"))
237-
} else {
238-
Compatible()
239-
}
240-
}
228+
override def getIncompatibleReasons(): Seq[String] =
229+
Seq(TimeFieldSerde.timestampNtzIncompatReason)
230+
231+
override def getSupportLevel(expr: Minute): SupportLevel =
232+
TimeFieldSerde.supportLevelForChild(expr.child.dataType)
241233

242234
override def convert(
243235
expr: Minute,
@@ -266,20 +258,11 @@ object CometMinute extends CometExpressionSerde[Minute] {
266258

267259
object CometSecond extends CometExpressionSerde[Second] {
268260

269-
override def getIncompatibleReasons(): Seq[String] = Seq(
270-
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
271-
" (https://github.com/apache/datafusion-comet/issues/3180)")
272-
273-
override def getSupportLevel(expr: Second): SupportLevel = {
274-
if (expr.child.dataType == TimestampNTZType) {
275-
Incompatible(
276-
Some(
277-
"Incorrectly applies timezone conversion to TimestampNTZ inputs" +
278-
" (https://github.com/apache/datafusion-comet/issues/3180)"))
279-
} else {
280-
Compatible()
281-
}
282-
}
261+
override def getIncompatibleReasons(): Seq[String] =
262+
Seq(TimeFieldSerde.timestampNtzIncompatReason)
263+
264+
override def getSupportLevel(expr: Second): SupportLevel =
265+
TimeFieldSerde.supportLevelForChild(expr.child.dataType)
283266

284267
override def convert(
285268
expr: Second,
@@ -437,6 +420,11 @@ object CometMakeDate extends CometScalarFunction[MakeDate]("make_date")
437420

438421
object CometSecondsToTimestamp
439422
extends CometScalarFunction[SecondsToTimestamp]("seconds_to_timestamp") {
423+
424+
override def getUnsupportedReasons(): Seq[String] = Seq(
425+
"Only `IntegerType`, `LongType`, `FloatType`, and `DoubleType` inputs are supported." +
426+
" `DecimalType`, `ByteType`, and `ShortType` fall back to Spark.")
427+
440428
override def getSupportLevel(expr: SecondsToTimestamp): SupportLevel =
441429
expr.child.dataType match {
442430
case IntegerType | LongType | FloatType | DoubleType => Compatible()
@@ -482,8 +470,14 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] {
482470
val supportedFormats: Seq[String] =
483471
Seq("year", "yyyy", "yy", "quarter", "mon", "month", "mm", "week")
484472

485-
override def getIncompatibleReasons(): Seq[String] = Seq(
486-
"Non-literal format strings will throw an exception instead of returning NULL")
473+
private val nonLiteralFormatIncompatReason: String =
474+
"Non-literal format strings will throw an exception instead of returning NULL"
475+
476+
private def unsupportedFormatReason(fmt: Any): String =
477+
s"Format $fmt is not supported. Only the following formats are supported: " +
478+
supportedFormats.mkString(", ")
479+
480+
override def getIncompatibleReasons(): Seq[String] = Seq(nonLiteralFormatIncompatReason)
487481

488482
override def getUnsupportedReasons(): Seq[String] = Seq(
489483
"Only the following formats are supported: " + supportedFormats.mkString(", "))
@@ -494,11 +488,10 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] {
494488
if (supportedFormats.contains(fmt.toString.toLowerCase(Locale.ROOT))) {
495489
Compatible()
496490
} else {
497-
Unsupported(Some(s"Format $fmt is not supported"))
491+
Unsupported(Some(unsupportedFormatReason(fmt)))
498492
}
499493
case _ =>
500-
Incompatible(
501-
Some("Invalid format strings will throw an exception instead of returning NULL"))
494+
Incompatible(Some(nonLiteralFormatIncompatReason))
502495
}
503496
}
504497

@@ -521,10 +514,6 @@ object CometTruncDate extends CometExpressionSerde[TruncDate] {
521514

522515
object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] {
523516

524-
override def getIncompatibleReasons(): Seq[String] = Seq(
525-
"Produces incorrect results when used with non-UTC timezones. Compatible when timezone is" +
526-
" UTC. (https://github.com/apache/datafusion-comet/issues/2649)")
527-
528517
val supportedFormats: Seq[String] =
529518
Seq(
530519
"year",
@@ -543,6 +532,23 @@ object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] {
543532
"millisecond",
544533
"microsecond")
545534

535+
private val nonUtcIncompatReason: String =
536+
"Produces incorrect results when used with non-UTC timezones. Compatible when timezone is" +
537+
" UTC. (https://github.com/apache/datafusion-comet/issues/2649)"
538+
539+
private val nonLiteralFormatIncompatReason: String =
540+
"Non-literal format strings will throw an exception instead of returning NULL"
541+
542+
private def unsupportedFormatReason(fmt: Any): String =
543+
s"Format $fmt is not supported. Only the following formats are supported: " +
544+
supportedFormats.mkString(", ")
545+
546+
override def getIncompatibleReasons(): Seq[String] =
547+
Seq(nonUtcIncompatReason, nonLiteralFormatIncompatReason)
548+
549+
override def getUnsupportedReasons(): Seq[String] = Seq(
550+
"Only the following formats are supported: " + supportedFormats.mkString(", "))
551+
546552
override def getSupportLevel(expr: TruncTimestamp): SupportLevel = {
547553
val timezone = expr.timeZoneId.getOrElse("UTC")
548554
val isUtc = timezone == "UTC" || timezone == "Etc/UTC"
@@ -552,17 +558,13 @@ object CometTruncTimestamp extends CometExpressionSerde[TruncTimestamp] {
552558
if (isUtc) {
553559
Compatible()
554560
} else {
555-
Incompatible(
556-
Some(
557-
s"Incorrect results in non-UTC timezone '$timezone'" +
558-
" (https://github.com/apache/datafusion-comet/issues/2649)"))
561+
Incompatible(Some(nonUtcIncompatReason))
559562
}
560563
} else {
561-
Unsupported(Some(s"Format $fmt is not supported"))
564+
Unsupported(Some(unsupportedFormatReason(fmt)))
562565
}
563566
case _ =>
564-
Incompatible(
565-
Some("Invalid format strings will throw an exception instead of returning NULL"))
567+
Incompatible(Some(nonLiteralFormatIncompatReason))
566568
}
567569
}
568570

@@ -700,24 +702,27 @@ object CometDateFormat extends CometExpressionSerde[DateFormatClass] {
700702
* without applying any session timezone offset.
701703
*/
702704
object CometHours extends CometExpressionSerde[Hours] {
705+
706+
override def getUnsupportedReasons(): Seq[String] = Seq(
707+
"Only `TimestampType` and `TimestampNTZType` inputs are supported.")
708+
709+
override def getSupportLevel(expr: Hours): SupportLevel = expr.child.dataType match {
710+
case TimestampType | TimestampNTZType => Compatible()
711+
case other => Unsupported(Some(s"Hours does not support input type: $other"))
712+
}
713+
703714
override def convert(
704715
expr: Hours,
705716
inputs: Seq[Attribute],
706717
binding: Boolean): Option[ExprOuterClass.Expr] = {
707-
val optExpr = expr.child.dataType match {
708-
case TimestampType | TimestampNTZType =>
709-
exprToProtoInternal(expr.child, inputs, binding).map { childExpr =>
710-
val builder = ExprOuterClass.HoursTransform.newBuilder()
711-
builder.setChild(childExpr)
718+
val optExpr = exprToProtoInternal(expr.child, inputs, binding).map { childExpr =>
719+
val builder = ExprOuterClass.HoursTransform.newBuilder()
720+
builder.setChild(childExpr)
712721

713-
ExprOuterClass.Expr
714-
.newBuilder()
715-
.setHoursTransform(builder)
716-
.build()
717-
}
718-
case other =>
719-
withInfo(expr, s"Hours does not support input type: $other")
720-
None
722+
ExprOuterClass.Expr
723+
.newBuilder()
724+
.setHoursTransform(builder)
725+
.build()
721726
}
722727
optExprWithInfo(optExpr, expr, expr.child)
723728
}
@@ -734,6 +739,16 @@ object CometHours extends CometExpressionSerde[Hours] {
734739
* The first cast respects the session timezone to correctly determine the date boundary.
735740
*/
736741
object CometDays extends CometExpressionSerde[Days] {
742+
743+
override def getUnsupportedReasons(): Seq[String] = Seq(
744+
"Only `DateType` and `TimestampType` inputs are supported." +
745+
" `TimestampNTZType` is not supported.")
746+
747+
override def getSupportLevel(expr: Days): SupportLevel = expr.child.dataType match {
748+
case DateType | TimestampType => Compatible()
749+
case other => Unsupported(Some(s"Days does not support input type: $other"))
750+
}
751+
737752
override def convert(
738753
expr: Days,
739754
inputs: Seq[Attribute],
@@ -748,9 +763,7 @@ object CometDays extends CometExpressionSerde[Days] {
748763
childExpr.flatMap { child =>
749764
CometCast.castToProto(expr, Some(timezone), DateType, child, CometEvalMode.LEGACY)
750765
}
751-
case other =>
752-
withInfo(expr, s"Days does not support input type: $other")
753-
None
766+
case _ => None
754767
}
755768

756769
// Convert DateType to IntegerType (days since epoch)

spark/src/main/scala/org/apache/comet/serde/unixtime.scala

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,26 @@ import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithIn
2929
// https://github.com/apache/datafusion/issues/16594
3030
object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] {
3131

32-
override def getIncompatibleReasons(): Seq[String] = Seq(
33-
"Only supports the default datetime format pattern `yyyy-MM-dd HH:mm:ss`." +
34-
" DataFusion's valid timestamp range differs from Spark" +
35-
" (https://github.com/apache/datafusion/issues/16594)")
32+
private val incompatReason: String =
33+
"DataFusion's valid timestamp range differs from Spark" +
34+
" (https://github.com/apache/datafusion/issues/16594)"
3635

37-
override def getSupportLevel(expr: FromUnixTime): SupportLevel = Incompatible(None)
36+
private val unsupportedFormatReason: String =
37+
"Only the default datetime format pattern `yyyy-MM-dd HH:mm:ss` is supported;" +
38+
" other patterns fall back to Spark" +
39+
" (https://github.com/apache/datafusion/issues/16577)"
40+
41+
override def getIncompatibleReasons(): Seq[String] = Seq(incompatReason)
42+
43+
override def getUnsupportedReasons(): Seq[String] = Seq(unsupportedFormatReason)
44+
45+
override def getSupportLevel(expr: FromUnixTime): SupportLevel = {
46+
if (expr.format != Literal(TimestampFormatter.defaultPattern)) {
47+
Unsupported(Some(unsupportedFormatReason))
48+
} else {
49+
Incompatible(Some(incompatReason))
50+
}
51+
}
3852

3953
override def convert(
4054
expr: FromUnixTime,
@@ -48,10 +62,7 @@ object CometFromUnixTime extends CometExpressionSerde[FromUnixTime] {
4862
val formatExpr = exprToProtoInternal(Literal("%Y-%m-%d %H:%M:%S"), inputs, binding)
4963
val timeZone = exprToProtoInternal(Literal(expr.timeZoneId.orNull), inputs, binding)
5064

51-
if (expr.format != Literal(TimestampFormatter.defaultPattern)) {
52-
withInfo(expr, "Datetime pattern format is unsupported")
53-
None
54-
} else if (secExpr.isDefined && formatExpr.isDefined) {
65+
if (secExpr.isDefined && formatExpr.isDefined) {
5566
val timestampExpr =
5667
scalarFunctionExprToProto("from_unixtime", Seq(secExpr, timeZone): _*)
5768
val optExpr = scalarFunctionExprToProto("to_char", Seq(timestampExpr, formatExpr): _*)

spark/src/test/resources/sql-tests/expressions/datetime/from_unix_time.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ INSERT INTO test_from_unix_time VALUES (0), (1718451045), (-1), (NULL), (2147483
2424
query expect_fallback(not fully compatible with Spark)
2525
SELECT from_unixtime(t) FROM test_from_unix_time
2626

27-
query expect_fallback(not fully compatible with Spark)
27+
query expect_fallback(Only the default datetime format pattern)
2828
SELECT from_unixtime(t, 'yyyy-MM-dd') FROM test_from_unix_time
2929

3030
-- literal arguments
3131
query expect_fallback(not fully compatible with Spark)
3232
SELECT from_unixtime(0)
3333

34-
query expect_fallback(not fully compatible with Spark)
34+
query expect_fallback(Only the default datetime format pattern)
3535
SELECT from_unixtime(1718451045, 'yyyy-MM-dd')

0 commit comments

Comments
 (0)