Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions native/core/src/execution/jni_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ use datafusion_spark::function::math::hex::SparkHex;
use datafusion_spark::function::math::width_bucket::SparkWidthBucket;
use datafusion_spark::function::string::char::CharFunc;
use datafusion_spark::function::string::concat::SparkConcat;
use datafusion_spark::function::string::ilike::SparkILike;
use futures::poll;
use futures::stream::StreamExt;
use jni::objects::JByteBuffer;
Expand Down Expand Up @@ -400,6 +401,7 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) {
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkWidthBucket::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(MapFromEntries::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkCrc32::default()));
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkILike::default()));
}

/// Prepares arrow arrays for output.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
classOf[Concat] -> CometConcat,
classOf[Contains] -> CometScalarFunction("contains"),
classOf[EndsWith] -> CometScalarFunction("ends_with"),
classOf[ILike] -> CometILike,
classOf[InitCap] -> CometInitCap,
classOf[Length] -> CometLength,
classOf[Like] -> CometLike,
Expand Down
15 changes: 14 additions & 1 deletion spark/src/main/scala/org/apache/comet/serde/strings.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ package org.apache.comet.serde

import java.util.Locale

import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, ConcatWs, Expression, If, InitCap, IsNull, Left, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, StringSplit, Substring, Upper}
import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, ConcatWs, Expression, If, ILike, InitCap, IsNull, Left, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, StringSplit, Substring, Upper}
import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType}
import org.apache.spark.unsafe.types.UTF8String

Expand Down Expand Up @@ -238,6 +238,19 @@ object CometLike extends CometExpressionSerde[Like] {
}
}

object CometILike extends CometExpressionSerde[ILike] {

override def convert(expr: ILike, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = {
if (expr.escapeChar != '\\') {
withInfo(expr, s"custom escape character ${expr.escapeChar} not supported in ILIKE")
return None
}
val childExpr = expr.children.map(exprToProtoInternal(_, inputs, binding))
val optExpr = scalarFunctionExprToProto("ilike", childExpr: _*)
optExprWithInfo(optExpr, expr, expr.children: _*)
}
}

object CometRLike extends CometExpressionSerde[RLike] {

override def convert(expr: RLike, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,31 @@ class CometStringExpressionSuite extends CometTestBase {
}
}

test("ilike") {
withSQLConf("spark.comet.caseConversion.enabled" -> "true") {
withParquetTable(Seq("Hello", "WORLD", "hello", "FooBar", null).map(Tuple1(_)), "tbl") {
checkSparkAnswerAndOperator("SELECT _1 ILIKE '%hello%' FROM tbl")
checkSparkAnswerAndOperator("SELECT _1 ILIKE 'H_llo' FROM tbl")
checkSparkAnswerAndOperator("SELECT _1 ILIKE '%WORLD%' FROM tbl")
checkSparkAnswerAndOperator("SELECT NULL ILIKE '%test%' FROM tbl")
}
}
}

test("ilike falls back when caseConversion is disabled") {
// ILIKE requires case-insensitive comparison which uses locale-specific
// case conversion (upper/lower). Rust's to_lowercase() follows Unicode
// default rules while Java uses locale-specific rules (e.g. Turkish I),
// so Comet falls back when caseConversion is disabled.
withSQLConf("spark.comet.caseConversion.enabled" -> "false") {
withParquetTable(Seq("Hello", "WORLD").map(Tuple1(_)), "tbl") {
checkSparkAnswerAndFallbackReason(
"SELECT _1 ILIKE '%hello%' FROM tbl",
"Comet is not compatible with Spark for case conversion")
}
}
}

test("split string basic") {
withSQLConf("spark.comet.expression.StringSplit.allowIncompatible" -> "true") {
withParquetTable((0 until 5).map(i => (s"value$i,test$i", i)), "tbl") {
Expand Down