Skip to content

Commit 1227bc7

Browse files
authored
[Chore](Variant) set enable_variant_schema_auto_cast default true and enhance tests (#60628)
1. Set `enable_variant_schema_auto_cast` to **true** by default. 2. Improve the implementation and expand test coverage for **auto-cast** (see PR #60362). 3. Add support for **multi-cast** scenarios when validating `match` expressions. 4. Update `test_predefine_typed_to_sparse` to explicitly set `enable_variant_schema_auto_cast = false`.
1 parent 26e98e9 commit 1227bc7

8 files changed

Lines changed: 168 additions & 17 deletions

File tree

be/src/vec/common/variant_util.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,9 @@ struct GlobRegexCacheEntry {
136136
std::list<std::string>::iterator lru_it;
137137
};
138138

139-
std::mutex g_glob_regex_cache_mutex;
140-
std::list<std::string> g_glob_regex_cache_lru;
141-
std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
139+
static std::mutex g_glob_regex_cache_mutex;
140+
static std::list<std::string> g_glob_regex_cache_lru;
141+
static std::unordered_map<std::string, GlobRegexCacheEntry> g_glob_regex_cache;
142142

143143
std::shared_ptr<RE2> get_or_build_re2(const std::string& glob_pattern) {
144144
{

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ private boolean containsVariantTypeOutsideCast(Expression expr) {
217217
}
218218

219219
private boolean containsVariantTypeOutsideCast(Expression expr, boolean underCast) {
220-
boolean nextUnderCast = underCast || expr instanceof Cast;
220+
boolean nextUnderCast = underCast || (expr instanceof Cast && !expr.getDataType().isVariantType());
221221
if (!nextUnderCast && expr.getDataType().isVariantType()) {
222222
return true;
223223
}

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -735,10 +735,7 @@ private boolean isEnableVariantSchemaAutoCast(ExpressionRewriteContext context)
735735
return false;
736736
}
737737
SessionVariable sessionVariable = context.cascadesContext.getConnectContext().getSessionVariable();
738-
if (sessionVariable == null || !sessionVariable.isEnableVariantSchemaAutoCast()) {
739-
return false;
740-
}
741-
return sessionVariable.isEnableVariantSchemaAutoCast();
738+
return sessionVariable != null && sessionVariable.isEnableVariantSchemaAutoCast();
742739
}
743740

744741
private Expression wrapVariantElementAtWithCast(Expression expr) {
@@ -808,6 +805,9 @@ private Expression maybeCastAliasExpression(Alias alias, ExpressionRewriteContex
808805
return alias;
809806
}
810807
Expression child = alias.child();
808+
if (!(child instanceof ElementAt)) {
809+
return alias;
810+
}
811811
Expression casted = wrapVariantElementAtWithCast(child);
812812
if (casted == child) {
813813
return alias;

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMatchExpression.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,7 @@ private Plan checkChildren(LogicalFilter<? extends Plan> filter) {
4949
for (Expression expr : expressions) {
5050
if (expr instanceof Match) {
5151
Match matchExpression = (Match) expr;
52-
boolean isSlotReference = matchExpression.left() instanceof SlotReference;
53-
boolean isCastChildWithSlotReference = (matchExpression.left() instanceof Cast
54-
&& matchExpression.left().child(0) instanceof SlotReference);
55-
if (!(isSlotReference || isCastChildWithSlotReference)
52+
if (!isSlotOrCastChainOnSlot(matchExpression.left())
5653
|| !(matchExpression.right() instanceof Literal)) {
5754
throw new AnalysisException(String.format("Only support match left operand is SlotRef,"
5855
+ " right operand is Literal. But meet expression %s", matchExpression));
@@ -61,4 +58,12 @@ private Plan checkChildren(LogicalFilter<? extends Plan> filter) {
6158
}
6259
return filter;
6360
}
61+
62+
private boolean isSlotOrCastChainOnSlot(Expression expression) {
63+
Expression current = expression;
64+
while (current instanceof Cast) {
65+
current = current.child(0);
66+
}
67+
return current instanceof SlotReference;
68+
}
6469
}

fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2452,7 +2452,7 @@ public boolean isEnableHboNonStrictMatchingMode() {
24522452
+ "因为外表会存在表的 schema 中 char 或者 varchar 列的最大长度和底层 parquet 或者 orc 文件中的 schema 不一致"
24532453
+ "的情况。此时开启改选项,会按照表的 schema 中的最大长度进行截断。",
24542454
"Whether to truncate char or varchar columns according to the table's schema. "
2455-
+ "The default is false.\n"
2455+
+ "The default is true.\n"
24562456
+ "Because the maximum length of the char or varchar column in the schema of the table"
24572457
+ " is inconsistent with the schema in the underlying parquet or orc file."
24582458
+ " At this time, if the option is turned on, it will be truncated according to the maximum length"
@@ -3334,12 +3334,12 @@ public boolean isEnableESParallelScroll() {
33343334
needForward = true,
33353335
affectQueryResultInExecution = true,
33363336
description = {
3337-
"是否启用基于 schema template 的 variant 自动 cast,默认关闭。",
3337+
"是否启用基于 schema template 的 variant 自动 cast,默认开启。",
33383338
"Whether to enable schema-template-based auto cast for variant expressions. "
3339-
+ "The default is false."
3339+
+ "The default is true."
33403340
}
33413341
)
3342-
public boolean enableVariantSchemaAutoCast = false;
3342+
public boolean enableVariantSchemaAutoCast = true;
33433343

33443344
@VariableMgr.VarAttr(
33453345
name = DEFAULT_VARIANT_ENABLE_TYPED_PATHS_TO_SPARSE,

regression-test/data/variant_p0/predefine/test_schema_template_auto_cast.out

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,3 +299,44 @@ charlie 50
299299
-- !leaf_having_mixed --
300300
3033333 3033333
301301
4044444 4044444
302+
303+
-- !glob_wild_match --
304+
2 3
305+
306+
-- !glob_literal_match --
307+
2
308+
309+
-- !nonleaf_auto_cast_on --
310+
\\N
311+
312+
-- !nonleaf_auto_cast_off --
313+
{"level1_num_1":1011111,"level1_num_2":102}
314+
315+
-- !explicit_cast_chain_select_2 --
316+
10
317+
30
318+
50
319+
15
320+
321+
-- !explicit_cast_chain_where_3 --
322+
2
323+
3
324+
325+
-- !explicit_cast_chain_order_by_4 --
326+
3
327+
2
328+
4
329+
1
330+
331+
-- !explicit_cast_chain_group_having_4 --
332+
15 1
333+
30 1
334+
50 1
335+
336+
-- !explicit_cast_chain_match_2 --
337+
1
338+
4
339+
340+
-- !explicit_cast_chain_match_4 --
341+
1
342+
4

regression-test/suites/variant_p0/predefine/predefined_typed_to_sparse.groovy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
suite("test_predefine_typed_to_sparse", "p0"){
1818
sql """ set enable_common_expr_pushdown = true """
1919
sql """ set default_variant_enable_doc_mode = false """
20+
sql """ set enable_variant_schema_auto_cast = false """
2021
def count = new Random().nextInt(10) + 1
2122

2223
def load_json_data = {table_name, file_name ->

regression-test/suites/variant_p0/predefine/test_schema_template_auto_cast.groovy

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,4 +307,108 @@ suite("test_schema_template_auto_cast", "p0") {
307307
ORDER BY data['int_nested.level1_num_1'] """
308308

309309
sql "DROP TABLE IF EXISTS ${leafTable}"
310-
}
310+
311+
// Test 16: backslash escaping in schema template pattern
312+
def globWildTable = "test_variant_schema_auto_cast_glob_wild"
313+
def globLiteralTable = "test_variant_schema_auto_cast_glob_literal"
314+
def globLiteralPattern = "a\\*b" // SQL sees a\*b, glob sees a\*b (literal *)
315+
316+
sql "DROP TABLE IF EXISTS ${globWildTable}"
317+
sql "DROP TABLE IF EXISTS ${globLiteralTable}"
318+
319+
sql """CREATE TABLE ${globWildTable} (
320+
`id` bigint NULL,
321+
`data` variant<'a*b': BIGINT> NOT NULL
322+
) ENGINE=OLAP DUPLICATE KEY(`id`)
323+
DISTRIBUTED BY HASH(`id`) BUCKETS 1
324+
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""
325+
326+
sql """CREATE TABLE ${globLiteralTable} (
327+
`id` bigint NULL,
328+
`data` variant<'${globLiteralPattern}': BIGINT> NOT NULL
329+
) ENGINE=OLAP DUPLICATE KEY(`id`)
330+
DISTRIBUTED BY HASH(`id`) BUCKETS 1
331+
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""
332+
333+
sql """insert into ${globWildTable} values(1, '{\"a*b\": 1, \"axb\": 2}')"""
334+
sql """insert into ${globLiteralTable} values(1, '{\"a*b\": 1, \"axb\": 2}')"""
335+
336+
// wildcard a*b matches both a*b and axb
337+
qt_glob_wild_match """ SELECT data['a*b'] + 1 AS v1, data['axb'] + 1 AS v2
338+
FROM ${globWildTable} ORDER BY id """
339+
340+
// literal a\*b matches only a*b
341+
qt_glob_literal_match """ SELECT data['a*b'] + 1 AS v1 FROM ${globLiteralTable} ORDER BY id """
342+
test {
343+
sql """ SELECT data['axb'] + 1 FROM ${globLiteralTable} """
344+
exception "Cannot cast from variant"
345+
}
346+
347+
sql "DROP TABLE IF EXISTS ${globWildTable}"
348+
sql "DROP TABLE IF EXISTS ${globLiteralTable}"
349+
350+
351+
// Test 17: non-leaf path auto cast limitation
352+
def nonleafTable = "test_variant_schema_auto_cast_nonleaf_limit"
353+
sql "DROP TABLE IF EXISTS ${nonleafTable}"
354+
sql """CREATE TABLE ${nonleafTable} (
355+
`id` int NULL,
356+
`data` variant<'int_*': INT> NOT NULL
357+
) ENGINE=OLAP DUPLICATE KEY(`id`)
358+
DISTRIBUTED BY HASH(`id`) BUCKETS 1
359+
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""
360+
361+
sql """insert into ${nonleafTable} values(
362+
1, '{"int_1": 1, "int_nested": {"level1_num_1": 1011111, "level1_num_2": 102}}')"""
363+
364+
// auto cast enabled: non-leaf path matches int_* and returns NULL
365+
sql "set enable_variant_schema_auto_cast = true"
366+
qt_nonleaf_auto_cast_on """ SELECT data['int_nested'] FROM ${nonleafTable} ORDER BY id """
367+
368+
// auto cast disabled: return original object
369+
sql "set enable_variant_schema_auto_cast = false"
370+
qt_nonleaf_auto_cast_off """ SELECT data['int_nested'] FROM ${nonleafTable} ORDER BY id """
371+
372+
// restore default
373+
sql "set enable_variant_schema_auto_cast = true"
374+
sql "DROP TABLE IF EXISTS ${nonleafTable}"
375+
376+
377+
// Test 18: multi-layer explicit cast chain (2~4), including MATCH clause
378+
def castChainTable = "test_variant_schema_auto_cast_cast_chain"
379+
sql "DROP TABLE IF EXISTS ${castChainTable}"
380+
sql """CREATE TABLE ${castChainTable} (
381+
`id` bigint NULL,
382+
`data` variant<'num_*': BIGINT, 'str_*': STRING> NOT NULL
383+
) ENGINE=OLAP DUPLICATE KEY(`id`)
384+
DISTRIBUTED BY HASH(`id`) BUCKETS 1
385+
PROPERTIES ( "replication_allocation" = "tag.location.default: 1")"""
386+
387+
sql """insert into ${castChainTable} values(1, '{\"num_a\": 10, \"num_b\": 20, \"str_name\": \"alice\"}')"""
388+
sql """insert into ${castChainTable} values(2, '{\"num_a\": 30, \"num_b\": 40, \"str_name\": \"bob\"}')"""
389+
sql """insert into ${castChainTable} values(3, '{\"num_a\": 50, \"num_b\": 60, \"str_name\": \"charlie\"}')"""
390+
sql """insert into ${castChainTable} values(4, '{\"num_a\": 15, \"num_b\": 25, \"str_name\": \"alice\"}')"""
391+
392+
qt_explicit_cast_chain_select_2 """ SELECT CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT)
393+
FROM ${castChainTable} ORDER BY id """
394+
qt_explicit_cast_chain_where_3 """ SELECT id FROM ${castChainTable}
395+
WHERE CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) > 20 ORDER BY id """
396+
qt_explicit_cast_chain_order_by_4 """ SELECT id FROM ${castChainTable}
397+
ORDER BY CAST(CAST(CAST(CAST(data['num_b'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT) DESC, id """
398+
qt_explicit_cast_chain_group_having_4 """ SELECT
399+
CAST(CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT) AS v, COUNT(*)
400+
FROM ${castChainTable}
401+
GROUP BY CAST(CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT)
402+
HAVING CAST(CAST(CAST(CAST(data['num_a'] AS BIGINT) AS BIGINT) AS BIGINT) AS BIGINT) >= 15
403+
ORDER BY v """
404+
405+
sql """ set enable_match_without_inverted_index = true """
406+
qt_explicit_cast_chain_match_2 """ SELECT id FROM ${castChainTable}
407+
WHERE CAST(CAST(data['str_name'] AS STRING) AS VARCHAR) MATCH 'alice' ORDER BY id """
408+
qt_explicit_cast_chain_match_4 """ SELECT id FROM ${castChainTable}
409+
WHERE CAST(CAST(CAST(CAST(data['str_name'] AS STRING) AS VARCHAR) AS STRING) AS VARCHAR) MATCH 'alice' ORDER BY id """
410+
sql """ set enable_match_without_inverted_index = false """
411+
412+
sql "DROP TABLE IF EXISTS ${castChainTable}"
413+
414+
}

0 commit comments

Comments
 (0)