Skip to content

Commit d959cd9

Browse files
PostgreSQL: Support SQL standard ARRAY keyword in type declarations
1 parent 182eae8 commit d959cd9

12 files changed

Lines changed: 101 additions & 50 deletions

src/ast/data_type.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,8 @@ impl fmt::Display for DataType {
719719
ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"),
720720
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
721721
ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"),
722+
ArrayElemTypeDef::Keyword(t, None) => write!(f, "{t} ARRAY"),
723+
ArrayElemTypeDef::Keyword(t, Some(size)) => write!(f, "{t} ARRAY[{size}]"),
722724
},
723725
DataType::Custom(ty, modifiers) => {
724726
if modifiers.is_empty() {
@@ -1146,6 +1148,8 @@ pub enum ArrayElemTypeDef {
11461148
SquareBracket(Box<DataType>, Option<u64>),
11471149
/// Parenthesis style, e.g. `Array(Int64)`.
11481150
Parenthesis(Box<DataType>),
1151+
/// Keyword style with an optional size, e.g. `INT ARRAY` or `INT ARRAY[4]`.
1152+
Keyword(Box<DataType>, Option<u64>),
11491153
}
11501154

11511155
/// Represents different types of geometric shapes which are commonly used in

src/ast/mod.rs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,14 +1088,10 @@ pub enum Expr {
10881088
kind: CastKind,
10891089
/// Expression being cast.
10901090
expr: Box<Expr>,
1091-
/// Target data type.
1091+
/// Target data type. A trailing `ARRAY` keyword (e.g.
1092+
/// `CAST(... AS UNSIGNED ARRAY)`) is captured as [`DataType::Array`]
1093+
/// with [`ArrayElemTypeDef::Keyword`].
10921094
data_type: DataType,
1093-
/// [MySQL] allows CAST(... AS type ARRAY) in functional index definitions for InnoDB
1094-
/// multi-valued indices. It's not really a datatype, and is only allowed in `CAST` in key
1095-
/// specifications, so it's a flag here.
1096-
///
1097-
/// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html#function_cast
1098-
array: bool,
10991095
/// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery]
11001096
///
11011097
/// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax
@@ -1957,14 +1953,10 @@ impl fmt::Display for Expr {
19571953
kind,
19581954
expr,
19591955
data_type,
1960-
array,
19611956
format,
19621957
} => match kind {
19631958
CastKind::Cast => {
19641959
write!(f, "CAST({expr} AS {data_type}")?;
1965-
if *array {
1966-
write!(f, " ARRAY")?;
1967-
}
19681960
if let Some(format) = format {
19691961
write!(f, " FORMAT {format}")?;
19701962
}

src/ast/spans.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1591,7 +1591,6 @@ impl Spanned for Expr {
15911591
kind: _,
15921592
expr,
15931593
data_type: _,
1594-
array: _,
15951594
format: _,
15961595
} => expr.span(),
15971596
Expr::AtTimeZone {

src/dialect/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1398,6 +1398,13 @@ pub trait Dialect: Debug + Any {
13981398
fn supports_array_typedef_with_brackets(&self) -> bool {
13991399
false
14001400
}
1401+
/// Returns true if the dialect supports array type definition with the
1402+
/// `ARRAY` keyword and an optional size. For example:
1403+
/// ```CREATE TABLE my_table (arr1 INT ARRAY, arr2 INT ARRAY[3])```
1404+
/// ```SELECT x::INT ARRAY[3]```
1405+
fn supports_array_typedef_with_keyword(&self) -> bool {
1406+
false
1407+
}
14011408
/// Returns true if the dialect supports geometric types.
14021409
///
14031410
/// Postgres: <https://www.postgresql.org/docs/9.5/functions-geometry.html>

src/dialect/postgresql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,11 @@ impl Dialect for PostgreSqlDialect {
274274
true
275275
}
276276

277+
/// See: <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-DECLARATION>
278+
fn supports_array_typedef_with_keyword(&self) -> bool {
279+
true
280+
}
281+
277282
fn supports_geometric_types(&self) -> bool {
278283
true
279284
}

src/parser/mod.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,7 +1765,6 @@ impl<'a> Parser<'a> {
17651765
kind: CastKind::Cast,
17661766
expr: Box::new(parser.parse_expr()?),
17671767
data_type: DataType::Binary(None),
1768-
array: false,
17691768
format: None,
17701769
})
17711770
}
@@ -2807,15 +2806,19 @@ impl<'a> Parser<'a> {
28072806
self.expect_token(&Token::LParen)?;
28082807
let expr = self.parse_expr()?;
28092808
self.expect_keyword_is(Keyword::AS)?;
2810-
let data_type = self.parse_data_type()?;
2811-
let array = self.parse_keyword(Keyword::ARRAY);
2809+
let mut data_type = self.parse_data_type()?;
2810+
// A trailing `ARRAY` keyword makes the target an array type, e.g. MySQL's
2811+
// `CAST(... AS UNSIGNED ARRAY)`. PostgreSQL already consumes it while
2812+
// parsing the data type, so the guard avoids wrapping it twice.
2813+
if !matches!(data_type, DataType::Array(_)) && self.parse_keyword(Keyword::ARRAY) {
2814+
data_type = DataType::Array(ArrayElemTypeDef::Keyword(Box::new(data_type), None));
2815+
}
28122816
let format = self.parse_optional_cast_format()?;
28132817
self.expect_token(&Token::RParen)?;
28142818
Ok(Expr::Cast {
28152819
kind,
28162820
expr: Box::new(expr),
28172821
data_type,
2818-
array,
28192822
format,
28202823
})
28212824
}
@@ -4097,7 +4100,6 @@ impl<'a> Parser<'a> {
40974100
kind: CastKind::DoubleColon,
40984101
expr: Box::new(expr),
40994102
data_type: self.parse_data_type()?,
4100-
array: false,
41014103
format: None,
41024104
})
41034105
} else if Token::ExclamationMark == *tok && self.dialect.supports_factorial_operator() {
@@ -4345,7 +4347,6 @@ impl<'a> Parser<'a> {
43454347
kind: CastKind::DoubleColon,
43464348
expr: Box::new(expr),
43474349
data_type: self.parse_data_type()?,
4348-
array: false,
43494350
format: None,
43504351
})
43514352
}
@@ -12825,6 +12826,22 @@ impl<'a> Parser<'a> {
1282512826
data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data), size))
1282612827
}
1282712828
}
12829+
12830+
// Keyword form, e.g. `INT ARRAY` or `INT ARRAY[3]`. It is one-dimensional,
12831+
// so only a single optional size is accepted (multidimensional arrays use
12832+
// the bracket form above).
12833+
if self.dialect.supports_array_typedef_with_keyword() && self.parse_keyword(Keyword::ARRAY)
12834+
{
12835+
let size = if self.consume_token(&Token::LBracket) {
12836+
let size = self.maybe_parse(|p| p.parse_literal_uint())?;
12837+
self.expect_token(&Token::RBracket)?;
12838+
size
12839+
} else {
12840+
None
12841+
};
12842+
data = DataType::Array(ArrayElemTypeDef::Keyword(Box::new(data), size));
12843+
}
12844+
1282812845
Ok((data, trailing_bracket))
1282912846
}
1283012847

tests/sqlparser_common.rs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3146,7 +3146,6 @@ fn parse_cast() {
31463146
kind: CastKind::Cast,
31473147
expr: Box::new(Expr::Identifier(Ident::new("id"))),
31483148
data_type: DataType::BigInt(None),
3149-
array: false,
31503149
format: None,
31513150
},
31523151
expr_from_projection(only(&select.projection))
@@ -3159,7 +3158,6 @@ fn parse_cast() {
31593158
kind: CastKind::Cast,
31603159
expr: Box::new(Expr::Identifier(Ident::new("id"))),
31613160
data_type: DataType::TinyInt(None),
3162-
array: false,
31633161
format: None,
31643162
},
31653163
expr_from_projection(only(&select.projection))
@@ -3191,7 +3189,6 @@ fn parse_cast() {
31913189
length: 50,
31923190
unit: None,
31933191
})),
3194-
array: false,
31953192
format: None,
31963193
},
31973194
expr_from_projection(only(&select.projection))
@@ -3204,7 +3201,6 @@ fn parse_cast() {
32043201
kind: CastKind::Cast,
32053202
expr: Box::new(Expr::Identifier(Ident::new("id"))),
32063203
data_type: DataType::Clob(None),
3207-
array: false,
32083204
format: None,
32093205
},
32103206
expr_from_projection(only(&select.projection))
@@ -3217,7 +3213,6 @@ fn parse_cast() {
32173213
kind: CastKind::Cast,
32183214
expr: Box::new(Expr::Identifier(Ident::new("id"))),
32193215
data_type: DataType::Clob(Some(50)),
3220-
array: false,
32213216
format: None,
32223217
},
32233218
expr_from_projection(only(&select.projection))
@@ -3230,7 +3225,6 @@ fn parse_cast() {
32303225
kind: CastKind::Cast,
32313226
expr: Box::new(Expr::Identifier(Ident::new("id"))),
32323227
data_type: DataType::Binary(Some(50)),
3233-
array: false,
32343228
format: None,
32353229
},
32363230
expr_from_projection(only(&select.projection))
@@ -3243,7 +3237,6 @@ fn parse_cast() {
32433237
kind: CastKind::Cast,
32443238
expr: Box::new(Expr::Identifier(Ident::new("id"))),
32453239
data_type: DataType::Varbinary(Some(BinaryLength::IntegerLength { length: 50 })),
3246-
array: false,
32473240
format: None,
32483241
},
32493242
expr_from_projection(only(&select.projection))
@@ -3256,7 +3249,6 @@ fn parse_cast() {
32563249
kind: CastKind::Cast,
32573250
expr: Box::new(Expr::Identifier(Ident::new("id"))),
32583251
data_type: DataType::Blob(None),
3259-
array: false,
32603252
format: None,
32613253
},
32623254
expr_from_projection(only(&select.projection))
@@ -3269,7 +3261,6 @@ fn parse_cast() {
32693261
kind: CastKind::Cast,
32703262
expr: Box::new(Expr::Identifier(Ident::new("id"))),
32713263
data_type: DataType::Blob(Some(50)),
3272-
array: false,
32733264
format: None,
32743265
},
32753266
expr_from_projection(only(&select.projection))
@@ -3282,7 +3273,6 @@ fn parse_cast() {
32823273
kind: CastKind::Cast,
32833274
expr: Box::new(Expr::Identifier(Ident::new("details"))),
32843275
data_type: DataType::JSONB,
3285-
array: false,
32863276
format: None,
32873277
},
32883278
expr_from_projection(only(&select.projection))
@@ -3298,7 +3288,6 @@ fn parse_try_cast() {
32983288
kind: CastKind::TryCast,
32993289
expr: Box::new(Expr::Identifier(Ident::new("id"))),
33003290
data_type: DataType::BigInt(None),
3301-
array: false,
33023291
format: None,
33033292
},
33043293
expr_from_projection(only(&select.projection))
@@ -6641,7 +6630,6 @@ fn interval_disallow_interval_expr_double_colon() {
66416630
fractional_seconds_precision: None,
66426631
})),
66436632
data_type: DataType::Text,
6644-
array: false,
66456633
format: None,
66466634
}
66476635
)
@@ -6659,7 +6647,6 @@ fn parse_text_type_modifier_double_colon_cast() {
66596647
ObjectName::from(vec![Ident::new("TEXT")]),
66606648
vec!["16777216".to_string()]
66616649
),
6662-
array: false,
66636650
format: None,
66646651
}
66656652
);
@@ -9412,7 +9399,6 @@ fn parse_double_colon_cast_at_timezone() {
94129399
.with_empty_span()
94139400
)),
94149401
data_type: DataType::Timestamp(None, TimezoneInfo::None),
9415-
array: false,
94169402
format: None
94179403
}),
94189404
time_zone: Box::new(Expr::Value(
@@ -13812,7 +13798,6 @@ fn test_dictionary_syntax() {
1381213798
(Value::SingleQuotedString("2023-04-01".to_owned())).with_empty_span(),
1381313799
)),
1381413800
data_type: DataType::Timestamp(None, TimezoneInfo::None),
13815-
array: false,
1381613801
format: None,
1381713802
}),
1381813803
},
@@ -13824,7 +13809,6 @@ fn test_dictionary_syntax() {
1382413809
(Value::SingleQuotedString("2023-04-05".to_owned())).with_empty_span(),
1382513810
)),
1382613811
data_type: DataType::Timestamp(None, TimezoneInfo::None),
13827-
array: false,
1382813812
format: None,
1382913813
}),
1383013814
},
@@ -14121,7 +14105,6 @@ fn test_extract_seconds_ok() {
1412114105
fields: None,
1412214106
precision: None
1412314107
},
14124-
array: false,
1412514108
format: None,
1412614109
}),
1412714110
}
@@ -14152,7 +14135,6 @@ fn test_extract_seconds_ok() {
1415214135
fields: None,
1415314136
precision: None,
1415414137
},
14155-
array: false,
1415614138
format: None,
1415714139
}),
1415814140
})],
@@ -14210,7 +14192,6 @@ fn test_extract_seconds_single_quote_ok() {
1421014192
fields: None,
1421114193
precision: None
1421214194
},
14213-
array: false,
1421414195
format: None,
1421514196
}),
1421614197
}

tests/sqlparser_databricks.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,6 @@ fn data_type_timestamp_ntz() {
421421
"created_at".into()
422422
)))),
423423
data_type: DataType::TimestampNtz(None),
424-
array: false,
425424
format: None
426425
}
427426
);

tests/sqlparser_duckdb.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,6 @@ fn test_duckdb_specific_int_types() {
388388
Value::Number("123".parse().unwrap(), false).with_empty_span()
389389
)),
390390
data_type: data_type.clone(),
391-
array: false,
392391
format: None,
393392
},
394393
expr_from_projection(&select.projection[0])

tests/sqlparser_mysql.rs

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,6 @@ fn test_functional_key_part() {
886886
)),
887887
}),
888888
data_type: DataType::Unsigned,
889-
array: false,
890889
format: None,
891890
})),
892891
);
@@ -903,8 +902,10 @@ fn test_functional_key_part() {
903902
Value::SingleQuotedString("$.fields".to_string()).with_empty_span()
904903
)),
905904
}),
906-
data_type: DataType::Unsigned,
907-
array: true,
905+
data_type: DataType::Array(ArrayElemTypeDef::Keyword(
906+
Box::new(DataType::Unsigned),
907+
None,
908+
)),
908909
format: None,
909910
})),
910911
);
@@ -4278,12 +4279,47 @@ fn parse_cast_integers() {
42784279

42794280
#[test]
42804281
fn parse_cast_array() {
4281-
mysql().verified_expr("CAST(foo AS SIGNED ARRAY)");
4282+
// The element type may be any type accepted by CAST().
4283+
for ty in [
4284+
"SIGNED",
4285+
"UNSIGNED",
4286+
"CHAR",
4287+
"CHAR(10)",
4288+
"BINARY",
4289+
"BINARY(5)",
4290+
"DATE",
4291+
"TIME",
4292+
"DATETIME",
4293+
"DECIMAL",
4294+
"DECIMAL(10,2)",
4295+
"DOUBLE",
4296+
"FLOAT",
4297+
"YEAR",
4298+
] {
4299+
mysql().verified_expr(&format!("CAST(foo AS {ty} ARRAY)"));
4300+
}
4301+
4302+
// `ARRAY` on its own is not a valid CAST target type.
42824303
mysql()
42834304
.run_parser_method("CAST(foo AS ARRAY)", |p| p.parse_expr())
42844305
.expect_err("ARRAY alone is not a type");
42854306
}
42864307

4308+
#[test]
4309+
fn parse_multi_valued_index() {
4310+
// `CAST(... AS <type> ARRAY)` key part in CREATE TABLE, CREATE INDEX, and
4311+
// ALTER TABLE. See https://dev.mysql.com/doc/refman/8.0/en/create-index.html
4312+
mysql_and_generic().verified_stmt(
4313+
"CREATE TABLE customers (id BIGINT, custinfo JSON, INDEX zips ((CAST(custinfo -> '$.zipcode' AS UNSIGNED ARRAY))))",
4314+
);
4315+
mysql_and_generic().verified_stmt(
4316+
"CREATE INDEX zips ON customers((CAST(custinfo -> '$.zipcode' AS UNSIGNED ARRAY)))",
4317+
);
4318+
mysql_and_generic().verified_stmt(
4319+
"ALTER TABLE customers ADD INDEX zips ((CAST(custinfo -> '$.zipcode' AS UNSIGNED ARRAY)))",
4320+
);
4321+
}
4322+
42874323
#[test]
42884324
fn parse_match_against_with_alias() {
42894325
let sql = "SELECT tbl.ProjectID FROM surveys.tbl1 AS tbl WHERE MATCH (tbl.ReferenceID) AGAINST ('AAA' IN BOOLEAN MODE)";

0 commit comments

Comments
 (0)