Skip to content

Commit 62c053a

Browse files
Merge branch 'pathological2' into pathological-combined
2 parents 2705a7d + 381dec5 commit 62c053a

5 files changed

Lines changed: 94 additions & 19 deletions

File tree

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use criterion::{criterion_group, criterion_main, Criterion};
19-
use sqlparser::dialect::GenericDialect;
19+
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect};
2020
use sqlparser::keywords::Keyword;
2121
use sqlparser::parser::Parser;
2222
use sqlparser::tokenizer::{Span, Word};
@@ -177,11 +177,37 @@ fn parse_compound_chain(c: &mut Criterion) {
177177
group.finish();
178178
}
179179

180+
/// Benchmark parsing pathological named-arg chains that previously caused
181+
/// 2^N work in `parse_function_args` on dialects with expression-named
182+
/// function arguments (PostgreSQL, MSSQL). Each `--<newline>` swallows the
183+
/// trailing `,i)`, leaving a chain of unterminated function calls that the
184+
/// previous `maybe_parse(parse_expr)` re-walked on rollback.
185+
fn parse_named_arg_chain(c: &mut Criterion) {
186+
let mut group = c.benchmark_group("parse_named_arg_chain");
187+
let dialect = PostgreSqlDialect {};
188+
189+
for &n in &[5usize, 10, 15] {
190+
let body = std::iter::repeat_n(".foo(t--,i)", n)
191+
.collect::<Vec<_>>()
192+
.join("\n");
193+
let sql = format!("SELECT Y\n{body}");
194+
195+
group.bench_function(format!("chain_{n}"), |b| {
196+
b.iter(|| {
197+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
198+
});
199+
});
200+
}
201+
202+
group.finish();
203+
}
204+
180205
criterion_group!(
181206
benches,
182207
basic_queries,
183208
word_to_ident,
184209
parse_many_identifiers,
185-
parse_compound_chain
210+
parse_compound_chain,
211+
parse_named_arg_chain
186212
);
187213
criterion_main!(benches);

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,11 @@ impl Dialect for MySqlDialect {
216216
fn supports_key_column_option(&self) -> bool {
217217
true
218218
}
219+
220+
/// See: <https://dev.mysql.com/doc/refman/9.7/en/group-by-modifiers.html>
221+
fn supports_group_by_with_modifier(&self) -> bool {
222+
true
223+
}
219224
}
220225

221226
/// `LOCK TABLES`

src/parser/mod.rs

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18357,19 +18357,8 @@ impl<'a> Parser<'a> {
1835718357

1835818358
/// Parse a single function argument, handling named and unnamed variants.
1835918359
pub fn parse_function_args(&mut self) -> Result<FunctionArg, ParserError> {
18360-
let arg = if self.dialect.supports_named_fn_args_with_expr_name() {
18361-
self.maybe_parse(|p| {
18362-
let name = p.parse_expr()?;
18363-
let operator = p.parse_function_named_arg_operator()?;
18364-
let arg = p.parse_wildcard_expr()?.into();
18365-
Ok(FunctionArg::ExprNamed {
18366-
name,
18367-
arg,
18368-
operator,
18369-
})
18370-
})?
18371-
} else {
18372-
self.maybe_parse(|p| {
18360+
if !self.dialect.supports_named_fn_args_with_expr_name() {
18361+
if let Some(arg) = self.maybe_parse(|p| {
1837318362
let name = p.parse_identifier()?;
1837418363
let operator = p.parse_function_named_arg_operator()?;
1837518364
let arg = p.parse_wildcard_expr()?.into();
@@ -18378,12 +18367,35 @@ impl<'a> Parser<'a> {
1837818367
arg,
1837918368
operator,
1838018369
})
18381-
})?
18382-
};
18383-
if let Some(arg) = arg {
18384-
return Ok(arg);
18370+
})? {
18371+
return Ok(arg);
18372+
}
1838518373
}
18374+
18375+
// Parse the leading expression once, then speculatively parse the
18376+
// named-arg tail `<op> <expr>`. The previous implementation also
18377+
// speculated on the name itself via `maybe_parse(parse_expr ...)`,
18378+
// which produced ~2^N work on chains like
18379+
// `SELECT Y\n.foo(t--\n.foo(t--\n...` because rollback re-walked
18380+
// deeply nested function calls. Same family of bug as #2344.
1838618381
let wildcard_expr = self.parse_wildcard_expr()?;
18382+
if self.dialect.supports_named_fn_args_with_expr_name()
18383+
&& !matches!(wildcard_expr, Expr::Wildcard(_))
18384+
{
18385+
if let Some((operator, arg)) = self.maybe_parse(|p| {
18386+
Ok((
18387+
p.parse_function_named_arg_operator()?,
18388+
p.parse_wildcard_expr()?,
18389+
))
18390+
})? {
18391+
return Ok(FunctionArg::ExprNamed {
18392+
name: wildcard_expr,
18393+
arg: arg.into(),
18394+
operator,
18395+
});
18396+
}
18397+
}
18398+
1838718399
let arg_expr: FunctionArgExpr = match wildcard_expr {
1838818400
Expr::Wildcard(ref token) if self.dialect.supports_select_wildcard_exclude() => {
1838918401
// Support `* EXCLUDE(col1, col2, ...)` inside function calls (e.g. Snowflake's

tests/sqlparser_common.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19004,3 +19004,30 @@ fn parse_compound_chain_no_exponential_blowup() {
1900419004
rx.recv_timeout(Duration::from_secs(5))
1900519005
.expect("parser should reject this quickly, not loop exponentially");
1900619006
}
19007+
19008+
/// Regression test for the 2^N parse-time blowup in `parse_function_args` on
19009+
/// dialects that allow `<expr> <op> <expr>` named args (PostgreSQL, MSSQL).
19010+
/// Each `--<newline>` swallows the trailing `,i)`, leaving a chain of
19011+
/// unterminated function calls that the previous `maybe_parse(parse_expr)`
19012+
/// re-walked on rollback. Post-fix the parser returns `Err` in well under
19013+
/// a millisecond.
19014+
#[test]
19015+
fn parse_named_arg_chain_no_exponential_blowup() {
19016+
use std::sync::mpsc;
19017+
use std::thread;
19018+
use std::time::Duration;
19019+
19020+
let body: String = std::iter::repeat_n(".foo(t--,i)", 25)
19021+
.collect::<Vec<_>>()
19022+
.join("\n");
19023+
let sql = format!("SELECT Y\n{body}");
19024+
19025+
let (tx, rx) = mpsc::channel();
19026+
thread::spawn(move || {
19027+
let _ = Parser::parse_sql(&PostgreSqlDialect {}, &sql);
19028+
let _ = tx.send(());
19029+
});
19030+
19031+
rx.recv_timeout(Duration::from_secs(5))
19032+
.expect("PostgreSQL parser should reject this quickly, not loop exponentially");
19033+
}

tests/sqlparser_mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4900,3 +4900,8 @@ fn parse_adjacent_string_literal_concatenation() {
49004900
let sql = r#"SELECT 'M' "y" 'S' "q" 'l'"#;
49014901
mysql().one_statement_parses_to(sql, r"SELECT 'MySql'");
49024902
}
4903+
4904+
#[test]
4905+
fn parse_group_by_with_rollup() {
4906+
mysql().verified_stmt("SELECT * FROM tbl GROUP BY col1, col2 WITH ROLLUP");
4907+
}

0 commit comments

Comments
 (0)