Skip to content

Commit 2705a7d

Browse files
Parser: fix exponential parse time on compound chains (apache#2344)
1 parent e999d3d commit 2705a7d

3 files changed

Lines changed: 67 additions & 7 deletions

File tree

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,5 +152,36 @@ fn parse_many_identifiers(c: &mut Criterion) {
152152
group.finish();
153153
}
154154

155-
criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers);
155+
/// Benchmark parsing pathological compound chains that previously caused 2^N
156+
/// work in `parse_compound_expr`. The input `IF a0.a1...aN.#` rejects at the
157+
/// trailing `#`, which used to force quadratic-or-worse backtracking through
158+
/// the chain.
159+
fn parse_compound_chain(c: &mut Criterion) {
160+
let mut group = c.benchmark_group("parse_compound_chain");
161+
let dialect = GenericDialect {};
162+
163+
for &n in &[10usize, 20, 30] {
164+
let chain = (0..n)
165+
.map(|i| format!("a{i}"))
166+
.collect::<Vec<_>>()
167+
.join(".");
168+
let sql = format!("IF {chain}.#");
169+
170+
group.bench_function(format!("chain_{n}"), |b| {
171+
b.iter(|| {
172+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
173+
});
174+
});
175+
}
176+
177+
group.finish();
178+
}
179+
180+
criterion_group!(
181+
benches,
182+
basic_queries,
183+
word_to_ident,
184+
parse_many_identifiers,
185+
parse_compound_chain
186+
);
156187
criterion_main!(benches);

src/parser/mod.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,14 +2028,16 @@ impl<'a> Parser<'a> {
20282028
chain.push(AccessExpr::Dot(expr));
20292029
self.advance_token(); // The consumed placeholder
20302030
}
2031-
// Fallback to parsing an arbitrary expression, but restrict to expression
2032-
// types that are valid after the dot operator. This ensures that e.g.
2033-
// `T.interval` is parsed as a compound identifier, not as an interval
2034-
// expression.
2031+
// Parse a single field component, restricted to expression types valid
2032+
// after `.` (so e.g. `T.interval` is a compound identifier, not an
2033+
// interval expression). Using `parse_prefix` here rather than
2034+
// `parse_subexpr` avoids 2^N work on inputs like `IF a.b.c...x.#`:
2035+
// the outer loop already consumes successive `.field` segments, so a
2036+
// recursive `parse_subexpr` would re-walk the rest of the chain at
2037+
// every dot.
20352038
_ => {
20362039
let expr = self.maybe_parse(|parser| {
2037-
let expr = parser
2038-
.parse_subexpr(parser.dialect.prec_value(Precedence::Period))?;
2040+
let expr = parser.parse_prefix()?;
20392041
match &expr {
20402042
Expr::CompoundFieldAccess { .. }
20412043
| Expr::CompoundIdentifier(_)

tests/sqlparser_common.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18977,3 +18977,30 @@ fn parse_non_pg_dialects_keep_xml_names_as_regular_identifiers() {
1897718977
let dialects = all_dialects_except(|d| d.supports_xml_expressions());
1897818978
dialects.verified_only_select("SELECT xml FROM t");
1897918979
}
18980+
18981+
/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on
18982+
/// inputs like `IF a0.a1...aN.#`. The parse is run on a worker thread and the
18983+
/// main thread asserts that it reports back within a generous timeout. Post-fix
18984+
/// the parser returns `Err` in well under a millisecond, so the timeout is a
18985+
/// hang guard, not a perf threshold.
18986+
#[test]
18987+
fn parse_compound_chain_no_exponential_blowup() {
18988+
use std::sync::mpsc;
18989+
use std::thread;
18990+
use std::time::Duration;
18991+
18992+
let chain: String = (0..30)
18993+
.map(|i| format!("a{i}"))
18994+
.collect::<Vec<_>>()
18995+
.join(".");
18996+
let sql = format!("IF {chain}.#");
18997+
18998+
let (tx, rx) = mpsc::channel();
18999+
thread::spawn(move || {
19000+
let _ = Parser::parse_sql(&GenericDialect {}, &sql);
19001+
let _ = tx.send(());
19002+
});
19003+
19004+
rx.recv_timeout(Duration::from_secs(5))
19005+
.expect("parser should reject this quickly, not loop exponentially");
19006+
}

0 commit comments

Comments
 (0)