Skip to content

Commit d1d6e5a

Browse files
Parser: add regression test and bench for compound keyword-chain blowup
1 parent 2705a7d commit d1d6e5a

2 files changed

Lines changed: 50 additions & 1 deletion

File tree

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,35 @@ fn parse_compound_chain(c: &mut Criterion) {
177177
group.finish();
178178
}
179179

180+
/// Benchmark parsing pathological compound chains with a reserved keyword in
181+
/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to
182+
/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended
183+
/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at
184+
/// every segment.
185+
fn parse_compound_keyword_chain(c: &mut Criterion) {
186+
let mut group = c.benchmark_group("parse_compound_keyword_chain");
187+
let dialect = GenericDialect {};
188+
189+
for &n in &[5usize, 10, 15] {
190+
let body = std::iter::repeat_n(".not-b", n).collect::<String>();
191+
let sql = format!("SELECT x{body}");
192+
193+
group.bench_function(format!("chain_{n}"), |b| {
194+
b.iter(|| {
195+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
196+
});
197+
});
198+
}
199+
200+
group.finish();
201+
}
202+
180203
criterion_group!(
181204
benches,
182205
basic_queries,
183206
word_to_ident,
184207
parse_many_identifiers,
185-
parse_compound_chain
208+
parse_compound_chain,
209+
parse_compound_keyword_chain
186210
);
187211
criterion_main!(benches);

tests/sqlparser_common.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19004,3 +19004,28 @@ fn parse_compound_chain_no_exponential_blowup() {
1900419004
rx.recv_timeout(Duration::from_secs(5))
1900519005
.expect("parser should reject this quickly, not loop exponentially");
1900619006
}
19007+
19008+
/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on
19009+
/// chains like `x.not-b.not-b...`. The `NOT` keyword in field position drives
19010+
/// `parse_prefix` -> `parse_not` -> `parse_subexpr`, which re-walks the
19011+
/// remaining chain at every segment and doubles the work. Post-fix the parser
19012+
/// handles 25 segments in well under a millisecond, so the timeout is a hang
19013+
/// guard, not a perf threshold.
19014+
#[test]
19015+
fn parse_compound_keyword_chain_no_exponential_blowup() {
19016+
use std::sync::mpsc;
19017+
use std::thread;
19018+
use std::time::Duration;
19019+
19020+
let body: String = std::iter::repeat_n(".not-b", 25).collect();
19021+
let sql = format!("SELECT x{body}");
19022+
19023+
let (tx, rx) = mpsc::channel();
19024+
thread::spawn(move || {
19025+
let _ = Parser::parse_sql(&GenericDialect {}, &sql);
19026+
let _ = tx.send(());
19027+
});
19028+
19029+
rx.recv_timeout(Duration::from_secs(5))
19030+
.expect("parser should handle this quickly, not loop exponentially");
19031+
}

0 commit comments

Comments
 (0)