Skip to content

Commit f63e42f

Browse files
Merge branch 'pathological4' into pathological-combined
2 parents ce62a47 + a741dbd commit f63e42f

3 files changed

Lines changed: 80 additions & 1 deletion

File tree

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,38 @@ fn parse_compound_keyword_chain(c: &mut Criterion) {
225225
group.finish();
226226
}
227227

228+
/// Benchmark parsing pathological chains of `<ident>-NOT-<ident>.` segments
229+
/// that previously triggered 2^N speculative `parse_not` work. The input
230+
/// `SELECT x-not-b.x-not-b...` rejects at the trailing `.`. Each `-NOT-`
231+
/// segment used to recurse into `parse_not` which re-walks the rest of the
232+
/// chain, doubling the work at every level. Post-fix the cost is linear in
233+
/// the number of segments.
234+
fn parse_not_chain(c: &mut Criterion) {
235+
let mut group = c.benchmark_group("parse_not_chain");
236+
let dialect = GenericDialect {};
237+
238+
for &n in &[10usize, 20, 30] {
239+
let body: String = std::iter::repeat_n("x-not-b.", n).collect();
240+
let sql = format!("SELECT {body}");
241+
242+
group.bench_function(format!("chain_{n}"), |b| {
243+
b.iter(|| {
244+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
245+
});
246+
});
247+
}
248+
249+
group.finish();
250+
}
251+
228252
criterion_group!(
229253
benches,
230254
basic_queries,
231255
word_to_ident,
232256
parse_many_identifiers,
233257
parse_compound_chain,
234258
parse_named_arg_chain,
235-
parse_compound_keyword_chain
259+
parse_compound_keyword_chain,
260+
parse_not_chain
236261
);
237262
criterion_main!(benches);

src/parser/mod.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
//! SQL Parser
1414

15+
#[cfg(not(feature = "std"))]
16+
use alloc::collections::BTreeSet;
1517
#[cfg(not(feature = "std"))]
1618
use alloc::{
1719
boxed::Box,
@@ -25,6 +27,8 @@ use core::{
2527
str::FromStr,
2628
};
2729
use helpers::attached_token::AttachedToken;
30+
#[cfg(feature = "std")]
31+
use std::collections::BTreeSet;
2832

2933
use log::debug;
3034

@@ -359,6 +363,14 @@ pub struct Parser<'a> {
359363
options: ParserOptions,
360364
/// Ensures the stack does not overflow by limiting recursion depth.
361365
recursion_counter: RecursionCounter,
366+
/// Token indices where a speculative attempt to parse `NOT` as a unary
367+
/// prefix operator has already failed during this parse. Re-entering
368+
/// `parse_not` at the same position would repeat the same work and fail
369+
/// again, so the second visit short-circuits to identifier interpretation.
370+
/// Without this cache, inputs like `SELECT x-not-b.x-not-b...` (chains of
371+
/// `<ident>-NOT-<ident>.` ending in a parse error) trigger 2^N exploration
372+
/// because each `-NOT-` segment otherwise re-walks the rest of the chain.
373+
failed_unary_not_positions: BTreeSet<usize>,
362374
}
363375

364376
impl<'a> Parser<'a> {
@@ -385,6 +397,7 @@ impl<'a> Parser<'a> {
385397
dialect,
386398
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
387399
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
400+
failed_unary_not_positions: BTreeSet::new(),
388401
}
389402
}
390403

@@ -446,6 +459,7 @@ impl<'a> Parser<'a> {
446459
pub fn with_tokens_with_locations(mut self, tokens: Vec<TokenWithSpan>) -> Self {
447460
self.tokens = tokens;
448461
self.index = 0;
462+
self.failed_unary_not_positions.clear();
449463
self
450464
}
451465

@@ -1801,6 +1815,17 @@ impl<'a> Parser<'a> {
18011815
// We first try to parse the word and following tokens as a special expression, and if that fails,
18021816
// we rollback and try to parse it as an identifier.
18031817
let w = w.clone();
1818+
// If we already tried to parse `NOT` as a unary prefix operator
1819+
// at this exact position and it failed, skip the speculative
1820+
// path and fall back to identifier interpretation directly. The
1821+
// speculative `parse_not` re-walks the rest of the expression,
1822+
// so repeating it on every visit causes 2^N work on chains
1823+
// like `x-not-b.x-not-b...` that end in a parse error.
1824+
if w.keyword == Keyword::NOT
1825+
&& self.failed_unary_not_positions.contains(&self.index)
1826+
{
1827+
return self.parse_expr_prefix_by_unreserved_word(&w, span);
1828+
}
18041829
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
18051830
// This word indicated an expression prefix and parsing was successful
18061831
Ok(Some(expr)) => Ok(expr),
@@ -1815,6 +1840,9 @@ impl<'a> Parser<'a> {
18151840
// we rollback and return the parsing error we got from trying to parse a
18161841
// special expression (to maintain backwards compatibility of parsing errors).
18171842
Err(e) => {
1843+
if w.keyword == Keyword::NOT {
1844+
self.failed_unary_not_positions.insert(self.index);
1845+
}
18181846
if !self.dialect.is_reserved_for_identifier(w.keyword) {
18191847
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
18201848
parser.parse_expr_prefix_by_unreserved_word(&w, span)

tests/sqlparser_common.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19056,3 +19056,29 @@ fn parse_compound_keyword_chain_no_exponential_blowup() {
1905619056
rx.recv_timeout(Duration::from_secs(5))
1905719057
.expect("parser should handle this quickly, not loop exponentially");
1905819058
}
19059+
19060+
/// Regression test for the 2^N parse-time blowup on chains of
19061+
/// `<ident>-NOT-<ident>.` segments ending in a parse error. Each `-NOT-`
19062+
/// segment used to trigger a speculative `parse_not` that re-walks the rest of
19063+
/// the chain, so re-entering it at every `NOT` doubled the work. The fix
19064+
/// caches positions where the speculative `parse_not` already failed and
19065+
/// skips repeating it. Post-fix the parser rejects this in well under a
19066+
/// millisecond, so the timeout is a hang guard, not a perf threshold.
19067+
#[test]
19068+
fn parse_not_chain_no_exponential_blowup() {
19069+
use std::sync::mpsc;
19070+
use std::thread;
19071+
use std::time::Duration;
19072+
19073+
let body: String = std::iter::repeat_n("x-not-b.", 30).collect();
19074+
let sql = format!("SELECT {body}");
19075+
19076+
let (tx, rx) = mpsc::channel();
19077+
thread::spawn(move || {
19078+
let _ = Parser::parse_sql(&GenericDialect {}, &sql);
19079+
let _ = tx.send(());
19080+
});
19081+
19082+
rx.recv_timeout(Duration::from_secs(5))
19083+
.expect("parser should reject this quickly, not loop exponentially");
19084+
}

0 commit comments

Comments
 (0)