Skip to content

Commit 306abb5

Browse files
Parser: fix exponential parse time on speculative prefix parsing
1 parent f63e42f commit 306abb5

3 files changed

Lines changed: 143 additions & 3 deletions

File tree

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use criterion::{criterion_group, criterion_main, Criterion};
19-
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect};
19+
use sqlparser::dialect::{GenericDialect, PostgreSqlDialect, SQLiteDialect};
2020
use sqlparser::keywords::Keyword;
2121
use sqlparser::parser::Parser;
2222
use sqlparser::tokenizer::{Span, Word};
@@ -249,6 +249,51 @@ fn parse_not_chain(c: &mut Criterion) {
249249
group.finish();
250250
}
251251

252+
/// Benchmark parsing pathological `IF(<keyword-fn>(<keyword-fn>(...x` chains
253+
/// that previously caused 2^N work in `parse_prefix`. Each nested
254+
/// `current_time(` segment used to be explored twice at every level (once via
255+
/// the speculative reserved-word arm, once via the unreserved-word fallback),
256+
/// doubling work per level. Post-fix the cost is linear in chain length.
257+
fn parse_prefix_keyword_call_chain(c: &mut Criterion) {
258+
let mut group = c.benchmark_group("parse_prefix_keyword_call_chain");
259+
let dialect = PostgreSqlDialect {};
260+
261+
for &n in &[10usize, 20, 30] {
262+
let sql = String::from("if(") + &"current_time(".repeat(n) + "x";
263+
264+
group.bench_function(format!("chain_{n}"), |b| {
265+
b.iter(|| {
266+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
267+
});
268+
});
269+
}
270+
271+
group.finish();
272+
}
273+
274+
/// Benchmark parsing pathological `case-case-case-...c` chains that
275+
/// previously caused 2^N work in `parse_prefix`. Each `case` token used to
276+
/// trigger a speculative `parse_case_expr` that recursively descends the
277+
/// chain, but the unreserved-word fallback returns `Identifier(case)` so the
278+
/// overall `parse_prefix` succeeds and the failure cache never fires.
279+
/// Post-fix the per-arm cache short-circuits the speculative descent.
280+
fn parse_prefix_case_chain(c: &mut Criterion) {
281+
let mut group = c.benchmark_group("parse_prefix_case_chain");
282+
let dialect = SQLiteDialect {};
283+
284+
for &n in &[10usize, 20, 30] {
285+
let sql = "case\t-".repeat(n) + "c";
286+
287+
group.bench_function(format!("chain_{n}"), |b| {
288+
b.iter(|| {
289+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
290+
});
291+
});
292+
}
293+
294+
group.finish();
295+
}
296+
252297
criterion_group!(
253298
benches,
254299
basic_queries,
@@ -257,6 +302,8 @@ criterion_group!(
257302
parse_compound_chain,
258303
parse_named_arg_chain,
259304
parse_compound_keyword_chain,
260-
parse_not_chain
305+
parse_not_chain,
306+
parse_prefix_keyword_call_chain,
307+
parse_prefix_case_chain
261308
);
262309
criterion_main!(benches);

src/parser/mod.rs

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use alloc::collections::BTreeSet;
1717
#[cfg(not(feature = "std"))]
1818
use alloc::{
1919
boxed::Box,
20+
collections::BTreeMap,
2021
format,
2122
string::{String, ToString},
2223
vec,
@@ -26,6 +27,9 @@ use core::{
2627
fmt::{self, Display},
2728
str::FromStr,
2829
};
30+
#[cfg(feature = "std")]
31+
use std::collections::BTreeMap;
32+
2933
use helpers::attached_token::AttachedToken;
3034
#[cfg(feature = "std")]
3135
use std::collections::BTreeSet;
@@ -371,6 +375,12 @@ pub struct Parser<'a> {
371375
/// `<ident>-NOT-<ident>.` ending in a parse error) trigger 2^N exploration
372376
/// because each `-NOT-` segment otherwise re-walks the rest of the chain.
373377
failed_unary_not_positions: BTreeSet<usize>,
378+
/// Cached errors from `parse_prefix` calls that returned `Err`. See
379+
/// [`Parser::parse_prefix`] for the 2^N patterns this guards.
380+
failed_prefix_positions: BTreeMap<usize, ParserError>,
381+
/// Cached errors from the speculative reserved-word prefix arm. See
382+
/// [`Parser::parse_prefix`] for the 2^N patterns this guards.
383+
failed_reserved_word_prefix_positions: BTreeMap<usize, ParserError>,
374384
}
375385

376386
impl<'a> Parser<'a> {
@@ -398,6 +408,8 @@ impl<'a> Parser<'a> {
398408
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
399409
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
400410
failed_unary_not_positions: BTreeSet::new(),
411+
failed_prefix_positions: BTreeMap::new(),
412+
failed_reserved_word_prefix_positions: BTreeMap::new(),
401413
}
402414
}
403415

@@ -460,6 +472,8 @@ impl<'a> Parser<'a> {
460472
self.tokens = tokens;
461473
self.index = 0;
462474
self.failed_unary_not_positions.clear();
475+
self.failed_prefix_positions.clear();
476+
self.failed_reserved_word_prefix_positions.clear();
463477
self
464478
}
465479

@@ -1731,6 +1745,23 @@ impl<'a> Parser<'a> {
17311745
return prefix;
17321746
}
17331747

1748+
// Memoize parse_prefix failures to break 2^N speculation when both
1749+
// prefix arms fail at every level (e.g. `IF(current_time(...x`).
1750+
// The per-arm cache in `parse_prefix_inner` complements this for
1751+
// chains where the reserved arm fails but the unreserved fallback
1752+
// succeeds (e.g. `case-case-...c`).
1753+
let start_index = self.index;
1754+
if let Some(cached) = self.failed_prefix_positions.get(&start_index) {
1755+
return Err(cached.clone());
1756+
}
1757+
let result = self.parse_prefix_inner();
1758+
if let Err(ref e) = result {
1759+
self.failed_prefix_positions.insert(start_index, e.clone());
1760+
}
1761+
result
1762+
}
1763+
1764+
fn parse_prefix_inner(&mut self) -> Result<Expr, ParserError> {
17341765
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
17351766
// string literal represents a literal of that type. Some examples:
17361767
//
@@ -1826,7 +1857,21 @@ impl<'a> Parser<'a> {
18261857
{
18271858
return self.parse_expr_prefix_by_unreserved_word(&w, span);
18281859
}
1829-
match self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span)) {
1860+
// Memoize failed speculative reserved-word parses. When
1861+
// the reserved arm (CASE, CURRENT_TIME, etc.) does
1862+
// exponential work but the unreserved fallback ultimately
1863+
// succeeds, the overall `parse_prefix` returns `Ok` and the
1864+
// outer cache never fires. Chains like `case-case-...c`
1865+
// need this per-arm cache to break the doubling.
1866+
let try_parse_result = if let Some(cached) = self
1867+
.failed_reserved_word_prefix_positions
1868+
.get(&next_token_index)
1869+
{
1870+
Err(cached.clone())
1871+
} else {
1872+
self.try_parse(|parser| parser.parse_expr_prefix_by_reserved_word(&w, span))
1873+
};
1874+
match try_parse_result {
18301875
// This word indicated an expression prefix and parsing was successful
18311876
Ok(Some(expr)) => Ok(expr),
18321877

@@ -1843,6 +1888,8 @@ impl<'a> Parser<'a> {
18431888
if w.keyword == Keyword::NOT {
18441889
self.failed_unary_not_positions.insert(self.index);
18451890
}
1891+
self.failed_reserved_word_prefix_positions
1892+
.insert(next_token_index, e.clone());
18461893
if !self.dialect.is_reserved_for_identifier(w.keyword) {
18471894
if let Ok(Some(expr)) = self.maybe_parse(|parser| {
18481895
parser.parse_expr_prefix_by_unreserved_word(&w, span)

tests/sqlparser_common.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19082,3 +19082,49 @@ fn parse_not_chain_no_exponential_blowup() {
1908219082
rx.recv_timeout(Duration::from_secs(5))
1908319083
.expect("parser should reject this quickly, not loop exponentially");
1908419084
}
19085+
19086+
/// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs
19087+
/// like `IF(current_time(current_time(...x`. Each nested `current_time(` used
19088+
/// to be explored twice at every level (once via the speculative reserved-word
19089+
/// arm, once via the unreserved-word fallback), doubling work per level.
19090+
/// Post-fix the failing parse short-circuits via the position-keyed cache.
19091+
#[test]
19092+
fn parse_prefix_keyword_call_chain_no_exponential_blowup() {
19093+
use std::sync::mpsc;
19094+
use std::thread;
19095+
use std::time::Duration;
19096+
19097+
let sql = String::from("if(") + &"current_time(".repeat(30) + "x";
19098+
19099+
let (tx, rx) = mpsc::channel();
19100+
thread::spawn(move || {
19101+
let _ = Parser::parse_sql(&PostgreSqlDialect {}, &sql);
19102+
let _ = tx.send(());
19103+
});
19104+
19105+
rx.recv_timeout(Duration::from_secs(5))
19106+
.expect("parser should reject this quickly, not loop exponentially");
19107+
}
19108+
19109+
/// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs
19110+
/// like `case-case-case-...c`. Each `case` token triggers a speculative
19111+
/// `parse_case_expr` that fails, but the unreserved-word fallback returns
19112+
/// `Identifier(case)`, so the outer failure cache never fires. Post-fix the
19113+
/// per-arm cache short-circuits the speculative descent.
19114+
#[test]
19115+
fn parse_prefix_case_chain_no_exponential_blowup() {
19116+
use std::sync::mpsc;
19117+
use std::thread;
19118+
use std::time::Duration;
19119+
19120+
let sql = "case\t-".repeat(30) + "c";
19121+
19122+
let (tx, rx) = mpsc::channel();
19123+
thread::spawn(move || {
19124+
let _ = Parser::parse_sql(&SQLiteDialect {}, &sql);
19125+
let _ = tx.send(());
19126+
});
19127+
19128+
rx.recv_timeout(Duration::from_secs(5))
19129+
.expect("parser should reject this quickly, not loop exponentially");
19130+
}

0 commit comments

Comments
 (0)