Skip to content

Commit ce62a47

Browse files
Merge branch 'pathological3' into pathological-combined
# Conflicts: # sqlparser_bench/benches/sqlparser_bench.rs # tests/sqlparser_common.rs
2 parents 62c053a + 02ea2d6 commit ce62a47

3 files changed

Lines changed: 79 additions & 15 deletions

File tree

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,36 @@ fn parse_named_arg_chain(c: &mut Criterion) {
202202
group.finish();
203203
}
204204

205+
/// Benchmark parsing pathological compound chains with a reserved keyword in
206+
/// field position, like `SELECT x.not-b.not-b...`. The `.not-b` shape used to
207+
/// cause 2^N work in `parse_compound_expr` because `parse_prefix` descended
208+
/// into `parse_not` -> `parse_subexpr`, re-walking the remaining chain at
209+
/// every segment.
210+
fn parse_compound_keyword_chain(c: &mut Criterion) {
211+
let mut group = c.benchmark_group("parse_compound_keyword_chain");
212+
let dialect = GenericDialect {};
213+
214+
for &n in &[5usize, 10, 15] {
215+
let body = std::iter::repeat_n(".not-b", n).collect::<String>();
216+
let sql = format!("SELECT x{body}");
217+
218+
group.bench_function(format!("chain_{n}"), |b| {
219+
b.iter(|| {
220+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
221+
});
222+
});
223+
}
224+
225+
group.finish();
226+
}
227+
205228
criterion_group!(
206229
benches,
207230
basic_queries,
208231
word_to_ident,
209232
parse_many_identifiers,
210233
parse_compound_chain,
211-
parse_named_arg_chain
234+
parse_named_arg_chain,
235+
parse_compound_keyword_chain
212236
);
213237
criterion_main!(benches);

src/parser/mod.rs

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2036,20 +2036,35 @@ impl<'a> Parser<'a> {
20362036
// recursive `parse_subexpr` would re-walk the rest of the chain at
20372037
// every dot.
20382038
_ => {
2039-
let expr = self.maybe_parse(|parser| {
2040-
let expr = parser.parse_prefix()?;
2041-
match &expr {
2042-
Expr::CompoundFieldAccess { .. }
2043-
| Expr::CompoundIdentifier(_)
2044-
| Expr::Identifier(_)
2045-
| Expr::Value(_)
2046-
| Expr::Function(_) => Ok(expr),
2047-
_ => parser.expected_ref(
2048-
"an identifier or value",
2049-
parser.peek_token_ref(),
2050-
),
2051-
}
2052-
})?;
2039+
// For a plain `Word` field (not followed by `(`), skip the
2040+
// speculative `parse_prefix`. The only result the validator
2041+
// below would accept is `Identifier`, which `parse_identifier`
2042+
// in the None branch produces directly. This avoids 2^N work
2043+
// on chains like `.not-b.not-b...` where `parse_prefix` would
2044+
// descend into `parse_not` and re-walk the remaining chain at
2045+
// every segment.
2046+
let word_field_no_lparen =
2047+
matches!(self.peek_token_ref().token, Token::Word(_))
2048+
&& self.peek_nth_token_ref(1).token != Token::LParen;
2049+
2050+
let expr = if word_field_no_lparen {
2051+
None
2052+
} else {
2053+
self.maybe_parse(|parser| {
2054+
let expr = parser.parse_prefix()?;
2055+
match &expr {
2056+
Expr::CompoundFieldAccess { .. }
2057+
| Expr::CompoundIdentifier(_)
2058+
| Expr::Identifier(_)
2059+
| Expr::Value(_)
2060+
| Expr::Function(_) => Ok(expr),
2061+
_ => parser.expected_ref(
2062+
"an identifier or value",
2063+
parser.peek_token_ref(),
2064+
),
2065+
}
2066+
})?
2067+
};
20532068

20542069
match expr {
20552070
// If we get back a compound field access or identifier,

tests/sqlparser_common.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19031,3 +19031,28 @@ fn parse_named_arg_chain_no_exponential_blowup() {
1903119031
rx.recv_timeout(Duration::from_secs(5))
1903219032
.expect("PostgreSQL parser should reject this quickly, not loop exponentially");
1903319033
}
19034+
19035+
/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on
19036+
/// chains like `x.not-b.not-b...`. The `NOT` keyword in field position drives
19037+
/// `parse_prefix` -> `parse_not` -> `parse_subexpr`, which re-walks the
19038+
/// remaining chain at every segment and doubles the work. Post-fix the parser
19039+
/// handles 25 segments in well under a millisecond, so the timeout is a hang
19040+
/// guard, not a perf threshold.
19041+
#[test]
19042+
fn parse_compound_keyword_chain_no_exponential_blowup() {
19043+
use std::sync::mpsc;
19044+
use std::thread;
19045+
use std::time::Duration;
19046+
19047+
let body: String = std::iter::repeat_n(".not-b", 25).collect();
19048+
let sql = format!("SELECT x{body}");
19049+
19050+
let (tx, rx) = mpsc::channel();
19051+
thread::spawn(move || {
19052+
let _ = Parser::parse_sql(&GenericDialect {}, &sql);
19053+
let _ = tx.send(());
19054+
});
19055+
19056+
rx.recv_timeout(Duration::from_secs(5))
19057+
.expect("parser should handle this quickly, not loop exponentially");
19058+
}

0 commit comments

Comments
 (0)