Skip to content

Commit e7f4532

Browse files
refactor: Function and AllTableColumn lookahead
Signed-off-by: Andreas Reichel <andreas@manticore-projects.com> Signed-off-by: manticore-projects <andreas@manticore-projects.com>
1 parent a7a1d12 commit e7f4532

File tree

2 files changed

+184
-21
lines changed

2 files changed

+184
-21
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ As per March-2026, the productions `Condition()`, `RegularCondition()` and `AndE
8080

8181
```text
8282
Benchmark (version) Mode Cnt Score Error Units
83-
JSQLParserBenchmark.parseSQLStatements latest avgt 15 33.995 ± 0.764 ms/op <-- March/26
83+
JSQLParserBenchmark.parseSQLStatements latest avgt 15 15.908 ± 0.446 ms/op <-- March/26
8484
JSQLParserBenchmark.parseSQLStatements 5.3 avgt 15 84.687 ± 3.321 ms/op
8585
JSQLParserBenchmark.parseSQLStatements 5.1 avgt 15 86.592 ± 5.781 ms/op
8686
```

src/main/jjtree/net/sf/jsqlparser/parser/JSqlParserCC.jjt

Lines changed: 183 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ public class CCJSqlParser extends AbstractJSqlParser<CCJSqlParser> {
195195
}
196196
switch (token.image.charAt(0)) {
197197
case '>': // >, >=
198-
case '=': // =, =*
198+
case '=': // =, =* but not => Oracle/PostgreSQL named parameter syntax
199+
return !token.image.equals("=>");
199200
case '~': // ~, ~*
200201
return true;
201202
case '<': // <, <=, <>, <@, <->, <#>, <=>, <&
@@ -222,6 +223,163 @@ public class CCJSqlParser extends AbstractJSqlParser<CCJSqlParser> {
222223
|| nextKind == K_VALUES
223224
|| nextKind == K_FROM;
224225
}
226+
227+
/**
228+
* Tokens that have dedicated branches in PrimaryExpression AFTER the Function branch.
229+
* If isFunctionAhead() returns true for these, Function() would consume them and fail.
230+
*/
231+
private boolean isNonFunctionKeyword(Token t) {
232+
switch (t.kind) {
233+
case K_CONNECT_BY_ROOT: // CONNECT_BY_ROOT (expr)
234+
case K_PRIOR: // PRIOR expr
235+
case K_STRUCT: // STRUCT(...)
236+
return true;
237+
default:
238+
return false;
239+
}
240+
}
241+
242+
/**
243+
* Scans ahead through a dotted identifier chain and checks if '(' follows.
244+
* Distinguishes function calls like func(), schema.func(), a.b.c.func()
245+
* from column references like col, schema.col, a.b.c.col.
246+
*
247+
* Replaces LOOKAHEAD(16) on Function() with a targeted O(chain-length) check.
248+
*/
249+
protected boolean isFunctionAhead() {
250+
int i = 1;
251+
Token t = getToken(i);
252+
253+
// JDBC escape function: {fn ...} — must check for FN keyword
254+
if (t.image.equals("{")) {
255+
return getToken(2).kind == K_FN;
256+
}
257+
258+
// Optional APPROXIMATE keyword
259+
if (t.kind == K_APPROXIMATE) {
260+
i++;
261+
t = getToken(i);
262+
}
263+
264+
// Exclude tokens that have their own dedicated branches
265+
// after Function() in PrimaryExpression
266+
if (isNonFunctionKeyword(t)) {
267+
return false;
268+
}
269+
270+
// First token must not be a literal, bracket, or EOF
271+
if (t.kind == S_LONG || t.kind == S_DOUBLE || t.kind == S_HEX
272+
|| t.kind == S_CHAR_LITERAL || t.kind == OPENING_BRACKET
273+
|| t.kind == CLOSING_BRACKET || t.kind == EOF) {
274+
return false;
275+
}
276+
i++;
277+
278+
// Walk through dotted name chain
279+
while (true) {
280+
t = getToken(i);
281+
if (t.image.equals(".") || t.image.equals("..")
282+
|| t.image.equals("...") || t.image.equals(":")) {
283+
i++; // skip delimiter
284+
i++; // skip next name part
285+
} else {
286+
break;
287+
}
288+
}
289+
290+
// Must be followed by (
291+
if (getToken(i).kind != OPENING_BRACKET) {
292+
return false;
293+
}
294+
295+
// Exclude Oracle join syntax: column(+)
296+
if (getToken(i + 1).image.equals("+")
297+
&& getToken(i + 2).kind == CLOSING_BRACKET) {
298+
return false;
299+
}
300+
301+
return true;
302+
}
303+
304+
/**
305+
* Scans ahead through a dotted identifier chain and checks if '*' follows.
306+
* Identifies table.* patterns for AllTableColumns.
307+
*/
308+
protected boolean isAllTableColumnsAhead() {
309+
int i = 1;
310+
Token t = getToken(i);
311+
312+
// Must start with a name-like token
313+
if (t.kind == S_LONG || t.kind == S_DOUBLE || t.kind == S_HEX
314+
|| t.kind == S_CHAR_LITERAL || t.kind == OPENING_BRACKET
315+
|| t.kind == CLOSING_BRACKET || t.kind == EOF) {
316+
return false;
317+
}
318+
i++;
319+
320+
// Walk through dotted name chain
321+
while (true) {
322+
t = getToken(i);
323+
if (t.image.equals(".") || t.image.equals("..")
324+
|| t.image.equals("...")) {
325+
i++; // skip delimiter
326+
i++; // skip next part (could be "*")
327+
} else {
328+
break;
329+
}
330+
}
331+
332+
// It's AllTableColumns if the chain ended on "*"
333+
// i.e., the last name part we skipped over was "*"
334+
// Back up: the last token consumed was at (i-1)
335+
return getToken(i - 1).image.equals("*");
336+
}
337+
338+
/**
339+
* Checks if the next token can start a condition suffix
340+
* (comparison, IN, BETWEEN, LIKE, IS NULL, etc.)
341+
*
342+
* Used as the entry guard for the entire optional condition-suffix block
343+
* in Condition(), eliminating choice conflicts.
344+
*/
345+
protected boolean isConditionSuffixAhead() {
346+
if (isComparisonOperatorAhead()) {
347+
return true;
348+
}
349+
Token t = getToken(1);
350+
switch (t.kind) {
351+
// Each suffix's start token:
352+
case K_OVERLAPS: // OVERLAPS
353+
case K_IN: // IN
354+
case K_GLOBAL: // GLOBAL ... IN
355+
case K_EXCLUDES: // EXCLUDES (...)
356+
case K_INCLUDES: // INCLUDES (...)
357+
case K_BETWEEN: // BETWEEN
358+
case K_MEMBER: // MEMBER OF
359+
case K_IS: // IS [NOT] NULL / TRUE / FALSE / UNKNOWN / DISTINCT
360+
case K_ISNULL: // ISNULL
361+
case K_NOTNULL: // NOTNULL
362+
case K_LIKE: // LIKE
363+
case K_ILIKE: // ILIKE
364+
case K_RLIKE: // RLIKE
365+
case K_REGEXP_LIKE: // REGEXP_LIKE
366+
case K_REGEXP: // REGEXP
367+
case K_SIMILAR_TO: // SIMILAR TO (in LikeExpression)
368+
case K_SIMILAR: // SIMILAR TO (in SimilarToExpression)
369+
case K_MATCH_ANY: // MATCH_ANY
370+
case K_MATCH_ALL: // MATCH_ALL
371+
case K_MATCH_PHRASE: // MATCH_PHRASE
372+
case K_MATCH_PHRASE_PREFIX: // MATCH_PHRASE_PREFIX
373+
case K_MATCH_REGEXP: // MATCH_REGEXP
374+
case K_NOT: // NOT IN / NOT BETWEEN / NOT LIKE / NOT ISNULL / NOT SIMILAR
375+
return true;
376+
// Oracle (+) before IN: col(+) IN (...)
377+
case OPENING_BRACKET:
378+
return getToken(2).image.equals("+");
379+
default:
380+
return false;
381+
}
382+
}
225383
}
226384

227385
PARSER_END(CCJSqlParser)
@@ -6105,23 +6263,26 @@ Expression Condition():
61056263
}
61066264
]
61076265

6108-
[
6109-
LOOKAHEAD({ isComparisonOperatorAhead() }) result = RegularConditionRHS(left, oracleJoin)
6110-
|
6111-
LOOKAHEAD(2, <K_OVERLAPS>) result = OverlapsCondition(left)
6112-
|
6113-
LOOKAHEAD(3, {!interrupted}) result=InExpression(left)
6114-
| LOOKAHEAD(3) result=ExcludesExpression(left)
6115-
| LOOKAHEAD(3) result=IncludesExpression(left)
6116-
| LOOKAHEAD(2) result=Between(left)
6117-
| result = MemberOfExpression(left)
6118-
| LOOKAHEAD(3) result=IsNullExpression(left)
6119-
| LOOKAHEAD(3) result=IsBooleanExpression(left)
6120-
| LOOKAHEAD(3) result=IsUnknownExpression(left)
6121-
| LOOKAHEAD(2) result=LikeExpression(left)
6122-
| LOOKAHEAD(3) result=IsDistinctExpression(left)
6123-
| result=SimilarToExpression(left)
6124-
]
6266+
// Single guard: only enter if next token can start a condition suffix
6267+
[
6268+
LOOKAHEAD({ isConditionSuffixAhead() })
6269+
(
6270+
LOOKAHEAD({ isComparisonOperatorAhead() })
6271+
result = RegularConditionRHS(left, oracleJoin)
6272+
| LOOKAHEAD(2) result = OverlapsCondition(left)
6273+
| LOOKAHEAD(3) result=InExpression(left)
6274+
| LOOKAHEAD(3) result=ExcludesExpression(left)
6275+
| LOOKAHEAD(3) result=IncludesExpression(left)
6276+
| LOOKAHEAD(2) result=Between(left)
6277+
| LOOKAHEAD(2) result = MemberOfExpression(left)
6278+
| LOOKAHEAD(3) result=IsNullExpression(left)
6279+
| LOOKAHEAD(3) result=IsBooleanExpression(left)
6280+
| LOOKAHEAD(3) result=IsUnknownExpression(left)
6281+
| LOOKAHEAD(2) result=LikeExpression(left)
6282+
| LOOKAHEAD(3) result=IsDistinctExpression(left)
6283+
| result=SimilarToExpression(left)
6284+
)
6285+
]
61256286
)
61266287
{
61276288
if (oraclePrior == EqualsTo.ORACLE_PRIOR_START
@@ -6966,7 +7127,8 @@ Expression PrimaryExpression() #PrimaryExpression:
69667127

69677128
| LOOKAHEAD(2, {!interrupted}) retval= CastExpression()
69687129

6969-
| LOOKAHEAD(16) retval = Function() [ LOOKAHEAD(2) retval = AnalyticExpression( (Function) retval ) ]
7130+
| LOOKAHEAD({ !interrupted && isFunctionAhead() })
7131+
retval = Function() [ LOOKAHEAD(2) retval = AnalyticExpression( (Function) retval ) ]
69707132

69717133
| LOOKAHEAD(2) retval = DateUnitExpression()
69727134

@@ -6980,7 +7142,8 @@ Expression PrimaryExpression() #PrimaryExpression:
69807142

69817143
| LOOKAHEAD(3) retval=AllColumns(true)
69827144

6983-
| LOOKAHEAD(16) retval=AllTableColumns(true)
7145+
| LOOKAHEAD({ !interrupted && isAllTableColumnsAhead() })
7146+
retval=AllTableColumns(true)
69847147

69857148
// See issue #2207
69867149
// there is a huge! performance deterioration from this production

0 commit comments

Comments
 (0)