Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public static void register(BuiltinFunctionRepository repository) {
repository.register(greater());
repository.register(gte());
repository.register(like());
repository.register(ilike());
repository.register(notLike());
repository.register(regexp());
}
Expand Down Expand Up @@ -391,6 +392,12 @@ private static DefaultFunctionResolver like() {
impl(nullMissingHandling(OperatorUtils::matches3), BOOLEAN, STRING, STRING, BOOLEAN));
}

private static DefaultFunctionResolver ilike() {
return define(
BuiltinFunctionName.ILIKE.getName(),
impl(nullMissingHandling(OperatorUtils::matches2), BOOLEAN, STRING, STRING));
}

private static DefaultFunctionResolver regexp() {
return define(
BuiltinFunctionName.REGEXP.getName(),
Expand Down
65 changes: 65 additions & 0 deletions docs/user/ppl/functions/condition.md
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,71 @@ fetched rows / total rows = 1/1
+-----+
```

## CONTAINS

### Description

Usage: `field contains 'substring'` returns TRUE if the field value contains the given substring (case-insensitive), FALSE otherwise.

The `contains` operator is a CloudWatch-style comparison operator that performs case-insensitive substring matching. It is sugar for an `ilike` comparison with `%substring%` wildcards.

Syntax: `<field> contains '<string_literal>'`

- The left-hand side must be a field reference.
- The right-hand side must be a string literal. Using a field reference on the right-hand side will raise a semantic error.
- Matching is case-insensitive.

**Argument type:** `STRING`
**Return type:** `BOOLEAN`

### Example

Basic substring filter:

```ppl
source=accounts
| where firstname contains 'mbe'
| fields firstname, age
```

Expected output:

```text
fetched rows / total rows = 1/1
+-----------+-----+
| firstname | age |
|-----------+-----|
| Amber | 32 |
+-----------+-----+
```

Case-insensitive matching (all of the following are equivalent):

```ppl ignore
source=accounts | where firstname contains 'mbe'
source=accounts | where firstname CONTAINS 'MBE'
source=accounts | where firstname Contains 'Mbe'
```

Combining with other conditions:

```ppl
source=accounts
| where employer contains 'ami' AND age > 30
| fields firstname, employer, age
```

Expected output:

```text
fetched rows / total rows = 1/1
+-----------+----------+-----+
| firstname | employer | age |
|-----------+----------+-----|
| Amber | Pyrami | 32 |
+-----------+----------+-----+
```

## REGEXP_MATCH

### Description
Expand Down
1 change: 1 addition & 0 deletions docs/user/ppl/functions/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ PPL supports a wide range of built-in functions for data processing and analysis
- [EARLIEST](condition.md/#earliest)
- [LATEST](condition.md/#latest)
- [REGEXP_MATCH](condition.md/#regexp_match)
- [CONTAINS](condition.md/#contains)

- [Type Conversion Functions](conversion.md)
- [CAST](conversion.md/#cast)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,34 @@ public void testLikeOperatorCaseInsensitive() throws IOException {
verifyDataRows(result3, rows("Amber"));
}

@Test
public void testContainsOperator() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where firstname contains 'mbe' | fields firstname",
Copy link
Member

@LantaoJin LantaoJin Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the contains case-sensitive or insensitive? Need more tests for this.
Can you link the cloudwatch doc link in description?
And this PR should include user document updates. maybe in condition.md

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think contains is case-insensitive since it's implemented with ILIKE with %value% wrapping.
If a user writes firstname contains '%', the wrapped pattern becomes %%% which would match everything, which is likely unintended. I'll fix this and add more test cases.
Here is the doc provided: https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/CWL_QuerySyntax-Filter.html

TEST_INDEX_ACCOUNT));
verifyDataRows(result, rows("Amber"), rows("Chambers"));

result =
executeQuery(
String.format(
"source=%s | where firstname contains 'zzz' | fields firstname",
TEST_INDEX_ACCOUNT));
assertEquals(0, result.getInt("total"));
}

@Test
public void testContainsOperatorCaseInsensitive() throws IOException {
// contains uses ilike semantics - case insensitive
JSONObject result =
executeQuery(
String.format(
"source=%s | where firstname contains 'MBE' | fields firstname",
TEST_INDEX_ACCOUNT));
verifyDataRows(result, rows("Amber"), rows("Chambers"));
}

@Test
public void testIsNullFunction() throws IOException {
JSONObject result =
Expand Down
1 change: 1 addition & 0 deletions language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ REDUCE: 'REDUCE';

// BOOL FUNCTIONS
LIKE: 'LIKE';
CONTAINS: 'CONTAINS';
ISNULL: 'ISNULL';
ISNOTNULL: 'ISNOTNULL';
BETWEEN: 'BETWEEN';
Expand Down
1 change: 1 addition & 0 deletions language-grammar/src/main/antlr4/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,7 @@ geoIpProperty
| GREATER
| NOT_GREATER
| REGEXP
| CONTAINS
;

singleFieldRelevanceFunctionName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public ScriptQueryUnSupportedException(String message) {
.put(BuiltinFunctionName.LTE.getName(), new RangeQuery(Comparison.LTE))
.put(BuiltinFunctionName.GTE.getName(), new RangeQuery(Comparison.GTE))
.put(BuiltinFunctionName.LIKE.getName(), new LikeQuery())
.put(BuiltinFunctionName.ILIKE.getName(), new LikeQuery())
.put(BuiltinFunctionName.MATCH.getName(), new MatchQuery())
.put(BuiltinFunctionName.MATCH_PHRASE.getName(), new MatchPhraseQuery())
.put(BuiltinFunctionName.MATCHPHRASE.getName(), new MatchPhraseQuery())
Expand Down
1 change: 1 addition & 0 deletions ppl/src/main/antlr/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ CAST: 'CAST';
// BOOL FUNCTIONS
LIKE: 'LIKE';
ILIKE: 'ILIKE';
CONTAINS: 'CONTAINS';
ISNULL: 'ISNULL';
ISNOTNULL: 'ISNOTNULL';
CIDRMATCH: 'CIDRMATCH';
Expand Down
2 changes: 2 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,7 @@ positionFunctionName
| REGEXP
| LIKE
| ILIKE
| CONTAINS
;

singleFieldRelevanceFunctionName
Expand Down Expand Up @@ -1609,6 +1610,7 @@ searchableKeyWord
| ELSE
| ARROW
| BETWEEN
| CONTAINS
| EXISTS
| SOURCE
| INDEX
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,18 @@ public UnresolvedExpression visitCompareExpr(CompareExprContext ctx) {
String operator = ctx.comparisonOperator().getText();
if ("==".equals(operator)) {
operator = EQUAL.getName().getFunctionName();
} else if ("contains".equalsIgnoreCase(operator)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: In principle this should be a const like LIKE but it's probably fine

We already broke the const in the previous if with "==", if someone doesn't like it then they can review this file for consts in another PR.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense to me. Thank you for reviewing it!

UnresolvedExpression left = visit(ctx.left);
UnresolvedExpression right = visit(ctx.right);
if (!(right instanceof Literal) || ((Literal) right).getType() != DataType.STRING) {
throw new SemanticCheckException(
"The right-hand side of 'contains' must be a string literal");
}
String raw = ((Literal) right).getValue().toString();
String escaped = raw.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_");
String wrapped = "%" + escaped + "%";
return new Compare(
ILIKE.getName().getFunctionName(), left, new Literal(wrapped, DataType.STRING));
} else if (LIKE.getName().getFunctionName().equalsIgnoreCase(operator)
&& UnresolvedPlanHelper.isCalciteEnabled(astBuilder.getSettings())) {
operator =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.opensearch.sql.ast.tree.Chart;
import org.opensearch.sql.calcite.plan.OpenSearchConstants;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.exception.SemanticCheckException;

public class AstExpressionBuilderTest extends AstBuilderTest {
@Test
Expand Down Expand Up @@ -227,6 +228,59 @@ public void testLikeOperatorCaseInsensitive() {
filter(relation("t"), compare("ilike", field("a"), stringLiteral("pattern"))));
}

@Test
public void testContainsOperatorExpr() {
assertEqual(
"source=t | where a contains 'hello'",
filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%"))));

assertEqual(
"source=t | where message contains 'err'",
filter(relation("t"), compare("ilike", field("message"), stringLiteral("%err%"))));
}

@Test
public void testContainsOperatorCaseInsensitive() {
assertEqual(
"source=t | where a CONTAINS 'hello'",
filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%"))));

assertEqual(
"source=t | where a Contains 'hello'",
filter(relation("t"), compare("ilike", field("a"), stringLiteral("%hello%"))));
}

@Test
public void testContainsOperatorNonLiteralRhsThrows() {
assertThrows(
SemanticCheckException.class,
() -> assertEqual("source=t | where a contains b", (Node) null));
}

@Test
public void testContainsOperatorEscapesSpecialChars() {
// % must be escaped so it is treated as a literal character, not a wildcard
assertEqual(
"source=t | where a contains '%'",
filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\%%"))));

// _ must be escaped so it is treated as a literal character, not a single-char wildcard
assertEqual(
"source=t | where a contains '_'",
filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\_%"))));

// backslash in PPL is written as '\\'; unquotes to \, then escaped to \\ in the pattern
// Java: "source=t | where a contains '\\\\'" produces PPL: source=t | where a contains '\\'
assertEqual(
"source=t | where a contains '\\\\'",
filter(relation("t"), compare("ilike", field("a"), stringLiteral("%\\\\%"))));

// mixed special characters are all escaped
assertEqual(
"source=t | where a contains 'foo%bar_baz'",
filter(relation("t"), compare("ilike", field("a"), stringLiteral("%foo\\%bar\\_baz%"))));
}

@Test
public void testBooleanIsNullFunction() {
assertEqual(
Expand Down
Loading