Skip to content

Commit 5677765

Browse files
ahkcsmanasvinibs
andauthored
Add replace command with Calcite (opensearch-project#4451)
* Add replace command with Calcite Signed-off-by: Manasvini B S <manasvis@amazon.com> # Conflicts: # core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java # docs/category.json # Conflicts: # docs/category.json * fix anonymizer test and add explainIT Signed-off-by: Kai Huang <ahkcs@amazon.com> # Conflicts: # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java * fix category.json Signed-off-by: Kai Huang <ahkcs@amazon.com> * fix category.json Signed-off-by: Kai Huang <ahkcs@amazon.com> * update doc Signed-off-by: Kai Huang <ahkcs@amazon.com> * add nullable Signed-off-by: Kai Huang <ahkcs@amazon.com> * fixes Signed-off-by: Kai Huang <ahkcs@amazon.com> * fix Signed-off-by: Kai Huang <ahkcs@amazon.com> * change new_ handling to in-place replacement Signed-off-by: Kai Huang <ahkcs@amazon.com> * update doctest Signed-off-by: Kai Huang <ahkcs@amazon.com> * update explain Signed-off-by: Kai Huang <ahkcs@amazon.com> * update test Signed-off-by: Kai Huang <ahkcs@amazon.com> * fixes Signed-off-by: Kai Huang <ahkcs@amazon.com> * doc update Signed-off-by: Kai Huang <ahkcs@amazon.com> * update comma support Signed-off-by: Kai Huang <ahkcs@amazon.com> * remove validation logic since enforced by antlr Signed-off-by: Kai Huang <ahkcs@amazon.com> * update doc Signed-off-by: Kai Huang <ahkcs@amazon.com> * update validation logic Signed-off-by: Kai Huang <ahkcs@amazon.com> * update Signed-off-by: Kai Huang <ahkcs@amazon.com> --------- Signed-off-by: Kai Huang <ahkcs@amazon.com> Co-authored-by: Manasvini B S <manasvis@amazon.com>
1 parent e3ab9d0 commit 5677765

File tree

20 files changed

+1070
-1
lines changed

20 files changed

+1070
-1
lines changed

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
import org.opensearch.sql.ast.tree.Relation;
8686
import org.opensearch.sql.ast.tree.RelationSubquery;
8787
import org.opensearch.sql.ast.tree.Rename;
88+
import org.opensearch.sql.ast.tree.Replace;
8889
import org.opensearch.sql.ast.tree.Reverse;
8990
import org.opensearch.sql.ast.tree.Rex;
9091
import org.opensearch.sql.ast.tree.SPath;
@@ -801,6 +802,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con
801802
return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context));
802803
}
803804

805+
@Override
806+
public LogicalPlan visitReplace(Replace node, AnalysisContext context) {
807+
throw getOnlyForCalciteException("Replace");
808+
}
809+
804810
@Override
805811
public LogicalPlan visitJoin(Join node, AnalysisContext context) {
806812
throw getOnlyForCalciteException("Join");

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import org.opensearch.sql.ast.tree.Relation;
7474
import org.opensearch.sql.ast.tree.RelationSubquery;
7575
import org.opensearch.sql.ast.tree.Rename;
76+
import org.opensearch.sql.ast.tree.Replace;
7677
import org.opensearch.sql.ast.tree.Reverse;
7778
import org.opensearch.sql.ast.tree.Rex;
7879
import org.opensearch.sql.ast.tree.SPath;
@@ -245,6 +246,10 @@ public T visitRename(Rename node, C context) {
245246
return visitChildren(node, context);
246247
}
247248

249+
public T visitReplace(Replace node, C context) {
250+
return visitChildren(node, context);
251+
}
252+
248253
public T visitEval(Eval node, C context) {
249254
return visitChildren(node, context);
250255
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import com.google.common.collect.ImmutableList;
9+
import java.util.List;
10+
import java.util.Set;
11+
import lombok.EqualsAndHashCode;
12+
import lombok.Getter;
13+
import lombok.Setter;
14+
import lombok.ToString;
15+
import org.jetbrains.annotations.Nullable;
16+
import org.opensearch.sql.ast.AbstractNodeVisitor;
17+
import org.opensearch.sql.ast.expression.Field;
18+
19+
@Getter
20+
@Setter
21+
@ToString
22+
@EqualsAndHashCode(callSuper = false)
23+
public class Replace extends UnresolvedPlan {
24+
private final List<ReplacePair> replacePairs;
25+
private final Set<Field> fieldList;
26+
@Nullable private UnresolvedPlan child;
27+
28+
/**
29+
* Constructor with multiple pattern/replacement pairs.
30+
*
31+
* @param replacePairs List of pattern/replacement pairs
32+
* @param fieldList Set of fields to apply replacements to
33+
*/
34+
public Replace(List<ReplacePair> replacePairs, Set<Field> fieldList) {
35+
this.replacePairs = replacePairs;
36+
this.fieldList = fieldList;
37+
}
38+
39+
@Override
40+
public Replace attach(UnresolvedPlan child) {
41+
if (null == this.child) {
42+
this.child = child;
43+
} else {
44+
this.child.attach(child);
45+
}
46+
return this;
47+
}
48+
49+
@Override
50+
public List<UnresolvedPlan> getChild() {
51+
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
52+
}
53+
54+
@Override
55+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
56+
return nodeVisitor.visitReplace(this, context);
57+
}
58+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import lombok.AllArgsConstructor;
9+
import lombok.EqualsAndHashCode;
10+
import lombok.Getter;
11+
import lombok.ToString;
12+
import org.opensearch.sql.ast.expression.Literal;
13+
14+
/** A pair of pattern and replacement literals for the Replace command. */
15+
@Getter
16+
@AllArgsConstructor
17+
@EqualsAndHashCode
18+
@ToString
19+
public class ReplacePair {
20+
private final Literal pattern;
21+
private final Literal replacement;
22+
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@
121121
import org.opensearch.sql.ast.tree.Regex;
122122
import org.opensearch.sql.ast.tree.Relation;
123123
import org.opensearch.sql.ast.tree.Rename;
124+
import org.opensearch.sql.ast.tree.Replace;
125+
import org.opensearch.sql.ast.tree.ReplacePair;
124126
import org.opensearch.sql.ast.tree.Rex;
125127
import org.opensearch.sql.ast.tree.SPath;
126128
import org.opensearch.sql.ast.tree.Search;
@@ -2411,6 +2413,51 @@ public RelNode visitValues(Values values, CalcitePlanContext context) {
24112413
}
24122414
}
24132415

2416+
@Override
2417+
public RelNode visitReplace(Replace node, CalcitePlanContext context) {
2418+
visitChildren(node, context);
2419+
2420+
List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();
2421+
2422+
// Create a set of field names to replace for quick lookup
2423+
Set<String> fieldsToReplace =
2424+
node.getFieldList().stream().map(f -> f.getField().toString()).collect(Collectors.toSet());
2425+
2426+
// Validate that all fields to replace exist by calling field() on each
2427+
// This leverages relBuilder.field()'s built-in validation which throws
2428+
// IllegalArgumentException if any field doesn't exist
2429+
for (String fieldToReplace : fieldsToReplace) {
2430+
context.relBuilder.field(fieldToReplace);
2431+
}
2432+
2433+
List<RexNode> projectList = new ArrayList<>();
2434+
2435+
// Project all fields, replacing specified ones in-place
2436+
for (String fieldName : fieldNames) {
2437+
if (fieldsToReplace.contains(fieldName)) {
2438+
// Replace this field in-place with all pattern/replacement pairs applied sequentially
2439+
RexNode fieldRef = context.relBuilder.field(fieldName);
2440+
2441+
// Apply all replacement pairs sequentially (nested REPLACE calls)
2442+
for (ReplacePair pair : node.getReplacePairs()) {
2443+
RexNode patternNode = rexVisitor.analyze(pair.getPattern(), context);
2444+
RexNode replacementNode = rexVisitor.analyze(pair.getReplacement(), context);
2445+
fieldRef =
2446+
context.relBuilder.call(
2447+
SqlStdOperatorTable.REPLACE, fieldRef, patternNode, replacementNode);
2448+
}
2449+
2450+
projectList.add(fieldRef);
2451+
} else {
2452+
// Keep original field unchanged
2453+
projectList.add(context.relBuilder.field(fieldName));
2454+
}
2455+
}
2456+
2457+
context.relBuilder.project(projectList, fieldNames);
2458+
return context.relBuilder.peek();
2459+
}
2460+
24142461
private void buildParseRelNode(Parse node, CalcitePlanContext context) {
24152462
RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context);
24162463
ParseMethod parseMethod = node.getParseMethod();

docs/category.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"user/ppl/cmd/rare.rst",
4141
"user/ppl/cmd/regex.rst",
4242
"user/ppl/cmd/rename.rst",
43+
"user/ppl/cmd/replace.rst",
4344
"user/ppl/cmd/rex.rst",
4445
"user/ppl/cmd/search.rst",
4546
"user/ppl/cmd/showdatasources.rst",
@@ -68,4 +69,4 @@
6869
"bash_settings": [
6970
"user/ppl/admin/settings.rst"
7071
]
71-
}
72+
}

docs/user/ppl/cmd/replace.rst

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
=============
2+
replace
3+
=============
4+
5+
.. rubric:: Table of contents
6+
7+
.. contents::
8+
:local:
9+
:depth: 2
10+
11+
12+
Description
13+
============
14+
Using ``replace`` command to replace text in one or more fields in the search result.
15+
16+
Note: This command is only available when Calcite engine is enabled.
17+
18+
19+
Syntax
20+
============
21+
replace '<pattern>' WITH '<replacement>' [, '<pattern>' WITH '<replacement>']... IN <field-name>[, <field-name>]...
22+
23+
24+
Parameters
25+
==========
26+
* **pattern**: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions).
27+
* **replacement**: mandatory. The text you want to replace with.
28+
* **field-name**: mandatory. One or more field names where the replacement should occur.
29+
30+
31+
Examples
32+
========
33+
34+
Example 1: Replace text in one field
35+
------------------------------------
36+
37+
The example shows replacing text in one field.
38+
39+
PPL query::
40+
41+
os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state;
42+
fetched rows / total rows = 4/4
43+
+----------+
44+
| state |
45+
|----------|
46+
| Illinois |
47+
| TN |
48+
| VA |
49+
| MD |
50+
+----------+
51+
52+
53+
Example 2: Replace text in multiple fields
54+
------------------------------------
55+
56+
The example shows replacing text in multiple fields.
57+
58+
PPL query::
59+
60+
os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address;
61+
fetched rows / total rows = 4/4
62+
+----------+----------------------+
63+
| state | address |
64+
|----------+----------------------|
65+
| Illinois | 880 Holmes Lane |
66+
| TN | 671 Bristol Street |
67+
| VA | 789 Madison Street |
68+
| MD | 467 Hutchinson Court |
69+
+----------+----------------------+
70+
71+
72+
Example 3: Replace with other commands in a pipeline
73+
------------------------------------
74+
75+
The example shows using replace with other commands in a query pipeline.
76+
77+
PPL query::
78+
79+
os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age;
80+
fetched rows / total rows = 3/3
81+
+----------+-----+
82+
| state | age |
83+
|----------+-----|
84+
| Illinois | 32 |
85+
| TN | 36 |
86+
| MD | 33 |
87+
+----------+-----+
88+
89+
Example 4: Replace with multiple pattern/replacement pairs
90+
------------------------------------
91+
92+
The example shows using multiple pattern/replacement pairs in a single replace command. The replacements are applied sequentially.
93+
94+
PPL query::
95+
96+
os> source=accounts | replace "IL" WITH "Illinois", "TN" WITH "Tennessee" IN state | fields state;
97+
fetched rows / total rows = 4/4
98+
+-----------+
99+
| state |
100+
|-----------|
101+
| Illinois |
102+
| Tennessee |
103+
| VA |
104+
| MD |
105+
+-----------+
106+
107+
Example 5: Pattern matching with LIKE and replace
108+
------------------------------------
109+
110+
Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs.
111+
112+
PPL query::
113+
114+
os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city;
115+
fetched rows / total rows = 1/1
116+
+-----------------+-------+--------+-----+--------+
117+
| address | state | gender | age | city |
118+
|-----------------+-------+--------+-----+--------|
119+
| 880 HOLMES Lane | IL | M | 32 | Brogan |
120+
+-----------------+-------+--------+-----+--------+
121+
122+
123+
Limitations
124+
===========
125+
* Only supports plain text literals for pattern matching. Wildcards and regular expressions are not supported.
126+
* Pattern and replacement values must be string literals.
127+
* The replace command modifies the specified fields in-place.

docs/user/ppl/functions/string.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,14 @@ Argument type: STRING, STRING (regex pattern), STRING (replacement)
215215

216216
Return type: STRING
217217

218+
**Important - Regex Special Characters**: The pattern is interpreted as a regular expression. Characters like ``.``, ``*``, ``+``, ``[``, ``]``, ``(``, ``)``, ``{``, ``}``, ``^``, ``$``, ``|``, ``?``, and ``\`` have special meaning in regex. To match them literally, escape with backslashes:
219+
220+
* To match ``example.com``: use ``'example\\.com'`` (escape the dots)
221+
* To match ``value*``: use ``'value\\*'`` (escape the asterisk)
222+
* To match ``price+tax``: use ``'price\\+tax'`` (escape the plus)
223+
224+
For strings with many special characters, use ``\\Q...\\E`` to quote the entire literal string (e.g., ``'\\Qhttps://example.com/path?id=123\\E'`` matches that exact URL).
225+
218226
Literal String Replacement Examples::
219227

220228
os> source=people | eval `REPLACE('helloworld', 'world', 'universe')` = REPLACE('helloworld', 'world', 'universe'), `REPLACE('helloworld', 'invalid', 'universe')` = REPLACE('helloworld', 'invalid', 'universe') | fields `REPLACE('helloworld', 'world', 'universe')`, `REPLACE('helloworld', 'invalid', 'universe')`
@@ -225,6 +233,16 @@ Literal String Replacement Examples::
225233
| hellouniverse | helloworld |
226234
+--------------------------------------------+----------------------------------------------+
227235

236+
Escaping Special Characters Examples::
237+
238+
os> source=people | eval `Replace domain` = REPLACE('api.example.com', 'example\\.com', 'newsite.org'), `Replace with quote` = REPLACE('https://api.example.com/v1', '\\Qhttps://api.example.com\\E', 'http://localhost:8080') | fields `Replace domain`, `Replace with quote`
239+
fetched rows / total rows = 1/1
240+
+-----------------+--------------------------+
241+
| Replace domain | Replace with quote |
242+
|-----------------+--------------------------|
243+
| api.newsite.org | http://localhost:8080/v1 |
244+
+-----------------+--------------------------+
245+
228246
Regex Pattern Examples::
229247

230248
os> source=people | eval `Remove digits` = REPLACE('test123', '\\d+', ''), `Collapse spaces` = REPLACE('hello world', ' +', ' '), `Remove special` = REPLACE('hello@world!', '[^a-zA-Z]', '') | fields `Remove digits`, `Collapse spaces`, `Remove special`

docs/user/ppl/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ The query start with search command and then flowing a set of command delimited
124124

125125
- `trendline command <cmd/trendline.rst>`_
126126

127+
- `replace command <cmd/replace.rst>`_
128+
127129
- `where command <cmd/where.rst>`_
128130

129131
* **Functions**

integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
CalciteRegexCommandIT.class,
9090
CalciteRexCommandIT.class,
9191
CalciteRenameCommandIT.class,
92+
CalciteReplaceCommandIT.class,
9293
CalciteResourceMonitorIT.class,
9394
CalciteSearchCommandIT.class,
9495
CalciteSettingsIT.class,

0 commit comments

Comments
 (0)