Skip to content

Commit ce9cfdf

Browse files
committed
Add new operators support
1 parent 9efd4aa commit ce9cfdf

File tree

19 files changed

+325
-82
lines changed

19 files changed

+325
-82
lines changed

translator/app/translator/const.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from os.path import abspath, dirname
2+
from typing import Union, List
23

34
APP_PATH = dirname(abspath(__file__))
45

56
CTI_MIN_LIMIT_QUERY = 10000
7+
8+
DEFAULT_VALUE_TYPE = Union[Union[int, str, List[int], List[str]]]

translator/app/translator/core/render.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from abc import ABC
2121
from typing import Union, List, Dict
2222

23+
from app.translator.const import DEFAULT_VALUE_TYPE
2324
from app.translator.core.exceptions.core import NotImplementedException, StrictPlatformException
2425
from app.translator.core.exceptions.parser import UnsupportedOperatorException
2526
from app.translator.core.functions import PlatformFunctions
@@ -37,6 +38,11 @@ class BaseQueryFieldValue(ABC):
3738
def __init__(self, or_token):
3839
self.field_value = {
3940
OperatorType.EQ: self.equal_modifier,
41+
OperatorType.LT: self.less_modifier,
42+
OperatorType.LTE: self.less_or_equal_modifier,
43+
OperatorType.GT: self.greater_modifier,
44+
OperatorType.GTE: self.greater_or_equal_modifier,
45+
OperatorType.NEQ: self.not_equal_modifier,
4046
OperatorType.CONTAINS: self.contains_modifier,
4147
OperatorType.ENDSWITH: self.endswith_modifier,
4248
OperatorType.STARTSWITH: self.startswith_modifier,
@@ -45,22 +51,37 @@ def __init__(self, or_token):
4551
}
4652
self.or_token = f" {or_token} "
4753

48-
def equal_modifier(self, field, value):
54+
def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
4955
raise NotImplementedException
5056

51-
def contains_modifier(self, field, value):
57+
def less_modifier(self, field: str, value: Union[int, str]) -> str:
5258
raise NotImplementedException
5359

54-
def endswith_modifier(self, field, value):
60+
def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
5561
raise NotImplementedException
5662

57-
def startswith_modifier(self, field, value):
63+
def greater_modifier(self, field: str, value: Union[int, str]) -> str:
5864
raise NotImplementedException
5965

60-
def regex_modifier(self, field, value):
66+
def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
6167
raise NotImplementedException
6268

63-
def keywords(self, field, value):
69+
def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
70+
raise NotImplementedException
71+
72+
def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
73+
raise NotImplementedException
74+
75+
def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
76+
raise NotImplementedException
77+
78+
def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
79+
raise NotImplementedException
80+
81+
def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
82+
raise NotImplementedException
83+
84+
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
6485
raise NotImplementedException
6586

6687
def apply_field_value(self, field, operator, value):

translator/app/translator/core/tokenizer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class QueryTokenizer(BaseTokenizer):
4646
field_pattern = r"(?P<field_name>[a-zA-Z\._\-]+)"
4747
operator_pattern = r"\s?(?P<operator>and|or|not|AND|OR|NOT)\s?"
4848
field_value_pattern = r"""^___field___\s*___match_operator___\s*___value___"""
49-
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>ilike|contains|endswith|startswith|in|==|=|=~|!=|:|\:))\s?"""
49+
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>ilike|contains|endswith|startswith|in|>=|<=|==|>|<|=~|!=|=|:|\:))\s?"""
5050
base_value_pattern = r"(?:___value_pattern___)"
5151
_value_pattern = r"""(?:\"|\')*(?P<value>[:a-zA-Z\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)(?:\*|\'|\"|\s|\$)*"""
5252
value_pattern = base_value_pattern.replace('___value_pattern___', _value_pattern)
@@ -60,6 +60,11 @@ class QueryTokenizer(BaseTokenizer):
6060
operators_map = {
6161
"=": OperatorType.EQ,
6262
"in": OperatorType.EQ,
63+
"<": OperatorType.LT,
64+
"<=": OperatorType.LTE,
65+
">": OperatorType.GT,
66+
">=": OperatorType.GTE,
67+
"!=": OperatorType.NEQ,
6368
"contains": OperatorType.CONTAINS,
6469
"startswith": OperatorType.STARTSWITH,
6570
"endswith": OperatorType.ENDSWITH

translator/app/translator/platforms/athena/renders/athena.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
limitations under the License.
1717
-----------------------------------------------------------------
1818
"""
19+
from typing import Union
1920

21+
from app.translator.const import DEFAULT_VALUE_TYPE
2022
from app.translator.platforms.athena.const import athena_details
2123
from app.translator.platforms.athena.mapping import AthenaMappings, athena_mappings
2224
from app.translator.core.exceptions.render import UnsupportedRenderMethod
@@ -28,32 +30,49 @@
2830
class AthenaFieldValue(BaseQueryFieldValue):
2931
details: PlatformDetails = athena_details
3032

31-
def equal_modifier(self, field, value):
33+
def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
3234
if isinstance(value, list):
3335
return f"({self.or_token.join([self.equal_modifier(field=field, value=v) for v in value])})"
3436
return f"{field} = '{value}'"
3537

36-
def contains_modifier(self, field, value):
38+
def less_modifier(self, field: str, value: Union[int, str]) -> str:
39+
return f"{field} < '{value}'"
40+
41+
def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
42+
return f"{field} <= '{value}'"
43+
44+
def greater_modifier(self, field: str, value: Union[int, str]) -> str:
45+
return f"{field} > '{value}'"
46+
47+
def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
48+
return f"{field} >= '{value}'"
49+
50+
def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
51+
if isinstance(value, list):
52+
return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})"
53+
return f"{field} != '{value}'"
54+
55+
def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
3756
if isinstance(value, list):
3857
return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})"
3958
return f"{field} ILIKE '%{value}%' ESCAPE '\\'"
4059

41-
def endswith_modifier(self, field, value):
60+
def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
4261
if isinstance(value, list):
4362
return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})"
4463
return f"{field} ILIKE '%{value}' ESCAPE '\\'"
4564

46-
def startswith_modifier(self, field, value):
65+
def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
4766
if isinstance(value, list):
4867
return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})"
4968
return f"{field} ILIKE '{value}%' ESCAPE '\\'"
5069

51-
def regex_modifier(self, field, value):
70+
def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
5271
if isinstance(value, list):
5372
return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})"
5473
return f"{field} ILIKE '{value}' ESCAPE '\\'"
5574

56-
def keywords(self, field, value):
75+
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
5776
raise UnsupportedRenderMethod(platform_name=self.details.name, method="Keywords")
5877

5978

translator/app/translator/platforms/athena/tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
class AthenaTokenizer(QueryTokenizer):
2929
field_pattern = r'(?P<field_name>"[a-zA-Z\._\-\s]+"|[a-zA-Z\._\-]+)'
30-
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>like|in|=|>|<|>=|<=|<>|!=))\s?"""
30+
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>like|in|<=|>=|==|>|<|<>|!=|=))\s?"""
3131
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3232
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
3333
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""
@@ -66,7 +66,7 @@ def search_field_value(self, query):
6666
should_process_value_wildcard_symbols = self.should_process_value_wildcard_symbols(operator)
6767
query, operator, value = self.search_value(query=query, operator=operator, field_name=field_name)
6868

69-
operator_token = Identifier(token_type=OperatorType.EQ)
69+
operator_token = Identifier(token_type=operator)
7070
if should_process_value_wildcard_symbols:
7171
value, operator_token = self.process_value_wildcard_symbols(
7272
value=value,
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
COMPARISON_OPERATORS_MAP = {
3+
":[* TO": {
4+
"replace": [":\[\*\sTO"],
5+
"default_op": "<="
6+
},
7+
":[": {
8+
"replace": [":\[", "TO\s\*"],
9+
"default_op": ">="
10+
},
11+
}

translator/app/translator/platforms/base/lucene/renders/lucene.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"""
1919
from typing import Union
2020

21+
from app.translator.const import DEFAULT_VALUE_TYPE
2122
from app.translator.core.render import BaseQueryRender
2223
from app.translator.core.render import BaseQueryFieldValue
2324

@@ -27,39 +28,57 @@ class LuceneFieldValue(BaseQueryFieldValue):
2728
def apply_value(self, value: Union[str, int]):
2829
return value
2930

30-
def equal_modifier(self, field, value):
31+
def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
3132
if isinstance(value, list):
3233
values = self.or_token.join(self.apply_value(f'{v}') for v in value)
3334
return f"{field}:({values})"
3435
return f'{field}:{self.apply_value(value)}'
3536

36-
def contains_modifier(self, field, value):
37+
def less_modifier(self, field: str, value: Union[int, str]) -> str:
38+
return f'{field}:<{self.apply_value(value)}'
39+
40+
def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
41+
return f'{field}:[* TO {self.apply_value(value)}]'
42+
43+
def greater_modifier(self, field: str, value: Union[int, str]) -> str:
44+
return f'{field}:>{self.apply_value(value)}'
45+
46+
def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
47+
return f'{field}:[{self.apply_value(value)} TO *]'
48+
49+
def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
50+
if isinstance(value, list):
51+
values = self.or_token.join(self.apply_value(f'{v}') for v in value)
52+
return f"NOT ({field} = ({values})"
53+
return f'NOT ({field} = {self.apply_value(value)})'
54+
55+
def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
3756
if isinstance(value, list):
3857
values = self.or_token.join(self.apply_value(f'*{v}*') for v in value)
3958
return f"{field}:({values})"
4059
prepared_value = self.apply_value(f"*{value}*")
4160
return f'{field}:{prepared_value}'
4261

43-
def endswith_modifier(self, field, value):
62+
def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
4463
if isinstance(value, list):
4564
values = self.or_token.join(self.apply_value(f'*{v}') for v in value)
4665
return f"{field}:({values})"
4766
prepared_value = self.apply_value(f"*{value}")
4867
return f'{field}:{prepared_value}'
4968

50-
def startswith_modifier(self, field, value):
69+
def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
5170
if isinstance(value, list):
5271
values = self.or_token.join(self.apply_value(f'{v}*') for v in value)
5372
return f"{field}:({values})"
5473
prepared_value = self.apply_value(f"{value}*")
5574
return f'{field}:{prepared_value}'
5675

57-
def regex_modifier(self, field, value):
76+
def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
5877
if isinstance(value, list):
5978
return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})"
6079
return f'{field}:/{value}/'
6180

62-
def keywords(self, field, value):
81+
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
6382
if isinstance(value, list):
6483
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
6584
return self.apply_value(f"*{value}*")

translator/app/translator/platforms/base/lucene/tokenizer.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@
2626
from app.translator.core.tokenizer import QueryTokenizer
2727
from app.translator.core.custom_types.tokens import OperatorType
2828
from app.translator.tools.utils import get_match_group
29+
from app.translator.platforms.base.lucene.const import COMPARISON_OPERATORS_MAP
2930

3031

3132
class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
3233
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
33-
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:))\s*"
34-
34+
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:\[\*\sTO|:\[|:<|:>|:))\s*"
3535
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3636
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
3737
no_quotes_value_pattern = r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
@@ -46,6 +46,8 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
4646

4747
operators_map = {
4848
":": OperatorType.EQ,
49+
":>": OperatorType.GT,
50+
":<": OperatorType.LT
4951
}
5052

5153
def __init__(self):
@@ -77,9 +79,11 @@ def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.E
7779
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
7880
return operator, d_q_value
7981

80-
return super().get_operator_and_value(match)
82+
return super().get_operator_and_value(match, operator)
8183

8284
def search_value(self, query: str, operator: str, field_name: str) -> Tuple[str, str, Union[str, List[str]]]:
85+
if operator in COMPARISON_OPERATORS_MAP.keys():
86+
return self.search_value_gte_lte(query, operator, field_name)
8387
check_pattern = self.multi_value_check_pattern
8488
check_regex = check_pattern.replace('___field___', field_name).replace('___operator___', operator)
8589
if re.match(check_regex, query):
@@ -96,11 +100,19 @@ def search_value(self, query: str, operator: str, field_name: str) -> Tuple[str,
96100
if field_value_search is None:
97101
raise TokenizerGeneralException(error=f"Value couldn't be found in query part: {query}")
98102

99-
operator, value = self.get_operator_and_value(field_value_search)
103+
operator, value = self.get_operator_and_value(field_value_search, self.map_operator(operator))
100104
value = [self.clean_quotes(v) for v in re.split(r"\s+OR\s+", value)] if is_multi else value
101105
pos = field_value_search.end()
102106
return query[pos:], operator, value
103107

108+
def search_value_gte_lte(self, query: str, operator: str, field_name: str) -> Tuple[str, str, Union[str, List[str]]]:
109+
query_list = query.split("]")
110+
to_replace = [v for val in COMPARISON_OPERATORS_MAP.values() for v in val["replace"]]
111+
to_replace.append(field_name)
112+
regex = re.compile('|'.join(to_replace))
113+
value = re.sub(regex, '', query_list.pop(0))
114+
return "".join(query_list), COMPARISON_OPERATORS_MAP.get(operator, {}).get("default_op"), value.strip()
115+
104116
def search_keyword(self, query: str) -> Tuple[Keyword, str]:
105117
keyword_search = re.search(self.keyword_pattern, query)
106118
_, value = self.get_operator_and_value(keyword_search)

translator/app/translator/platforms/base/spl/renders/spl.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,39 +16,58 @@
1616
limitations under the License.
1717
-----------------------------------------------------------------
1818
"""
19+
from typing import Union
1920

21+
from app.translator.const import DEFAULT_VALUE_TYPE
2022
from app.translator.core.exceptions.render import UnsupportedRenderMethod
2123
from app.translator.core.render import BaseQueryRender, BaseQueryFieldValue
2224

2325

2426
class SplFieldValue(BaseQueryFieldValue):
2527

26-
def equal_modifier(self, field, value):
28+
def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
2729
if isinstance(value, list):
2830
return f"({self.or_token.join([self.equal_modifier(field=field, value=v) for v in value])})"
2931
return f'{field}="{value}"'
3032

31-
def contains_modifier(self, field, value):
33+
def less_modifier(self, field: str, value: Union[int, str]) -> str:
34+
return f'{field}<"{value}"'
35+
36+
def less_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
37+
return f'{field}<="{value}"'
38+
39+
def greater_modifier(self, field: str, value: Union[int, str]) -> str:
40+
return f'{field}>"{value}"'
41+
42+
def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
43+
return f'{field}>="{value}"'
44+
45+
def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
46+
if isinstance(value, list):
47+
return f"({self.or_token.join([self.not_equal_modifier(field=field, value=v) for v in value])})"
48+
return f'{field}!="{value}"'
49+
50+
def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
3251
if isinstance(value, list):
3352
return f"({self.or_token.join([self.contains_modifier(field=field, value=v) for v in value])})"
3453
return f'{field}="*{value}*"'
3554

36-
def endswith_modifier(self, field, value):
55+
def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
3756
if isinstance(value, list):
3857
return f"({self.or_token.join([self.endswith_modifier(field=field, value=v) for v in value])})"
3958
return f'{field}="*{value}"'
4059

41-
def startswith_modifier(self, field, value):
60+
def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
4261
if isinstance(value, list):
4362
return f"({self.or_token.join([self.startswith_modifier(field=field, value=v) for v in value])})"
4463
return f'{field}="{value}*"'
4564

46-
def keywords(self, field, value):
65+
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
4766
if isinstance(value, list):
4867
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
4968
return f'"{value}"'
5069

51-
def regex_modifier(self, field, value):
70+
def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
5271
raise UnsupportedRenderMethod(platform_name=self.details.name, method="Regex Expression")
5372

5473

0 commit comments

Comments
 (0)