Skip to content

Commit b76150f

Browse files
committed
unsupported query entry extraction
1 parent d10cc06 commit b76150f

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

uncoder-core/app/translator/core/tokenizer.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,16 @@ class QueryTokenizer(BaseTokenizer):
6464
fields_operator_map: ClassVar[dict[str, str]] = {}
6565
operators_map: ClassVar[dict[str, str]] = {} # used to generate re pattern. so the keys order is important
6666

67-
logical_operator_pattern = r"^(?P<logical_operator>and|or|not|AND|OR|NOT)\s+"
67+
logical_operators_map: ClassVar[dict[str, str]] = {
68+
"and": LogicalOperatorType.AND,
69+
"AND": LogicalOperatorType.AND,
70+
"or": LogicalOperatorType.OR,
71+
"OR": LogicalOperatorType.OR,
72+
"not": LogicalOperatorType.NOT,
73+
"NOT": LogicalOperatorType.NOT,
74+
}
75+
_logical_operator_pattern = f"(?P<logical_operator>{'|'.join(logical_operators_map)})"
76+
logical_operator_pattern = rf"^{_logical_operator_pattern}\s+"
6877
field_value_pattern = r"""^___field___\s*___operator___\s*___value___"""
6978
base_value_pattern = r"(?:___value_pattern___)"
7079

@@ -302,7 +311,12 @@ def _get_next_token(
302311
if self.keyword_pattern and re.match(self.keyword_pattern, query):
303312
return self.search_keyword(query)
304313

305-
raise TokenizerGeneralException("Unsupported query entry")
314+
unsupported_query_entry = self._get_unsupported_query_entry(query)
315+
raise TokenizerGeneralException(f"Unsupported query entry: {unsupported_query_entry}")
316+
317+
def _get_unsupported_query_entry(self, query: str) -> str:
318+
split_by_logical_operator = re.split(rf"\s+{self._logical_operator_pattern}\s+", query, maxsplit=1)
319+
return split_by_logical_operator[0]
306320

307321
@staticmethod
308322
def _validate_parentheses(tokens: list[QUERY_TOKEN_TYPE]) -> None:

0 commit comments

Comments
 (0)