Skip to content

Commit 4b2c89c

Browse files
authored
Merge pull request #108 from UncoderIO/gis-7510
add special characters parsing to kql tokenizer
2 parents fe2aa2f + cc3dd70 commit 4b2c89c

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

uncoder-core/app/translator/platforms/microsoft/tokenizer.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,15 @@ class MicrosoftSentinelTokenizer(QueryTokenizer, OperatorBasedMixin):
4646
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
4747
bool_value_pattern = rf"(?P<{MicrosoftValueType.bool_value}>true|false)\s*"
4848
num_value_pattern = rf"(?P<{MicrosoftValueType.number_value}>\d+(?:\.\d+)*)\s*"
49-
double_quotes_value_pattern = rf'"(?P<{MicrosoftValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s]|\\\"|\\\\)*)"\s*' # noqa: E501
50-
single_quotes_value_pattern = rf"'(?P<{MicrosoftValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\s]|\\\'|\\\\)*)'\s*" # noqa: E501
51-
verbatim_double_quotes_value_pattern = rf'@"(?P<{MicrosoftValueType.verbatim_double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\s\\]|"")*)"\s*' # noqa: E501
52-
verbatim_single_quotes_value_pattern = rf"@'(?P<{MicrosoftValueType.verbatim_single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\s\\]|'')*)'\s*" # noqa: E501
49+
double_quotes_value_pattern = rf'"(?P<{MicrosoftValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\[\];<>?`~\s]|\\\"|\\\\)*)"\s*' # noqa: E501
50+
single_quotes_value_pattern = rf"'(?P<{MicrosoftValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\[\];<>?`~\s]|\\\'|\\\\)*)'\s*" # noqa: E501
51+
verbatim_double_quotes_value_pattern = rf'@"(?P<{MicrosoftValueType.verbatim_double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{{\}}\[\];<>?`~\s\\]|"")*)"\s*' # noqa: E501
52+
verbatim_single_quotes_value_pattern = rf"@'(?P<{MicrosoftValueType.verbatim_single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{{\}}\[\];<>?`~\s\\]|'')*)'\s*" # noqa: E501
5353
str_value_pattern = rf"""{double_quotes_value_pattern}|{single_quotes_value_pattern}|{verbatim_double_quotes_value_pattern}|{verbatim_single_quotes_value_pattern}""" # noqa: E501
5454
_value_pattern = rf"""{bool_value_pattern}|{num_value_pattern}|{str_value_pattern}"""
55-
multi_value_pattern = rf"""\((?P<{MicrosoftValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)"""
55+
multi_value_pattern = (
56+
rf"""\((?P<{MicrosoftValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\];<>?`~\s]+)\)"""
57+
)
5658
keyword_pattern = rf"\*\s+contains\s+(?:{str_value_pattern})"
5759

5860
escape_manager = microsoft_escape_manager

0 commit comments

Comments
 (0)