Skip to content

Commit d3dba4e

Browse files
authored
Merge pull request #112 from UncoderIO/gis-7814
Improve AQL mapping method is_suitable
2 parents 2b3836c + d4614b8 commit d3dba4e

File tree

3 files changed

+224
-18
lines changed

3 files changed

+224
-18
lines changed

uncoder-core/app/translator/core/render.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def apply_field_value(self, field: str, operator: Identifier, value: DEFAULT_VAL
126126

127127
class QueryRender(ABC):
128128
comment_symbol: str = None
129+
details: PlatformDetails = None
129130
is_single_line_comment: bool = False
130131
unsupported_functions_text = "Unsupported functions were excluded from the result query:"
131132

@@ -146,7 +147,6 @@ def generate(self, query_container: Union[RawQueryContainer, TokenizedQueryConta
146147

147148
class PlatformQueryRender(QueryRender):
148149
mappings: BasePlatformMappings = None
149-
details: PlatformDetails = None
150150
is_strict_mapping: bool = False
151151

152152
or_token = "or"
@@ -295,28 +295,35 @@ def generate_raw_log_fields(self, fields: list[Field], source_mapping: SourceMap
295295

296296
def _generate_from_tokenized_query_container(self, query_container: TokenizedQueryContainer) -> str:
297297
queries_map = {}
298+
errors = []
298299
source_mappings = self._get_source_mappings(query_container.meta_info.source_mapping_ids)
299300

300301
for source_mapping in source_mappings:
301302
prefix = self.generate_prefix(source_mapping.log_source_signature)
302-
if source_mapping.raw_log_fields:
303-
defined_raw_log_fields = self.generate_raw_log_fields(
304-
fields=query_container.meta_info.query_fields, source_mapping=source_mapping
303+
try:
304+
if source_mapping.raw_log_fields:
305+
defined_raw_log_fields = self.generate_raw_log_fields(
306+
fields=query_container.meta_info.query_fields, source_mapping=source_mapping
307+
)
308+
prefix += f"\n{defined_raw_log_fields}\n"
309+
result = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping)
310+
except StrictPlatformException as err:
311+
errors.append(err)
312+
continue
313+
else:
314+
rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping)
315+
not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported
316+
finalized_query = self.finalize_query(
317+
prefix=prefix,
318+
query=result,
319+
functions=rendered_functions.rendered,
320+
not_supported_functions=not_supported_functions,
321+
meta_info=query_container.meta_info,
322+
source_mapping=source_mapping,
305323
)
306-
prefix += f"\n{defined_raw_log_fields}\n"
307-
result = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping)
308-
rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping)
309-
not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported
310-
finalized_query = self.finalize_query(
311-
prefix=prefix,
312-
query=result,
313-
functions=rendered_functions.rendered,
314-
not_supported_functions=not_supported_functions,
315-
meta_info=query_container.meta_info,
316-
source_mapping=source_mapping,
317-
)
318-
queries_map[source_mapping.source_id] = finalized_query
319-
324+
queries_map[source_mapping.source_id] = finalized_query
325+
if not queries_map and errors:
326+
raise errors[0]
320327
return self.finalize(queries_map)
321328

322329
def generate(self, query_container: Union[RawQueryContainer, TokenizedQueryContainer]) -> str:
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from typing import Optional
2+
3+
from app.translator.core.mapping import DEFAULT_MAPPING_NAME, BasePlatformMappings, LogSourceSignature, SourceMapping
4+
5+
6+
class AQLLogSourceSignature(LogSourceSignature):
7+
def __init__(
8+
self,
9+
device_types: Optional[list[int]],
10+
categories: Optional[list[int]],
11+
qids: Optional[list[int]],
12+
qid_event_categories: Optional[list[int]],
13+
default_source: dict,
14+
):
15+
self.device_types = set(device_types or [])
16+
self.categories = set(categories or [])
17+
self.qids = set(qids or [])
18+
self.qid_event_categories = set(qid_event_categories or [])
19+
self._default_source = default_source or {}
20+
21+
def is_suitable(
22+
self,
23+
devicetype: Optional[list[int]],
24+
category: Optional[list[int]],
25+
qid: Optional[list[int]],
26+
qideventcategory: Optional[list[int]],
27+
) -> bool:
28+
device_type_match = set(devicetype).issubset(self.device_types) if devicetype else None
29+
category_match = set(category).issubset(self.categories) if category else None
30+
qid_match = set(qid).issubset(self.qids) if qid else None
31+
qid_event_category_match = set(qideventcategory).issubset(self.qid_event_categories) if qideventcategory else None
32+
return all(
33+
condition for condition in (
34+
device_type_match, category_match,
35+
qid_match, qid_event_category_match)
36+
if condition is not None
37+
)
38+
39+
def __str__(self) -> str:
40+
return self._default_source.get("table", "events")
41+
42+
@property
43+
def extra_condition(self) -> str:
44+
default_source = self._default_source
45+
return " AND ".join((f"{key}={value}" for key, value in default_source.items() if key != "table" and value))
46+
47+
48+
class AQLMappings(BasePlatformMappings):
49+
def prepare_log_source_signature(self, mapping: dict) -> AQLLogSourceSignature:
50+
log_source = mapping.get("log_source", {})
51+
default_log_source = mapping["default_log_source"]
52+
return AQLLogSourceSignature(
53+
device_types=log_source.get("devicetype"),
54+
categories=log_source.get("category"),
55+
qids=log_source.get("qid"),
56+
qid_event_categories=log_source.get("qideventcategory"),
57+
default_source=default_log_source,
58+
)
59+
60+
def get_suitable_source_mappings(
61+
self,
62+
field_names: list[str],
63+
devicetype: Optional[list[int]] = None,
64+
category: Optional[list[int]] = None,
65+
qid: Optional[list[int]] = None,
66+
qideventcategory: Optional[list[int]] = None,
67+
) -> list[SourceMapping]:
68+
suitable_source_mappings = []
69+
for source_mapping in self._source_mappings.values():
70+
if source_mapping.source_id == DEFAULT_MAPPING_NAME:
71+
continue
72+
73+
log_source_signature: AQLLogSourceSignature = source_mapping.log_source_signature
74+
if log_source_signature.is_suitable(devicetype, category, qid, qideventcategory):
75+
if source_mapping.fields_mapping.is_suitable(field_names):
76+
suitable_source_mappings.append(source_mapping)
77+
elif source_mapping.fields_mapping.is_suitable(field_names):
78+
suitable_source_mappings.append(source_mapping)
79+
80+
if not suitable_source_mappings:
81+
suitable_source_mappings = [self._source_mappings[DEFAULT_MAPPING_NAME]]
82+
83+
return suitable_source_mappings
84+
85+
86+
aql_mappings = AQLMappings(platform_dir="qradar")
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Uncoder IO Commercial Edition License
3+
-----------------------------------------------------------------
4+
Copyright (c) 2024 SOC Prime, Inc.
5+
6+
This file is part of the Uncoder IO Commercial Edition ("CE") and is
7+
licensed under the Uncoder IO Non-Commercial License (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
-----------------------------------------------------------------
17+
"""
18+
19+
import re
20+
from typing import Union
21+
22+
from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
23+
from app.translator.core.parser import PlatformQueryParser
24+
from app.translator.platforms.base.aql.const import NUM_VALUE_PATTERN, SINGLE_QUOTES_VALUE_PATTERN
25+
from app.translator.platforms.base.aql.mapping import AQLMappings, aql_mappings
26+
from app.translator.platforms.base.aql.tokenizer import AQLTokenizer
27+
from app.translator.tools.utils import get_match_group
28+
29+
30+
class AQLQueryParser(PlatformQueryParser):
31+
tokenizer = AQLTokenizer()
32+
mappings: AQLMappings = aql_mappings
33+
34+
log_source_functions = ("LOGSOURCENAME", "LOGSOURCEGROUPNAME", "LOGSOURCETYPENAME", "CATEGORYNAME")
35+
log_source_function_pattern = r"\(?(?P<key>___func_name___\([a-zA-Z]+\))(?:\s+like\s+|\s+ilike\s+|\s*=\s*)'(?P<value>[%a-zA-Z\s]+)'\s*\)?\s+(?:and|or)?\s" # noqa: E501
36+
37+
log_source_key_types = ("devicetype", "category", "qid", "qideventcategory")
38+
log_source_pattern = rf"___source_type___(?:\s+like\s+|\s+ilike\s+|\s*=\s*)(?:{SINGLE_QUOTES_VALUE_PATTERN}|{NUM_VALUE_PATTERN})(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501
39+
num_value_pattern = r"[0-9]+"
40+
multi_num_log_source_pattern = (
41+
rf"___source_type___\s+in\s+\((?P<value>(?:{num_value_pattern}(?:\s*,\s*)?)+)\)(?:\s+(?:and|or)\s+|\s+)?"
42+
)
43+
str_value_pattern = r"""(?:')(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')+)(?:')"""
44+
multi_str_log_source_pattern = (
45+
rf"""___source_type___\s+in\s+\((?P<value>(?:{str_value_pattern}(?:\s*,\s*)?)+)\)(?:\s+(?:and|or)\s+|\s+)?"""
46+
)
47+
48+
table_pattern = r"\sFROM\s(?P<table>[a-zA-Z\.\-\*]+)\sWHERE\s"
49+
50+
def __clean_query(self, query: str) -> str:
51+
for func_name in self.log_source_functions:
52+
pattern = self.log_source_function_pattern.replace("___func_name___", func_name)
53+
while search := re.search(pattern, query, flags=re.IGNORECASE):
54+
pos_start = search.start()
55+
pos_end = search.end()
56+
query = query[:pos_start] + query[pos_end:]
57+
58+
return query
59+
60+
@staticmethod
61+
def __parse_multi_value_log_source(
62+
match: re.Match, query: str, pattern: str
63+
) -> tuple[str, Union[list[str], list[int]]]:
64+
value = match.group("value")
65+
pos_start = match.start()
66+
pos_end = match.end()
67+
query = query[:pos_start] + query[pos_end:]
68+
return query, re.findall(pattern, value)
69+
70+
def __parse_log_sources(self, query: str) -> tuple[dict[str, Union[list[str], list[int]]], str]:
71+
log_sources = {}
72+
73+
if search := re.search(self.table_pattern, query, flags=re.IGNORECASE):
74+
pos_end = search.end()
75+
query = query[pos_end:]
76+
77+
for log_source_key in self.log_source_key_types:
78+
pattern = self.log_source_pattern.replace("___source_type___", log_source_key)
79+
while search := re.search(pattern, query, flags=re.IGNORECASE):
80+
num_value = get_match_group(search, group_name="num_value")
81+
str_value = get_match_group(search, group_name="s_q_value")
82+
value = num_value and int(num_value) or str_value
83+
log_sources.setdefault(log_source_key, []).append(value)
84+
pos_start = search.start()
85+
pos_end = search.end()
86+
query = query[:pos_start] + query[pos_end:]
87+
88+
pattern = self.multi_num_log_source_pattern.replace("___source_type___", log_source_key)
89+
if search := re.search(pattern, query, flags=re.IGNORECASE):
90+
query, values = self.__parse_multi_value_log_source(search, query, self.num_value_pattern)
91+
values = [int(v) for v in values]
92+
log_sources.setdefault(log_source_key, []).extend(values)
93+
94+
pattern = self.multi_str_log_source_pattern.replace("___source_type___", log_source_key)
95+
if search := re.search(pattern, query, flags=re.IGNORECASE):
96+
query, values = self.__parse_multi_value_log_source(search, query, self.str_value_pattern)
97+
log_sources.setdefault(log_source_key, []).extend(values)
98+
99+
return log_sources, query
100+
101+
def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[int]]]]:
102+
query = self.__clean_query(text)
103+
log_sources, query = self.__parse_log_sources(query)
104+
return query, log_sources
105+
106+
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
107+
query, log_sources = self._parse_query(raw_query_container.query)
108+
tokens, source_mappings = self.get_tokens_and_source_mappings(query, log_sources)
109+
fields_tokens = self.get_fields_tokens(tokens=tokens)
110+
meta_info = raw_query_container.meta_info
111+
meta_info.query_fields = fields_tokens
112+
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
113+
return TokenizedQueryContainer(tokens=tokens, meta_info=meta_info)

0 commit comments

Comments
 (0)