Skip to content

Commit d3dba4e

Browse files
authored
Merge pull request #112 from UncoderIO/gis-7814
Improve AQL mapping method is_suitable
2 parents 2b3836c + d4614b8 commit d3dba4e

File tree

3 files changed

+224
-18
lines changed

3 files changed

+224
-18
lines changed

uncoder-core/app/translator/core/render.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def apply_field_value(self, field: str, operator: Identifier, value: DEFAULT_VAL
126126

127127
class QueryRender(ABC):
128128
comment_symbol: str = None
129+
details: PlatformDetails = None
129130
is_single_line_comment: bool = False
130131
unsupported_functions_text = "Unsupported functions were excluded from the result query:"
131132

@@ -146,7 +147,6 @@ def generate(self, query_container: Union[RawQueryContainer, TokenizedQueryConta
146147

147148
class PlatformQueryRender(QueryRender):
148149
mappings: BasePlatformMappings = None
149-
details: PlatformDetails = None
150150
is_strict_mapping: bool = False
151151

152152
or_token = "or"
@@ -295,28 +295,35 @@ def generate_raw_log_fields(self, fields: list[Field], source_mapping: SourceMap
295295

296296
def _generate_from_tokenized_query_container(self, query_container: TokenizedQueryContainer) -> str:
297297
queries_map = {}
298+
errors = []
298299
source_mappings = self._get_source_mappings(query_container.meta_info.source_mapping_ids)
299300

300301
for source_mapping in source_mappings:
301302
prefix = self.generate_prefix(source_mapping.log_source_signature)
302-
if source_mapping.raw_log_fields:
303-
defined_raw_log_fields = self.generate_raw_log_fields(
304-
fields=query_container.meta_info.query_fields, source_mapping=source_mapping
303+
try:
304+
if source_mapping.raw_log_fields:
305+
defined_raw_log_fields = self.generate_raw_log_fields(
306+
fields=query_container.meta_info.query_fields, source_mapping=source_mapping
307+
)
308+
prefix += f"\n{defined_raw_log_fields}\n"
309+
result = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping)
310+
except StrictPlatformException as err:
311+
errors.append(err)
312+
continue
313+
else:
314+
rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping)
315+
not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported
316+
finalized_query = self.finalize_query(
317+
prefix=prefix,
318+
query=result,
319+
functions=rendered_functions.rendered,
320+
not_supported_functions=not_supported_functions,
321+
meta_info=query_container.meta_info,
322+
source_mapping=source_mapping,
305323
)
306-
prefix += f"\n{defined_raw_log_fields}\n"
307-
result = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping)
308-
rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping)
309-
not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported
310-
finalized_query = self.finalize_query(
311-
prefix=prefix,
312-
query=result,
313-
functions=rendered_functions.rendered,
314-
not_supported_functions=not_supported_functions,
315-
meta_info=query_container.meta_info,
316-
source_mapping=source_mapping,
317-
)
318-
queries_map[source_mapping.source_id] = finalized_query
319-
324+
queries_map[source_mapping.source_id] = finalized_query
325+
if not queries_map and errors:
326+
raise errors[0]
320327
return self.finalize(queries_map)
321328

322329
def generate(self, query_container: Union[RawQueryContainer, TokenizedQueryContainer]) -> str:
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from typing import Optional
2+
3+
from app.translator.core.mapping import DEFAULT_MAPPING_NAME, BasePlatformMappings, LogSourceSignature, SourceMapping
4+
5+
6+
class AQLLogSourceSignature(LogSourceSignature):
7+
def __init__(
8+
self,
9+
device_types: Optional[list[int]],
10+
categories: Optional[list[int]],
11+
qids: Optional[list[int]],
12+
qid_event_categories: Optional[list[int]],
13+
default_source: dict,
14+
):
15+
self.device_types = set(device_types or [])
16+
self.categories = set(categories or [])
17+
self.qids = set(qids or [])
18+
self.qid_event_categories = set(qid_event_categories or [])
19+
self._default_source = default_source or {}
20+
21+
def is_suitable(
22+
self,
23+
devicetype: Optional[list[int]],
24+
category: Optional[list[int]],
25+
qid: Optional[list[int]],
26+
qideventcategory: Optional[list[int]],
27+
) -> bool:
28+
device_type_match = set(devicetype).issubset(self.device_types) if devicetype else None
29+
category_match = set(category).issubset(self.categories) if category else None
30+
qid_match = set(qid).issubset(self.qids) if qid else None
31+
qid_event_category_match = set(qideventcategory).issubset(self.qid_event_categories) if qideventcategory else None
32+
return all(
33+
condition for condition in (
34+
device_type_match, category_match,
35+
qid_match, qid_event_category_match)
36+
if condition is not None
37+
)
38+
39+
def __str__(self) -> str:
40+
return self._default_source.get("table", "events")
41+
42+
@property
43+
def extra_condition(self) -> str:
44+
default_source = self._default_source
45+
return " AND ".join((f"{key}={value}" for key, value in default_source.items() if key != "table" and value))
46+
47+
48+
class AQLMappings(BasePlatformMappings):
49+
def prepare_log_source_signature(self, mapping: dict) -> AQLLogSourceSignature:
50+
log_source = mapping.get("log_source", {})
51+
default_log_source = mapping["default_log_source"]
52+
return AQLLogSourceSignature(
53+
device_types=log_source.get("devicetype"),
54+
categories=log_source.get("category"),
55+
qids=log_source.get("qid"),
56+
qid_event_categories=log_source.get("qideventcategory"),
57+
default_source=default_log_source,
58+
)
59+
60+
def get_suitable_source_mappings(
61+
self,
62+
field_names: list[str],
63+
devicetype: Optional[list[int]] = None,
64+
category: Optional[list[int]] = None,
65+
qid: Optional[list[int]] = None,
66+
qideventcategory: Optional[list[int]] = None,
67+
) -> list[SourceMapping]:
68+
suitable_source_mappings = []
69+
for source_mapping in self._source_mappings.values():
70+
if source_mapping.source_id == DEFAULT_MAPPING_NAME:
71+
continue
72+
73+
log_source_signature: AQLLogSourceSignature = source_mapping.log_source_signature
74+
if log_source_signature.is_suitable(devicetype, category, qid, qideventcategory):
75+
if source_mapping.fields_mapping.is_suitable(field_names):
76+
suitable_source_mappings.append(source_mapping)
77+
elif source_mapping.fields_mapping.is_suitable(field_names):
78+
suitable_source_mappings.append(source_mapping)
79+
80+
if not suitable_source_mappings:
81+
suitable_source_mappings = [self._source_mappings[DEFAULT_MAPPING_NAME]]
82+
83+
return suitable_source_mappings
84+
85+
86+
aql_mappings = AQLMappings(platform_dir="qradar")
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Uncoder IO Commercial Edition License
3+
-----------------------------------------------------------------
4+
Copyright (c) 2024 SOC Prime, Inc.
5+
6+
This file is part of the Uncoder IO Commercial Edition ("CE") and is
7+
licensed under the Uncoder IO Non-Commercial License (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
-----------------------------------------------------------------
17+
"""
18+
19+
import re
20+
from typing import Union
21+
22+
from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
23+
from app.translator.core.parser import PlatformQueryParser
24+
from app.translator.platforms.base.aql.const import NUM_VALUE_PATTERN, SINGLE_QUOTES_VALUE_PATTERN
25+
from app.translator.platforms.base.aql.mapping import AQLMappings, aql_mappings
26+
from app.translator.platforms.base.aql.tokenizer import AQLTokenizer
27+
from app.translator.tools.utils import get_match_group
28+
29+
30+
class AQLQueryParser(PlatformQueryParser):
31+
tokenizer = AQLTokenizer()
32+
mappings: AQLMappings = aql_mappings
33+
34+
log_source_functions = ("LOGSOURCENAME", "LOGSOURCEGROUPNAME", "LOGSOURCETYPENAME", "CATEGORYNAME")
35+
log_source_function_pattern = r"\(?(?P<key>___func_name___\([a-zA-Z]+\))(?:\s+like\s+|\s+ilike\s+|\s*=\s*)'(?P<value>[%a-zA-Z\s]+)'\s*\)?\s+(?:and|or)?\s" # noqa: E501
36+
37+
log_source_key_types = ("devicetype", "category", "qid", "qideventcategory")
38+
log_source_pattern = rf"___source_type___(?:\s+like\s+|\s+ilike\s+|\s*=\s*)(?:{SINGLE_QUOTES_VALUE_PATTERN}|{NUM_VALUE_PATTERN})(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501
39+
num_value_pattern = r"[0-9]+"
40+
multi_num_log_source_pattern = (
41+
rf"___source_type___\s+in\s+\((?P<value>(?:{num_value_pattern}(?:\s*,\s*)?)+)\)(?:\s+(?:and|or)\s+|\s+)?"
42+
)
43+
str_value_pattern = r"""(?:')(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')+)(?:')"""
44+
multi_str_log_source_pattern = (
45+
rf"""___source_type___\s+in\s+\((?P<value>(?:{str_value_pattern}(?:\s*,\s*)?)+)\)(?:\s+(?:and|or)\s+|\s+)?"""
46+
)
47+
48+
table_pattern = r"\sFROM\s(?P<table>[a-zA-Z\.\-\*]+)\sWHERE\s"
49+
50+
def __clean_query(self, query: str) -> str:
51+
for func_name in self.log_source_functions:
52+
pattern = self.log_source_function_pattern.replace("___func_name___", func_name)
53+
while search := re.search(pattern, query, flags=re.IGNORECASE):
54+
pos_start = search.start()
55+
pos_end = search.end()
56+
query = query[:pos_start] + query[pos_end:]
57+
58+
return query
59+
60+
@staticmethod
61+
def __parse_multi_value_log_source(
62+
match: re.Match, query: str, pattern: str
63+
) -> tuple[str, Union[list[str], list[int]]]:
64+
value = match.group("value")
65+
pos_start = match.start()
66+
pos_end = match.end()
67+
query = query[:pos_start] + query[pos_end:]
68+
return query, re.findall(pattern, value)
69+
70+
def __parse_log_sources(self, query: str) -> tuple[dict[str, Union[list[str], list[int]]], str]:
71+
log_sources = {}
72+
73+
if search := re.search(self.table_pattern, query, flags=re.IGNORECASE):
74+
pos_end = search.end()
75+
query = query[pos_end:]
76+
77+
for log_source_key in self.log_source_key_types:
78+
pattern = self.log_source_pattern.replace("___source_type___", log_source_key)
79+
while search := re.search(pattern, query, flags=re.IGNORECASE):
80+
num_value = get_match_group(search, group_name="num_value")
81+
str_value = get_match_group(search, group_name="s_q_value")
82+
value = num_value and int(num_value) or str_value
83+
log_sources.setdefault(log_source_key, []).append(value)
84+
pos_start = search.start()
85+
pos_end = search.end()
86+
query = query[:pos_start] + query[pos_end:]
87+
88+
pattern = self.multi_num_log_source_pattern.replace("___source_type___", log_source_key)
89+
if search := re.search(pattern, query, flags=re.IGNORECASE):
90+
query, values = self.__parse_multi_value_log_source(search, query, self.num_value_pattern)
91+
values = [int(v) for v in values]
92+
log_sources.setdefault(log_source_key, []).extend(values)
93+
94+
pattern = self.multi_str_log_source_pattern.replace("___source_type___", log_source_key)
95+
if search := re.search(pattern, query, flags=re.IGNORECASE):
96+
query, values = self.__parse_multi_value_log_source(search, query, self.str_value_pattern)
97+
log_sources.setdefault(log_source_key, []).extend(values)
98+
99+
return log_sources, query
100+
101+
def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[int]]]]:
102+
query = self.__clean_query(text)
103+
log_sources, query = self.__parse_log_sources(query)
104+
return query, log_sources
105+
106+
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
107+
query, log_sources = self._parse_query(raw_query_container.query)
108+
tokens, source_mappings = self.get_tokens_and_source_mappings(query, log_sources)
109+
fields_tokens = self.get_fields_tokens(tokens=tokens)
110+
meta_info = raw_query_container.meta_info
111+
meta_info.query_fields = fields_tokens
112+
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
113+
return TokenizedQueryContainer(tokens=tokens, meta_info=meta_info)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy