From 5481f1ead85f6c04b7edd16627d78cd1d2879d7b Mon Sep 17 00:00:00 2001 From: Viktor Hrebeniuk <76157115+saltar-ua@users.noreply.github.com> Date: Tue, 21 May 2024 16:10:33 +0300 Subject: [PATCH 1/2] Improve AQL mapping method is_suitable --- uncoder-core/app/translator/core/render.py | 38 +++--- .../translator/platforms/base/aql/mapping.py | 86 +++++++++++++ .../platforms/base/aql/parsers/aql.py | 113 ++++++++++++++++++ 3 files changed, 220 insertions(+), 17 deletions(-) create mode 100644 uncoder-core/app/translator/platforms/base/aql/mapping.py create mode 100644 uncoder-core/app/translator/platforms/base/aql/parsers/aql.py diff --git a/uncoder-core/app/translator/core/render.py b/uncoder-core/app/translator/core/render.py index 7074ab1c..b1b51fb6 100644 --- a/uncoder-core/app/translator/core/render.py +++ b/uncoder-core/app/translator/core/render.py @@ -126,6 +126,7 @@ def apply_field_value(self, field: str, operator: Identifier, value: DEFAULT_VAL class QueryRender(ABC): comment_symbol: str = None + details: PlatformDetails = None is_single_line_comment: bool = False unsupported_functions_text = "Unsupported functions were excluded from the result query:" @@ -146,7 +147,6 @@ def generate(self, query_container: Union[RawQueryContainer, TokenizedQueryConta class PlatformQueryRender(QueryRender): mappings: BasePlatformMappings = None - details: PlatformDetails = None is_strict_mapping: bool = False or_token = "or" @@ -299,23 +299,27 @@ def _generate_from_tokenized_query_container(self, query_container: TokenizedQue for source_mapping in source_mappings: prefix = self.generate_prefix(source_mapping.log_source_signature) - if source_mapping.raw_log_fields: - defined_raw_log_fields = self.generate_raw_log_fields( - fields=query_container.meta_info.query_fields, source_mapping=source_mapping + try: + if source_mapping.raw_log_fields: + defined_raw_log_fields = self.generate_raw_log_fields( + fields=query_container.meta_info.query_fields, source_mapping=source_mapping + ) + prefix += f"\n{defined_raw_log_fields}\n" + result = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping) + except StrictPlatformException: + continue + else: + rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) + not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported + finalized_query = self.finalize_query( + prefix=prefix, + query=result, + functions=rendered_functions.rendered, + not_supported_functions=not_supported_functions, + meta_info=query_container.meta_info, + source_mapping=source_mapping, ) - prefix += f"\n{defined_raw_log_fields}\n" - result = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping) - rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) - not_supported_functions = query_container.functions.not_supported + rendered_functions.not_supported - finalized_query = self.finalize_query( - prefix=prefix, - query=result, - functions=rendered_functions.rendered, - not_supported_functions=not_supported_functions, - meta_info=query_container.meta_info, - source_mapping=source_mapping, - ) - queries_map[source_mapping.source_id] = finalized_query + queries_map[source_mapping.source_id] = finalized_query return self.finalize(queries_map) diff --git a/uncoder-core/app/translator/platforms/base/aql/mapping.py b/uncoder-core/app/translator/platforms/base/aql/mapping.py new file mode 100644 index 00000000..2ff93b23 --- /dev/null +++ b/uncoder-core/app/translator/platforms/base/aql/mapping.py @@ -0,0 +1,86 @@ +from typing import Optional + +from app.translator.core.mapping import DEFAULT_MAPPING_NAME, BasePlatformMappings, LogSourceSignature, SourceMapping + + +class AQLLogSourceSignature(LogSourceSignature): + def __init__( + self, + device_types: Optional[list[int]], + categories: Optional[list[int]], + qids: Optional[list[int]], + qid_event_categories: Optional[list[int]], + default_source: dict, + ): + self.device_types = set(device_types or []) + self.categories = set(categories or []) + self.qids = set(qids or []) + self.qid_event_categories = set(qid_event_categories or []) + self._default_source = default_source or {} + + def is_suitable( + self, + devicetype: Optional[list[int]], + category: Optional[list[int]], + qid: Optional[list[int]], + qideventcategory: Optional[list[int]], + ) -> bool: + device_type_match = set(devicetype).issubset(self.device_types) if devicetype else None + category_match = set(category).issubset(self.categories) if category else None + qid_match = set(qid).issubset(self.qids) if qid else None + qid_event_category_match = set(qideventcategory).issubset(self.qid_event_categories) if qideventcategory else None + return all( + condition for condition in ( + device_type_match, category_match, + qid_match, qid_event_category_match) + if condition is not None + ) + + def __str__(self) -> str: + return self._default_source.get("table", "events") + + @property + def extra_condition(self) -> str: + default_source = self._default_source + return " AND ".join((f"{key}={value}" for key, value in default_source.items() if key != "table" and value)) + + +class AQLMappings(BasePlatformMappings): + def prepare_log_source_signature(self, mapping: dict) -> AQLLogSourceSignature: + log_source = mapping.get("log_source", {}) + default_log_source = mapping["default_log_source"] + return AQLLogSourceSignature( + device_types=log_source.get("devicetype"), + categories=log_source.get("category"), + qids=log_source.get("qid"), + qid_event_categories=log_source.get("qideventcategory"), + default_source=default_log_source, + ) + + def get_suitable_source_mappings( + self, + field_names: list[str], + devicetype: Optional[list[int]] = None, + category: Optional[list[int]] = None, + qid: Optional[list[int]] = None, + qideventcategory: Optional[list[int]] = None, + ) -> list[SourceMapping]: + suitable_source_mappings = [] + for source_mapping in self._source_mappings.values(): + if source_mapping.source_id == DEFAULT_MAPPING_NAME: + continue + + log_source_signature: AQLLogSourceSignature = source_mapping.log_source_signature + if log_source_signature.is_suitable(devicetype, category, qid, qideventcategory): + if source_mapping.fields_mapping.is_suitable(field_names): + suitable_source_mappings.append(source_mapping) + elif source_mapping.fields_mapping.is_suitable(field_names): + suitable_source_mappings.append(source_mapping) + + if not suitable_source_mappings: + suitable_source_mappings = [self._source_mappings[DEFAULT_MAPPING_NAME]] + + return suitable_source_mappings + + +aql_mappings = AQLMappings(platform_dir="qradar") diff --git a/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py b/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py new file mode 100644 index 00000000..a42293f9 --- /dev/null +++ b/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py @@ -0,0 +1,113 @@ +""" +Uncoder IO Commercial Edition License +----------------------------------------------------------------- +Copyright (c) 2024 SOC Prime, Inc. + +This file is part of the Uncoder IO Commercial Edition ("CE") and is +licensed under the Uncoder IO Non-Commercial License (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +----------------------------------------------------------------- +""" + +import re +from typing import Union + +from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer +from app.translator.core.parser import PlatformQueryParser +from app.translator.platforms.base.aql.const import NUM_VALUE_PATTERN, SINGLE_QUOTES_VALUE_PATTERN +from app.translator.platforms.base.aql.mapping import AQLMappings, aql_mappings +from app.translator.platforms.base.aql.tokenizer import AQLTokenizer +from app.translator.tools.utils import get_match_group + + +class AQLQueryParser(PlatformQueryParser): + tokenizer = AQLTokenizer() + mappings: AQLMappings = aql_mappings + + log_source_functions = ("LOGSOURCENAME", "LOGSOURCEGROUPNAME", "LOGSOURCETYPENAME", "CATEGORYNAME") + log_source_function_pattern = r"\(?(?P___func_name___\([a-zA-Z]+\))(?:\s+like\s+|\s+ilike\s+|\s*=\s*)'(?P[%a-zA-Z\s]+)'\s*\)?\s+(?:and|or)?\s" # noqa: E501 + + log_source_key_types = ("devicetype", "category", "qid", "qideventcategory") + log_source_pattern = rf"___source_type___(?:\s+like\s+|\s+ilike\s+|\s*=\s*)(?:{SINGLE_QUOTES_VALUE_PATTERN}|{NUM_VALUE_PATTERN})(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501 + num_value_pattern = r"[0-9]+" + multi_num_log_source_pattern = ( + rf"___source_type___\s+in\s+\((?P(?:{num_value_pattern}(?:\s*,\s*)?)+)\)(?:\s+(?:and|or)\s+|\s+)?" + ) + str_value_pattern = r"""(?:')(?P(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')+)(?:')""" + multi_str_log_source_pattern = ( + rf"""___source_type___\s+in\s+\((?P(?:{str_value_pattern}(?:\s*,\s*)?)+)\)(?:\s+(?:and|or)\s+|\s+)?""" + ) + + table_pattern = r"\sFROM\s(?P[a-zA-Z\.\-\*]+)\sWHERE\s" + + def __clean_query(self, query: str) -> str: + for func_name in self.log_source_functions: + pattern = self.log_source_function_pattern.replace("___func_name___", func_name) + while search := re.search(pattern, query, flags=re.IGNORECASE): + pos_start = search.start() + pos_end = search.end() + query = query[:pos_start] + query[pos_end:] + + return query + + @staticmethod + def __parse_multi_value_log_source( + match: re.Match, query: str, pattern: str + ) -> tuple[str, Union[list[str], list[int]]]: + value = match.group("value") + pos_start = match.start() + pos_end = match.end() + query = query[:pos_start] + query[pos_end:] + return query, re.findall(pattern, value) + + def __parse_log_sources(self, query: str) -> tuple[dict[str, Union[list[str], list[int]]], str]: + log_sources = {} + + if search := re.search(self.table_pattern, query, flags=re.IGNORECASE): + pos_end = search.end() + query = query[pos_end:] + + for log_source_key in self.log_source_key_types: + pattern = self.log_source_pattern.replace("___source_type___", log_source_key) + while search := re.search(pattern, query, flags=re.IGNORECASE): + num_value = get_match_group(search, group_name="num_value") + str_value = get_match_group(search, group_name="s_q_value") + value = num_value and int(num_value) or str_value + log_sources.setdefault(log_source_key, []).append(value) + pos_start = search.start() + pos_end = search.end() + query = query[:pos_start] + query[pos_end:] + + pattern = self.multi_num_log_source_pattern.replace("___source_type___", log_source_key) + if search := re.search(pattern, query, flags=re.IGNORECASE): + query, values = self.__parse_multi_value_log_source(search, query, self.num_value_pattern) + values = [int(v) for v in values] + log_sources.setdefault(log_source_key, []).extend(values) + + pattern = self.multi_str_log_source_pattern.replace("___source_type___", log_source_key) + if search := re.search(pattern, query, flags=re.IGNORECASE): + query, values = self.__parse_multi_value_log_source(search, query, self.str_value_pattern) + log_sources.setdefault(log_source_key, []).extend(values) + + return log_sources, query + + def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[int]]]]: + query = self.__clean_query(text) + log_sources, query = self.__parse_log_sources(query) + return query, log_sources + + def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: + query, log_sources = self._parse_query(raw_query_container.query) + tokens, source_mappings = self.get_tokens_and_source_mappings(query, log_sources) + fields_tokens = self.get_fields_tokens(tokens=tokens) + meta_info = raw_query_container.meta_info + meta_info.query_fields = fields_tokens + meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] + return TokenizedQueryContainer(tokens=tokens, meta_info=meta_info) From d4614b8ff788a60db1a8ea03019959eb90f9a565 Mon Sep 17 00:00:00 2001 From: Viktor Hrebeniuk <76157115+saltar-ua@users.noreply.github.com> Date: Tue, 21 May 2024 17:00:16 +0300 Subject: [PATCH 2/2] Improve AQL mapping method is_suitable --- uncoder-core/app/translator/core/render.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/uncoder-core/app/translator/core/render.py b/uncoder-core/app/translator/core/render.py index b1b51fb6..a6fcbcb5 100644 --- a/uncoder-core/app/translator/core/render.py +++ b/uncoder-core/app/translator/core/render.py @@ -295,6 +295,7 @@ def generate_raw_log_fields(self, fields: list[Field], source_mapping: SourceMap def _generate_from_tokenized_query_container(self, query_container: TokenizedQueryContainer) -> str: queries_map = {} + errors = [] source_mappings = self._get_source_mappings(query_container.meta_info.source_mapping_ids) for source_mapping in source_mappings: @@ -306,7 +307,8 @@ def _generate_from_tokenized_query_container(self, query_container: TokenizedQue ) prefix += f"\n{defined_raw_log_fields}\n" result = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping) - except StrictPlatformException: + except StrictPlatformException as err: + errors.append(err) continue else: rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) @@ -320,7 +322,8 @@ def _generate_from_tokenized_query_container(self, query_container: TokenizedQue source_mapping=source_mapping, ) queries_map[source_mapping.source_id] = finalized_query - + if not queries_map and errors: + raise errors[0] return self.finalize(queries_map) def generate(self, query_container: Union[RawQueryContainer, TokenizedQueryContainer]) -> str: pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy