From bd9db47961a0d7a3c885fa799e1975845a1d8310 Mon Sep 17 00:00:00 2001 From: "oleksandr.volha" Date: Mon, 2 Dec 2024 12:54:25 +0200 Subject: [PATCH] separate field tokens --- uncoder-core/app/translator/core/functions.py | 4 +++ uncoder-core/app/translator/core/mapping.py | 28 +++++++++++++++---- .../translator/core/models/query_container.py | 4 +++ uncoder-core/app/translator/core/parser.py | 15 ++++++---- uncoder-core/app/translator/core/render.py | 8 ++++-- .../platforms/base/aql/parsers/aql.py | 10 +++++-- .../platforms/base/lucene/parsers/lucene.py | 6 ++-- .../platforms/base/spl/functions/__init__.py | 3 +- .../platforms/base/spl/parsers/spl.py | 14 ++++++---- .../platforms/base/sql/parsers/sql.py | 6 ++-- .../platforms/chronicle/parsers/chronicle.py | 6 ++-- .../parsers/elasticsearch_eql.py | 6 ++-- .../forti_siem/renders/forti_siem_rule.py | 5 +++- .../renders/logrhythm_axon_query.py | 5 +++- .../platforms/logscale/parsers/logscale.py | 10 +++++-- .../platforms/microsoft/functions/__init__.py | 3 +- .../microsoft/parsers/microsoft_sentinel.py | 10 +++++-- 17 files changed, 99 insertions(+), 44 deletions(-) diff --git a/uncoder-core/app/translator/core/functions.py b/uncoder-core/app/translator/core/functions.py index 728ddc0e..1ac217bb 100644 --- a/uncoder-core/app/translator/core/functions.py +++ b/uncoder-core/app/translator/core/functions.py @@ -164,6 +164,10 @@ def order_to_render(self) -> dict[str, int]: return {} + @property + def supported_render_names(self) -> set[str]: + return set(self._renders_map) + class PlatformFunctions: dir_path: str = None diff --git a/uncoder-core/app/translator/core/mapping.py b/uncoder-core/app/translator/core/mapping.py index 2a06147d..afd9973f 100644 --- a/uncoder-core/app/translator/core/mapping.py +++ b/uncoder-core/app/translator/core/mapping.py @@ -188,13 +188,22 @@ def get_source_mapping(self, source_id: str) -> Optional[SourceMapping]: def default_mapping(self) -> SourceMapping: return self._source_mappings[DEFAULT_MAPPING_NAME] - def check_fields_mapping_existence(self, field_tokens: list[Field], source_mapping: SourceMapping) -> list[str]: + def check_fields_mapping_existence( + self, + query_field_tokens: list[Field], + function_field_tokens_map: dict[str, list[Field]], + supported_func_render_names: set[str], + source_mapping: SourceMapping, + ) -> list[str]: unmapped = [] - for field in field_tokens: - generic_field_name = field.get_generic_field_name(source_mapping.source_id) - mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name) - if not mapped_field and field.source_name not in unmapped: - unmapped.append(field.source_name) + + for field in query_field_tokens: + self._check_field_mapping_existence(field, source_mapping, unmapped) + + for func_name, function_field_tokens in function_field_tokens_map.items(): + if func_name in supported_func_render_names: + for field in function_field_tokens: + self._check_field_mapping_existence(field, source_mapping, unmapped) if self.is_strict_mapping and unmapped: raise StrictPlatformException( @@ -203,6 +212,13 @@ def check_fields_mapping_existence(self, field_tokens: list[Field], source_mappi return unmapped + @staticmethod + def _check_field_mapping_existence(field: Field, source_mapping: SourceMapping, unmapped: list[str]) -> None: + generic_field_name = field.get_generic_field_name(source_mapping.source_id) + mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name) + if not mapped_field and field.source_name not in unmapped: + unmapped.append(field.source_name) + @staticmethod def map_field(field: Field, source_mapping: SourceMapping) -> list[str]: generic_field_name = field.get_generic_field_name(source_mapping.source_id) diff --git a/uncoder-core/app/translator/core/models/query_container.py b/uncoder-core/app/translator/core/models/query_container.py index bb95f9b4..6434d4e0 100644 --- a/uncoder-core/app/translator/core/models/query_container.py +++ b/uncoder-core/app/translator/core/models/query_container.py @@ -65,6 +65,8 @@ def __init__( date: Optional[str] = None, output_table_fields: Optional[list[Field]] = None, query_fields: Optional[list[Field]] = None, + function_fields: Optional[list[Field]] = None, + function_fields_map: Optional[dict[str, list[Field]]] = None, license_: Optional[str] = None, severity: Optional[str] = None, references: Optional[list[str]] = None, @@ -90,6 +92,8 @@ def __init__( self.date = date or datetime.now().date().strftime("%Y-%m-%d") self.output_table_fields = output_table_fields or [] self.query_fields = query_fields or [] + self.function_fields = function_fields or [] + self.function_fields_map = function_fields_map or {} self.license = license_ or "DRL 1.1" self.severity = severity or SeverityType.low self.references = references or [] diff --git a/uncoder-core/app/translator/core/parser.py b/uncoder-core/app/translator/core/parser.py index 0ad509d1..da7330eb 100644 --- a/uncoder-core/app/translator/core/parser.py +++ b/uncoder-core/app/translator/core/parser.py @@ -65,16 +65,19 @@ def get_query_tokens(self, query: str) -> list[QUERY_TOKEN_TYPE]: @staticmethod def get_field_tokens( query_tokens: list[QUERY_TOKEN_TYPE], functions: Optional[list[Function]] = None - ) -> list[Field]: - field_tokens = [] + ) -> tuple[list[Field], list[Field], dict[str, list[Field]]]: + query_field_tokens = [] + function_field_tokens = [] + function_field_tokens_map = {} for token in query_tokens: if isinstance(token, (FieldField, FieldValue, FunctionValue)): - field_tokens.extend(token.fields) + query_field_tokens.extend(token.fields) - if functions: - field_tokens.extend([field for func in functions for field in func.fields]) + for func in functions or []: + function_field_tokens.extend(func.fields) + function_field_tokens_map[func.name] = func.fields - return field_tokens + return query_field_tokens, function_field_tokens, function_field_tokens_map def get_source_mappings( self, field_tokens: list[Field], log_sources: dict[str, list[Union[int, str]]] diff --git a/uncoder-core/app/translator/core/render.py b/uncoder-core/app/translator/core/render.py index 97709dd0..857c2516 100644 --- a/uncoder-core/app/translator/core/render.py +++ b/uncoder-core/app/translator/core/render.py @@ -428,14 +428,18 @@ def _generate_from_tokenized_query_container_by_source_mapping( self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping ) -> str: unmapped_fields = self.mappings.check_fields_mapping_existence( - query_container.meta_info.query_fields, source_mapping + query_container.meta_info.query_fields, + query_container.meta_info.function_fields_map, + self.platform_functions.manager.supported_render_names, + source_mapping, ) rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping) prefix = self.generate_prefix(source_mapping.log_source_signature, rendered_functions.rendered_prefix) if source_mapping.raw_log_fields: defined_raw_log_fields = self.generate_raw_log_fields( - fields=query_container.meta_info.query_fields, source_mapping=source_mapping + fields=query_container.meta_info.query_fields + query_container.meta_info.function_fields, + source_mapping=source_mapping, ) prefix += f"\n{defined_raw_log_fields}" query = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping) diff --git a/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py b/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py index 5b3a7041..0dad8283 100644 --- a/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py +++ b/uncoder-core/app/translator/platforms/base/aql/parsers/aql.py @@ -115,9 +115,13 @@ def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[ def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: query, log_sources, functions = self._parse_query(raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens, functions.functions) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens( + query_tokens, functions.functions + ) + source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens + meta_info.function_fields = function_field_tokens + meta_info.function_fields_map = function_field_tokens_map meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions) diff --git a/uncoder-core/app/translator/platforms/base/lucene/parsers/lucene.py b/uncoder-core/app/translator/platforms/base/lucene/parsers/lucene.py index 5fb57284..49f05c98 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/parsers/lucene.py +++ b/uncoder-core/app/translator/platforms/base/lucene/parsers/lucene.py @@ -48,9 +48,9 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]: def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: query, log_sources = self._parse_query(raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, _, _ = self.get_field_tokens(query_tokens) + source_mappings = self.get_source_mappings(query_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info) diff --git a/uncoder-core/app/translator/platforms/base/spl/functions/__init__.py b/uncoder-core/app/translator/platforms/base/spl/functions/__init__.py index 1ef86248..9dc715f5 100644 --- a/uncoder-core/app/translator/platforms/base/spl/functions/__init__.py +++ b/uncoder-core/app/translator/platforms/base/spl/functions/__init__.py @@ -26,7 +26,8 @@ def parse(self, query: str) -> tuple[str, ParsedFunctions]: functions = query.split(self.function_delimiter) result_query = self.prepare_query(functions[0]) for func in functions[1:]: - split_func = func.strip().split(" ") + func = func.strip() + split_func = func.split(" ") func_name, func_body = split_func[0], " ".join(split_func[1:]) try: func_parser = self.manager.get_hof_parser(func_name) diff --git a/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py b/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py index 7818b4ac..f56af913 100644 --- a/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py +++ b/uncoder-core/app/translator/platforms/base/spl/parsers/spl.py @@ -29,7 +29,7 @@ class SplQueryParser(PlatformQueryParser): log_source_pattern = r"^___source_type___\s*=\s*(?:\"(?P[%a-zA-Z_*:0-9\-/]+)\"|(?P[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501 - rule_name_pattern = r"`(?P(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`" + rule_name_pattern = r"`(?P(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`" # noqa: RUF001 log_source_key_types = ("index", "source", "sourcetype", "sourcecategory") platform_functions: SplFunctions = None @@ -56,7 +56,7 @@ def _parse_log_sources(self, query: str) -> tuple[dict[str, list[str]], str]: def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]], ParsedFunctions]: if re.match(self.rule_name_pattern, query): search = re.search(self.rule_name_pattern, query, flags=re.IGNORECASE) - query = query[:search.start()] + query[search.end():] + query = query[: search.start()] + query[search.end() :] query = query.strip() log_sources, query = self._parse_log_sources(query) query, functions = self.platform_functions.parse(query) @@ -72,9 +72,13 @@ def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContain query, log_sources, functions = self._parse_query(raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens, functions.functions) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens( + query_tokens, functions.functions + ) + source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens + meta_info.function_fields = function_field_tokens + meta_info.function_fields_map = function_field_tokens_map meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions) diff --git a/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py b/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py index 735f95c6..01be3500 100644 --- a/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py +++ b/uncoder-core/app/translator/platforms/base/sql/parsers/sql.py @@ -43,9 +43,9 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]: def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: query, log_sources = self._parse_query(raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, _, _ = self.get_field_tokens(query_tokens) + source_mappings = self.get_source_mappings(query_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info) diff --git a/uncoder-core/app/translator/platforms/chronicle/parsers/chronicle.py b/uncoder-core/app/translator/platforms/chronicle/parsers/chronicle.py index 7c50cb06..0cc1af82 100644 --- a/uncoder-core/app/translator/platforms/chronicle/parsers/chronicle.py +++ b/uncoder-core/app/translator/platforms/chronicle/parsers/chronicle.py @@ -35,9 +35,9 @@ class ChronicleQueryParser(PlatformQueryParser): def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: query_tokens = self.get_query_tokens(raw_query_container.query) - field_tokens = self.get_field_tokens(query_tokens) - source_mappings = self.get_source_mappings(field_tokens, {}) + query_field_tokens, _, _ = self.get_field_tokens(query_tokens) + source_mappings = self.get_source_mappings(query_field_tokens, {}) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info) diff --git a/uncoder-core/app/translator/platforms/elasticsearch/parsers/elasticsearch_eql.py b/uncoder-core/app/translator/platforms/elasticsearch/parsers/elasticsearch_eql.py index 9ee7e0d4..377b1e08 100644 --- a/uncoder-core/app/translator/platforms/elasticsearch/parsers/elasticsearch_eql.py +++ b/uncoder-core/app/translator/platforms/elasticsearch/parsers/elasticsearch_eql.py @@ -29,9 +29,9 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]: def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: query, log_sources = self._parse_query(raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, _, _ = self.get_field_tokens(query_tokens) + source_mappings = self.get_source_mappings(query_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info) diff --git a/uncoder-core/app/translator/platforms/forti_siem/renders/forti_siem_rule.py b/uncoder-core/app/translator/platforms/forti_siem/renders/forti_siem_rule.py index 138e56c6..f9b3e942 100644 --- a/uncoder-core/app/translator/platforms/forti_siem/renders/forti_siem_rule.py +++ b/uncoder-core/app/translator/platforms/forti_siem/renders/forti_siem_rule.py @@ -232,7 +232,10 @@ def _generate_from_tokenized_query_container_by_source_mapping( self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping ) -> str: unmapped_fields = self.mappings.check_fields_mapping_existence( - query_container.meta_info.query_fields, source_mapping + query_container.meta_info.query_fields, + query_container.meta_info.function_fields_map, + self.platform_functions.manager.supported_render_names, + source_mapping, ) is_event_type_set = False field_values = [token for token in query_container.tokens if isinstance(token, FieldValue)] diff --git a/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py b/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py index b81f5453..c9172b58 100644 --- a/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py +++ b/uncoder-core/app/translator/platforms/logrhythm_axon/renders/logrhythm_axon_query.py @@ -244,7 +244,10 @@ def _generate_from_tokenized_query_container_by_source_mapping( self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping ) -> str: unmapped_fields = self.mappings.check_fields_mapping_existence( - query_container.meta_info.query_fields, source_mapping + query_container.meta_info.query_fields, + query_container.meta_info.function_fields_map, + self.platform_functions.manager.supported_render_names, + source_mapping, ) prefix = self.generate_prefix(source_mapping.log_source_signature) if "product" in query_container.meta_info.parsed_logsources: diff --git a/uncoder-core/app/translator/platforms/logscale/parsers/logscale.py b/uncoder-core/app/translator/platforms/logscale/parsers/logscale.py index 4f6fb9d9..ddf2fcd1 100644 --- a/uncoder-core/app/translator/platforms/logscale/parsers/logscale.py +++ b/uncoder-core/app/translator/platforms/logscale/parsers/logscale.py @@ -43,9 +43,13 @@ def _parse_query(self, query: str) -> tuple[str, ParsedFunctions]: def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: query, functions = self._parse_query(query=raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens, functions.functions) - source_mappings = self.get_source_mappings(field_tokens, {}) + query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens( + query_tokens, functions.functions + ) + source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, {}) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens + meta_info.function_fields = function_field_tokens + meta_info.function_fields_map = function_field_tokens_map meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions) diff --git a/uncoder-core/app/translator/platforms/microsoft/functions/__init__.py b/uncoder-core/app/translator/platforms/microsoft/functions/__init__.py index b28b7880..e0742815 100644 --- a/uncoder-core/app/translator/platforms/microsoft/functions/__init__.py +++ b/uncoder-core/app/translator/platforms/microsoft/functions/__init__.py @@ -22,7 +22,8 @@ def parse(self, query: str) -> tuple[str, str, ParsedFunctions]: table = split_query[0].strip() query_parts = [] for func in split_query[1:]: - split_func = func.strip(" ").split(" ") + func = func.strip() + split_func = func.split(" ") func_name, func_body = split_func[0], " ".join(split_func[1:]) if func_name == KQLFunctionType.where: query_parts.append(func_body) diff --git a/uncoder-core/app/translator/platforms/microsoft/parsers/microsoft_sentinel.py b/uncoder-core/app/translator/platforms/microsoft/parsers/microsoft_sentinel.py index 24d522e9..680f3e2d 100644 --- a/uncoder-core/app/translator/platforms/microsoft/parsers/microsoft_sentinel.py +++ b/uncoder-core/app/translator/platforms/microsoft/parsers/microsoft_sentinel.py @@ -44,9 +44,13 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]], ParsedFun def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer: query, log_sources, functions = self._parse_query(query=raw_query_container.query) query_tokens = self.get_query_tokens(query) - field_tokens = self.get_field_tokens(query_tokens, functions.functions) - source_mappings = self.get_source_mappings(field_tokens, log_sources) + query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens( + query_tokens, functions.functions + ) + source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources) meta_info = raw_query_container.meta_info - meta_info.query_fields = field_tokens + meta_info.query_fields = query_field_tokens + meta_info.function_fields = function_field_tokens + meta_info.function_fields_map = function_field_tokens_map meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings] return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy