From ecf3fdf3d6d3a14f08f4c5631168a614951ad752 Mon Sep 17 00:00:00 2001 From: Oleksandr Volha Date: Fri, 10 Nov 2023 17:00:32 +0200 Subject: [PATCH] elastic and opensearch regex fixes, sigma source mapping fix --- .../elasticsearch/parsers/elasticsearch.py | 5 +++-- .../converter/backends/elasticsearch/tokenizer.py | 4 ++-- .../backends/opensearch/parsers/opensearch.py | 5 +++-- .../converter/backends/opensearch/tokenizer.py | 4 ++-- .../app/converter/backends/sigma/renders/sigma.py | 15 +++++++++++---- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/siem-converter/app/converter/backends/elasticsearch/parsers/elasticsearch.py b/siem-converter/app/converter/backends/elasticsearch/parsers/elasticsearch.py index 0a339460..72dbb438 100644 --- a/siem-converter/app/converter/backends/elasticsearch/parsers/elasticsearch.py +++ b/siem-converter/app/converter/backends/elasticsearch/parsers/elasticsearch.py @@ -32,7 +32,7 @@ class ElasticSearchParser(Parser): mappings: ElasticSearchMappings = elasticsearch_mappings tokenizer = ElasticSearchTokenizer() - log_source_pattern = r"___source_type___\s*(:|=)\s*(?:\"?(?P[%a-zA-Z_*:0-9\-/]+)\"|(?P[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" + log_source_pattern = r"___source_type___\s*(?:[:=])\s*(?:\"?(?P[%a-zA-Z_*:0-9\-/]+)\"|(?P[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" log_source_key_types = ("index", "event\.category") def _parse_log_sources(self, query: str) -> Tuple[str, Dict[str, List[str]]]: @@ -40,7 +40,8 @@ def _parse_log_sources(self, query: str) -> Tuple[str, Dict[str, List[str]]]: for source_type in self.log_source_key_types: pattern = self.log_source_pattern.replace('___source_type___', source_type) while search := re.search(pattern, query, flags=re.IGNORECASE): - value = search.group(1) + group_dict = search.groupdict() + value = group_dict.get("d_q_value") or group_dict.get("value") log_sources.setdefault(source_type, []).append(value) pos_start = search.start() pos_end = search.end() diff --git a/siem-converter/app/converter/backends/elasticsearch/tokenizer.py b/siem-converter/app/converter/backends/elasticsearch/tokenizer.py index 1e22b810..236fd701 100644 --- a/siem-converter/app/converter/backends/elasticsearch/tokenizer.py +++ b/siem-converter/app/converter/backends/elasticsearch/tokenizer.py @@ -32,13 +32,13 @@ class ElasticSearchTokenizer(QueryTokenizer): match_operator_pattern = r"(?:___field___\s*(?P:))\s*" num_value_pattern = r"(?P\d+(?:\.\d+)*)\s*" - double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)+)"\s*' + double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*' no_quotes_value_pattern = r"(?P(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*" re_value_pattern = r"/(?P[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*" _value_pattern = fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}" keyword_pattern = r"(?P(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)" - multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]*)\)""" + multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)""" multi_value_check_pattern = r"___field___\s*___operator___\s*\(" wildcard_symbol = "*" diff --git a/siem-converter/app/converter/backends/opensearch/parsers/opensearch.py b/siem-converter/app/converter/backends/opensearch/parsers/opensearch.py index 2a259f56..1293176f 100644 --- a/siem-converter/app/converter/backends/opensearch/parsers/opensearch.py +++ b/siem-converter/app/converter/backends/opensearch/parsers/opensearch.py @@ -32,7 +32,7 @@ class OpenSearchParser(Parser): mappings: OpenSearchMappings = opensearch_mappings tokenizer = OpenSearchTokenizer() - log_source_pattern = r"___source_type___\s*(:|=)\s*(?:\"?(?P[%a-zA-Z_*:0-9\-/]+)\"|(?P[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" + log_source_pattern = r"___source_type___\s*(?:[:=])\s*(?:\"?(?P[%a-zA-Z_*:0-9\-/]+)\"|(?P[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" log_source_key_types = ("index", "event\.category") def _parse_log_sources(self, query: str) -> Tuple[str, Dict[str, List[str]]]: @@ -40,7 +40,8 @@ def _parse_log_sources(self, query: str) -> Tuple[str, Dict[str, List[str]]]: for source_type in self.log_source_key_types: pattern = self.log_source_pattern.replace('___source_type___', source_type) while search := re.search(pattern, query, flags=re.IGNORECASE): - value = search.group(1) + group_dict = search.groupdict() + value = group_dict.get("d_q_value") or group_dict.get("value") log_sources.setdefault(source_type, []).append(value) pos_start = search.start() pos_end = search.end() diff --git a/siem-converter/app/converter/backends/opensearch/tokenizer.py b/siem-converter/app/converter/backends/opensearch/tokenizer.py index 0c0a1e92..6ba28c67 100644 --- a/siem-converter/app/converter/backends/opensearch/tokenizer.py +++ b/siem-converter/app/converter/backends/opensearch/tokenizer.py @@ -32,13 +32,13 @@ class OpenSearchTokenizer(QueryTokenizer): match_operator_pattern = r"(?:___field___\s*(?P:))\s*" num_value_pattern = r"(?P\d+(?:\.\d+)*)\s*" - double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)+)"\s*' + double_quotes_value_pattern = r'"(?P(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*' no_quotes_value_pattern = r"(?P(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*" re_value_pattern = r"/(?P[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*" _value_pattern = fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}" keyword_pattern = r"(?P(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)" - multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]*)\)""" + multi_value_pattern = r"""\((?P[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)""" multi_value_check_pattern = r"___field___\s*___operator___\s*\(" wildcard_symbol = "*" diff --git a/siem-converter/app/converter/backends/sigma/renders/sigma.py b/siem-converter/app/converter/backends/sigma/renders/sigma.py index 77dabd43..651a0437 100644 --- a/siem-converter/app/converter/backends/sigma/renders/sigma.py +++ b/siem-converter/app/converter/backends/sigma/renders/sigma.py @@ -17,7 +17,7 @@ """ import copy -from typing import Any +from typing import Any, List import yaml @@ -25,7 +25,7 @@ from app.converter.backends.sigma.mapping import SigmaMappings, sigma_mappings, SigmaLogSourceSignature from app.converter.core.compiler import DataStructureCompiler from app.converter.core.exceptions.core import StrictPlatformFieldException -from app.converter.core.mapping import SourceMapping +from app.converter.core.mapping import SourceMapping, DEFAULT_MAPPING_NAME from app.converter.core.models.field import Field, Keyword from app.converter.core.models.functions.types import ParsedFunctions from app.converter.core.models.group import Group @@ -229,11 +229,18 @@ def generate_detection(self, data: Any, source_mapping: SourceMapping) -> dict: self.reset_counters() return detection + + def __get_source_mapping(self, source_mapping_ids: List[str]) -> SourceMapping: + for source_mapping_id in source_mapping_ids: + if source_mapping := self.mappings.get_source_mapping(source_mapping_id): + return source_mapping + + return self.mappings.get_source_mapping(DEFAULT_MAPPING_NAME) def generate(self, query, meta_info: MetaInfoContainer, functions: ParsedFunctions): self.reset_counters() - source_mapping = self.mappings.get_source_mapping(meta_info.source_mapping_ids[0]) + source_mapping = self.__get_source_mapping(meta_info.source_mapping_ids) log_source_signature: SigmaLogSourceSignature = source_mapping.log_source_signature sigma_condition = copy.deepcopy(query) prepared_data_structure = DataStructureCompiler().generate(tokens=sigma_condition) @@ -243,7 +250,7 @@ def generate(self, query, meta_info: MetaInfoContainer, functions: ParsedFunctio "id": meta_info.id, "description": meta_info.description, "status": "experimental", - "author": "", + "author": meta_info.author, "references": meta_info.references, "tags": meta_info.tags, "logsource": log_source_signature.log_sources, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy