Merge pull request #199 from UncoderIO/gis-8639

nazargesyk · web-flow · commit 416f5ca0719a · 2024-10-16T10:44:28.000+03:00
Gis 8639 add ElasticSearchEQLQueryParser
diff --git a/uncoder-core/app/translator/platforms/elasticsearch/__init__.py b/uncoder-core/app/translator/platforms/elasticsearch/__init__.py
@@ -3,6 +3,7 @@
     ElasticSearchRuleTOMLParser,  # noqa: F401
 )
 from app.translator.platforms.elasticsearch.parsers.elasticsearch import ElasticSearchQueryParser  # noqa: F401
+from app.translator.platforms.elasticsearch.parsers.elasticsearch_eql import ElasticSearchEQLQueryParser  # noqa: F401
 from app.translator.platforms.elasticsearch.renders.detection_rule import ElasticSearchRuleRender  # noqa: F401
 from app.translator.platforms.elasticsearch.renders.elast_alert import ElastAlertRuleRender  # noqa: F401
 from app.translator.platforms.elasticsearch.renders.elasticsearch import ElasticSearchQueryRender  # noqa: F401
diff --git a/uncoder-core/app/translator/platforms/elasticsearch/const.py b/uncoder-core/app/translator/platforms/elasticsearch/const.py
@@ -11,6 +11,7 @@
 _ELASTIC_WATCHER_RULE = "elastic-watcher-rule"
 _ELASTIC_ESQL_QUERY = "elastic-esql-query"
 _ELASTIC_ESQL_RULE = "elastic-esql-rule"
+_ELASTIC_EQL_QUERY = "elastic-eql-query"
 
 ELASTIC_QUERY_TYPES = {
     _ELASTIC_LUCENE_QUERY,
@@ -83,6 +84,13 @@
     **PLATFORM_DETAILS,
 }
 
+ELASTICSEARCH_EQL_QUERY_DETAILS = {
+    "platform_id": _ELASTIC_EQL_QUERY,
+    "name": "Elasticsearch EQL Query",
+    "platform_name": "Query (EQL)",
+    **PLATFORM_DETAILS,
+}
+
 elasticsearch_lucene_query_details = PlatformDetails(**ELASTICSEARCH_LUCENE_QUERY_DETAILS)
 elasticsearch_esql_query_details = PlatformDetails(**ELASTICSEARCH_ESQL_QUERY_DETAILS)
 elasticsearch_esql_rule_details = PlatformDetails(**ELASTICSEARCH_ESQL_RULE_DETAILS)
@@ -91,6 +99,7 @@
 elastalert_details = PlatformDetails(**ELASTALERT_DETAILS)
 kibana_rule_details = PlatformDetails(**KIBANA_DETAILS)
 xpack_watcher_details = PlatformDetails(**XPACK_WATCHER_DETAILS)
+elastic_eql_query_details = PlatformDetails(**ELASTICSEARCH_EQL_QUERY_DETAILS)
 
 ELASTICSEARCH_DETECTION_RULE = {
     "description": "Autogenerated ElasticSearch Detection Rule.",
diff --git a/uncoder-core/app/translator/platforms/elasticsearch/mapping.py b/uncoder-core/app/translator/platforms/elasticsearch/mapping.py
@@ -2,6 +2,7 @@
 from app.translator.platforms.elasticsearch.const import (
     elastalert_details,
     elasticsearch_esql_query_details,
+    elastic_eql_query_details,
     elasticsearch_lucene_query_details,
     elasticsearch_rule_details,
     kibana_rule_details,
@@ -17,6 +18,7 @@
 elastalert_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=elastalert_details)
 kibana_rule_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=kibana_rule_details)
 xpack_watcher_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=xpack_watcher_details)
+elastic_eql_query_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=elastic_eql_query_details)
 
 
 class ElasticESQLMappings(LuceneMappings):
diff --git a/uncoder-core/app/translator/platforms/elasticsearch/parsers/elasticsearch_eql.py b/uncoder-core/app/translator/platforms/elasticsearch/parsers/elasticsearch_eql.py
@@ -0,0 +1,37 @@
+import re
+
+from app.translator.core.models.platform_details import PlatformDetails
+from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
+from app.translator.core.parser import PlatformQueryParser
+from app.translator.managers import parser_manager
+from app.translator.platforms.base.lucene.mapping import LuceneMappings
+from app.translator.platforms.elasticsearch.const import elastic_eql_query_details
+from app.translator.platforms.elasticsearch.mapping import elastic_eql_query_mappings
+from app.translator.platforms.elasticsearch.tokenizer import ElasticSearchEQLTokenizer
+
+
+@parser_manager.register_supported_by_roota
+class ElasticSearchEQLQueryParser(PlatformQueryParser):
+    details: PlatformDetails = elastic_eql_query_details
+    tokenizer = ElasticSearchEQLTokenizer()
+    mappings: LuceneMappings = elastic_eql_query_mappings
+    query_delimiter_pattern = r"\swhere\s"
+
+    def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
+        log_source = {"category": []}
+        if re.search(self.query_delimiter_pattern, query, flags=re.IGNORECASE):
+            sp_query = re.split(self.query_delimiter_pattern, query, flags=re.IGNORECASE)
+            if sp_query[0].lower() != "all":
+                log_source["category"].append(sp_query[0])
+            return sp_query[1], log_source
+        return query, log_source
+
+    def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
+        query, log_sources = self._parse_query(raw_query_container.query)
+        query_tokens = self.get_query_tokens(query)
+        field_tokens = self.get_field_tokens(query_tokens)
+        source_mappings = self.get_source_mappings(field_tokens, log_sources)
+        meta_info = raw_query_container.meta_info
+        meta_info.query_fields = field_tokens
+        meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
+        return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info)
diff --git a/uncoder-core/app/translator/platforms/elasticsearch/renders/esql.py b/uncoder-core/app/translator/platforms/elasticsearch/renders/esql.py
@@ -29,13 +29,13 @@
 from app.translator.platforms.elasticsearch.mapping import ElasticESQLMappings, esql_query_mappings
 from app.translator.platforms.elasticsearch.str_value_manager import (
     ESQLQueryStrValueManager,
-    esql_query_str_value_manager
+    esql_str_value_manager
 )
 
 
 class ESQLFieldValueRender(BaseFieldValueRender):
     details: PlatformDetails = elasticsearch_esql_query_details
-    str_value_manager: ESQLQueryStrValueManager = esql_query_str_value_manager
+    str_value_manager: ESQLQueryStrValueManager = esql_str_value_manager
 
     @staticmethod
     def _make_case_insensitive(value: str) -> str:
diff --git a/uncoder-core/app/translator/platforms/elasticsearch/str_value_manager.py b/uncoder-core/app/translator/platforms/elasticsearch/str_value_manager.py
@@ -23,6 +23,8 @@
     ReDigitalSymbol,
     ReWhiteSpaceSymbol,
     ReWordSymbol,
+    SingleSymbolWildCard,
+    StrValue,
     StrValueManager,
 )
 from app.translator.platforms.elasticsearch.escape_manager import ESQLQueryEscapeManager, esql_query_escape_manager
@@ -37,4 +39,13 @@ class ESQLQueryStrValueManager(StrValueManager):
     }
 
 
-esql_query_str_value_manager = ESQLQueryStrValueManager()
+class EQLStrValueManager(StrValueManager):
+    str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {"*": SingleSymbolWildCard}
+
+    def from_str_to_container(self, value: str) -> StrValue:
+        split = [self.str_spec_symbols_map[char]() if char in self.str_spec_symbols_map else char for char in value]
+        return StrValue(value, self._concat(split))
+
+
+esql_str_value_manager = ESQLQueryStrValueManager()
+eql_str_value_manager = EQLStrValueManager()
diff --git a/uncoder-core/app/translator/platforms/elasticsearch/tokenizer.py b/uncoder-core/app/translator/platforms/elasticsearch/tokenizer.py
@@ -15,9 +15,75 @@
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 -----------------------------------------------------------------
 """
+import re
+from typing import Any, ClassVar, Optional, Union
 
+from app.translator.core.custom_types.tokens import OperatorType
+from app.translator.core.custom_types.values import ValueType
+from app.translator.core.models.query_tokens.field_value import FieldValue
+from app.translator.core.models.query_tokens.identifier import Identifier
+from app.translator.core.tokenizer import QueryTokenizer
 from app.translator.platforms.base.lucene.tokenizer import LuceneTokenizer
+from app.translator.platforms.elasticsearch.str_value_manager import eql_str_value_manager
+from app.translator.tools.utils import get_match_group
 
 
 class ElasticSearchTokenizer(LuceneTokenizer):
     pass
+
+
+class ElasticSearchEQLTokenizer(QueryTokenizer):
+    single_value_operators_map: ClassVar[dict[str, str]] = {
+        ":": OperatorType.EQ,
+        "==": OperatorType.EQ,
+        "<=": OperatorType.LTE,
+        "<": OperatorType.LT,
+        ">=": OperatorType.GTE,
+        ">": OperatorType.GT,
+        "!=": OperatorType.NOT_EQ,
+        "regex~": OperatorType.REGEX,
+        "regex": OperatorType.REGEX,
+    }
+
+    multi_value_operators_map: ClassVar[dict[str, str]] = {
+        "in": OperatorType.EQ,
+        "in~": OperatorType.EQ,
+        ":": OperatorType.EQ,
+    }
+    wildcard_symbol = "*"
+    field_pattern = r"(?P<field_name>[a-zA-Z\.\-_`]+)"
+    re_value_pattern = (
+        rf'"(?P<{ValueType.regex_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)\[\^[z|Z]\]\.\?"'  # noqa: RUF001
+    )
+    double_quotes_value_pattern = (
+        rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)"'  # noqa: RUF001
+    )
+    _value_pattern = rf"{re_value_pattern}|{double_quotes_value_pattern}"
+    multi_value_pattern = rf"""\((?P<{ValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#№;\-_\/\\'\,.$&^@!\(\[\]\s|]+)\)"""
+    multi_value_check_pattern = r"___field___\s*___operator___\s*\("
+    keyword_pattern = (
+        rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)"'  # noqa: RUF001
+    )
+
+    str_value_manager = eql_str_value_manager
+
+    def get_operator_and_value(
+        self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None
+    ) -> tuple[str, Any]:
+        if (re_value := get_match_group(match, group_name=ValueType.regex_value)) is not None:
+            return OperatorType.REGEX, self.str_value_manager.from_re_str_to_container(re_value)
+
+        if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None:
+            return mapped_operator, self.str_value_manager.from_str_to_container(d_q_value)
+
+        return super().get_operator_and_value(match, mapped_operator, operator)
+
+    def is_multi_value_flow(self, field_name: str, operator: str, query: str) -> bool:
+        check_pattern = self.multi_value_check_pattern
+        check_regex = check_pattern.replace("___field___", field_name).replace("___operator___", operator)
+        return bool(re.match(check_regex, query))
+
+    @staticmethod
+    def create_field_value(field_name: str, operator: Identifier, value: Union[str, list]) -> FieldValue:
+        field_name = field_name.replace("`", "")
+        return FieldValue(source_name=field_name, operator=operator, value=value)

Original file line number	Diff line number	Diff line change
`@@ -3,6 +3,7 @@`
`3`	`3`	`ElasticSearchRuleTOMLParser, # noqa: F401`
`4`	`4`	`)`
`5`	`5`	`from app.translator.platforms.elasticsearch.parsers.elasticsearch import ElasticSearchQueryParser # noqa: F401`
	`6`	`+from app.translator.platforms.elasticsearch.parsers.elasticsearch_eql import ElasticSearchEQLQueryParser # noqa: F401`
`6`	`7`	`from app.translator.platforms.elasticsearch.renders.detection_rule import ElasticSearchRuleRender # noqa: F401`
`7`	`8`	`from app.translator.platforms.elasticsearch.renders.elast_alert import ElastAlertRuleRender # noqa: F401`
`8`	`9`	`from app.translator.platforms.elasticsearch.renders.elasticsearch import ElasticSearchQueryRender # noqa: F401`