Skip to content

Gis 8639 add ElasticSearchEQLQueryParser #199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
ElasticSearchRuleTOMLParser, # noqa: F401
)
from app.translator.platforms.elasticsearch.parsers.elasticsearch import ElasticSearchQueryParser # noqa: F401
from app.translator.platforms.elasticsearch.parsers.elasticsearch_eql import ElasticSearchEQLQueryParser # noqa: F401
from app.translator.platforms.elasticsearch.renders.detection_rule import ElasticSearchRuleRender # noqa: F401
from app.translator.platforms.elasticsearch.renders.elast_alert import ElastAlertRuleRender # noqa: F401
from app.translator.platforms.elasticsearch.renders.elasticsearch import ElasticSearchQueryRender # noqa: F401
Expand Down
9 changes: 9 additions & 0 deletions uncoder-core/app/translator/platforms/elasticsearch/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
_ELASTIC_WATCHER_RULE = "elastic-watcher-rule"
_ELASTIC_ESQL_QUERY = "elastic-esql-query"
_ELASTIC_ESQL_RULE = "elastic-esql-rule"
_ELASTIC_EQL_QUERY = "elastic-eql-query"

ELASTIC_QUERY_TYPES = {
_ELASTIC_LUCENE_QUERY,
Expand Down Expand Up @@ -83,6 +84,13 @@
**PLATFORM_DETAILS,
}

ELASTICSEARCH_EQL_QUERY_DETAILS = {
"platform_id": _ELASTIC_EQL_QUERY,
"name": "Elasticsearch EQL Query",
"platform_name": "Query (EQL)",
**PLATFORM_DETAILS,
}

elasticsearch_lucene_query_details = PlatformDetails(**ELASTICSEARCH_LUCENE_QUERY_DETAILS)
elasticsearch_esql_query_details = PlatformDetails(**ELASTICSEARCH_ESQL_QUERY_DETAILS)
elasticsearch_esql_rule_details = PlatformDetails(**ELASTICSEARCH_ESQL_RULE_DETAILS)
Expand All @@ -91,6 +99,7 @@
elastalert_details = PlatformDetails(**ELASTALERT_DETAILS)
kibana_rule_details = PlatformDetails(**KIBANA_DETAILS)
xpack_watcher_details = PlatformDetails(**XPACK_WATCHER_DETAILS)
elastic_eql_query_details = PlatformDetails(**ELASTICSEARCH_EQL_QUERY_DETAILS)

ELASTICSEARCH_DETECTION_RULE = {
"description": "Autogenerated ElasticSearch Detection Rule.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from app.translator.platforms.elasticsearch.const import (
elastalert_details,
elasticsearch_esql_query_details,
elastic_eql_query_details,
elasticsearch_lucene_query_details,
elasticsearch_rule_details,
kibana_rule_details,
Expand All @@ -17,6 +18,7 @@
elastalert_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=elastalert_details)
kibana_rule_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=kibana_rule_details)
xpack_watcher_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=xpack_watcher_details)
elastic_eql_query_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=elastic_eql_query_details)


class ElasticESQLMappings(LuceneMappings):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import re

from app.translator.core.models.platform_details import PlatformDetails
from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
from app.translator.core.parser import PlatformQueryParser
from app.translator.managers import parser_manager
from app.translator.platforms.base.lucene.mapping import LuceneMappings
from app.translator.platforms.elasticsearch.const import elastic_eql_query_details
from app.translator.platforms.elasticsearch.mapping import elastic_eql_query_mappings
from app.translator.platforms.elasticsearch.tokenizer import ElasticSearchEQLTokenizer


@parser_manager.register_supported_by_roota
class ElasticSearchEQLQueryParser(PlatformQueryParser):
details: PlatformDetails = elastic_eql_query_details
tokenizer = ElasticSearchEQLTokenizer()
mappings: LuceneMappings = elastic_eql_query_mappings
query_delimiter_pattern = r"\swhere\s"

def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
log_source = {"category": []}
if re.search(self.query_delimiter_pattern, query, flags=re.IGNORECASE):
sp_query = re.split(self.query_delimiter_pattern, query, flags=re.IGNORECASE)
if sp_query[0].lower() != "all":
log_source["category"].append(sp_query[0])
return sp_query[1], log_source
return query, log_source

def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
query, log_sources = self._parse_query(raw_query_container.query)
query_tokens = self.get_query_tokens(query)
field_tokens = self.get_field_tokens(query_tokens)
source_mappings = self.get_source_mappings(field_tokens, log_sources)
meta_info = raw_query_container.meta_info
meta_info.query_fields = field_tokens
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info)
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@
from app.translator.platforms.elasticsearch.mapping import ElasticESQLMappings, esql_query_mappings
from app.translator.platforms.elasticsearch.str_value_manager import (
ESQLQueryStrValueManager,
esql_query_str_value_manager
esql_str_value_manager
)


class ESQLFieldValueRender(BaseFieldValueRender):
details: PlatformDetails = elasticsearch_esql_query_details
str_value_manager: ESQLQueryStrValueManager = esql_query_str_value_manager
str_value_manager: ESQLQueryStrValueManager = esql_str_value_manager

@staticmethod
def _make_case_insensitive(value: str) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
ReDigitalSymbol,
ReWhiteSpaceSymbol,
ReWordSymbol,
SingleSymbolWildCard,
StrValue,
StrValueManager,
)
from app.translator.platforms.elasticsearch.escape_manager import ESQLQueryEscapeManager, esql_query_escape_manager
Expand All @@ -37,4 +39,13 @@ class ESQLQueryStrValueManager(StrValueManager):
}


esql_query_str_value_manager = ESQLQueryStrValueManager()
class EQLStrValueManager(StrValueManager):
str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {"*": SingleSymbolWildCard}

def from_str_to_container(self, value: str) -> StrValue:
split = [self.str_spec_symbols_map[char]() if char in self.str_spec_symbols_map else char for char in value]
return StrValue(value, self._concat(split))


esql_str_value_manager = ESQLQueryStrValueManager()
eql_str_value_manager = EQLStrValueManager()
66 changes: 66 additions & 0 deletions uncoder-core/app/translator/platforms/elasticsearch/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,75 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-----------------------------------------------------------------
"""
import re
from typing import Any, ClassVar, Optional, Union

from app.translator.core.custom_types.tokens import OperatorType
from app.translator.core.custom_types.values import ValueType
from app.translator.core.models.query_tokens.field_value import FieldValue
from app.translator.core.models.query_tokens.identifier import Identifier
from app.translator.core.tokenizer import QueryTokenizer
from app.translator.platforms.base.lucene.tokenizer import LuceneTokenizer
from app.translator.platforms.elasticsearch.str_value_manager import eql_str_value_manager
from app.translator.tools.utils import get_match_group


class ElasticSearchTokenizer(LuceneTokenizer):
pass


class ElasticSearchEQLTokenizer(QueryTokenizer):
single_value_operators_map: ClassVar[dict[str, str]] = {
":": OperatorType.EQ,
"==": OperatorType.EQ,
"<=": OperatorType.LTE,
"<": OperatorType.LT,
">=": OperatorType.GTE,
">": OperatorType.GT,
"!=": OperatorType.NOT_EQ,
"regex~": OperatorType.REGEX,
"regex": OperatorType.REGEX,
}

multi_value_operators_map: ClassVar[dict[str, str]] = {
"in": OperatorType.EQ,
"in~": OperatorType.EQ,
":": OperatorType.EQ,
}
wildcard_symbol = "*"
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_`]+)"
re_value_pattern = (
rf'"(?P<{ValueType.regex_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)\[\^[z|Z]\]\.\?"' # noqa: RUF001
)
double_quotes_value_pattern = (
rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)"' # noqa: RUF001
)
_value_pattern = rf"{re_value_pattern}|{double_quotes_value_pattern}"
multi_value_pattern = rf"""\((?P<{ValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#№;\-_\/\\'\,.$&^@!\(\[\]\s|]+)\)"""
multi_value_check_pattern = r"___field___\s*___operator___\s*\("
keyword_pattern = (
rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)"' # noqa: RUF001
)

str_value_manager = eql_str_value_manager

def get_operator_and_value(
self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None
) -> tuple[str, Any]:
if (re_value := get_match_group(match, group_name=ValueType.regex_value)) is not None:
return OperatorType.REGEX, self.str_value_manager.from_re_str_to_container(re_value)

if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None:
return mapped_operator, self.str_value_manager.from_str_to_container(d_q_value)

return super().get_operator_and_value(match, mapped_operator, operator)

def is_multi_value_flow(self, field_name: str, operator: str, query: str) -> bool:
check_pattern = self.multi_value_check_pattern
check_regex = check_pattern.replace("___field___", field_name).replace("___operator___", operator)
return bool(re.match(check_regex, query))

@staticmethod
def create_field_value(field_name: str, operator: Identifier, value: Union[str, list]) -> FieldValue:
field_name = field_name.replace("`", "")
return FieldValue(source_name=field_name, operator=operator, value=value)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy