Skip to content

Commit 416f5ca

Browse files
authored
Merge pull request #199 from UncoderIO/gis-8639
Gis 8639 add ElasticSearchEQLQueryParser
2 parents 25a55d0 + 608a1f4 commit 416f5ca

File tree

7 files changed

+129
-3
lines changed

7 files changed

+129
-3
lines changed

uncoder-core/app/translator/platforms/elasticsearch/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
ElasticSearchRuleTOMLParser, # noqa: F401
44
)
55
from app.translator.platforms.elasticsearch.parsers.elasticsearch import ElasticSearchQueryParser # noqa: F401
6+
from app.translator.platforms.elasticsearch.parsers.elasticsearch_eql import ElasticSearchEQLQueryParser # noqa: F401
67
from app.translator.platforms.elasticsearch.renders.detection_rule import ElasticSearchRuleRender # noqa: F401
78
from app.translator.platforms.elasticsearch.renders.elast_alert import ElastAlertRuleRender # noqa: F401
89
from app.translator.platforms.elasticsearch.renders.elasticsearch import ElasticSearchQueryRender # noqa: F401

uncoder-core/app/translator/platforms/elasticsearch/const.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
_ELASTIC_WATCHER_RULE = "elastic-watcher-rule"
1212
_ELASTIC_ESQL_QUERY = "elastic-esql-query"
1313
_ELASTIC_ESQL_RULE = "elastic-esql-rule"
14+
_ELASTIC_EQL_QUERY = "elastic-eql-query"
1415

1516
ELASTIC_QUERY_TYPES = {
1617
_ELASTIC_LUCENE_QUERY,
@@ -83,6 +84,13 @@
8384
**PLATFORM_DETAILS,
8485
}
8586

87+
ELASTICSEARCH_EQL_QUERY_DETAILS = {
88+
"platform_id": _ELASTIC_EQL_QUERY,
89+
"name": "Elasticsearch EQL Query",
90+
"platform_name": "Query (EQL)",
91+
**PLATFORM_DETAILS,
92+
}
93+
8694
elasticsearch_lucene_query_details = PlatformDetails(**ELASTICSEARCH_LUCENE_QUERY_DETAILS)
8795
elasticsearch_esql_query_details = PlatformDetails(**ELASTICSEARCH_ESQL_QUERY_DETAILS)
8896
elasticsearch_esql_rule_details = PlatformDetails(**ELASTICSEARCH_ESQL_RULE_DETAILS)
@@ -91,6 +99,7 @@
9199
elastalert_details = PlatformDetails(**ELASTALERT_DETAILS)
92100
kibana_rule_details = PlatformDetails(**KIBANA_DETAILS)
93101
xpack_watcher_details = PlatformDetails(**XPACK_WATCHER_DETAILS)
102+
elastic_eql_query_details = PlatformDetails(**ELASTICSEARCH_EQL_QUERY_DETAILS)
94103

95104
ELASTICSEARCH_DETECTION_RULE = {
96105
"description": "Autogenerated ElasticSearch Detection Rule.",

uncoder-core/app/translator/platforms/elasticsearch/mapping.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from app.translator.platforms.elasticsearch.const import (
33
elastalert_details,
44
elasticsearch_esql_query_details,
5+
elastic_eql_query_details,
56
elasticsearch_lucene_query_details,
67
elasticsearch_rule_details,
78
kibana_rule_details,
@@ -17,6 +18,7 @@
1718
elastalert_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=elastalert_details)
1819
kibana_rule_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=kibana_rule_details)
1920
xpack_watcher_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=xpack_watcher_details)
21+
elastic_eql_query_mappings = LuceneMappings(platform_dir="elasticsearch", platform_details=elastic_eql_query_details)
2022

2123

2224
class ElasticESQLMappings(LuceneMappings):
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import re
2+
3+
from app.translator.core.models.platform_details import PlatformDetails
4+
from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
5+
from app.translator.core.parser import PlatformQueryParser
6+
from app.translator.managers import parser_manager
7+
from app.translator.platforms.base.lucene.mapping import LuceneMappings
8+
from app.translator.platforms.elasticsearch.const import elastic_eql_query_details
9+
from app.translator.platforms.elasticsearch.mapping import elastic_eql_query_mappings
10+
from app.translator.platforms.elasticsearch.tokenizer import ElasticSearchEQLTokenizer
11+
12+
13+
@parser_manager.register_supported_by_roota
14+
class ElasticSearchEQLQueryParser(PlatformQueryParser):
15+
details: PlatformDetails = elastic_eql_query_details
16+
tokenizer = ElasticSearchEQLTokenizer()
17+
mappings: LuceneMappings = elastic_eql_query_mappings
18+
query_delimiter_pattern = r"\swhere\s"
19+
20+
def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
21+
log_source = {"category": []}
22+
if re.search(self.query_delimiter_pattern, query, flags=re.IGNORECASE):
23+
sp_query = re.split(self.query_delimiter_pattern, query, flags=re.IGNORECASE)
24+
if sp_query[0].lower() != "all":
25+
log_source["category"].append(sp_query[0])
26+
return sp_query[1], log_source
27+
return query, log_source
28+
29+
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
30+
query, log_sources = self._parse_query(raw_query_container.query)
31+
query_tokens = self.get_query_tokens(query)
32+
field_tokens = self.get_field_tokens(query_tokens)
33+
source_mappings = self.get_source_mappings(field_tokens, log_sources)
34+
meta_info = raw_query_container.meta_info
35+
meta_info.query_fields = field_tokens
36+
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
37+
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info)

uncoder-core/app/translator/platforms/elasticsearch/renders/esql.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@
2929
from app.translator.platforms.elasticsearch.mapping import ElasticESQLMappings, esql_query_mappings
3030
from app.translator.platforms.elasticsearch.str_value_manager import (
3131
ESQLQueryStrValueManager,
32-
esql_query_str_value_manager
32+
esql_str_value_manager
3333
)
3434

3535

3636
class ESQLFieldValueRender(BaseFieldValueRender):
3737
details: PlatformDetails = elasticsearch_esql_query_details
38-
str_value_manager: ESQLQueryStrValueManager = esql_query_str_value_manager
38+
str_value_manager: ESQLQueryStrValueManager = esql_str_value_manager
3939

4040
@staticmethod
4141
def _make_case_insensitive(value: str) -> str:

uncoder-core/app/translator/platforms/elasticsearch/str_value_manager.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
ReDigitalSymbol,
2424
ReWhiteSpaceSymbol,
2525
ReWordSymbol,
26+
SingleSymbolWildCard,
27+
StrValue,
2628
StrValueManager,
2729
)
2830
from app.translator.platforms.elasticsearch.escape_manager import ESQLQueryEscapeManager, esql_query_escape_manager
@@ -37,4 +39,13 @@ class ESQLQueryStrValueManager(StrValueManager):
3739
}
3840

3941

40-
esql_query_str_value_manager = ESQLQueryStrValueManager()
42+
class EQLStrValueManager(StrValueManager):
43+
str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {"*": SingleSymbolWildCard}
44+
45+
def from_str_to_container(self, value: str) -> StrValue:
46+
split = [self.str_spec_symbols_map[char]() if char in self.str_spec_symbols_map else char for char in value]
47+
return StrValue(value, self._concat(split))
48+
49+
50+
esql_str_value_manager = ESQLQueryStrValueManager()
51+
eql_str_value_manager = EQLStrValueManager()

uncoder-core/app/translator/platforms/elasticsearch/tokenizer.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,75 @@
1515
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1616
-----------------------------------------------------------------
1717
"""
18+
import re
19+
from typing import Any, ClassVar, Optional, Union
1820

21+
from app.translator.core.custom_types.tokens import OperatorType
22+
from app.translator.core.custom_types.values import ValueType
23+
from app.translator.core.models.query_tokens.field_value import FieldValue
24+
from app.translator.core.models.query_tokens.identifier import Identifier
25+
from app.translator.core.tokenizer import QueryTokenizer
1926
from app.translator.platforms.base.lucene.tokenizer import LuceneTokenizer
27+
from app.translator.platforms.elasticsearch.str_value_manager import eql_str_value_manager
28+
from app.translator.tools.utils import get_match_group
2029

2130

2231
class ElasticSearchTokenizer(LuceneTokenizer):
2332
pass
33+
34+
35+
class ElasticSearchEQLTokenizer(QueryTokenizer):
36+
single_value_operators_map: ClassVar[dict[str, str]] = {
37+
":": OperatorType.EQ,
38+
"==": OperatorType.EQ,
39+
"<=": OperatorType.LTE,
40+
"<": OperatorType.LT,
41+
">=": OperatorType.GTE,
42+
">": OperatorType.GT,
43+
"!=": OperatorType.NOT_EQ,
44+
"regex~": OperatorType.REGEX,
45+
"regex": OperatorType.REGEX,
46+
}
47+
48+
multi_value_operators_map: ClassVar[dict[str, str]] = {
49+
"in": OperatorType.EQ,
50+
"in~": OperatorType.EQ,
51+
":": OperatorType.EQ,
52+
}
53+
wildcard_symbol = "*"
54+
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_`]+)"
55+
re_value_pattern = (
56+
rf'"(?P<{ValueType.regex_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)\[\^[z|Z]\]\.\?"' # noqa: RUF001
57+
)
58+
double_quotes_value_pattern = (
59+
rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)"' # noqa: RUF001
60+
)
61+
_value_pattern = rf"{re_value_pattern}|{double_quotes_value_pattern}"
62+
multi_value_pattern = rf"""\((?P<{ValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#№;\-_\/\\'\,.$&^@!\(\[\]\s|]+)\)"""
63+
multi_value_check_pattern = r"___field___\s*___operator___\s*\("
64+
keyword_pattern = (
65+
rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s]|\\\"|\\)*)"' # noqa: RUF001
66+
)
67+
68+
str_value_manager = eql_str_value_manager
69+
70+
def get_operator_and_value(
71+
self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None
72+
) -> tuple[str, Any]:
73+
if (re_value := get_match_group(match, group_name=ValueType.regex_value)) is not None:
74+
return OperatorType.REGEX, self.str_value_manager.from_re_str_to_container(re_value)
75+
76+
if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None:
77+
return mapped_operator, self.str_value_manager.from_str_to_container(d_q_value)
78+
79+
return super().get_operator_and_value(match, mapped_operator, operator)
80+
81+
def is_multi_value_flow(self, field_name: str, operator: str, query: str) -> bool:
82+
check_pattern = self.multi_value_check_pattern
83+
check_regex = check_pattern.replace("___field___", field_name).replace("___operator___", operator)
84+
return bool(re.match(check_regex, query))
85+
86+
@staticmethod
87+
def create_field_value(field_name: str, operator: Identifier, value: Union[str, list]) -> FieldValue:
88+
field_name = field_name.replace("`", "")
89+
return FieldValue(source_name=field_name, operator=operator, value=value)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy