Skip to content

Commit 825b00c

Browse files
authored
Merge pull request #71 from UncoderIO/lucene-improvements
lucene improvements
2 parents 104ac22 + a814152 commit 825b00c

File tree

9 files changed

+57
-30
lines changed

9 files changed

+57
-30
lines changed

translator/app/translator/platforms/base/lucene/mapping.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ def prepare_log_source_signature(self, mapping: dict) -> LuceneLogSourceSignatur
2222
return LuceneLogSourceSignature(indices=indices, default_source=default_log_source)
2323

2424
def get_suitable_source_mappings(
25-
self, field_names: list[str], index: Optional[list[str]] = None
25+
self,
26+
field_names: list[str],
27+
index: Optional[list[str]] = None,
28+
**kwargs, # noqa: ARG002
2629
) -> list[SourceMapping]:
2730
suitable_source_mappings = []
2831
for source_mapping in self._source_mappings.values():

translator/app/translator/platforms/base/lucene/renders/lucene.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,13 @@ def _pre_process_values_list(
4242
) -> list[str]:
4343
value_type = self.__get_value_type(field, value_type)
4444
processed = []
45-
for v in values:
46-
if isinstance(v, StrValue):
47-
processed.append(self.str_value_manager.from_container_to_str(v, value_type))
48-
elif isinstance(v, str):
49-
processed.append(self.str_value_manager.escape_manager.escape(v, value_type))
45+
for val in values:
46+
if isinstance(val, StrValue):
47+
processed.append(self.str_value_manager.from_container_to_str(val, value_type))
48+
elif isinstance(val, str):
49+
processed.append(self.str_value_manager.escape_manager.escape(val, value_type))
5050
else:
51-
processed.append(str(v))
51+
processed.append(str(val))
5252
return processed
5353

5454
def _pre_process_value(
@@ -87,25 +87,32 @@ def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
8787

8888
def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
8989
if isinstance(value, list):
90-
values = self.or_token.join(f"*{v}*" for v in self._pre_process_values_list(field, value))
90+
values = self.or_token.join(f"*{val}*" for val in self._pre_process_values_list(field, value))
9191
return f"{field}:({values})"
9292
return f"{field}:*{self._pre_process_value(field, value)}*"
9393

9494
def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
9595
if isinstance(value, list):
96-
values = self.or_token.join(f"*{v}" for v in self._pre_process_values_list(field, value))
96+
values = self.or_token.join(f"*{val}" for val in self._pre_process_values_list(field, value))
9797
return f"{field}:({values})"
9898
return f"{field}:*{self._pre_process_value(field, value)}"
9999

100100
def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
101101
if isinstance(value, list):
102-
values = self.or_token.join(f"{v}*" for v in self._pre_process_values_list(field, value))
102+
values = self.or_token.join(f"{val}*" for val in self._pre_process_values_list(field, value))
103103
return f"{field}:({values})"
104104
return f"{field}:{self._pre_process_value(field, value)}*"
105105

106106
def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
107107
if isinstance(value, list):
108-
return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})"
108+
values = []
109+
for val in value:
110+
values.append(
111+
f"/{self._pre_process_value(field, val, value_type=ValueType.regex_value)}/"
112+
if isinstance(val, StrValue)
113+
else f"/{val}/"
114+
)
115+
return f"{field}:({self.or_token.join(values)})"
109116

110117
if isinstance(value, StrValue):
111118
return f"{field}:/{self._pre_process_value(field, value, value_type=ValueType.regex_value)}/"
@@ -114,7 +121,7 @@ def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
114121

115122
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
116123
if isinstance(value, list):
117-
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
124+
return f"({self.or_token.join(self.keywords(field=field, value=val) for val in value)})"
118125
return f"*{self._pre_process_value(field, value)}*"
119126

120127

translator/app/translator/platforms/base/spl/tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
4242

4343
field_pattern = r"(?P<field_name>[a-zA-Z0-9\.\-_\{\}]+)"
4444
num_value_pattern = rf"(?P<{ValueType.number_value}>\d+(?:\.\d+)*)(?=$|\s|\))"
45-
double_quotes_value_pattern = rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,;`\?~‘○×\'\.<>$&^@!\]\[\(\)\{{\}}\s]|\\\"|\\)*)"\s*' # noqa: E501
45+
double_quotes_value_pattern = rf'"(?P<{ValueType.double_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,;`\?~‘○×\'\.<>$&^@!\]\[\(\)\{{\}}\s]|\\\"|\\)*)"\s*' # noqa: E501, RUF001
4646
single_quotes_value_pattern = (
4747
rf"'(?P<{ValueType.single_quotes_value}>(?:[:a-zA-Z\*0-9=+%#\-_/,;\"\.<>$&^@!\(\)\{{\}}\s]|\\\'|\\)*)'\s*"
4848
)

translator/app/translator/platforms/elasticsearch/renders/detection_rule.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
"""
1919

2020
import copy
21-
import json
2221
from typing import Optional, Union
2322

23+
import ujson
24+
2425
from app.translator.core.mapping import SourceMapping
2526
from app.translator.core.mitre import MitreConfig
2627
from app.translator.core.models.parser_output import MetaInfoContainer
@@ -113,7 +114,7 @@ def finalize_query(
113114
"false_positives": meta_info.false_positives,
114115
}
115116
)
116-
rule_str = json.dumps(rule, indent=4, sort_keys=False, ensure_ascii=False)
117+
rule_str = ujson.dumps(rule, indent=4, sort_keys=False, ensure_ascii=False)
117118
if not_supported_functions:
118119
rendered_not_supported = self.render_not_supported_functions(not_supported_functions)
119120
return rule_str + rendered_not_supported

translator/app/translator/platforms/elasticsearch/renders/kibana.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
-----------------------------------------------------------------
1818
"""
1919
import copy
20-
import json
2120
from typing import Optional
2221

22+
import ujson
23+
2324
from app.translator.core.mapping import SourceMapping
2425
from app.translator.core.models.parser_output import MetaInfoContainer
2526
from app.translator.core.models.platform_details import PlatformDetails
@@ -56,7 +57,7 @@ def finalize_query(
5657
query = super().finalize_query(prefix=prefix, query=query, functions=functions)
5758
search_source = copy.deepcopy(KIBANA_SEARCH_SOURCE_JSON)
5859
search_source["query"]["query_string"]["query"] = query
59-
dumped_rule = json.dumps(search_source, sort_keys=False)
60+
dumped_rule = ujson.dumps(search_source, sort_keys=False, escape_forward_slashes=False)
6061
rule = copy.deepcopy(KIBANA_RULE)
6162
rule["_source"]["kibanaSavedObjectMeta"]["searchSourceJSON"] = dumped_rule
6263
rule["_source"]["title"] = meta_info.title
@@ -67,7 +68,7 @@ def finalize_query(
6768
license_=meta_info.license,
6869
references=meta_info.references,
6970
)
70-
rule_str = json.dumps(rule, indent=4, sort_keys=False)
71+
rule_str = ujson.dumps(rule, indent=4, sort_keys=False)
7172
if not_supported_functions:
7273
rendered_not_supported = self.render_not_supported_functions(not_supported_functions)
7374
return rule_str + rendered_not_supported

translator/app/translator/platforms/elasticsearch/renders/xpack_watcher.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
-----------------------------------------------------------------
1818
"""
1919
import copy
20-
import json
2120
from typing import Optional
2221

22+
import ujson
23+
2324
from app.translator.core.mapping import SourceMapping
2425
from app.translator.core.models.parser_output import MetaInfoContainer
2526
from app.translator.core.models.platform_details import PlatformDetails
@@ -72,7 +73,7 @@ def finalize_query(
7273
indices = source_mapping and [str(source_mapping.log_source_signature)] or []
7374
rule["input"]["search"]["request"]["indices"] = indices
7475
rule["actions"]["send_email"]["email"]["subject"] = meta_info.title
75-
rule_str = json.dumps(rule, indent=4, sort_keys=False)
76+
rule_str = ujson.dumps(rule, indent=4, sort_keys=False)
7677
if not_supported_functions:
7778
rendered_not_supported = self.render_not_supported_functions(not_supported_functions)
7879
return rule_str + rendered_not_supported

translator/app/translator/platforms/opensearch/renders/opensearch.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from app.translator.const import DEFAULT_VALUE_TYPE
2222
from app.translator.core.custom_types.values import ValueType
2323
from app.translator.core.models.platform_details import PlatformDetails
24+
from app.translator.core.str_value_manager import StrValue
2425
from app.translator.platforms.base.lucene.renders.lucene import LuceneFieldValue, LuceneQueryRender
2526
from app.translator.platforms.opensearch.const import opensearch_query_details
2627
from app.translator.platforms.opensearch.mapping import OpenSearchMappings, opensearch_mappings
@@ -31,7 +32,7 @@ class OpenSearchFieldValue(LuceneFieldValue):
3132

3233
def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
3334
if isinstance(value, list):
34-
values = self.or_token.join(f'"{v}"' for v in self._pre_process_values_list(field, value))
35+
values = self.or_token.join(f'"{val}"' for val in self._pre_process_values_list(field, value))
3536
return f"{field}:({values})"
3637
return f'{field}:"{self._pre_process_value(field, value)}"'
3738

@@ -49,36 +50,47 @@ def greater_or_equal_modifier(self, field: str, value: Union[int, str]) -> str:
4950

5051
def not_equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
5152
if isinstance(value, list):
52-
values = self.or_token.join(f'"{v}"' for v in self._pre_process_values_list(field, value))
53+
values = self.or_token.join(f'"{val}"' for val in self._pre_process_values_list(field, value))
5354
return f"NOT ({field} = ({values})"
5455
return f'NOT ({field} = "{self._pre_process_value(field, value)}")'
5556

5657
def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
5758
if isinstance(value, list):
58-
values = self.or_token.join(f'"*{v}*"' for v in self._pre_process_values_list(field, value))
59+
values = self.or_token.join(f'"*{val}*"' for val in self._pre_process_values_list(field, value))
5960
return f"{field}:({values})"
6061
return f'{field}:"*{self._pre_process_value(field, value)}*"'
6162

6263
def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
6364
if isinstance(value, list):
64-
values = self.or_token.join(f'"*{v}"' for v in self._pre_process_values_list(field, value))
65+
values = self.or_token.join(f'"*{val}"' for val in self._pre_process_values_list(field, value))
6566
return f"{field}:({values})"
6667
return f'{field}:"*{self._pre_process_value(field, value)}"'
6768

6869
def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
6970
if isinstance(value, list):
70-
values = self.or_token.join(f'"{v}*"' for v in self._pre_process_values_list(field, value))
71+
values = self.or_token.join(f'"{val}*"' for val in self._pre_process_values_list(field, value))
7172
return f"{field}:({values})"
7273
return f'{field}:"{self._pre_process_value(field, value)}*"'
7374

7475
def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
7576
if isinstance(value, list):
76-
return f"({self.or_token.join(self.regex_modifier(field=field, value=v) for v in value)})"
77-
return f'{field}:"/{self._pre_process_value(field, value, value_type=ValueType.regex_value)}/"'
77+
values = []
78+
for val in value:
79+
values.append(
80+
f'"/{self._pre_process_value(field, val, value_type=ValueType.regex_value)}/"'
81+
if isinstance(val, StrValue)
82+
else f'"/{val}/"'
83+
)
84+
return f"{field}:({self.or_token.join(values)})"
85+
86+
if isinstance(value, StrValue):
87+
return f'{field}:"/{self._pre_process_value(field, value, value_type=ValueType.regex_value)}/"'
88+
89+
return f'{field}:"/{value}/"'
7890

7991
def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
8092
if isinstance(value, list):
81-
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
93+
return f"({self.or_token.join(self.keywords(field=field, value=val) for val in value)})"
8294
return f'"*{self._pre_process_value(field, value)}*"'
8395

8496

translator/app/translator/platforms/opensearch/renders/opensearch_rule.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
-----------------------------------------------------------------
1818
"""
1919
import copy
20-
import json
2120
from typing import Optional
2221

22+
import ujson
23+
2324
from app.translator.core.custom_types.meta_info import SeverityType
2425
from app.translator.core.mapping import SourceMapping
2526
from app.translator.core.models.parser_output import MetaInfoContainer
@@ -63,7 +64,7 @@ def finalize_query(
6364
rule["inputs"][0]["search"]["query"]["query"]["bool"]["must"][0]["query_string"]["query"] = query
6465
rule["triggers"][0]["name"] = meta_info.title
6566
rule["triggers"][0]["severity"] = _SEVERITIES_MAP[meta_info.severity]
66-
rule_str = json.dumps(rule, indent=4, sort_keys=False)
67+
rule_str = ujson.dumps(rule, indent=4, sort_keys=False)
6768
if not_supported_functions:
6869
rendered_not_supported = self.render_not_supported_functions(not_supported_functions)
6970
return rule_str + rendered_not_supported

translator/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ pydantic~=1.10.13
44
PyYAML~=6.0.1
55
colorama~=0.4.6
66
ruff==0.1.13
7+
ujson==5.9.0

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy