Skip to content

Commit 9992004

Browse files
authored
Merge pull request #9 from UncoderIO/tokenizer-and-render-fixes
parser, tokenizer, render fixes
2 parents c026f1d + c34e622 commit 9992004

File tree

19 files changed

+71
-63
lines changed

19 files changed

+71
-63
lines changed

siem-converter/app/converter/backends/athena/renders/athena.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ class AthenaQueryRender(BaseQueryRender):
6767

6868
field_value_map = AthenaFieldValue(or_token=or_token)
6969
query_pattern = "{prefix} WHERE {query} {functions}"
70+
comment_symbol = "--"
71+
is_multi_line_comment = True
7072

7173
def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
7274
table = str(log_source_signature) if str(log_source_signature) else "eventlog"

siem-converter/app/converter/backends/athena/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class AthenaTokenizer(QueryTokenizer):
3030
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>like|in|=|>|<|>=|<=|<>|!=))\s?"""
3131
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3232
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
33-
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')+)'"""
33+
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""
3434
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}"
3535
multi_value_pattern = r"""\((?P<value>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*')*)\)"""
3636

@@ -49,13 +49,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool:
4949
return operator.lower() in ("like",)
5050

5151
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
52-
if num_value := get_match_group(match, group_name='num_value'):
52+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
5353
return operator, num_value
5454

55-
elif bool_value := get_match_group(match, group_name='bool_value'):
55+
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
5656
return operator, bool_value
5757

58-
elif s_q_value := get_match_group(match, group_name='s_q_value'):
58+
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
5959
return operator, s_q_value
6060

6161
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/chronicle/tokenizer.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,22 @@ class ChronicleQueryTokenizer(QueryTokenizer):
3131
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3232
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
3333
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\\\)*)"\s*(?:nocase)?'
34-
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]*)/\s*(?:nocase)?"
34+
re_value_pattern = r"/(?P<re_value>(?:\\\/|[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?])+)/\s*(?:nocase)?"
3535
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{double_quotes_value_pattern}|{re_value_pattern}"
3636

3737
wildcard_symbol = ".*"
3838

3939
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
40-
if num_value := get_match_group(match, group_name='num_value'):
40+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
4141
return operator, num_value
4242

43-
elif bool_value := get_match_group(match, group_name='bool_value'):
43+
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
4444
return operator, bool_value
4545

46-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
46+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
4747
return operator, d_q_value
4848

49-
elif re_value := get_match_group(match, group_name='re_value'):
49+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
5050
return OperatorType.REGEX, re_value
5151

5252
return super().get_operator_and_value(match, operator)
@@ -94,10 +94,10 @@ def search_field_value(self, query):
9494
return super().search_field_value(query=query)
9595

9696
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
97-
if d_q_value := get_match_group(match, group_name='d_q_value'):
97+
if (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
9898
return operator, d_q_value
9999

100-
elif b_q_value := get_match_group(match, group_name='b_q_value'):
100+
elif (b_q_value := get_match_group(match, group_name='b_q_value')) is not None:
101101
return operator, b_q_value
102102

103103
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/elasticsearch/renders/elasticsearch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ class ElasticSearchQueryRender(BaseQueryRender):
8282

8383
field_value_map = ElasticSearchFieldValue(or_token=or_token)
8484
query_pattern = "{query} {functions}"
85+
comment_symbol = "//"
86+
is_multi_line_comment = True
8587

8688
def generate_prefix(self, logsource: dict) -> str:
8789
return ""

siem-converter/app/converter/backends/elasticsearch/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,16 @@ def clean_quotes(value: Union[str, int]):
6464
return value
6565

6666
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
67-
if num_value := get_match_group(match, group_name='num_value'):
67+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
6868
return operator, num_value
6969

70-
elif re_value := get_match_group(match, group_name='re_value'):
70+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
7171
return OperatorType.REGEX, re_value
7272

73-
elif n_q_value := get_match_group(match, group_name='n_q_value'):
73+
elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
7474
return operator, n_q_value
7575

76-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
76+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
7777
return operator, d_q_value
7878

7979
return super().get_operator_and_value(match)

siem-converter/app/converter/backends/logscale/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,20 @@ class LogScaleTokenizer(QueryTokenizer):
3030
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>=|!=))\s?"""
3131
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3232
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
33-
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]*)/i?\s*"
33+
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]+)/i?\s*"
3434
_value_pattern = fr"""{num_value_pattern}|{re_value_pattern}|{double_quotes_value_pattern}"""
3535
keyword_pattern = double_quotes_value_pattern
3636

3737
wildcard_symbol = "*"
3838

3939
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
40-
if num_value := get_match_group(match, group_name='num_value'):
40+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
4141
return operator, num_value
4242

43-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
43+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
4444
return operator, d_q_value
4545

46-
elif re_value := get_match_group(match, group_name='re_value'):
46+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
4747
return OperatorType.REGEX, re_value
4848

4949
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/microsoft/renders/microsoft_sentinel.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
limitations under the License.
1717
-----------------------------------------------------------------
1818
"""
19+
from typing import Union
1920

2021
from app.converter.backends.microsoft.const import microsoft_sentinel_query_details
2122
from app.converter.backends.microsoft.mapping import MicrosoftSentinelMappings, microsoft_sentinel_mappings
@@ -28,32 +29,36 @@
2829
class MicrosoftSentinelFieldValue(BaseQueryFieldValue):
2930
details: PlatformDetails = microsoft_sentinel_query_details
3031

32+
@staticmethod
33+
def __escape_value(value: Union[int, str]) -> Union[int, str]:
34+
return value.replace("'", "''") if isinstance(value, str) else value
35+
3136
def equal_modifier(self, field, value):
3237
if isinstance(value, str):
33-
return f"{field} =~ @'{value}'"
38+
return f"{field} =~ @'{self.__escape_value(value)}'"
3439
elif isinstance(value, list):
35-
prepared_values = ", ".join(f"@'{v}'" for v in value)
40+
prepared_values = ", ".join(f"@'{self.__escape_value(v)}'" for v in value)
3641
operator = "in~" if all(isinstance(v, str) for v in value) else "in"
3742
return f'{field} {operator} ({prepared_values})'
3843
return f'{field} == {value}'
3944

4045
def contains_modifier(self, field, value):
4146
if isinstance(value, list):
4247
return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})"
43-
return f"{field} contains @'{value}'"
48+
return f"{field} contains @'{self.__escape_value(value)}'"
4449

4550
def endswith_modifier(self, field, value):
4651
if isinstance(value, list):
4752
return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})"
48-
return f"{field} endswith @'{value}'"
53+
return f"{field} endswith @'{self.__escape_value(value)}'"
4954

5055
def startswith_modifier(self, field, value):
5156
if isinstance(value, list):
5257
return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})"
53-
return f"{field} startswith @'{value}'"
58+
return f"{field} startswith @'{self.__escape_value(value)}'"
5459

5560
def __regex_modifier(self, field, value):
56-
return f"{field} matches regex @'(?i){value}'"
61+
return f"{field} matches regex @'(?i){self.__escape_value(value)}'"
5762

5863
def regex_modifier(self, field, value):
5964
if isinstance(value, list):
@@ -63,7 +68,7 @@ def regex_modifier(self, field, value):
6368
def keywords(self, field, value):
6469
if isinstance(value, list):
6570
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
66-
return f"* contains @'{value}'"
71+
return f"* contains @'{self.__escape_value(value)}'"
6772

6873

6974
class MicrosoftSentinelQueryRender(BaseQueryRender):
@@ -78,14 +83,11 @@ class MicrosoftSentinelQueryRender(BaseQueryRender):
7883

7984
mappings: MicrosoftSentinelMappings = microsoft_sentinel_mappings
8085
comment_symbol = "//"
86+
is_multi_line_comment = True
8187

8288
def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
8389
return str(log_source_signature)
8490

85-
def render_not_supported_functions(self, not_supported_functions: list) -> str:
86-
render_not_suported = "\n".join([f'// {i}' for i in not_supported_functions])
87-
return "\n\n" + f"// {self.unsupported_functions_text}" + render_not_suported
88-
8991
def generate_functions(self, functions: list) -> str:
9092
if not functions:
9193
return ""

siem-converter/app/converter/backends/microsoft/tokenizer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class MicrosoftSentinelTokenizer(QueryTokenizer, OperatorBasedMixin):
3434
single_quotes_value_pattern = r"@?'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{\}\s]|\\\'|\\\\)*)'\s*"
3535
str_value_pattern = fr"""{double_quotes_value_pattern}|{single_quotes_value_pattern}"""
3636
_value_pattern = fr"""{bool_value_pattern}|{num_value_pattern}|{str_value_pattern}"""
37-
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)\)"""
37+
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)"""
3838
keyword_pattern = fr"\*\s+contains\s+(?:{str_value_pattern})"
3939

4040
multi_value_operators = ("in", "in~")
@@ -50,16 +50,16 @@ def __init__(self, *args, **kwargs):
5050
self.operators_map.update(super().operators_map)
5151

5252
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
53-
if num_value := get_match_group(match, group_name='num_value'):
53+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
5454
return operator, num_value
5555

56-
elif bool_value := get_match_group(match, group_name='bool_value'):
56+
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
5757
return operator, bool_value
5858

59-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
59+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
6060
return operator, d_q_value
6161

62-
elif s_q_value := get_match_group(match, group_name='s_q_value'):
62+
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
6363
return operator, s_q_value
6464

6565
return super().get_operator_and_value(match, operator)

siem-converter/app/converter/backends/opensearch/renders/opensearch.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class OpenSearchQueryRender(BaseQueryRender):
7171

7272
field_value_map = OpenSearchFieldValue(or_token=or_token)
7373
query_pattern = "{query} {functions}"
74+
comment_symbol = "//"
75+
is_multi_line_comment = True
7476

7577
def generate_prefix(self, logsource: dict) -> str:
7678
return ""

siem-converter/app/converter/backends/opensearch/tokenizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,16 @@ def clean_quotes(value: Union[str, int]):
6464
return value
6565

6666
def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
67-
if num_value := get_match_group(match, group_name='num_value'):
67+
if (num_value := get_match_group(match, group_name='num_value')) is not None:
6868
return operator, num_value
6969

70-
elif re_value := get_match_group(match, group_name='re_value'):
70+
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
7171
return OperatorType.REGEX, re_value
7272

73-
elif n_q_value := get_match_group(match, group_name='n_q_value'):
73+
elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
7474
return operator, n_q_value
7575

76-
elif d_q_value := get_match_group(match, group_name='d_q_value'):
76+
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
7777
return operator, d_q_value
7878

7979
return super().get_operator_and_value(match)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy