Skip to content

parser, tokenizer, render fixes #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class AthenaQueryRender(BaseQueryRender):

field_value_map = AthenaFieldValue(or_token=or_token)
query_pattern = "{prefix} WHERE {query} {functions}"
comment_symbol = "--"
is_multi_line_comment = True

def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
table = str(log_source_signature) if str(log_source_signature) else "eventlog"
Expand Down
8 changes: 4 additions & 4 deletions siem-converter/app/converter/backends/athena/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class AthenaTokenizer(QueryTokenizer):
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>like|in|=|>|<|>=|<=|<>|!=))\s?"""
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')+)'"""
single_quotes_value_pattern = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}"
multi_value_pattern = r"""\((?P<value>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*')*)\)"""

Expand All @@ -49,13 +49,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool:
return operator.lower() in ("like",)

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif bool_value := get_match_group(match, group_name='bool_value'):
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
return operator, bool_value

elif s_q_value := get_match_group(match, group_name='s_q_value'):
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
return operator, s_q_value

return super().get_operator_and_value(match, operator)
Expand Down
14 changes: 7 additions & 7 deletions siem-converter/app/converter/backends/chronicle/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,22 @@ class ChronicleQueryTokenizer(QueryTokenizer):
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
bool_value_pattern = r"(?P<bool_value>true|false)\s*"
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\\\)*)"\s*(?:nocase)?'
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]*)/\s*(?:nocase)?"
re_value_pattern = r"/(?P<re_value>(?:\\\/|[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?])+)/\s*(?:nocase)?"
_value_pattern = fr"{num_value_pattern}|{bool_value_pattern}|{double_quotes_value_pattern}|{re_value_pattern}"

wildcard_symbol = ".*"

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif bool_value := get_match_group(match, group_name='bool_value'):
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
return operator, bool_value

elif d_q_value := get_match_group(match, group_name='d_q_value'):
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
return operator, d_q_value

elif re_value := get_match_group(match, group_name='re_value'):
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
return OperatorType.REGEX, re_value

return super().get_operator_and_value(match, operator)
Expand Down Expand Up @@ -94,10 +94,10 @@ def search_field_value(self, query):
return super().search_field_value(query=query)

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if d_q_value := get_match_group(match, group_name='d_q_value'):
if (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
return operator, d_q_value

elif b_q_value := get_match_group(match, group_name='b_q_value'):
elif (b_q_value := get_match_group(match, group_name='b_q_value')) is not None:
return operator, b_q_value

return super().get_operator_and_value(match, operator)
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class ElasticSearchQueryRender(BaseQueryRender):

field_value_map = ElasticSearchFieldValue(or_token=or_token)
query_pattern = "{query} {functions}"
comment_symbol = "//"
is_multi_line_comment = True

def generate_prefix(self, logsource: dict) -> str:
return ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@ def clean_quotes(value: Union[str, int]):
return value

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif re_value := get_match_group(match, group_name='re_value'):
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
return OperatorType.REGEX, re_value

elif n_q_value := get_match_group(match, group_name='n_q_value'):
elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
return operator, n_q_value

elif d_q_value := get_match_group(match, group_name='d_q_value'):
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
return operator, d_q_value

return super().get_operator_and_value(match)
Expand Down
8 changes: 4 additions & 4 deletions siem-converter/app/converter/backends/logscale/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ class LogScaleTokenizer(QueryTokenizer):
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>=|!=))\s?"""
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]*)/i?\s*"
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\s?]+)/i?\s*"
_value_pattern = fr"""{num_value_pattern}|{re_value_pattern}|{double_quotes_value_pattern}"""
keyword_pattern = double_quotes_value_pattern

wildcard_symbol = "*"

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif d_q_value := get_match_group(match, group_name='d_q_value'):
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
return operator, d_q_value

elif re_value := get_match_group(match, group_name='re_value'):
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
return OperatorType.REGEX, re_value

return super().get_operator_and_value(match, operator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
limitations under the License.
-----------------------------------------------------------------
"""
from typing import Union

from app.converter.backends.microsoft.const import microsoft_sentinel_query_details
from app.converter.backends.microsoft.mapping import MicrosoftSentinelMappings, microsoft_sentinel_mappings
Expand All @@ -28,32 +29,36 @@
class MicrosoftSentinelFieldValue(BaseQueryFieldValue):
details: PlatformDetails = microsoft_sentinel_query_details

@staticmethod
def __escape_value(value: Union[int, str]) -> Union[int, str]:
return value.replace("'", "''") if isinstance(value, str) else value

def equal_modifier(self, field, value):
if isinstance(value, str):
return f"{field} =~ @'{value}'"
return f"{field} =~ @'{self.__escape_value(value)}'"
elif isinstance(value, list):
prepared_values = ", ".join(f"@'{v}'" for v in value)
prepared_values = ", ".join(f"@'{self.__escape_value(v)}'" for v in value)
operator = "in~" if all(isinstance(v, str) for v in value) else "in"
return f'{field} {operator} ({prepared_values})'
return f'{field} == {value}'

def contains_modifier(self, field, value):
if isinstance(value, list):
return f"({self.or_token.join(self.contains_modifier(field=field, value=v) for v in value)})"
return f"{field} contains @'{value}'"
return f"{field} contains @'{self.__escape_value(value)}'"

def endswith_modifier(self, field, value):
if isinstance(value, list):
return f"({self.or_token.join(self.endswith_modifier(field=field, value=v) for v in value)})"
return f"{field} endswith @'{value}'"
return f"{field} endswith @'{self.__escape_value(value)}'"

def startswith_modifier(self, field, value):
if isinstance(value, list):
return f"({self.or_token.join(self.startswith_modifier(field=field, value=v) for v in value)})"
return f"{field} startswith @'{value}'"
return f"{field} startswith @'{self.__escape_value(value)}'"

def __regex_modifier(self, field, value):
return f"{field} matches regex @'(?i){value}'"
return f"{field} matches regex @'(?i){self.__escape_value(value)}'"

def regex_modifier(self, field, value):
if isinstance(value, list):
Expand All @@ -63,7 +68,7 @@ def regex_modifier(self, field, value):
def keywords(self, field, value):
if isinstance(value, list):
return f"({self.or_token.join(self.keywords(field=field, value=v) for v in value)})"
return f"* contains @'{value}'"
return f"* contains @'{self.__escape_value(value)}'"


class MicrosoftSentinelQueryRender(BaseQueryRender):
Expand All @@ -78,14 +83,11 @@ class MicrosoftSentinelQueryRender(BaseQueryRender):

mappings: MicrosoftSentinelMappings = microsoft_sentinel_mappings
comment_symbol = "//"
is_multi_line_comment = True

def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
return str(log_source_signature)

def render_not_supported_functions(self, not_supported_functions: list) -> str:
render_not_suported = "\n".join([f'// {i}' for i in not_supported_functions])
return "\n\n" + f"// {self.unsupported_functions_text}" + render_not_suported

def generate_functions(self, functions: list) -> str:
if not functions:
return ""
Expand Down
10 changes: 5 additions & 5 deletions siem-converter/app/converter/backends/microsoft/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class MicrosoftSentinelTokenizer(QueryTokenizer, OperatorBasedMixin):
single_quotes_value_pattern = r"@?'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{\}\s]|\\\'|\\\\)*)'\s*"
str_value_pattern = fr"""{double_quotes_value_pattern}|{single_quotes_value_pattern}"""
_value_pattern = fr"""{bool_value_pattern}|{num_value_pattern}|{str_value_pattern}"""
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]*)\)"""
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)"""
keyword_pattern = fr"\*\s+contains\s+(?:{str_value_pattern})"

multi_value_operators = ("in", "in~")
Expand All @@ -50,16 +50,16 @@ def __init__(self, *args, **kwargs):
self.operators_map.update(super().operators_map)

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif bool_value := get_match_group(match, group_name='bool_value'):
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
return operator, bool_value

elif d_q_value := get_match_group(match, group_name='d_q_value'):
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
return operator, d_q_value

elif s_q_value := get_match_group(match, group_name='s_q_value'):
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
return operator, s_q_value

return super().get_operator_and_value(match, operator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class OpenSearchQueryRender(BaseQueryRender):

field_value_map = OpenSearchFieldValue(or_token=or_token)
query_pattern = "{query} {functions}"
comment_symbol = "//"
is_multi_line_comment = True

def generate_prefix(self, logsource: dict) -> str:
return ""
Expand Down
8 changes: 4 additions & 4 deletions siem-converter/app/converter/backends/opensearch/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@ def clean_quotes(value: Union[str, int]):
return value

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif re_value := get_match_group(match, group_name='re_value'):
elif (re_value := get_match_group(match, group_name='re_value')) is not None:
return OperatorType.REGEX, re_value

elif n_q_value := get_match_group(match, group_name='n_q_value'):
elif (n_q_value := get_match_group(match, group_name='n_q_value')) is not None:
return operator, n_q_value

elif d_q_value := get_match_group(match, group_name='d_q_value'):
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
return operator, d_q_value

return super().get_operator_and_value(match)
Expand Down
2 changes: 1 addition & 1 deletion siem-converter/app/converter/backends/qradar/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}

NUM_VALUE_PATTERN = r"(?P<num_value>\d+(?:\.\d+)*)"
SINGLE_QUOTES_VALUE_PATTERN = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')+)'"""
SINGLE_QUOTES_VALUE_PATTERN = r"""'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{\}\s]|'')*)'"""


qradar_query_details = PlatformDetails(**QRADAR_QUERY_DETAILS)
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,3 @@ def generate_prefix(self, log_source_signature: QradarLogSourceSignature) -> str

def generate_functions(self, functions: list):
return ""

6 changes: 3 additions & 3 deletions siem-converter/app/converter/backends/qradar/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,13 @@ def should_process_value_wildcard_symbols(operator: str) -> bool:
return operator.lower() in ("like", "ilike")

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif bool_value := get_match_group(match, group_name='bool_value'):
elif (bool_value := get_match_group(match, group_name='bool_value')) is not None:
return operator, bool_value

elif s_q_value := get_match_group(match, group_name='s_q_value'):
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
return operator, s_q_value

return super().get_operator_and_value(match, operator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,3 @@ def generate_functions(self, functions: list):

def wrap_with_comment(self, value: str) -> str:
return f"{self.comment_symbol} {value} {self.comment_symbol}"

def render_not_supported_functions(self, not_supported_functions):
render_not_suported = "\n".join(not_supported_functions)
return f'\n\n""" {self.unsupported_functions_text}' + render_not_suported + '"""'
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class SplunkAlertFieldValue(SplunkFieldValue):
class SplunkAlertRender(SplunkQueryRender):
details: PlatformDetails = splunk_alert_details
or_token = "OR"
field_value_map = SplunkFieldValue(or_token=or_token)
field_value_map = SplunkAlertFieldValue(or_token=or_token)

def finalize_query(self, prefix: str, query: str, functions: str, meta_info: MetaInfoContainer,
source_mapping: SourceMapping = None, not_supported_functions: list = None):
Expand Down
16 changes: 8 additions & 8 deletions siem-converter/app/converter/backends/splunk/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,29 @@


class SplunkTokenizer(QueryTokenizer):
field_pattern = r"(?P<field_name>[a-zA-Z\.\-]+)"
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_\{\}]+)"
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
single_quotes_value_pattern = r"'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\"\.$&^@!\(\)\{\}\s]|\\\'|\\)*)'\s*"
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,;\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
single_quotes_value_pattern = r"'(?P<s_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,;\"\.$&^@!\(\)\{\}\s]|\\\'|\\)*)'\s*"
no_quotes_value = r"(?P<no_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\.\\$&^@!])+)\s*"
_value_pattern = fr"{num_value_pattern}|{no_quotes_value}|{double_quotes_value_pattern}|{single_quotes_value_pattern}"
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\s]+)\)"""
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,;.$&^@!\{\}\(\s]+)\)"""
keyword_pattern = double_quotes_value_pattern

multi_value_operators = ("in",)
wildcard_symbol = "*"

def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.EQ) -> Tuple[str, Any]:
if num_value := get_match_group(match, group_name='num_value'):
if (num_value := get_match_group(match, group_name='num_value')) is not None:
return operator, num_value

elif no_q_value := get_match_group(match, group_name='no_q_value'):
elif (no_q_value := get_match_group(match, group_name='no_q_value')) is not None:
return operator, no_q_value

elif d_q_value := get_match_group(match, group_name='d_q_value'):
elif (d_q_value := get_match_group(match, group_name='d_q_value')) is not None:
return operator, d_q_value

elif s_q_value := get_match_group(match, group_name='s_q_value'):
elif (s_q_value := get_match_group(match, group_name='s_q_value')) is not None:
return operator, s_q_value

return super().get_operator_and_value(match)
2 changes: 1 addition & 1 deletion siem-converter/app/converter/core/models/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __add_value(self, value: Union[int, str, list, tuple]):
self.values.extend(value)
elif value and isinstance(value, str) and value.isnumeric():
self.values.append(int(value))
elif value and isinstance(value, (int, str)):
elif value is not None and isinstance(value, (int, str)):
self.values.append(value)

def __add__(self, other):
Expand Down
10 changes: 6 additions & 4 deletions siem-converter/app/converter/core/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ class BaseQueryRender:
query_pattern = '{table} {query} {functions}'

comment_symbol: str = None
unsupported_functions_text = 'Unsupported functions were excluded from the result query:\n'
is_multi_line_comment: bool = False
unsupported_functions_text = 'Unsupported functions were excluded from the result query:'

def __init__(self):
self.operator_map = {
Expand Down Expand Up @@ -153,11 +154,12 @@ def finalize_query(self,
return query

def render_not_supported_functions(self, not_supported_functions: list) -> str:
render_not_supported = "\n".join(f"//{i}" for i in not_supported_functions)
return "\n\n" + f"// {self.unsupported_functions_text}" + render_not_supported
line_template = f"{self.comment_symbol} " if self.comment_symbol and self.is_multi_line_comment else ""
not_supported_functions_str = "\n".join(line_template + func for func in not_supported_functions)
return "\n\n" + self.wrap_with_comment(f"{self.unsupported_functions_text}\n{not_supported_functions_str}")

def wrap_with_comment(self, value: str) -> str:
return f"{self.comment_symbol}{value}"
return f"{self.comment_symbol} {value}"

def finalize(self, queries_map: Dict[str, str]) -> str:
unique_queries = set(queries_map.values())
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy