Skip to content

Interpret a space as and #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions siem-converter/app/converter/core/mixins/logic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import List, Union

from app.converter.core.models.field import Field, Keyword
from app.converter.core.models.identifier import Identifier
from app.converter.core.custom_types.tokens import LogicalOperatorType, GroupType


class ANDLogicOperatorMixin:

@staticmethod
def get_missed_and_token_indices(tokens: List[Union[Field, Keyword, Identifier]]) -> List[int]:
missed_and_indices = []
for index in range(len(tokens) - 1):
token = tokens[index]
next_token = tokens[index + 1]
if (isinstance(token, (Field, Keyword))
and not (isinstance(next_token, Identifier) and (
next_token.token_type in LogicalOperatorType
or next_token.token_type == GroupType.R_PAREN))):
missed_and_indices.append(index + 1)
return reversed(missed_and_indices)

def add_and_token_if_missed(self, tokens: List[Union[Field, Keyword, Identifier]]) -> List[Union[Field, Keyword, Identifier]]:
indices = self.get_missed_and_token_indices(tokens=tokens)
for index in indices:
tokens.insert(index, Identifier(token_type=LogicalOperatorType.AND))
return tokens
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
from typing import Tuple, Union, List, Any

from app.converter.core.exceptions.parser import TokenizerGeneralException
from app.converter.core.mixins.logic import ANDLogicOperatorMixin
from app.converter.core.models.field import Keyword, Field
from app.converter.core.models.identifier import Identifier
from app.converter.core.tokenizer import QueryTokenizer
from app.converter.core.custom_types.tokens import OperatorType
from app.converter.tools.utils import get_match_group


class LuceneTokenizer(QueryTokenizer):
class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:))\s*"

Expand Down Expand Up @@ -107,3 +108,7 @@ def search_keyword(self, query: str) -> Tuple[Keyword, str]:
keyword = Keyword(value=value)
pos = keyword_search.end() - 1
return keyword, query[pos:]

def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
tokens = super().tokenize(query=query)
return self.add_and_token_if_missed(tokens=tokens)
11 changes: 9 additions & 2 deletions siem-converter/app/converter/platforms/base/spl/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
"""

import re
from typing import Tuple, Any
from typing import Tuple, Any, List, Union

from app.converter.core.mixins.logic import ANDLogicOperatorMixin
from app.converter.core.models.field import Field, Keyword
from app.converter.core.models.identifier import Identifier
from app.converter.core.tokenizer import QueryTokenizer
from app.converter.core.custom_types.tokens import OperatorType
from app.converter.tools.utils import get_match_group


class SplTokenizer(QueryTokenizer):
class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_\{\}]+)"
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,;\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
Expand All @@ -51,3 +54,7 @@ def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.E
return operator, s_q_value

return super().get_operator_and_value(match)

def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
tokens = super().tokenize(query=query)
return self.add_and_token_if_missed(tokens=tokens)
9 changes: 5 additions & 4 deletions siem-converter/app/converter/platforms/logscale/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@
"""

import re
from typing import Tuple, Any
from typing import Tuple, Any, List, Union

from app.converter.core.mixins.logic import ANDLogicOperatorMixin
from app.converter.core.models.field import Keyword, Field
from app.converter.core.models.identifier import Identifier
from app.converter.core.custom_types.tokens import GroupType, LogicalOperatorType, OperatorType
from app.converter.core.tokenizer import QueryTokenizer
from app.converter.tools.utils import get_match_group


class LogScaleTokenizer(QueryTokenizer):
class LogScaleTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>=|!=))\s?"""
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
Expand Down Expand Up @@ -65,7 +66,7 @@ def __get_identifier(self, query: str) -> (list, str):
else:
return self.search_field_value(query)

def tokenize(self, query: str) -> list:
def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
tokenized = []
while query:
identifier, query = self.__get_identifier(query=query)
Expand All @@ -78,4 +79,4 @@ def tokenize(self, query: str) -> list:
tokenized.append(Identifier(token_type=LogicalOperatorType.AND))
tokenized.append(identifier)
self._validate_parentheses(tokenized)
return tokenized
return self.add_and_token_if_missed(tokens=tokenized)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy