Skip to content

Commit 441545e

Browse files
authored
Merge pull request #18 from UncoderIO/space-as-and
Interpret a space as and
2 parents 5bde75e + f72ca55 commit 441545e

File tree

4 files changed

+47
-7
lines changed

4 files changed

+47
-7
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import List, Union
2+
3+
from app.converter.core.models.field import Field, Keyword
4+
from app.converter.core.models.identifier import Identifier
5+
from app.converter.core.custom_types.tokens import LogicalOperatorType, GroupType
6+
7+
8+
class ANDLogicOperatorMixin:
9+
10+
@staticmethod
11+
def get_missed_and_token_indices(tokens: List[Union[Field, Keyword, Identifier]]) -> List[int]:
12+
missed_and_indices = []
13+
for index in range(len(tokens) - 1):
14+
token = tokens[index]
15+
next_token = tokens[index + 1]
16+
if (isinstance(token, (Field, Keyword))
17+
and not (isinstance(next_token, Identifier) and (
18+
next_token.token_type in LogicalOperatorType
19+
or next_token.token_type == GroupType.R_PAREN))):
20+
missed_and_indices.append(index + 1)
21+
return reversed(missed_and_indices)
22+
23+
def add_and_token_if_missed(self, tokens: List[Union[Field, Keyword, Identifier]]) -> List[Union[Field, Keyword, Identifier]]:
24+
indices = self.get_missed_and_token_indices(tokens=tokens)
25+
for index in indices:
26+
tokens.insert(index, Identifier(token_type=LogicalOperatorType.AND))
27+
return tokens

siem-converter/app/converter/platforms/base/lucene/tokenizer.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
from typing import Tuple, Union, List, Any
2121

2222
from app.converter.core.exceptions.parser import TokenizerGeneralException
23+
from app.converter.core.mixins.logic import ANDLogicOperatorMixin
2324
from app.converter.core.models.field import Keyword, Field
2425
from app.converter.core.models.identifier import Identifier
2526
from app.converter.core.tokenizer import QueryTokenizer
2627
from app.converter.core.custom_types.tokens import OperatorType
2728
from app.converter.tools.utils import get_match_group
2829

2930

30-
class LuceneTokenizer(QueryTokenizer):
31+
class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
3132
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
3233
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:))\s*"
3334

@@ -107,3 +108,7 @@ def search_keyword(self, query: str) -> Tuple[Keyword, str]:
107108
keyword = Keyword(value=value)
108109
pos = keyword_search.end() - 1
109110
return keyword, query[pos:]
111+
112+
def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
113+
tokens = super().tokenize(query=query)
114+
return self.add_and_token_if_missed(tokens=tokens)

siem-converter/app/converter/platforms/base/spl/tokenizer.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,17 @@
1717
"""
1818

1919
import re
20-
from typing import Tuple, Any
20+
from typing import Tuple, Any, List, Union
2121

22+
from app.converter.core.mixins.logic import ANDLogicOperatorMixin
23+
from app.converter.core.models.field import Field, Keyword
24+
from app.converter.core.models.identifier import Identifier
2225
from app.converter.core.tokenizer import QueryTokenizer
2326
from app.converter.core.custom_types.tokens import OperatorType
2427
from app.converter.tools.utils import get_match_group
2528

2629

27-
class SplTokenizer(QueryTokenizer):
30+
class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
2831
field_pattern = r"(?P<field_name>[a-zA-Z\.\-_\{\}]+)"
2932
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3033
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,;\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
@@ -51,3 +54,7 @@ def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.E
5154
return operator, s_q_value
5255

5356
return super().get_operator_and_value(match)
57+
58+
def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
59+
tokens = super().tokenize(query=query)
60+
return self.add_and_token_if_missed(tokens=tokens)

siem-converter/app/converter/platforms/logscale/tokenizer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@
1717
"""
1818

1919
import re
20-
from typing import Tuple, Any
20+
from typing import Tuple, Any, List, Union
2121

22+
from app.converter.core.mixins.logic import ANDLogicOperatorMixin
2223
from app.converter.core.models.field import Keyword, Field
2324
from app.converter.core.models.identifier import Identifier
2425
from app.converter.core.custom_types.tokens import GroupType, LogicalOperatorType, OperatorType
2526
from app.converter.core.tokenizer import QueryTokenizer
2627
from app.converter.tools.utils import get_match_group
2728

2829

29-
class LogScaleTokenizer(QueryTokenizer):
30+
class LogScaleTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
3031
match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>=|!=))\s?"""
3132
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
3233
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
@@ -65,7 +66,7 @@ def __get_identifier(self, query: str) -> (list, str):
6566
else:
6667
return self.search_field_value(query)
6768

68-
def tokenize(self, query: str) -> list:
69+
def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
6970
tokenized = []
7071
while query:
7172
identifier, query = self.__get_identifier(query=query)
@@ -78,4 +79,4 @@ def tokenize(self, query: str) -> list:
7879
tokenized.append(Identifier(token_type=LogicalOperatorType.AND))
7980
tokenized.append(identifier)
8081
self._validate_parentheses(tokenized)
81-
return tokenized
82+
return self.add_and_token_if_missed(tokens=tokenized)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy