From f72ca55d65c8080d34d78248c0f28a37bda5337c Mon Sep 17 00:00:00 2001
From: vh <vh@socprime.com>
Date: Mon, 4 Dec 2023 12:05:18 +0200
Subject: [PATCH] Interpret a space as and

---
 .../app/converter/core/mixins/logic.py        | 27 +++++++++++++++++++
 .../platforms/base/lucene/tokenizer.py        |  7 ++++-
 .../converter/platforms/base/spl/tokenizer.py | 11 ++++++--
 .../converter/platforms/logscale/tokenizer.py |  9 ++++---
 4 files changed, 47 insertions(+), 7 deletions(-)
 create mode 100644 siem-converter/app/converter/core/mixins/logic.py

diff --git a/siem-converter/app/converter/core/mixins/logic.py b/siem-converter/app/converter/core/mixins/logic.py
new file mode 100644
index 00000000..84b26a8e
--- /dev/null
+++ b/siem-converter/app/converter/core/mixins/logic.py
@@ -0,0 +1,27 @@
+from typing import List, Union
+
+from app.converter.core.models.field import Field, Keyword
+from app.converter.core.models.identifier import Identifier
+from app.converter.core.custom_types.tokens import LogicalOperatorType, GroupType
+
+
+class ANDLogicOperatorMixin:
+
+    @staticmethod
+    def get_missed_and_token_indices(tokens: List[Union[Field, Keyword, Identifier]]) -> List[int]:
+        missed_and_indices = []
+        for index in range(len(tokens) - 1):
+            token = tokens[index]
+            next_token = tokens[index + 1]
+            if (isinstance(token, (Field, Keyword))
+                    and not (isinstance(next_token, Identifier) and (
+                                    next_token.token_type in LogicalOperatorType
+                                    or next_token.token_type == GroupType.R_PAREN))):
+                missed_and_indices.append(index + 1)
+        return reversed(missed_and_indices)
+
+    def add_and_token_if_missed(self, tokens: List[Union[Field, Keyword, Identifier]]) -> List[Union[Field, Keyword, Identifier]]:
+        indices = self.get_missed_and_token_indices(tokens=tokens)
+        for index in indices:
+            tokens.insert(index, Identifier(token_type=LogicalOperatorType.AND))
+        return tokens
diff --git a/siem-converter/app/converter/platforms/base/lucene/tokenizer.py b/siem-converter/app/converter/platforms/base/lucene/tokenizer.py
index 0ac47881..d48acfb5 100644
--- a/siem-converter/app/converter/platforms/base/lucene/tokenizer.py
+++ b/siem-converter/app/converter/platforms/base/lucene/tokenizer.py
@@ -20,6 +20,7 @@
 from typing import Tuple, Union, List, Any
 
 from app.converter.core.exceptions.parser import TokenizerGeneralException
+from app.converter.core.mixins.logic import ANDLogicOperatorMixin
 from app.converter.core.models.field import Keyword, Field
 from app.converter.core.models.identifier import Identifier
 from app.converter.core.tokenizer import QueryTokenizer
@@ -27,7 +28,7 @@
 from app.converter.tools.utils import get_match_group
 
 
-class LuceneTokenizer(QueryTokenizer):
+class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
     field_pattern = r"(?P<field_name>[a-zA-Z\.\-_]+)"
     match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:))\s*"
 
@@ -107,3 +108,7 @@ def search_keyword(self, query: str) -> Tuple[Keyword, str]:
         keyword = Keyword(value=value)
         pos = keyword_search.end() - 1
         return keyword, query[pos:]
+
+    def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
+        tokens = super().tokenize(query=query)
+        return self.add_and_token_if_missed(tokens=tokens)
diff --git a/siem-converter/app/converter/platforms/base/spl/tokenizer.py b/siem-converter/app/converter/platforms/base/spl/tokenizer.py
index e0207cd7..f4f2f127 100644
--- a/siem-converter/app/converter/platforms/base/spl/tokenizer.py
+++ b/siem-converter/app/converter/platforms/base/spl/tokenizer.py
@@ -17,14 +17,17 @@
 """
 
 import re
-from typing import Tuple, Any
+from typing import Tuple, Any, List, Union
 
+from app.converter.core.mixins.logic import ANDLogicOperatorMixin
+from app.converter.core.models.field import Field, Keyword
+from app.converter.core.models.identifier import Identifier
 from app.converter.core.tokenizer import QueryTokenizer
 from app.converter.core.custom_types.tokens import OperatorType
 from app.converter.tools.utils import get_match_group
 
 
-class SplTokenizer(QueryTokenizer):
+class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
     field_pattern = r"(?P<field_name>[a-zA-Z\.\-_\{\}]+)"
     num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
     double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,;\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
@@ -51,3 +54,7 @@ def get_operator_and_value(self, match: re.Match, operator: str = OperatorType.E
             return operator, s_q_value
 
         return super().get_operator_and_value(match)
+
+    def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
+        tokens = super().tokenize(query=query)
+        return self.add_and_token_if_missed(tokens=tokens)
diff --git a/siem-converter/app/converter/platforms/logscale/tokenizer.py b/siem-converter/app/converter/platforms/logscale/tokenizer.py
index dd665c7b..cba94b07 100644
--- a/siem-converter/app/converter/platforms/logscale/tokenizer.py
+++ b/siem-converter/app/converter/platforms/logscale/tokenizer.py
@@ -17,8 +17,9 @@
 """
 
 import re
-from typing import Tuple, Any
+from typing import Tuple, Any, List, Union
 
+from app.converter.core.mixins.logic import ANDLogicOperatorMixin
 from app.converter.core.models.field import Keyword, Field
 from app.converter.core.models.identifier import Identifier
 from app.converter.core.custom_types.tokens import GroupType, LogicalOperatorType, OperatorType
@@ -26,7 +27,7 @@
 from app.converter.tools.utils import get_match_group
 
 
-class LogScaleTokenizer(QueryTokenizer):
+class LogScaleTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
     match_operator_pattern = r"""(?:___field___\s?(?P<match_operator>=|!=))\s?"""
     num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
     double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
@@ -65,7 +66,7 @@ def __get_identifier(self, query: str) -> (list, str):
         else:
             return self.search_field_value(query)
 
-    def tokenize(self, query: str) -> list:
+    def tokenize(self, query: str) -> List[Union[Field, Keyword, Identifier]]:
         tokenized = []
         while query:
             identifier, query = self.__get_identifier(query=query)
@@ -78,4 +79,4 @@ def tokenize(self, query: str) -> list:
                         tokenized.append(Identifier(token_type=LogicalOperatorType.AND))
             tokenized.append(identifier)
         self._validate_parentheses(tokenized)
-        return tokenized
+        return self.add_and_token_if_missed(tokens=tokenized)

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/UncoderIO/Uncoder_IO/pull/18.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/UncoderIO/Uncoder_IO/pull/18.patch" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/UncoderIO/Uncoder_IO/pull/18.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/UncoderIO/Uncoder_IO/pull/18.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>