Skip to content

remove comments from incoming data #102

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions uncoder-core/app/translator/core/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-----------------------------------------------------------------
"""

import re
from abc import ABC, abstractmethod
from typing import Union

from app.translator.core.exceptions.parser import TokenizerGeneralException
from app.translator.core.functions import PlatformFunctions
from app.translator.core.mapping import BasePlatformMappings, SourceMapping
from app.translator.core.models.field import FieldValue, Field, FieldValue, Keyword
from app.translator.core.models.field import Field, FieldValue, Keyword
from app.translator.core.models.functions.base import ParsedFunctions
from app.translator.core.models.identifier import Identifier
from app.translator.core.models.platform_details import PlatformDetails
Expand All @@ -31,6 +31,11 @@


class QueryParser(ABC):
wrapped_with_comment_pattern: str = None

def remove_comments(self, text: str) -> str:
return re.sub(self.wrapped_with_comment_pattern, "\n", text, flags=re.MULTILINE).strip()

def parse_raw_query(self, text: str, language: str) -> RawQueryContainer:
return RawQueryContainer(query=text, language=language)

Expand Down
4 changes: 2 additions & 2 deletions uncoder-core/app/translator/core/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ def apply_field_value(self, field: str, operator: Identifier, value: DEFAULT_VAL

class QueryRender(ABC):
comment_symbol: str = None
is_multi_line_comment: bool = False
is_single_line_comment: bool = False
unsupported_functions_text = "Unsupported functions were excluded from the result query:"

platform_functions: PlatformFunctions = PlatformFunctions()

def render_not_supported_functions(self, not_supported_functions: list) -> str:
line_template = f"{self.comment_symbol} " if self.comment_symbol and self.is_multi_line_comment else ""
line_template = f"{self.comment_symbol} " if self.comment_symbol and self.is_single_line_comment else ""
not_supported_functions_str = "\n".join(line_template + func.lstrip() for func in not_supported_functions)
return "\n\n" + self.wrap_with_comment(f"{self.unsupported_functions_text}\n{not_supported_functions_str}")

Expand Down
2 changes: 1 addition & 1 deletion uncoder-core/app/translator/platforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@
from app.translator.platforms.microsoft.renders.microsoft_sentinel_rule import MicrosoftSentinelRuleRender
from app.translator.platforms.opensearch.parsers.opensearch import OpenSearchQueryParser
from app.translator.platforms.opensearch.renders.opensearch import OpenSearchQueryRender
from app.translator.platforms.palo_alto.renders.cortex_xsiam import CortexXQLQueryRender
from app.translator.platforms.opensearch.renders.opensearch_cti import OpenSearchCTI
from app.translator.platforms.opensearch.renders.opensearch_rule import OpenSearchRuleRender
from app.translator.platforms.palo_alto.renders.cortex_xsiam import CortexXQLQueryRender
from app.translator.platforms.qradar.parsers.qradar import QradarQueryParser
from app.translator.platforms.qradar.renders.qradar import QradarQueryRender
from app.translator.platforms.qradar.renders.qradar_cti import QRadarCTI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class AthenaQueryParser(PlatformQueryParser):
query_delimiter_pattern = r"\sFROM\s\S*\sWHERE\s"
table_pattern = r"\sFROM\s(?P<table>[a-zA-Z\.\-\*]+)\sWHERE\s"

wrapped_with_comment_pattern = r"^\s*--.*(?:\n|$)"

def _parse_query(self, query: str) -> tuple[str, dict[str, Optional[str]]]:
log_source = {"table": None}
if re.search(self.query_delimiter_pattern, query, flags=re.IGNORECASE):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class AthenaQueryRender(PlatformQueryRender):
field_value_map = AthenaFieldValue(or_token=or_token)
query_pattern = "{prefix} WHERE {query} {functions}"
comment_symbol = "--"
is_multi_line_comment = True
is_single_line_comment = True

def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
table = str(log_source_signature) if str(log_source_signature) else "eventlog"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class LuceneQueryParser(PlatformQueryParser):
log_source_pattern = r"___source_type___\s*(?:[:=])\s*(?:\"?(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501
log_source_key_types = ("index", "event\.category")

wrapped_with_comment_pattern = r"^\s*//.*(?:\n|$)"

def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
log_sources = {}
for source_type in self.log_source_key_types:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class LuceneQueryRender(PlatformQueryRender):
query_pattern = "{query} {functions}"

comment_symbol = "//"
is_multi_line_comment = True
is_single_line_comment = True

def generate_prefix(self, log_source_signature: LuceneLogSourceSignature) -> str: # noqa: ARG002
return ""
2 changes: 2 additions & 0 deletions uncoder-core/app/translator/platforms/base/spl/parsers/spl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class SplQueryParser(PlatformQueryParser):
platform_functions: SplFunctions = None
tokenizer = SplTokenizer()

wrapped_with_comment_pattern = r"^\s*```(?:|\n|.)*```"

def _parse_log_sources(self, query: str) -> tuple[dict[str, list[str]], str]:
log_sources = {}
for source_type in self.log_source_key_types:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class ChronicleQueryParser(PlatformQueryParser):
tokenizer: ChronicleQueryTokenizer = ChronicleQueryTokenizer()
details: PlatformDetails = chronicle_query_details

wrapped_with_comment_pattern = r"^\s*//.*(?:\n|$)"

def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
tokens, source_mappings = self.get_tokens_and_source_mappings(raw_query_container.query, {})
fields_tokens = self.get_fields_tokens(tokens=tokens)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
limitations under the License.
-----------------------------------------------------------------
"""

from typing import Union

from app.translator.const import DEFAULT_VALUE_TYPE
Expand Down Expand Up @@ -109,4 +108,5 @@ class ChronicleQueryRender(PlatformQueryRender):

field_value_map = ChronicleFieldValue(or_token=or_token)
query_pattern = "{query} {functions}"
comment_symbol = r"//"
comment_symbol = "//"
is_single_line_comment = True
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-----------------------------------------------------------------
"""

from app.translator.core.models.platform_details import PlatformDetails
from app.translator.platforms.base.spl.parsers.spl import SplQueryParser
from app.translator.platforms.crowdstrike.const import crowdstrike_query_details
Expand All @@ -31,3 +30,5 @@ class CrowdStrikeQueryParser(SplQueryParser):

mappings: CrowdstrikeMappings = crowdstrike_mappings
platform_functions: CrowdStrikeFunctions = crowd_strike_functions

wrapped_with_comment_pattern = r"^\s*`(?:|\n|.)*`"
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ class LogRhythmAxonQueryRender(PlatformQueryRender):

mappings: LogRhythmAxonMappings = logrhythm_axon_mappings
comment_symbol = "//"
is_multi_line_comment = True
is_single_line_comment = True
is_strict_mapping = True

def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
-----------------------------------------------------------------
"""


from app.translator.core.models.functions.base import ParsedFunctions
from app.translator.core.models.platform_details import PlatformDetails
from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
Expand All @@ -32,6 +33,8 @@ class LogScaleQueryParser(PlatformQueryParser):
tokenizer = LogScaleTokenizer()
mappings: LogScaleMappings = logscale_mappings

wrapped_with_comment_pattern = r"^\s*/\*(?:|\n|.)*\*/"

def _parse_query(self, query: str) -> tuple[str, ParsedFunctions]:
functions, query = self.platform_functions.parse(query)
return query, functions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
-----------------------------------------------------------------
"""


from app.translator.core.models.functions.base import ParsedFunctions
from app.translator.core.models.platform_details import PlatformDetails
from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
Expand All @@ -32,6 +33,8 @@ class MicrosoftSentinelQueryParser(PlatformQueryParser):
tokenizer = MicrosoftSentinelTokenizer()
details: PlatformDetails = microsoft_sentinel_query_details

wrapped_with_comment_pattern = r"^\s*//.*(?:\n|$)"

def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]], ParsedFunctions]:
table, query, functions = self.platform_functions.parse(query)
log_sources = {"table": [table]}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class MicrosoftSentinelQueryRender(PlatformQueryRender):

mappings: MicrosoftSentinelMappings = microsoft_sentinel_mappings
comment_symbol = "//"
is_multi_line_comment = True
is_single_line_comment = True

def __init__(self):
super().__init__()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@

from app.translator.const import DEFAULT_VALUE_TYPE
from app.translator.core.exceptions.render import UnsupportedRenderMethod
from app.translator.core.mapping import LogSourceSignature
from app.translator.core.models.platform_details import PlatformDetails
from app.translator.core.render import BaseQueryFieldValue, PlatformQueryRender
from app.translator.platforms.palo_alto.const import cortex_xql_query_details
from app.translator.platforms.palo_alto.escape_manager import cortex_xql_escape_manager
from app.translator.platforms.palo_alto.mapping import CortexXSIAMMappings, cortex_xsiam_mappings
from app.translator.platforms.palo_alto.mapping import (
CortexXSIAMLogSourceSignature,
CortexXSIAMMappings,
cortex_xsiam_mappings,
)


class CortexXSIAMFieldValue(BaseQueryFieldValue):
Expand Down Expand Up @@ -71,7 +74,8 @@ def endswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:

def startswith_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
return f"({self.or_token.join(self.startswith_modifier(field=field, value=self.apply_value(v)) for v in value)})"
clause = self.or_token.join(self.startswith_modifier(field=field, value=self.apply_value(v)) for v in value)
return f"({clause})"
return f'{field} ~= "{self.apply_value(value)}.*"'

def regex_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
Expand Down Expand Up @@ -108,9 +112,9 @@ class CortexXQLQueryRender(PlatformQueryRender):
field_value_map = CortexXSIAMFieldValue(or_token=or_token)
query_pattern = "{prefix} | filter {query} {functions}"
comment_symbol = "//"
is_multi_line_comment = False
is_single_line_comment = False

def generate_prefix(self, log_source_signature: LogSourceSignature) -> str:
def generate_prefix(self, log_source_signature: CortexXSIAMLogSourceSignature) -> str:
preset = (
f"preset = {log_source_signature._default_source.get('preset')}"
if log_source_signature._default_source.get("preset")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class QradarQueryParser(PlatformQueryParser):

table_pattern = r"\sFROM\s(?P<table>[a-zA-Z\.\-\*]+)\sWHERE\s"

wrapped_with_comment_pattern = r"^\s*/\*(?:|\n|.)*\*/"

def __clean_query(self, query: str) -> str:
for func_name in self.log_source_functions:
pattern = self.log_source_function_pattern.replace("___func_name___", func_name)
Expand Down
2 changes: 2 additions & 0 deletions uncoder-core/app/translator/platforms/roota/parsers/roota.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class RootAParser(QueryParser, YamlRuleMixin):
"license",
}

wrapped_with_comment_pattern = r"^\s*#.*(?:\n|$)"

def __parse_meta_info(self, rule: dict) -> MetaInfoContainer:
mitre_attack = rule.get("mitre-attack") or []
mitre_tags = [i.strip("") for i in mitre_attack.split(",")] if isinstance(mitre_attack, str) else mitre_attack
Expand Down
14 changes: 10 additions & 4 deletions uncoder-core/app/translator/platforms/sigma/parsers/sigma.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,24 @@
from app.translator.core.exceptions.core import SigmaRuleValidationException
from app.translator.core.mixins.rule import YamlRuleMixin
from app.translator.core.models.field import FieldValue, Field
from app.translator.core.models.query_container import MetaInfoContainer, TokenizedQueryContainer
from app.translator.core.models.query_container import MetaInfoContainer, TokenizedQueryContainer, RawQueryContainer
from app.translator.core.models.platform_details import PlatformDetails
from app.translator.core.parser import QueryParser
from app.translator.core.tokenizer import QueryTokenizer
from app.translator.platforms.sigma.const import SIGMA_RULE_DETAILS
from app.translator.platforms.sigma.mapping import SigmaMappings, sigma_mappings
from app.translator.platforms.sigma.tokenizer import SigmaConditionTokenizer, SigmaTokenizer


class SigmaParser(YamlRuleMixin):
class SigmaParser(QueryParser, YamlRuleMixin):
details: PlatformDetails = PlatformDetails(**SIGMA_RULE_DETAILS)
condition_tokenizer = SigmaConditionTokenizer()
tokenizer: SigmaTokenizer = SigmaTokenizer()
mappings: SigmaMappings = sigma_mappings
mandatory_fields = {"title", "description", "logsource", "detection"}

wrapped_with_comment_pattern = r"^\s*#.*(?:\n|$)"

@staticmethod
def __parse_false_positives(false_positives: Union[str, list[str], None]) -> list:
if isinstance(false_positives, str):
Expand Down Expand Up @@ -75,8 +78,11 @@ def __validate_rule(self, rule: dict):
if missing_fields := self.mandatory_fields.difference(set(rule.keys())):
raise SigmaRuleValidationException(missing_fields=list(missing_fields))

def parse(self, text: str) -> TokenizedQueryContainer:
sigma_rule = self.load_rule(text=text)
def parse_raw_query(self, text: str, language: str) -> RawQueryContainer:
return RawQueryContainer(query=text, language=language)

def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
sigma_rule = self.load_rule(text=raw_query_container.query)
self.__validate_rule(rule=sigma_rule)
log_sources = {
key: [value]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class SigmaRender(QueryRender):
keyword_num = 0

comment_symbol = "#"
is_multi_line_comment = True
is_single_line_comment = True

mappings: SigmaMappings = sigma_mappings
details: PlatformDetails = PlatformDetails(**SIGMA_RULE_DETAILS)
Expand Down
6 changes: 2 additions & 4 deletions uncoder-core/app/translator/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,9 @@ def __is_one_vendor_translation(source: str, target: str) -> bool:
@handle_translation_exceptions
def __parse_incoming_data(
self, text: str, source: str, target: Optional[str] = None
) -> tuple[Optional[RawQueryContainer], Optional[TokenizedQueryContainer]]:
) -> tuple[RawQueryContainer, Optional[TokenizedQueryContainer]]:
parser = self.__get_parser(source)
if isinstance(parser, SigmaParser):
return None, parser.parse(text)

text = parser.remove_comments(text)
raw_query_container = parser.parse_raw_query(text, language=source)
tokenized_query_container = None
if not (target and self.__is_one_vendor_translation(raw_query_container.language, target)):
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy