Skip to content

Commit c868b92

Browse files
authored
Merge pull request #208 from UncoderIO/field-tokens-separation
separate field tokens
2 parents 7ec3852 + bd9db47 commit c868b92

File tree

17 files changed

+99
-44
lines changed

17 files changed

+99
-44
lines changed

uncoder-core/app/translator/core/functions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ def order_to_render(self) -> dict[str, int]:
164164

165165
return {}
166166

167+
@property
168+
def supported_render_names(self) -> set[str]:
169+
return set(self._renders_map)
170+
167171

168172
class PlatformFunctions:
169173
dir_path: str = None

uncoder-core/app/translator/core/mapping.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,22 @@ def get_source_mapping(self, source_id: str) -> Optional[SourceMapping]:
188188
def default_mapping(self) -> SourceMapping:
189189
return self._source_mappings[DEFAULT_MAPPING_NAME]
190190

191-
def check_fields_mapping_existence(self, field_tokens: list[Field], source_mapping: SourceMapping) -> list[str]:
191+
def check_fields_mapping_existence(
192+
self,
193+
query_field_tokens: list[Field],
194+
function_field_tokens_map: dict[str, list[Field]],
195+
supported_func_render_names: set[str],
196+
source_mapping: SourceMapping,
197+
) -> list[str]:
192198
unmapped = []
193-
for field in field_tokens:
194-
generic_field_name = field.get_generic_field_name(source_mapping.source_id)
195-
mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name)
196-
if not mapped_field and field.source_name not in unmapped:
197-
unmapped.append(field.source_name)
199+
200+
for field in query_field_tokens:
201+
self._check_field_mapping_existence(field, source_mapping, unmapped)
202+
203+
for func_name, function_field_tokens in function_field_tokens_map.items():
204+
if func_name in supported_func_render_names:
205+
for field in function_field_tokens:
206+
self._check_field_mapping_existence(field, source_mapping, unmapped)
198207

199208
if self.is_strict_mapping and unmapped:
200209
raise StrictPlatformException(
@@ -203,6 +212,13 @@ def check_fields_mapping_existence(self, field_tokens: list[Field], source_mappi
203212

204213
return unmapped
205214

215+
@staticmethod
216+
def _check_field_mapping_existence(field: Field, source_mapping: SourceMapping, unmapped: list[str]) -> None:
217+
generic_field_name = field.get_generic_field_name(source_mapping.source_id)
218+
mapped_field = source_mapping.fields_mapping.get_platform_field_name(generic_field_name=generic_field_name)
219+
if not mapped_field and field.source_name not in unmapped:
220+
unmapped.append(field.source_name)
221+
206222
@staticmethod
207223
def map_field(field: Field, source_mapping: SourceMapping) -> list[str]:
208224
generic_field_name = field.get_generic_field_name(source_mapping.source_id)

uncoder-core/app/translator/core/models/query_container.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def __init__(
6565
date: Optional[str] = None,
6666
output_table_fields: Optional[list[Field]] = None,
6767
query_fields: Optional[list[Field]] = None,
68+
function_fields: Optional[list[Field]] = None,
69+
function_fields_map: Optional[dict[str, list[Field]]] = None,
6870
license_: Optional[str] = None,
6971
severity: Optional[str] = None,
7072
references: Optional[list[str]] = None,
@@ -90,6 +92,8 @@ def __init__(
9092
self.date = date or datetime.now().date().strftime("%Y-%m-%d")
9193
self.output_table_fields = output_table_fields or []
9294
self.query_fields = query_fields or []
95+
self.function_fields = function_fields or []
96+
self.function_fields_map = function_fields_map or {}
9397
self.license = license_ or "DRL 1.1"
9498
self.severity = severity or SeverityType.low
9599
self.references = references or []

uncoder-core/app/translator/core/parser.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,19 @@ def get_query_tokens(self, query: str) -> list[QUERY_TOKEN_TYPE]:
6565
@staticmethod
6666
def get_field_tokens(
6767
query_tokens: list[QUERY_TOKEN_TYPE], functions: Optional[list[Function]] = None
68-
) -> list[Field]:
69-
field_tokens = []
68+
) -> tuple[list[Field], list[Field], dict[str, list[Field]]]:
69+
query_field_tokens = []
70+
function_field_tokens = []
71+
function_field_tokens_map = {}
7072
for token in query_tokens:
7173
if isinstance(token, (FieldField, FieldValue, FunctionValue)):
72-
field_tokens.extend(token.fields)
74+
query_field_tokens.extend(token.fields)
7375

74-
if functions:
75-
field_tokens.extend([field for func in functions for field in func.fields])
76+
for func in functions or []:
77+
function_field_tokens.extend(func.fields)
78+
function_field_tokens_map[func.name] = func.fields
7679

77-
return field_tokens
80+
return query_field_tokens, function_field_tokens, function_field_tokens_map
7881

7982
def get_source_mappings(
8083
self, field_tokens: list[Field], log_sources: dict[str, list[Union[int, str]]]

uncoder-core/app/translator/core/render.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,14 +428,18 @@ def _generate_from_tokenized_query_container_by_source_mapping(
428428
self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping
429429
) -> str:
430430
unmapped_fields = self.mappings.check_fields_mapping_existence(
431-
query_container.meta_info.query_fields, source_mapping
431+
query_container.meta_info.query_fields,
432+
query_container.meta_info.function_fields_map,
433+
self.platform_functions.manager.supported_render_names,
434+
source_mapping,
432435
)
433436
rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping)
434437
prefix = self.generate_prefix(source_mapping.log_source_signature, rendered_functions.rendered_prefix)
435438

436439
if source_mapping.raw_log_fields:
437440
defined_raw_log_fields = self.generate_raw_log_fields(
438-
fields=query_container.meta_info.query_fields, source_mapping=source_mapping
441+
fields=query_container.meta_info.query_fields + query_container.meta_info.function_fields,
442+
source_mapping=source_mapping,
439443
)
440444
prefix += f"\n{defined_raw_log_fields}"
441445
query = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping)

uncoder-core/app/translator/platforms/base/aql/parsers/aql.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,13 @@ def _parse_query(self, text: str) -> tuple[str, dict[str, Union[list[str], list[
115115
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
116116
query, log_sources, functions = self._parse_query(raw_query_container.query)
117117
query_tokens = self.get_query_tokens(query)
118-
field_tokens = self.get_field_tokens(query_tokens, functions.functions)
119-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
118+
query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens(
119+
query_tokens, functions.functions
120+
)
121+
source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources)
120122
meta_info = raw_query_container.meta_info
121-
meta_info.query_fields = field_tokens
123+
meta_info.query_fields = query_field_tokens
124+
meta_info.function_fields = function_field_tokens
125+
meta_info.function_fields_map = function_field_tokens_map
122126
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
123127
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions)

uncoder-core/app/translator/platforms/base/lucene/parsers/lucene.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
4848
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
4949
query, log_sources = self._parse_query(raw_query_container.query)
5050
query_tokens = self.get_query_tokens(query)
51-
field_tokens = self.get_field_tokens(query_tokens)
52-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
51+
query_field_tokens, _, _ = self.get_field_tokens(query_tokens)
52+
source_mappings = self.get_source_mappings(query_field_tokens, log_sources)
5353
meta_info = raw_query_container.meta_info
54-
meta_info.query_fields = field_tokens
54+
meta_info.query_fields = query_field_tokens
5555
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
5656
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info)

uncoder-core/app/translator/platforms/base/spl/functions/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def parse(self, query: str) -> tuple[str, ParsedFunctions]:
2626
functions = query.split(self.function_delimiter)
2727
result_query = self.prepare_query(functions[0])
2828
for func in functions[1:]:
29-
split_func = func.strip().split(" ")
29+
func = func.strip()
30+
split_func = func.split(" ")
3031
func_name, func_body = split_func[0], " ".join(split_func[1:])
3132
try:
3233
func_parser = self.manager.get_hof_parser(func_name)

uncoder-core/app/translator/platforms/base/spl/parsers/spl.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
class SplQueryParser(PlatformQueryParser):
3131
log_source_pattern = r"^___source_type___\s*=\s*(?:\"(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501
32-
rule_name_pattern = r"`(?P<name>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`"
32+
rule_name_pattern = r"`(?P<name>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`" # noqa: RUF001
3333
log_source_key_types = ("index", "source", "sourcetype", "sourcecategory")
3434

3535
platform_functions: SplFunctions = None
@@ -56,7 +56,7 @@ def _parse_log_sources(self, query: str) -> tuple[dict[str, list[str]], str]:
5656
def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]], ParsedFunctions]:
5757
if re.match(self.rule_name_pattern, query):
5858
search = re.search(self.rule_name_pattern, query, flags=re.IGNORECASE)
59-
query = query[:search.start()] + query[search.end():]
59+
query = query[: search.start()] + query[search.end() :]
6060
query = query.strip()
6161
log_sources, query = self._parse_log_sources(query)
6262
query, functions = self.platform_functions.parse(query)
@@ -72,9 +72,13 @@ def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContain
7272

7373
query, log_sources, functions = self._parse_query(raw_query_container.query)
7474
query_tokens = self.get_query_tokens(query)
75-
field_tokens = self.get_field_tokens(query_tokens, functions.functions)
76-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
75+
query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens(
76+
query_tokens, functions.functions
77+
)
78+
source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources)
7779
meta_info = raw_query_container.meta_info
78-
meta_info.query_fields = field_tokens
80+
meta_info.query_fields = query_field_tokens
81+
meta_info.function_fields = function_field_tokens
82+
meta_info.function_fields_map = function_field_tokens_map
7983
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
8084
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions)

uncoder-core/app/translator/platforms/base/sql/parsers/sql.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]]]:
4343
def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContainer:
4444
query, log_sources = self._parse_query(raw_query_container.query)
4545
query_tokens = self.get_query_tokens(query)
46-
field_tokens = self.get_field_tokens(query_tokens)
47-
source_mappings = self.get_source_mappings(field_tokens, log_sources)
46+
query_field_tokens, _, _ = self.get_field_tokens(query_tokens)
47+
source_mappings = self.get_source_mappings(query_field_tokens, log_sources)
4848
meta_info = raw_query_container.meta_info
49-
meta_info.query_fields = field_tokens
49+
meta_info.query_fields = query_field_tokens
5050
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
5151
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy