Skip to content

Commit e2ad267

Browse files
authored
Merge pull request #66 from UncoderIO/lucene-wildcards-and-regex-support
lucene wildcards and regex processing
2 parents 600523a + 3c83574 commit e2ad267

32 files changed

+734
-467
lines changed

translator/app/translator/const.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from os.path import abspath, dirname
22
from typing import Union
33

4-
from app.translator.core.str_value_processing import StrValue
4+
from app.translator.core.str_value_manager import StrValue
55

66
APP_PATH = dirname(abspath(__file__))
77

translator/app/translator/core/custom_types/values.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ class ValueType(CustomEnum):
99
back_quotes_value = "b_q_value"
1010
no_quotes_value = "no_q_value"
1111
bool_value = "bool_value"
12-
regular_expression_value = "re_value"
12+
regex_value = "re_value"
1313
greater_than_or_equal = "gte_value"
1414
less_than_or_equal = "lte_value"
15+
multi_value = "multi_value"
16+
ip = "ip"

translator/app/translator/core/models/field.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from app.translator.core.custom_types.tokens import OperatorType
44
from app.translator.core.mapping import DEFAULT_MAPPING_NAME, SourceMapping
55
from app.translator.core.models.identifier import Identifier
6-
from app.translator.core.str_value_processing import StrValue
6+
from app.translator.core.str_value_manager import StrValue
77

88

99
class Field:
@@ -59,7 +59,7 @@ class Keyword:
5959
def __init__(self, value: Union[str, list[str]]):
6060
self.operator: Identifier = Identifier(token_type=OperatorType.KEYWORD)
6161
self.name = "keyword"
62-
self.values: [str] = []
62+
self.values = []
6363
self.__add_value(value=value)
6464

6565
@property

translator/app/translator/core/render.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from app.translator.core.models.identifier import Identifier
3434
from app.translator.core.models.parser_output import MetaInfoContainer
3535
from app.translator.core.models.platform_details import PlatformDetails
36-
from app.translator.core.str_value_processing import StrValueManager
36+
from app.translator.core.str_value_manager import StrValueManager
3737

3838

3939
class BaseQueryFieldValue(ABC):
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
"""
2+
Uncoder IO Commercial Edition License
3+
-----------------------------------------------------------------
4+
Copyright (c) 2023 SOC Prime, Inc.
5+
6+
This file is part of the Uncoder IO Commercial Edition ("CE") and is
7+
licensed under the Uncoder IO Non-Commercial License (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
https://github.com/UncoderIO/UncoderIO/blob/main/LICENSE
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
-----------------------------------------------------------------
17+
"""
18+
19+
from typing import ClassVar, Optional, TypeVar, Union
20+
21+
from app.translator.core.custom_types.values import ValueType
22+
from app.translator.core.escape_manager import EscapeManager
23+
24+
25+
class BaseSpecSymbol:
26+
...
27+
28+
29+
SpecSymbolType = TypeVar("SpecSymbolType", bound=BaseSpecSymbol)
30+
31+
32+
class SingleSymbolWildCard(BaseSpecSymbol):
33+
...
34+
35+
36+
class UnboundLenWildCard(BaseSpecSymbol):
37+
...
38+
39+
40+
class ReEndOfStrSymbol(BaseSpecSymbol):
41+
...
42+
43+
44+
class ReWordSymbol(BaseSpecSymbol):
45+
...
46+
47+
48+
class ReDigitalSymbol(BaseSpecSymbol):
49+
...
50+
51+
52+
class ReAnySymbol(BaseSpecSymbol):
53+
...
54+
55+
56+
class ReWhiteSpaceSymbol(BaseSpecSymbol):
57+
...
58+
59+
60+
class ReOneOrMoreQuantifier(BaseSpecSymbol):
61+
...
62+
63+
64+
class ReZeroOrMoreQuantifier(BaseSpecSymbol):
65+
...
66+
67+
68+
class ReZeroOrOneQuantifier(BaseSpecSymbol):
69+
...
70+
71+
72+
class ReLeftParenthesis(BaseSpecSymbol):
73+
...
74+
75+
76+
class ReRightParenthesis(BaseSpecSymbol):
77+
...
78+
79+
80+
class ReLeftSquareBracket(BaseSpecSymbol):
81+
...
82+
83+
84+
class ReRightSquareBracket(BaseSpecSymbol):
85+
...
86+
87+
88+
class ReLeftCurlyBracket(BaseSpecSymbol):
89+
...
90+
91+
92+
class ReRightCurlyBracket(BaseSpecSymbol):
93+
...
94+
95+
96+
class ReOrOperator(BaseSpecSymbol):
97+
...
98+
99+
100+
class ReCaretSymbol(BaseSpecSymbol):
101+
...
102+
103+
104+
class ReCommaSymbol(BaseSpecSymbol):
105+
...
106+
107+
108+
class ReHyphenSymbol(BaseSpecSymbol):
109+
...
110+
111+
112+
class StrValue(str):
113+
def __new__(cls, value: str, split_value: Optional[list[Union[str, SpecSymbolType]]] = None): # noqa: ARG003
114+
return super().__new__(cls, value)
115+
116+
def __init__(
117+
self,
118+
value: str, # noqa: ARG002
119+
split_value: Optional[list[Union[str, SpecSymbolType]]] = None,
120+
) -> None:
121+
self.split_value = split_value or []
122+
123+
@property
124+
def has_spec_symbols(self) -> bool:
125+
return any(isinstance(el, BaseSpecSymbol) for el in self.split_value)
126+
127+
128+
CONTAINER_SPEC_SYMBOLS_MAP = {
129+
SingleSymbolWildCard: "?",
130+
UnboundLenWildCard: "*",
131+
ReAnySymbol: ".",
132+
ReWordSymbol: r"\w",
133+
ReDigitalSymbol: r"\d",
134+
ReWhiteSpaceSymbol: r"\s",
135+
ReZeroOrMoreQuantifier: "*",
136+
ReOneOrMoreQuantifier: "+",
137+
ReZeroOrOneQuantifier: "?",
138+
ReLeftSquareBracket: "[",
139+
ReRightSquareBracket: "]",
140+
ReLeftParenthesis: "(",
141+
ReRightParenthesis: ")",
142+
ReLeftCurlyBracket: "{",
143+
ReRightCurlyBracket: "}",
144+
ReOrOperator: "|",
145+
ReCaretSymbol: "^",
146+
ReEndOfStrSymbol: "$",
147+
ReCommaSymbol: ",",
148+
ReHyphenSymbol: "-",
149+
}
150+
151+
152+
class StrValueManager:
153+
escape_manager: EscapeManager = None
154+
str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {}
155+
re_str_alpha_num_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {}
156+
re_str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {}
157+
container_spec_symbols_map: ClassVar[dict[type[BaseSpecSymbol], str]] = CONTAINER_SPEC_SYMBOLS_MAP
158+
159+
@staticmethod
160+
def from_str_to_container(value: str) -> StrValue:
161+
return StrValue(value=value, split_value=[value])
162+
163+
def from_re_str_to_container(self, value: str) -> StrValue:
164+
split = []
165+
prev_char = None
166+
inside_curly_brackets = False
167+
inside_square_brackets = False
168+
for char in value:
169+
if prev_char == "\\":
170+
if char == "\\":
171+
split.append(char)
172+
prev_char = None
173+
continue
174+
if char in self.re_str_alpha_num_symbols_map:
175+
split.append(self.re_str_alpha_num_symbols_map[char]())
176+
else:
177+
split.append(char)
178+
elif char in self.re_str_spec_symbols_map:
179+
if char == "{":
180+
inside_curly_brackets = True
181+
elif char == "}":
182+
inside_curly_brackets = False
183+
elif char == "[":
184+
inside_square_brackets = True
185+
elif char == "]":
186+
inside_square_brackets = False
187+
elif (
188+
char == ","
189+
and not inside_curly_brackets
190+
or char == "-"
191+
and (not inside_square_brackets or isinstance(split[-1], ReLeftSquareBracket))
192+
):
193+
split.append(char)
194+
continue
195+
split.append(self.re_str_spec_symbols_map[char]())
196+
elif char != "\\":
197+
split.append(char)
198+
199+
prev_char = char
200+
201+
return StrValue(value, self._concat(split))
202+
203+
def from_container_to_str(self, container: StrValue, value_type: str = ValueType.value) -> str:
204+
result = ""
205+
for el in container.split_value:
206+
if isinstance(el, str):
207+
result += self.escape_manager.escape(el, value_type)
208+
elif isinstance(el, BaseSpecSymbol) and (pattern := self.container_spec_symbols_map.get(type(el))):
209+
result += pattern
210+
211+
return result
212+
213+
@staticmethod
214+
def _concat(split: list[Union[str, SpecSymbolType]]) -> list[Union[str, SpecSymbolType]]:
215+
result = []
216+
sub_str = ""
217+
for el in split:
218+
if isinstance(el, str):
219+
sub_str += el
220+
elif isinstance(el, BaseSpecSymbol):
221+
if sub_str:
222+
result.append(sub_str)
223+
result.append(el)
224+
sub_str = ""
225+
226+
if sub_str:
227+
result.append(sub_str)
228+
229+
return result

translator/app/translator/core/str_value_processing.py

Lines changed: 0 additions & 90 deletions
This file was deleted.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy