Skip to content

Commit dcf1125

Browse files
committed
parse regex
1 parent 2678646 commit dcf1125

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

uncoder-core/app/translator/platforms/base/sql/tokenizer.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
from app.translator.core.custom_types.tokens import OperatorType
2323
from app.translator.core.custom_types.values import ValueType
2424
from app.translator.core.models.query_tokens.field_value import FieldValue
25+
from app.translator.core.models.query_tokens.function_value import FunctionValue
2526
from app.translator.core.models.query_tokens.identifier import Identifier
27+
from app.translator.core.models.query_tokens.keyword import Keyword
2628
from app.translator.core.tokenizer import QueryTokenizer
2729
from app.translator.platforms.base.sql.str_value_manager import sql_str_value_manager
2830
from app.translator.tools.utils import get_match_group
@@ -49,6 +51,7 @@ class SqlTokenizer(QueryTokenizer):
4951
)
5052
_value_pattern = rf"{num_value_pattern}|{bool_value_pattern}|{single_quotes_value_pattern}"
5153
multi_value_pattern = rf"""\((?P<{ValueType.multi_value}>\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*')*)\)""" # noqa: E501
54+
re_field_value_pattern = rf"""regexp_like\({field_pattern},\s*'(?P<{ValueType.regex_value}>(?:[:a-zA-Z\*\?0-9=+%#№;\-_,"\.$&^@!\{{\}}\[\]\s?<>|]|\\\'|\\)+)'\)""" # noqa: E501
5255

5356
wildcard_symbol = "%"
5457

@@ -77,6 +80,22 @@ def create_field_value(field_name: str, operator: Identifier, value: Union[str,
7780
field_name = field_name.strip('"')
7881
return FieldValue(source_name=field_name, operator=operator, value=value)
7982

83+
def _search_re_field_value(self, query: str) -> Optional[tuple[FieldValue, str]]:
84+
if match := re.match(self.re_field_value_pattern, query, re.IGNORECASE):
85+
group_dict = match.groupdict()
86+
field_name = group_dict["field_name"]
87+
value = self.str_value_manager.from_re_str_to_container(group_dict[ValueType.regex_value])
88+
operator = Identifier(token_type=OperatorType.REGEX)
89+
return self.create_field_value(field_name, operator, value), query[match.end() :]
90+
8091
def tokenize(self, query: str) -> list:
8192
query = re.sub(r"\s*ESCAPE\s*'.'", "", query) # remove `ESCAPE 'escape_char'` in LIKE expr
8293
return super().tokenize(query)
94+
95+
def _get_next_token(
96+
self, query: str
97+
) -> tuple[Union[FieldValue, FunctionValue, Keyword, Identifier, list[Union[FieldValue, Identifier]]], str]:
98+
query = query.strip("\n").strip(" ").strip("\n")
99+
if search_result := self._search_re_field_value(query):
100+
return search_result
101+
return super()._get_next_token(query)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy