22
22
from app .translator .core .custom_types .tokens import OperatorType
23
23
from app .translator .core .custom_types .values import ValueType
24
24
from app .translator .core .models .query_tokens .field_value import FieldValue
25
+ from app .translator .core .models .query_tokens .function_value import FunctionValue
25
26
from app .translator .core .models .query_tokens .identifier import Identifier
27
+ from app .translator .core .models .query_tokens .keyword import Keyword
26
28
from app .translator .core .tokenizer import QueryTokenizer
27
29
from app .translator .platforms .base .sql .str_value_manager import sql_str_value_manager
28
30
from app .translator .tools .utils import get_match_group
@@ -49,6 +51,7 @@ class SqlTokenizer(QueryTokenizer):
49
51
)
50
52
_value_pattern = rf"{ num_value_pattern } |{ bool_value_pattern } |{ single_quotes_value_pattern } "
51
53
multi_value_pattern = rf"""\((?P<{ ValueType .multi_value } >\d+(?:,\s*\d+)*|'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*'(?:,\s*'(?:[:a-zA-Z\*0-9=+%#\-\/\\,_".$&^@!\(\)\{{\}}\s]|'')*')*)\)""" # noqa: E501
54
+ re_field_value_pattern = rf"""regexp_like\({ field_pattern } ,\s*'(?P<{ ValueType .regex_value } >(?:[:a-zA-Z\*\?0-9=+%#№;\-_,"\.$&^@!\{{\}}\[\]\s?<>|]|\\\'|\\)+)'\)""" # noqa: E501
52
55
53
56
wildcard_symbol = "%"
54
57
@@ -77,6 +80,22 @@ def create_field_value(field_name: str, operator: Identifier, value: Union[str,
77
80
field_name = field_name .strip ('"' )
78
81
return FieldValue (source_name = field_name , operator = operator , value = value )
79
82
83
+ def _search_re_field_value (self , query : str ) -> Optional [tuple [FieldValue , str ]]:
84
+ if match := re .match (self .re_field_value_pattern , query , re .IGNORECASE ):
85
+ group_dict = match .groupdict ()
86
+ field_name = group_dict ["field_name" ]
87
+ value = self .str_value_manager .from_re_str_to_container (group_dict [ValueType .regex_value ])
88
+ operator = Identifier (token_type = OperatorType .REGEX )
89
+ return self .create_field_value (field_name , operator , value ), query [match .end () :]
90
+
80
91
def tokenize (self , query : str ) -> list :
81
92
query = re .sub (r"\s*ESCAPE\s*'.'" , "" , query ) # remove `ESCAPE 'escape_char'` in LIKE expr
82
93
return super ().tokenize (query )
94
+
95
+ def _get_next_token (
96
+ self , query : str
97
+ ) -> tuple [Union [FieldValue , FunctionValue , Keyword , Identifier , list [Union [FieldValue , Identifier ]]], str ]:
98
+ query = query .strip ("\n " ).strip (" " ).strip ("\n " )
99
+ if search_result := self ._search_re_field_value (query ):
100
+ return search_result
101
+ return super ()._get_next_token (query )
0 commit comments