From b8b64d4048a09cf542fe1a515c035a023ff6861e Mon Sep 17 00:00:00 2001 From: Gesyk Nazar <77268518+nazargesyk@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:57:26 +0300 Subject: [PATCH 1/3] gis-8556 elastic-lucene-query tokenizer fixes --- .../app/translator/platforms/base/lucene/tokenizer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py index b56f5bee..b46a4ddc 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py @@ -38,6 +38,7 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin): ":>": OperatorType.GT, ":<": OperatorType.LT, ":": OperatorType.EQ, + "==": OperatorType.EQ, } multi_value_operators_map: ClassVar[dict[str, str]] = {":": OperatorType.EQ} @@ -61,7 +62,7 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin): multi_value_pattern = rf"""\((?P<{ValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#№;\-_\/\\'\,.$&^@!\(\[\]\s|]+)\)""" multi_value_check_pattern = r"___field___\s*___operator___\s*\(" - multi_value_delimiter_pattern = r"\s+OR\s+" + multi_value_delimiter_pattern = r"\s+OR|or\s+" escape_manager = lucene_escape_manager @@ -77,7 +78,9 @@ def create_field_value(field_name: str, operator: Identifier, value: Union[str, @staticmethod def clean_multi_value(value: str) -> str: - return value.strip('"') if value.startswith('"') and value.endswith('"') else value + value = value.strip('"') if value.startswith('"') and value.endswith('"') else value + value = value.replace("\n", "").replace(" ", "") + return value.strip() def get_operator_and_value( # noqa: PLR0911 self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None From ff22a2196fffd2ce17c1dd83e6ff977a665cc5a0 Mon Sep 17 00:00:00 2001 From: Gesyk Nazar <77268518+nazargesyk@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:03:59 +0300 Subject: [PATCH 2/3] gis-8556 fix lucene multi_value_delimiter_pattern --- uncoder-core/app/translator/platforms/base/lucene/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py index b46a4ddc..b86cf8f4 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py @@ -62,7 +62,7 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin): multi_value_pattern = rf"""\((?P<{ValueType.multi_value}>[:a-zA-Z\"\*0-9=+%#№;\-_\/\\'\,.$&^@!\(\[\]\s|]+)\)""" multi_value_check_pattern = r"___field___\s*___operator___\s*\(" - multi_value_delimiter_pattern = r"\s+OR|or\s+" + multi_value_delimiter_pattern = r"\s+(?:OR|or)\s+" escape_manager = lucene_escape_manager From bdf940f4fb6328382d1948564b4944a8acb5d203 Mon Sep 17 00:00:00 2001 From: Gesyk Nazar <77268518+nazargesyk@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:47:40 +0300 Subject: [PATCH 3/3] gis-8556 fix lucene clean_multi_value --- uncoder-core/app/translator/platforms/base/lucene/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py index b86cf8f4..8be19ffe 100644 --- a/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py +++ b/uncoder-core/app/translator/platforms/base/lucene/tokenizer.py @@ -78,8 +78,8 @@ def create_field_value(field_name: str, operator: Identifier, value: Union[str, @staticmethod def clean_multi_value(value: str) -> str: - value = value.strip('"') if value.startswith('"') and value.endswith('"') else value value = value.replace("\n", "").replace(" ", "") + value = value.strip('"') if value.startswith('"') and value.endswith('"') else value return value.strip() def get_operator_and_value( # noqa: PLR0911 pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy