From 506a186e326352a42f87d2af2581abe93fb125da Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Wed, 16 Jul 2025 13:16:02 +0200 Subject: [PATCH 1/7] Added support for searching large amount of indices by moving the indices from the request url to the body of the request when size is larger than 4096 bytes. --- .../stac_fastapi/opensearch/database_logic.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index e4c88d85..d677adf3 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -63,7 +63,7 @@ from stac_fastapi.types.stac import Collection, Item logger = logging.getLogger(__name__) - +ES_MAX_URL_LENGTH = 4096 async def create_index_templates() -> None: """ @@ -546,6 +546,17 @@ async def execute_search( index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH-300: + index_param = ITEM_INDICES + index_filter = {"terms": {"collection": collection_ids}} + if not "bool" in search_body["query"]: + search_body["query"]["bool"] = {} + if not "filter" in search_body["query"]["bool"]: + search_body["query"]["bool"]["filter"] = [index_filter] + filters = search_body["query"]["bool"]["filter"] + if not index_filter in filters: + filters.append(index_filter) + max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) From bd84128c90358e55bbbc50ec41599492b4d88cce Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Wed, 16 Jul 2025 13:44:40 +0200 Subject: [PATCH 2/7] not x in -> x not in --- .../opensearch/stac_fastapi/opensearch/database_logic.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index d677adf3..d79b2d63 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -65,6 +65,7 @@ logger = logging.getLogger(__name__) ES_MAX_URL_LENGTH = 4096 + async def create_index_templates() -> None: """ Create index templates for the Collection and Item indices. @@ -546,15 +547,15 @@ async def execute_search( index_param = indices(collection_ids) - if len(index_param) > ES_MAX_URL_LENGTH-300: + if len(index_param) > ES_MAX_URL_LENGTH - 300: index_param = ITEM_INDICES index_filter = {"terms": {"collection": collection_ids}} - if not "bool" in search_body["query"]: + if "bool" not in search_body["query"]: search_body["query"]["bool"] = {} - if not "filter" in search_body["query"]["bool"]: + if "filter" not in search_body["query"]["bool"]: search_body["query"]["bool"]["filter"] = [index_filter] filters = search_body["query"]["bool"]["filter"] - if not index_filter in filters: + if index_filter not in filters: filters.append(index_filter) max_result_window = MAX_LIMIT From ab4f56e64f55519fff2abd7890ddd94aade8a0ba Mon Sep 17 00:00:00 2001 From: Stijn Caerts Date: Thu, 17 Jul 2025 13:40:14 +0200 Subject: [PATCH 3/7] update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 421e8315..780386b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added the ability to set timeout for Opensearch and Elasticsearch clients by setting the environmental variable `ES_TIMEOUT` [#408](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/408) +### Changed + +- Updated collection to index logic to support searching a large amount of indices [#412](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/412) + ## [v6.0.0] - 2025-06-22 ### Added From 06999c9f1ec4251817a77f08dca330d0b02ce5d7 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:26:21 +0200 Subject: [PATCH 4/7] Create add_collections_to_body in sfeos_helpers and added search support for large amount of queries to ElasticSearch database logic. --- .../elasticsearch/database_logic.py | 7 +++++ .../stac_fastapi/opensearch/database_logic.py | 23 ++++++--------- .../sfeos_helpers/database/query.py | 29 +++++++++++++++++++ 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 195950f3..ed3e3080 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -27,6 +27,8 @@ PartialItem, PatchOperation, ) + +from stac_fastapi.opensearch.stac_fastapi.opensearch.database_logic import ES_MAX_URL_LENGTH from stac_fastapi.sfeos_helpers import filter from stac_fastapi.sfeos_helpers.database import ( apply_free_text_filter_shared, @@ -60,6 +62,8 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item +from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body + logger = logging.getLogger(__name__) @@ -520,6 +524,9 @@ async def execute_search( query = search.query.to_dict() if search.query else None index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) max_result_window = MAX_LIMIT diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index d79b2d63..35f70fe3 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -62,8 +62,10 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item +from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ + ES_MAX_URL_LENGTH + logger = logging.getLogger(__name__) -ES_MAX_URL_LENGTH = 4096 async def create_index_templates() -> None: @@ -533,6 +535,12 @@ async def execute_search( """ search_body: Dict[str, Any] = {} query = search.query.to_dict() if search.query else None + + index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) + if query: search_body["query"] = query @@ -545,19 +553,6 @@ async def execute_search( search_body["sort"] = sort if sort else DEFAULT_SORT - index_param = indices(collection_ids) - - if len(index_param) > ES_MAX_URL_LENGTH - 300: - index_param = ITEM_INDICES - index_filter = {"terms": {"collection": collection_ids}} - if "bool" not in search_body["query"]: - search_body["query"]["bool"] = {} - if "filter" not in search_body["query"]["bool"]: - search_body["query"]["bool"]["filter"] = [index_filter] - filters = search_body["query"]["bool"]["filter"] - if index_filter not in filters: - filters.append(index_filter) - max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index dacbb590..81409b65 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -7,6 +7,7 @@ from stac_fastapi.sfeos_helpers.mappings import Geometry +ES_MAX_URL_LENGTH = 4096 def apply_free_text_filter_shared( search: Any, free_text_queries: Optional[List[str]] @@ -83,3 +84,31 @@ def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: return {s.field: {"order": s.direction} for s in sortby} else: return None + + +def add_collections_to_body(collection_ids: List[str], query: Optional[Dict[str, Any]]) -> Dict[str, Any]: + """Adds a list of collection ids to the body of a query. + + Args: + collection_ids (List[str]): A list of collections ids. + query (Optional[Dict[str, Any]]): The query to add collections to. If none, create a query that filters + the collection ids. + + Returns: + Dict[str, Any]: A query that contains a filter on the given collection ids. + + Notes: + This function is needed in the execute_search function when the size of the URL path will exceed the maximum of ES. + """ + index_filter = {"terms": {"collection": collection_ids}} + if query is None: + query = {"query": {}} + if "bool" not in query: + query["bool"] = {} + if "filter" not in query["bool"]: + query["bool"]["filter"] = [] + + filters = query["bool"]["filter"] + if index_filter not in filters: + filters.append(index_filter) + return query \ No newline at end of file From be48bd89e7ed9eba70d00156ee9c9e52cf8f0302 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:33:48 +0200 Subject: [PATCH 5/7] Fixed import. --- .../elasticsearch/stac_fastapi/elasticsearch/database_logic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index ed3e3080..98178b50 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -62,7 +62,8 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body +from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ + ES_MAX_URL_LENGTH logger = logging.getLogger(__name__) From 51a7a76c80cf48b934938edd0c07eac1e6823e22 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:36:50 +0200 Subject: [PATCH 6/7] Final import fix. Tests are passing again. --- .../elasticsearch/stac_fastapi/elasticsearch/database_logic.py | 3 +-- .../opensearch/stac_fastapi/opensearch/database_logic.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 98178b50..d4a8c8c4 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -28,7 +28,6 @@ PatchOperation, ) -from stac_fastapi.opensearch.stac_fastapi.opensearch.database_logic import ES_MAX_URL_LENGTH from stac_fastapi.sfeos_helpers import filter from stac_fastapi.sfeos_helpers.database import ( apply_free_text_filter_shared, @@ -62,7 +61,7 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ +from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ ES_MAX_URL_LENGTH logger = logging.getLogger(__name__) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 35f70fe3..f3c925d8 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -62,7 +62,7 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ +from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ ES_MAX_URL_LENGTH logger = logging.getLogger(__name__) From 552b483fa40ec72801403ba72a82d0f1b15cb560 Mon Sep 17 00:00:00 2001 From: Simon Van Baelen Date: Fri, 18 Jul 2025 09:51:29 +0200 Subject: [PATCH 7/7] Precommit. --- .../stac_fastapi/elasticsearch/database_logic.py | 8 ++++---- .../opensearch/stac_fastapi/opensearch/database_logic.py | 7 ++++--- .../stac_fastapi/sfeos_helpers/database/query.py | 9 ++++++--- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index d4a8c8c4..16a8a83d 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -27,7 +27,6 @@ PartialItem, PatchOperation, ) - from stac_fastapi.sfeos_helpers import filter from stac_fastapi.sfeos_helpers.database import ( apply_free_text_filter_shared, @@ -44,6 +43,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -61,9 +64,6 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ - ES_MAX_URL_LENGTH - logger = logging.getLogger(__name__) diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index f3c925d8..c323b307 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -42,6 +42,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -62,9 +66,6 @@ from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item -from stac_fastapi.sfeos_helpers.database.query import add_collections_to_body, \ - ES_MAX_URL_LENGTH - logger = logging.getLogger(__name__) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index 81409b65..97df5703 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -9,6 +9,7 @@ ES_MAX_URL_LENGTH = 4096 + def apply_free_text_filter_shared( search: Any, free_text_queries: Optional[List[str]] ) -> Any: @@ -86,8 +87,10 @@ def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: return None -def add_collections_to_body(collection_ids: List[str], query: Optional[Dict[str, Any]]) -> Dict[str, Any]: - """Adds a list of collection ids to the body of a query. +def add_collections_to_body( + collection_ids: List[str], query: Optional[Dict[str, Any]] +) -> Dict[str, Any]: + """Add a list of collection ids to the body of a query. Args: collection_ids (List[str]): A list of collections ids. @@ -111,4 +114,4 @@ def add_collections_to_body(collection_ids: List[str], query: Optional[Dict[str, filters = query["bool"]["filter"] if index_filter not in filters: filters.append(index_filter) - return query \ No newline at end of file + return query pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy