From b057c516f2eeeba76094d0de7ec0857136eb2f81 Mon Sep 17 00:00:00 2001 From: Jonathan Healy Date: Sun, 22 Jun 2025 14:59:59 +0800 Subject: [PATCH 1/5] Update README.md, stac-fastapi badge to 6.0.0 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 11619f86..8ab49c4e 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ [![GitHub forks](https://img.shields.io/github/forks/stac-utils/stac-fastapi-elasticsearch-opensearch.svg?color=blue)](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/network/members) [![PyPI version](https://img.shields.io/pypi/v/stac-fastapi-elasticsearch.svg?color=blue)](https://pypi.org/project/stac-fastapi-elasticsearch/) [![STAC](https://img.shields.io/badge/STAC-1.1.0-blue.svg)](https://github.com/radiantearth/stac-spec/tree/v1.1.0) - [![stac-fastapi](https://img.shields.io/badge/stac--fastapi-5.2.0-blue.svg)](https://github.com/stac-utils/stac-fastapi) + [![stac-fastapi](https://img.shields.io/badge/stac--fastapi-6.0.0-blue.svg)](https://github.com/stac-utils/stac-fastapi) ## Sponsors & Supporters From a0b77cbecd7be115d94a4bdd38c3a7eb5dad26b8 Mon Sep 17 00:00:00 2001 From: z-mrozu <43796046+z-mrozu@users.noreply.github.com> Date: Wed, 2 Jul 2025 08:24:11 +0200 Subject: [PATCH 2/5] Timeout setting for Opensearch and Elasticsearch (#408) **Description:** Added timeout setting in Opensearch & Elasticsearch config which should only be relevant if user sets "ES_TIMEOUT" **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --- CHANGELOG.md | 4 ++++ README.md | 17 +++++++++-------- .../stac_fastapi/elasticsearch/config.py | 4 ++++ .../stac_fastapi/opensearch/config.py | 4 ++++ 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 979094a4..421e8315 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Added the ability to set timeout for Opensearch and Elasticsearch clients by setting the environmental variable `ES_TIMEOUT` [#408](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/408) + ## [v6.0.0] - 2025-06-22 ### Added diff --git a/README.md b/README.md index 8ab49c4e..4d2cb807 100644 --- a/README.md +++ b/README.md @@ -207,26 +207,27 @@ You can customize additional settings in your `.env` file: | `ES_PORT` | Port for Elasticsearch/OpenSearch. | `9200` (ES) / `9202` (OS)| Optional | | `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `false` | Optional | | `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `false` | Optional | +| `ES_TIMEOUT` | Client timeout for Elasticsearch/OpenSearch. | DB client default | Optional | | `STAC_FASTAPI_TITLE` | Title of the API in the documentation. | `stac-fastapi-` | Optional | | `STAC_FASTAPI_DESCRIPTION` | Description of the API in the documentation. | N/A | Optional | | `STAC_FASTAPI_VERSION` | API version. | `2.1` | Optional | -| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID | `stac-fastapi` | Optional | +| `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID | `stac-fastapi` | Optional | | `APP_HOST` | Server bind address. | `0.0.0.0` | Optional | | `APP_PORT` | Server port. | `8080` | Optional | | `ENVIRONMENT` | Runtime environment. | `local` | Optional | | `WEB_CONCURRENCY` | Number of worker processes. | `10` | Optional | | `RELOAD` | Enable auto-reload for development. | `true` | Optional | | `STAC_FASTAPI_RATE_LIMIT` | API rate limit per client. | `200/minute` | Optional | -| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional | -| `ELASTICSEARCH_VERSION` | Version of Elasticsearch to use. | `8.11.0` | Optional | | -| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional -| `ENABLE_DIRECT_RESPONSE` | Enable direct response for maximum performance (disables all FastAPI dependencies, including authentication, custom status codes, and validation) | `false` | Optional -| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` Optional | -| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional | +| `BACKEND` | Tests-related variable | `elasticsearch` or `opensearch` based on the backend | Optional | +| `ELASTICSEARCH_VERSION` | Version of Elasticsearch to use. | `8.11.0` | Optional | +| `OPENSEARCH_VERSION` | OpenSearch version | `2.11.1` | Optional | +| `ENABLE_DIRECT_RESPONSE` | Enable direct response for maximum performance (disables all FastAPI dependencies, including authentication, custom status codes, and validation) | `false` | Optional | +| `RAISE_ON_BULK_ERROR` | Controls whether bulk insert operations raise exceptions on errors. If set to `true`, the operation will stop and raise an exception when an error occurs. If set to `false`, errors will be logged, and the operation will continue. **Note:** STAC Item and ItemCollection validation errors will always raise, regardless of this flag. | `false` | Optional | +| `DATABASE_REFRESH` | Controls whether database operations refresh the index immediately after changes. If set to `true`, changes will be immediately searchable. If set to `false`, changes may not be immediately visible but can improve performance for bulk operations. If set to `wait_for`, changes will wait for the next refresh cycle to become visible. | `false` | Optional | | `ENABLE_TRANSACTIONS_EXTENSIONS` | Enables or disables the Transactions and Bulk Transactions API extensions. If set to `false`, the POST `/collections` route and related transaction endpoints (including bulk transaction operations) will be unavailable in the API. This is useful for deployments where mutating the catalog via the API should be prevented. | `true` | Optional | > [!NOTE] -> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, and `ES_VERIFY_CERTS` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch. +> The variables `ES_HOST`, `ES_PORT`, `ES_USE_SSL`, `ES_VERIFY_CERTS` and `ES_TIMEOUT` apply to both Elasticsearch and OpenSearch backends, so there is no need to rename the key names to `OS_` even if you're using OpenSearch. ## Interacting with the API diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py index 49495854..c5b6e3dd 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py @@ -56,6 +56,10 @@ def _es_config() -> Dict[str, Any]: if (u := os.getenv("ES_USER")) and (p := os.getenv("ES_PASS")): config["http_auth"] = (u, p) + # Include timeout setting if set + if request_timeout := os.getenv("ES_TIMEOUT"): + config["request_timeout"] = request_timeout + # Explicitly exclude SSL settings when not using SSL if not use_ssl: return config diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py index 3fe4d71b..08e9a42a 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py @@ -53,6 +53,10 @@ def _es_config() -> Dict[str, Any]: config["headers"] = headers + # Include timeout setting if set + if timeout := os.getenv("ES_TIMEOUT"): + config["timeout"] = timeout + # Explicitly exclude SSL settings when not using SSL if not use_ssl: return config From 90fd9caa6dbb3fd549bbb119cdec7bdd5781e90e Mon Sep 17 00:00:00 2001 From: simonvb00 <159138195+simonvb00@users.noreply.github.com> Date: Sat, 19 Jul 2025 00:13:05 +0200 Subject: [PATCH 3/5] Added support for searching large amount of indices (#412) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Description:** When searching the catalog with the `/search`-endpoint, a `GET //_search` request is done with all indices listed in the URL path. However when doing such a search on a large amount of indices, it is possible that the size of the endpoint exceeds Elasticsearch’s maximum allowed HTTP line length (4096 bytes), resulting in the following error: `{"code":"RequestError","description":"RequestError(400, 'too_long_http_line_exception', 'An HTTP line is larger than 4096 bytes.')"}` The solution in this commit moves the indices from the endpoint to the body of the request once the amount of indices passes a certain threshold. The indices of the endpoint will be replaced by `ITEM_INDICES`. Since the query still filters on the correct indices, this change preserves the behavior while avoiding the URL length limitation. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [ ] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --------- Co-authored-by: Stijn Caerts --- CHANGELOG.md | 4 +++ .../elasticsearch/database_logic.py | 7 ++++ .../stac_fastapi/opensearch/database_logic.py | 12 +++++-- .../sfeos_helpers/database/query.py | 32 +++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 421e8315..780386b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added the ability to set timeout for Opensearch and Elasticsearch clients by setting the environmental variable `ES_TIMEOUT` [#408](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/408) +### Changed + +- Updated collection to index logic to support searching a large amount of indices [#412](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/412) + ## [v6.0.0] - 2025-06-22 ### Added diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 195950f3..16a8a83d 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -43,6 +43,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -520,6 +524,9 @@ async def execute_search( query = search.query.to_dict() if search.query else None index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) max_result_window = MAX_LIMIT diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index e4c88d85..c323b307 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -42,6 +42,10 @@ return_date, validate_refresh, ) +from stac_fastapi.sfeos_helpers.database.query import ( + ES_MAX_URL_LENGTH, + add_collections_to_body, +) from stac_fastapi.sfeos_helpers.database.utils import ( merge_to_operations, operations_to_script, @@ -532,6 +536,12 @@ async def execute_search( """ search_body: Dict[str, Any] = {} query = search.query.to_dict() if search.query else None + + index_param = indices(collection_ids) + if len(index_param) > ES_MAX_URL_LENGTH - 300: + index_param = ITEM_INDICES + query = add_collections_to_body(collection_ids, query) + if query: search_body["query"] = query @@ -544,8 +554,6 @@ async def execute_search( search_body["sort"] = sort if sort else DEFAULT_SORT - index_param = indices(collection_ids) - max_result_window = MAX_LIMIT size_limit = min(limit + 1, max_result_window) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py index dacbb590..97df5703 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -7,6 +7,8 @@ from stac_fastapi.sfeos_helpers.mappings import Geometry +ES_MAX_URL_LENGTH = 4096 + def apply_free_text_filter_shared( search: Any, free_text_queries: Optional[List[str]] @@ -83,3 +85,33 @@ def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: return {s.field: {"order": s.direction} for s in sortby} else: return None + + +def add_collections_to_body( + collection_ids: List[str], query: Optional[Dict[str, Any]] +) -> Dict[str, Any]: + """Add a list of collection ids to the body of a query. + + Args: + collection_ids (List[str]): A list of collections ids. + query (Optional[Dict[str, Any]]): The query to add collections to. If none, create a query that filters + the collection ids. + + Returns: + Dict[str, Any]: A query that contains a filter on the given collection ids. + + Notes: + This function is needed in the execute_search function when the size of the URL path will exceed the maximum of ES. + """ + index_filter = {"terms": {"collection": collection_ids}} + if query is None: + query = {"query": {}} + if "bool" not in query: + query["bool"] = {} + if "filter" not in query["bool"]: + query["bool"]["filter"] = [] + + filters = query["bool"]["filter"] + if index_filter not in filters: + filters.append(index_filter) + return query From 970f65e88c410c4ce01390d78be33cd29dd1f9ec Mon Sep 17 00:00:00 2001 From: Bennett Brixen Date: Sun, 20 Jul 2025 15:00:44 -0600 Subject: [PATCH 4/5] update ES env variable documentation (#410) fixes incorrect or missing documentation for ES_USE_SSL, ES_VERIFY_CERTS, and ES_API_KEY **Description:** - README.md documentation has incorrect default values for ES_USE_SSL and ES_VERIFY_CERTS - The default values that are used can be found at: `./stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py` and `./stac_fastapi/opensearch/stac_fastapi/opensearch/config.py` - Adds documentation for ES_API_KEY **PR Checklist:** - [X] Code is formatted and linted (run `pre-commit run --all-files`) - [X] Tests pass (run `make test`) - [X] Documentation has been updated to reflect changes, if applicable - [X] Changes are added to the changelog --------- Co-authored-by: Bennett Patrick Brixen Co-authored-by: Jonathan Healy --- CHANGELOG.md | 2 ++ README.md | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 780386b1..c6951ef4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Updated collection to index logic to support searching a large amount of indices [#412](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/412) +- Updated documentation to reflect use of ES environment variables [#410](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/410) +- Updated documentation to reflect `APP_PORT` in [stac-fastapi-core ApiSettings](https://github.com/stac-utils/stac-fastapi/blob/fa42985255fad0bab7dbe3aadbf1f74cb1635f3a/stac_fastapi/types/stac_fastapi/types/config.py#L30) [#410](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/410) ## [v6.0.0] - 2025-06-22 diff --git a/README.md b/README.md index 4d2cb807..9e5a4674 100644 --- a/README.md +++ b/README.md @@ -205,15 +205,16 @@ You can customize additional settings in your `.env` file: |------------------------------|--------------------------------------------------------------------------------------|--------------------------|---------------------------------------------------------------------------------------------| | `ES_HOST` | Hostname for external Elasticsearch/OpenSearch. | `localhost` | Optional | | `ES_PORT` | Port for Elasticsearch/OpenSearch. | `9200` (ES) / `9202` (OS)| Optional | -| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `false` | Optional | -| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `false` | Optional | +| `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `true` | Optional | +| `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `true` | Optional | +| `ES_API_KEY` | API Key for external Elasticsearch/OpenSearch. | N/A | Optional | | `ES_TIMEOUT` | Client timeout for Elasticsearch/OpenSearch. | DB client default | Optional | | `STAC_FASTAPI_TITLE` | Title of the API in the documentation. | `stac-fastapi-` | Optional | | `STAC_FASTAPI_DESCRIPTION` | Description of the API in the documentation. | N/A | Optional | | `STAC_FASTAPI_VERSION` | API version. | `2.1` | Optional | | `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID | `stac-fastapi` | Optional | | `APP_HOST` | Server bind address. | `0.0.0.0` | Optional | -| `APP_PORT` | Server port. | `8080` | Optional | +| `APP_PORT` | Server port. | `8000` | Optional | | `ENVIRONMENT` | Runtime environment. | `local` | Optional | | `WEB_CONCURRENCY` | Number of worker processes. | `10` | Optional | | `RELOAD` | Enable auto-reload for development. | `true` | Optional | From e34e923ad61e542607be14ab5e52ac8915139819 Mon Sep 17 00:00:00 2001 From: GrzegorzPustulka <94792342+GrzegorzPustulka@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:01:48 +0200 Subject: [PATCH 5/5] Enable collection filtering support for stac-auth-proxy (#411) Added `collection-search#filter` conformance class to CollectionSearchExtension to enable compatibility with stac-auth-proxy collection filtering **PR Checklist:** - [ ] Code is formatted and linted (run `pre-commit run --all-files`) - [ ] Tests pass (run `make test`) - [ ] Documentation has been updated to reflect changes, if applicable - [ ] Changes are added to the changelog --------- Co-authored-by: Grzegorz Pustulka Co-authored-by: Jonathan Healy Co-authored-by: Bennett Brixen Co-authored-by: Bennett Patrick Brixen --- CHANGELOG.md | 1 + .../elasticsearch/stac_fastapi/elasticsearch/app.py | 10 ++++++++++ stac_fastapi/opensearch/stac_fastapi/opensearch/app.py | 10 ++++++++++ 3 files changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6951ef4..624b0043 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added - Added the ability to set timeout for Opensearch and Elasticsearch clients by setting the environmental variable `ES_TIMEOUT` [#408](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/408) +- Added `collection-search#filter` conformance class to CollectionSearchExtension to enable compatibility with stac-auth-proxy collection filtering [#411](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/411) ### Changed diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 7e145072..c348822f 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -31,6 +31,7 @@ ) from stac_fastapi.extensions.core import ( AggregationExtension, + CollectionSearchExtension, FilterExtension, FreeTextExtension, SortExtension, @@ -60,6 +61,14 @@ FilterConformanceClasses.ADVANCED_COMPARISON_OPERATORS ) +# Adding collection search extension for compatibility with stac-auth-proxy +# (https://github.com/developmentseed/stac-auth-proxy) +# The extension is not fully implemented yet but is required for collection filtering support +collection_search_extension = CollectionSearchExtension() +collection_search_extension.conformance_classes.append( + "https://api.stacspec.org/v1.0.0-rc.1/collection-search#filter" +) + aggregation_extension = AggregationExtension( client=EsAsyncBaseAggregationClient( database=database_logic, session=session, settings=settings @@ -75,6 +84,7 @@ TokenPaginationExtension(), filter_extension, FreeTextExtension(), + collection_search_extension, ] if TRANSACTIONS_EXTENSIONS: diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index c047014a..b31281fa 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -25,6 +25,7 @@ from stac_fastapi.core.utilities import get_bool_env from stac_fastapi.extensions.core import ( AggregationExtension, + CollectionSearchExtension, FilterExtension, FreeTextExtension, SortExtension, @@ -60,6 +61,14 @@ FilterConformanceClasses.ADVANCED_COMPARISON_OPERATORS ) +# Adding collection search extension for compatibility with stac-auth-proxy +# (https://github.com/developmentseed/stac-auth-proxy) +# The extension is not fully implemented yet but is required for collection filtering support +collection_search_extension = CollectionSearchExtension() +collection_search_extension.conformance_classes.append( + "https://api.stacspec.org/v1.0.0-rc.1/collection-search#filter" +) + aggregation_extension = AggregationExtension( client=EsAsyncBaseAggregationClient( database=database_logic, session=session, settings=settings @@ -75,6 +84,7 @@ TokenPaginationExtension(), filter_extension, FreeTextExtension(), + collection_search_extension, ] pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy