diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 864b52e3..abf6ebfa 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -96,6 +96,10 @@ jobs: run: | pip install ./stac_fastapi/core + - name: Install helpers library stac-fastapi + run: | + pip install ./stac_fastapi/sfeos_helpers + - name: Install elasticsearch stac-fastapi run: | pip install ./stac_fastapi/elasticsearch[dev,server] diff --git a/.github/workflows/deploy_mkdocs.yml b/.github/workflows/deploy_mkdocs.yml index 3606d654..80b13104 100644 --- a/.github/workflows/deploy_mkdocs.yml +++ b/.github/workflows/deploy_mkdocs.yml @@ -30,6 +30,7 @@ jobs: python -m pip install --upgrade pip python -m pip install \ stac_fastapi/core \ + stac_fastapi/sfeos_helpers \ stac_fastapi/elasticsearch[docs] \ stac_fastapi/opensearch \ diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index eb84e7fc..8ed81ce6 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -35,6 +35,18 @@ jobs: # Publish to PyPI twine upload dist/* + - name: Build and publish sfeos_helpers + working-directory: stac_fastapi/sfeos_helpers + env: + TWINE_USERNAME: "__token__" + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + # Build package + python setup.py sdist bdist_wheel + + # Publish to PyPI + twine upload dist/* + - name: Build and publish stac-fastapi-elasticsearch working-directory: stac_fastapi/elasticsearch env: diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cc84550..7388cc78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,15 +5,33 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + ## [Unreleased] +## [v5.0.0a0] - 2025-05-29 + ### Added +- Created new `sfeos_helpers` package to improve code organization and maintainability [#376](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/376) +- Added introduction section - What is stac-fastapi-elasticsearch-opensearch? - to README [#384](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/384) + ### Changed +- Refactored utility functions into dedicated modules within `sfeos_helpers` [#376](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/376): + - Created `database` package with specialized modules for index, document, and utility operations + - Created `aggregation` package for Elasticsearch/OpenSearch-specific aggregation functionality + - Moved shared logic from core module to helper functions for better code reuse + - Separated utility functions from constant mappings for clearer code organization +- Updated documentation to reflect recent code refactoring [#376](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/376) +- Improved README documentation with consistent formatting and enhanced sections [#381](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/381): + - Added sfeos logo and banner + - Added a comprehensive Quick Start guide + - Reorganized sections for better navigation + - Reformatted content with bullet points for improved readability + - Added more detailed examples for API interaction + ### Fixed - ## [v4.2.0] - 2025-05-15 ### Added @@ -388,7 +406,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Use genexp in execute_search and get_all_collections to return results. - Added db_to_stac serializer to item_collection method in core.py. -[Unreleased]: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/compare/v4.2.0...main +[Unreleased]: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/compare/v5.0.0a0...main +[v5.0.0a0]: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/compare/v4.2.0...v5.0.0a0 [v4.2.0]: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/compare/v4.1.0...v4.2.0 [v4.1.0]: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/compare/v4.0.0...v4.1.0 [v4.0.0]: https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/compare/v3.2.5...v4.0.0 diff --git a/Makefile b/Makefile index 3440b7a2..5896e734 100644 --- a/Makefile +++ b/Makefile @@ -95,7 +95,8 @@ pybase-install: pip install -e ./stac_fastapi/api[dev] && \ pip install -e ./stac_fastapi/types[dev] && \ pip install -e ./stac_fastapi/extensions[dev] && \ - pip install -e ./stac_fastapi/core + pip install -e ./stac_fastapi/core && \ + pip install -e ./stac_fastapi/sfeos_helpers .PHONY: install-es install-es: pybase-install diff --git a/README.md b/README.md index 2604b467..11619f86 100644 --- a/README.md +++ b/README.md @@ -1,99 +1,205 @@ -# stac-fastapi-elasticsearch-opensearch (sfeos) +# stac-fastapi-elasticsearch-opensearch +

- -

Elasticsearch and Opensearch backends for the stac-fastapi project.

-

Featuring stac-fastapi.core for simplifying the creation and maintenance of custom STAC api backends.

+

- - [![PyPI version](https://badge.fury.io/py/stac-fastapi-elasticsearch.svg)](https://badge.fury.io/py/stac-fastapi-elasticsearch) [![PyPI version](https://badge.fury.io/py/stac-fastapi-opensearch.svg)](https://badge.fury.io/py/stac-fastapi-opensearch) - [![Join the chat at https://gitter.im/stac-fastapi-elasticsearch/community](https://badges.gitter.im/stac-fastapi-elasticsearch/community.svg)](https://gitter.im/stac-fastapi-elasticsearch/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +**Jump to:** [Project Introduction](#project-introduction---what-is-sfeos) | [Quick Start](#quick-start) | [Table of Contents](#table-of-contents) + [![Downloads](https://static.pepy.tech/badge/stac-fastapi-core?color=blue)](https://pepy.tech/project/stac-fastapi-core) + [![GitHub contributors](https://img.shields.io/github/contributors/stac-utils/stac-fastapi-elasticsearch-opensearch?color=blue)](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/graphs/contributors) + [![GitHub stars](https://img.shields.io/github/stars/stac-utils/stac-fastapi-elasticsearch-opensearch.svg?color=blue)](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/stargazers) + [![GitHub forks](https://img.shields.io/github/forks/stac-utils/stac-fastapi-elasticsearch-opensearch.svg?color=blue)](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/network/members) + [![PyPI version](https://img.shields.io/pypi/v/stac-fastapi-elasticsearch.svg?color=blue)](https://pypi.org/project/stac-fastapi-elasticsearch/) + [![STAC](https://img.shields.io/badge/STAC-1.1.0-blue.svg)](https://github.com/radiantearth/stac-spec/tree/v1.1.0) + [![stac-fastapi](https://img.shields.io/badge/stac--fastapi-5.2.0-blue.svg)](https://github.com/stac-utils/stac-fastapi) ---- +## Sponsors & Supporters -**Online Documentation**: [https://stac-utils.github.io/stac-fastapi-elasticsearch-opensearch](https://stac-utils.github.io/stac-fastapi-elasticsearch-opensearch/) +The following organizations have contributed time and/or funding to support the development of this project: -**Source Code**: [https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) +

+ Healy Hyperspatial + Atomic Maps + VITO Remote Sensing +

+## Project Introduction - What is SFEOS? ---- +SFEOS (stac-fastapi-elasticsearch-opensearch) is a high-performance, scalable API implementation for serving SpatioTemporal Asset Catalog (STAC) data - an enhanced GeoJSON format designed specifically for geospatial assets like satellite imagery, aerial photography, and other Earth observation data. This project enables organizations to: -### Notes: - -- Our Api core library can be used to create custom backends. See [stac-fastapi-mongo](https://github.com/Healy-Hyperspatial/stac-fastapi-mongo) for a working example. -- Reach out on our [Gitter](https://app.gitter.im/#/room/#stac-fastapi-elasticsearch_community:gitter.im) channel or feel free to add to our [Discussions](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/discussions) page here on github. -- There is [Postman](https://documenter.getpostman.com/view/12888943/2s8ZDSdRHA) documentation here for examples on how to run some of the API routes locally - after starting the elasticsearch backend via the compose.yml file. -- The `/examples` folder shows an example of running stac-fastapi-elasticsearch from PyPI in docker without needing any code from the repository. There is also a Postman collection here that you can load into Postman for testing the API routes. +- **Efficiently catalog and search geospatial data** such as satellite imagery, aerial photography, DEMs, and other geospatial assets using Elasticsearch or OpenSearch as the database backend +- **Implement standardized STAC APIs** that support complex spatial, temporal, and property-based queries across large collections of geospatial data +- **Scale to millions of geospatial assets** with fast search performance through optimized spatial indexing and query capabilities +- **Support OGC-compliant filtering** including spatial operations (intersects, contains, etc.) and temporal queries +- **Perform geospatial aggregations** to analyze data distribution across space and time +This implementation builds on the STAC-FastAPI framework, providing a production-ready solution specifically optimized for Elasticsearch and OpenSearch databases. It's ideal for organizations managing large geospatial data catalogs who need efficient discovery and access capabilities through standardized APIs. -### Performance Note -The `enable_direct_response` option is provided by the stac-fastapi core library (introduced in stac-fastapi 5.2.0) and is available in this project starting from v4.0.0. -**You can now control this setting via the `ENABLE_DIRECT_RESPONSE` environment variable.** +## Common Deployment Patterns -When enabled (`ENABLE_DIRECT_RESPONSE=true`), endpoints return Starlette Response objects directly, bypassing FastAPI's default serialization for improved performance. **However, all FastAPI dependencies (including authentication, custom status codes, and validation) are disabled for all routes.** +stac-fastapi-elasticsearch-opensearch can be deployed in several ways depending on your needs: -This mode is best suited for public or read-only APIs where authentication and custom logic are not required. Default is `false` for safety. +- **Containerized Application**: Run as a Docker container with connections to Elasticsearch/OpenSearch databases +- **Serverless Function**: Deploy as AWS Lambda or similar serverless function with API Gateway +- **Traditional Server**: Run on virtual machines or bare metal servers in your infrastructure +- **Kubernetes**: Deploy as part of a larger microservices architecture with container orchestration -See: [issue #347](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/issues/347) +The implementation is flexible and can scale from small local deployments to large production environments serving millions of geospatial assets. +## Technologies -### To install from PyPI: +This project is built on the following technologies: STAC, stac-fastapi, FastAPI, Elasticsearch, Python, OpenSearch -```bash -# For versions 4.0.0a1 and newer (PEP 625 compliant naming): -pip install stac-fastapi-elasticsearch # Elasticsearch backend -pip install stac-fastapi-opensearch # Opensearch backend -pip install stac-fastapi-core # Core library +

+ STAC + Python + FastAPI + Elasticsearch + OpenSearch +

-# For versions 4.0.0a0 and older: -pip install stac-fastapi.elasticsearch # Elasticsearch backend -pip install stac-fastapi.opensearch # Opensearch backend -pip install stac-fastapi.core # Core library -``` +## Table of Contents + +- [Documentation & Resources](#documentation--resources) +- [Package Structure](#package-structure) +- [Examples](#examples) +- [Performance](#performance) +- [Quick Start](#quick-start) + - [Installation](#installation) + - [Running Locally](#running-locally) +- [Configuration reference](#configuration-reference) +- [Interacting with the API](#interacting-with-the-api) +- [Configure the API](#configure-the-api) +- [Collection pagination](#collection-pagination) +- [Ingesting Sample Data CLI Tool](#ingesting-sample-data-cli-tool) +- [Elasticsearch Mappings](#elasticsearch-mappings) +- [Managing Elasticsearch Indices](#managing-elasticsearch-indices) + - [Snapshots](#snapshots) + - [Reindexing](#reindexing) +- [Auth](#auth) +- [Aggregation](#aggregation) +- [Rate Limiting](#rate-limiting) + +## Documentation & Resources + +- **Online Documentation**: [https://stac-utils.github.io/stac-fastapi-elasticsearch-opensearch](https://stac-utils.github.io/stac-fastapi-elasticsearch-opensearch/) +- **Source Code**: [https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch) +- **API Examples**: [Postman Documentation](https://documenter.getpostman.com/view/12888943/2s8ZDSdRHA) - Examples of how to use the API endpoints +- **Community**: + - [Gitter Chat](https://app.gitter.im/#/room/#stac-fastapi-elasticsearch_community:gitter.im) - For real-time discussions + - [GitHub Discussions](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/discussions) - For longer-form questions and answers + +## Package Structure + +This project is organized into several packages, each with a specific purpose: + +- **stac_fastapi_core**: Core functionality that's database-agnostic, including API models, extensions, and shared utilities. This package provides the foundation for building STAC API implementations with any database backend. See [stac-fastapi-mongo](https://github.com/Healy-Hyperspatial/stac-fastapi-mongo) for a working example. + +- **sfeos_helpers**: Shared helper functions and utilities used by both the Elasticsearch and OpenSearch backends. This package includes: + - `database`: Specialized modules for index, document, and database utility operations + - `aggregation`: Elasticsearch/OpenSearch-specific aggregation functionality + - Shared logic and utilities that improve code reuse between backends + +- **stac_fastapi_elasticsearch**: Complete implementation of the STAC API using Elasticsearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`. +- +- **stac_fastapi_opensearch**: Complete implementation of the STAC API using OpenSearch as the backend database. This package depends on both `stac_fastapi_core` and `sfeos_helpers`. + +## Examples + +The `/examples` directory contains several useful examples and reference implementations: + +- **pip_docker**: Examples of running stac-fastapi-elasticsearch from PyPI in Docker without needing any code from the repository +- **auth**: Authentication examples including: + - Basic authentication + - OAuth2 with Keycloak + - Route dependencies configuration +- **rate_limit**: Example of implementing rate limiting for API requests +- **postman_collections**: Postman collection files you can import for testing API endpoints + +These examples provide practical reference implementations for various deployment scenarios and features. + +## Performance + +### Direct Response Mode + +- The `enable_direct_response` option is provided by the stac-fastapi core library (introduced in stac-fastapi 5.2.0) and is available in this project starting from v4.0.0. +- **Control via environment variable**: Set `ENABLE_DIRECT_RESPONSE=true` to enable this feature. +- **How it works**: When enabled, endpoints return Starlette Response objects directly, bypassing FastAPI's default serialization for improved performance. +- **Important limitation**: All FastAPI dependencies (including authentication, custom status codes, and validation) are disabled for all routes when this mode is enabled. +- **Best use case**: This mode is best suited for public or read-only APIs where authentication and custom logic are not required. +- **Default setting**: `false` for safety. +- **More information**: See [issue #347](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/issues/347) for background and implementation details. + +## Quick Start + +This section helps you get up and running with stac-fastapi-elasticsearch-opensearch quickly. + +### Installation + +- **For versions 4.0.0a1 and newer** (PEP 625 compliant naming): + ```bash + pip install stac-fastapi-elasticsearch # Elasticsearch backend + pip install stac-fastapi-opensearch # Opensearch backend + pip install stac-fastapi-core # Core library + ``` + +- **For versions 4.0.0a0 and older**: + ```bash + pip install stac-fastapi.elasticsearch # Elasticsearch backend + pip install stac-fastapi.opensearch # Opensearch backend + pip install stac-fastapi.core # Core library + ``` > **Important Note:** Starting with version 4.0.0a1, package names have changed from using periods (e.g., `stac-fastapi.core`) to using hyphens (e.g., `stac-fastapi-core`) to comply with PEP 625. The internal package structure uses underscores, but users should install with hyphens as shown above. Please update your requirements files accordingly. -### To install and run via pre-built Docker Images +### Running Locally -We provide ready-to-use Docker images through GitHub Container Registry ([ElasticSearch](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pkgs/container/stac-fastapi-es) and [OpenSearch](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pkgs/container/stac-fastapi-os) backends). You can easily pull and run these images: +There are two main ways to run the API locally: -```shell -# For Elasticsearch backend -docker pull ghcr.io/stac-utils/stac-fastapi-es:latest +#### Using Pre-built Docker Images -# For OpenSearch backend -docker pull ghcr.io/stac-utils/stac-fastapi-os:latest -``` +- We provide ready-to-use Docker images through GitHub Container Registry: + - [ElasticSearch backend](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pkgs/container/stac-fastapi-es) + - [OpenSearch backend](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pkgs/container/stac-fastapi-os) -## Run Elasticsearch API backend on localhost:8080 +- **Pull and run the images**: + ```shell + # For Elasticsearch backend + docker pull ghcr.io/stac-utils/stac-fastapi-es:latest -You need to ensure [**Docker Compose**](https://docs.docker.com/compose/install/) or [**Podman Compose**](https://podman-desktop.io/docs/compose) installed and running on your machine. In the following command instead of `docker compose` you can use `podman-compose` as well. + # For OpenSearch backend + docker pull ghcr.io/stac-utils/stac-fastapi-os:latest + ``` -```shell -docker compose up elasticsearch app-elasticsearch -``` +#### Using Docker Compose -By default, Docker Compose uses Elasticsearch 8.x and OpenSearch 2.11.1. -If you wish to use a different version, put the following in a -file named `.env` in the same directory you run Docker Compose from: +- **Prerequisites**: Ensure [Docker Compose](https://docs.docker.com/compose/install/) or [Podman Compose](https://podman-desktop.io/docs/compose) is installed on your machine. -```shell -ELASTICSEARCH_VERSION=8.11.0 -OPENSEARCH_VERSION=2.11.1 -ENABLE_DIRECT_RESPONSE=false -``` -The most recent Elasticsearch 7.x versions should also work. See the [opensearch-py docs](https://github.com/opensearch-project/opensearch-py/blob/main/COMPATIBILITY.md) for compatibility information. +- **Start the API**: + ```shell + docker compose up elasticsearch app-elasticsearch + ``` -#### **Configuration reference keys:** +- **Configuration**: By default, Docker Compose uses Elasticsearch 8.x and OpenSearch 2.11.1. To use different versions, create a `.env` file: + ```shell + ELASTICSEARCH_VERSION=8.11.0 + OPENSEARCH_VERSION=2.11.1 + ENABLE_DIRECT_RESPONSE=false + ``` + +- **Compatibility**: The most recent Elasticsearch 7.x versions should also work. See the [opensearch-py docs](https://github.com/opensearch-project/opensearch-py/blob/main/COMPATIBILITY.md) for compatibility information. + + + +## Configuration Reference You can customize additional settings in your `.env` file: -###### Key variables to configure: | Variable | Description | Default | Required | |------------------------------|--------------------------------------------------------------------------------------|--------------------------|---------------------------------------------------------------------------------------------| @@ -101,7 +207,7 @@ You can customize additional settings in your `.env` file: | `ES_PORT` | Port for Elasticsearch/OpenSearch. | `9200` (ES) / `9202` (OS)| Optional | | `ES_USE_SSL` | Use SSL for connecting to Elasticsearch/OpenSearch. | `false` | Optional | | `ES_VERIFY_CERTS` | Verify SSL certificates when connecting. | `false` | Optional | -| `STAC_FASTAPI_TITLE` | Title of the API in the documentation. | `stac-fastapi-elasticsearch` or `stac-fastapi-opensearch` | Optional | +| `STAC_FASTAPI_TITLE` | Title of the API in the documentation. | `stac-fastapi-` | Optional | | `STAC_FASTAPI_DESCRIPTION` | Description of the API in the documentation. | N/A | Optional | | `STAC_FASTAPI_VERSION` | API version. | `2.1` | Optional | | `STAC_FASTAPI_LANDING_PAGE_ID` | Landing page ID | `stac-fastapi` | Optional | @@ -124,223 +230,310 @@ You can customize additional settings in your `.env` file: ## Interacting with the API -To create a new Collection: - -```shell -curl -X "POST" "http://localhost:8080/collections" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "id": "my_collection" -}' -``` - -Note: this "Collections Transaction" behavior is not part of the STAC API, but may be soon. +- **Creating a Collection**: + ```shell + curl -X "POST" "http://localhost:8080/collections" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "id": "my_collection" + }' + ``` + +- **Adding an Item to a Collection**: + ```shell + curl -X "POST" "http://localhost:8080/collections/my_collection/items" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d @item.json + ``` + +- **Searching for Items**: + ```shell + curl -X "GET" "http://localhost:8080/search" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "collections": ["my_collection"], + "limit": 10 + }' + ``` + +- **Filtering by Bbox**: + ```shell + curl -X "GET" "http://localhost:8080/search" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "collections": ["my_collection"], + "bbox": [-180, -90, 180, 90] + }' + ``` + +- **Filtering by Datetime**: + ```shell + curl -X "GET" "http://localhost:8080/search" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "collections": ["my_collection"], + "datetime": "2020-01-01T00:00:00Z/2020-12-31T23:59:59Z" + }' + ``` ## Configure the API -By default the API title and description are set to `stac-fastapi-`. Change the API title and description from the default by setting the `STAC_FASTAPI_TITLE` and `STAC_FASTAPI_DESCRIPTION` environment variables, respectively. +- **API Title and Description**: By default set to `stac-fastapi-`. Customize these by setting: + - `STAC_FASTAPI_TITLE`: Changes the API title in the documentation + - `STAC_FASTAPI_DESCRIPTION`: Changes the API description in the documentation -By default the API will read from and write to the `collections` and `items_` indices. To change the API collections index and the items index prefix, change the `STAC_COLLECTIONS_INDEX` and `STAC_ITEMS_INDEX_PREFIX` environment variables. +- **Database Indices**: By default, the API reads from and writes to: + - `collections` index for collections + - `items_` indices for items + - Customize with `STAC_COLLECTIONS_INDEX` and `STAC_ITEMS_INDEX_PREFIX` environment variables -The application root path is left as the base url by default. If deploying to AWS Lambda with a Gateway API, you will need to define the app root path to be the same as the Gateway API stage name where you will deploy the API. The app root path can be defined with the `STAC_FASTAPI_ROOT_PATH` environment variable (`/v1`, for example) +- **Root Path Configuration**: The application root path is the base URL by default. + - For AWS Lambda with Gateway API: Set `STAC_FASTAPI_ROOT_PATH` to match the Gateway API stage name (e.g., `/v1`) -## Collection pagination -The collections route handles optional `limit` and `token` parameters. The `links` field that is -returned from the `/collections` route contains a `next` link with the token that can be used to -get the next page of results. +## Collection Pagination -```shell -curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token" -``` +- **Overview**: The collections route supports pagination through optional query parameters. +- **Parameters**: + - `limit`: Controls the number of collections returned per page + - `token`: Used to retrieve subsequent pages of results +- **Response Structure**: The `links` field in the response contains a `next` link with the token for the next page of results. +- **Example Usage**: + ```shell + curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token" + ``` ## Ingesting Sample Data CLI Tool -```shell -Usage: data_loader.py [OPTIONS] +- **Overview**: The `data_loader.py` script provides a convenient way to load STAC items into the database. - Load STAC items into the database. +- **Usage**: + ```shell + python3 data_loader.py --base-url http://localhost:8080 + ``` -Options: +- **Options**: + ``` --base-url TEXT Base URL of the STAC API [required] --collection-id TEXT ID of the collection to which items are added --use-bulk Use bulk insert method for items --data-dir PATH Directory containing collection.json and feature collection file --help Show this message and exit. -``` - -```shell -python3 data_loader.py --base-url http://localhost:8080 -``` - + ``` + +- **Example Workflows**: + - **Loading Sample Data**: + ```shell + python3 data_loader.py --base-url http://localhost:8080 + ``` + - **Loading Data to a Specific Collection**: + ```shell + python3 data_loader.py --base-url http://localhost:8080 --collection-id my-collection + ``` + - **Using Bulk Insert for Performance**: + ```shell + python3 data_loader.py --base-url http://localhost:8080 --use-bulk + ``` ## Elasticsearch Mappings -Mappings apply to search index, not source. The mappings are stored in index templates on application startup. -These templates will be used implicitly when creating new Collection and Item indices. - +- **Overview**: Mappings apply to search index, not source data. They define how documents and their fields are stored and indexed. +- **Implementation**: + - Mappings are stored in index templates that are created on application startup + - These templates are automatically applied when creating new Collection and Item indices + - The `sfeos_helpers` package contains shared mapping definitions used by both Elasticsearch and OpenSearch backends +- **Customization**: Custom mappings can be defined by extending the base mapping templates. ## Managing Elasticsearch Indices -### Snapshots - -This section covers how to create a snapshot repository and then create and restore snapshots with this. - -Create a snapshot repository. This puts the files in the `elasticsearch/snapshots` in this git repo clone, as -the elasticsearch.yml and compose files create a mapping from that directory to -`/usr/share/elasticsearch/snapshots` within the Elasticsearch container and grant permissions on using it. - -```shell -curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "type": "fs", - "settings": { - "location": "/usr/share/elasticsearch/snapshots/my_fs_backup" - } -}' -``` - -The next step is to create a snapshot of one or more indices into this snapshot repository. This command creates -a snapshot named `my_snapshot_2` and waits for the action to be completed before returning. This can also be done -asynchronously, and queried for status. The `indices` parameter determines which indices are snapshotted, and -can include wildcards. - -```shell -curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2?wait_for_completion=true" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "metadata": { - "taken_because": "dump of all items", - "taken_by": "pvarner" - }, - "include_global_state": false, - "ignore_unavailable": false, - "indices": "items_my-collection" -}' -``` - -To see the status of this snapshot: - -```shell -curl http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2 -``` - -To see all the snapshots: - -```shell -curl http://localhost:9200/_snapshot/my_fs_backup/_all -``` - -To restore a snapshot, run something similar to the following. This specific command will restore any indices that -match `items_*` and rename them so that the new index name will be suffixed with `-copy`. - -```shell -curl -X "POST" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2/_restore?wait_for_completion=true" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "include_aliases": false, - "include_global_state": false, - "ignore_unavailable": true, - "rename_replacement": "items_$1-copy", - "indices": "items_*", - "rename_pattern": "items_(.+)" -}' -``` - -Now the item documents have been restored in to the new index (e.g., `my-collection-copy`), but the value of the -`collection` field in those documents is still the original value of `my-collection`. To update these to match the -new collection name, run the following Elasticsearch Update By Query command, substituting the old collection name -into the term filter and the new collection name into the script parameter: - -```shell -curl -X "POST" "http://localhost:9200/items_my-collection-copy/_update_by_query" \ - -H 'Content-Type: application/json; charset=utf-8' \ - -d $'{ - "query": { - "match_all": {} -}, - "script": { - "lang": "painless", - "params": { - "collection": "my-collection-copy" - }, - "source": "ctx._source.collection = params.collection" - } -}' -``` - -Then, create a new collection through the api with the new name for each of the restored indices: - -```shell -curl -X "POST" "http://localhost:8080/collections" \ - -H 'Content-Type: application/json' \ - -d $'{ - "id": "my-collection-copy" -}' -``` -Voila! You have a copy of the collection now that has a resource URI (`/collections/my-collection-copy`) and can be -correctly queried by collection name. +### Snapshots -### Reindexing -This section covers how to reindex documents stored in Elasticsearch/OpenSearch. -A reindex operation might be useful to apply changes to documents or to correct dynamically generated mappings. - -The index templates will make sure that manually created indices will also have the correct mappings and settings. - -In this example, we will make a copy of an existing Item index `items_my-collection-lower_my-collection-hex-000001` but change the Item identifier to be lowercase. - -```shell -curl -X "POST" "http://localhost:9200/_reindex" \ - -H 'Content-Type: application/json' \ - -d $'{ - "source": { - "index": "items_my-collection-lower_my-collection-hex-000001" - }, - "dest": { - "index": "items_my-collection-lower_my-collection-hex-000002" +- **Overview**: Snapshots provide a way to backup and restore your indices. + +- **Creating a Snapshot Repository**: + ```shell + curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "type": "fs", + "settings": { + "location": "/usr/share/elasticsearch/snapshots/my_fs_backup" + } + }' + ``` + - This creates a snapshot repository that stores files in the elasticsearch/snapshots directory in this git repo clone + - The elasticsearch.yml and compose files create a mapping from that directory to /usr/share/elasticsearch/snapshots within the Elasticsearch container and grant permissions for using it + +- **Creating a Snapshot**: + ```shell + curl -X "PUT" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2?wait_for_completion=true" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "metadata": { + "taken_because": "dump of all items", + "taken_by": "pvarner" }, + "include_global_state": false, + "ignore_unavailable": false, + "indices": "items_my-collection" + }' + ``` + - This creates a snapshot named my_snapshot_2 and waits for the action to be completed before returning + - This can also be done asynchronously by omitting the wait_for_completion parameter, and queried for status later + - The indices parameter determines which indices are snapshotted, and can include wildcards + +- **Viewing Snapshots**: + ```shell + # View a specific snapshot + curl http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2 + + # View all snapshots + curl http://localhost:9200/_snapshot/my_fs_backup/_all + ``` + - These commands allow you to check the status and details of your snapshots + +- **Restoring a Snapshot**: + ```shell + curl -X "POST" "http://localhost:9200/_snapshot/my_fs_backup/my_snapshot_2/_restore?wait_for_completion=true" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "include_aliases": false, + "include_global_state": false, + "ignore_unavailable": true, + "rename_replacement": "items_$1-copy", + "indices": "items_*", + "rename_pattern": "items_(.+)" + }' + ``` + - This specific command will restore any indices that match items_* and rename them so that the new index name will be suffixed with -copy + - The rename_pattern and rename_replacement parameters allow you to restore indices under new names + +- **Updating Collection References**: + ```shell + curl -X "POST" "http://localhost:9200/items_my-collection-copy/_update_by_query" \ + -H 'Content-Type: application/json; charset=utf-8' \ + -d $'{ + "query": { + "match_all": {} + }, "script": { - "source": "ctx._source.id = ctx._source.id.toLowerCase()", - "lang": "painless" + "lang": "painless", + "params": { + "collection": "my-collection-copy" + }, + "source": "ctx._source.collection = params.collection" } }' -``` - -If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-lower_my-collection-hex-000002`. -```shell -curl -X "POST" "http://localhost:9200/_aliases" \ - -h 'Content-Type: application/json' \ - -d $'{ - "actions": [ - { - "remove": { - "index": "*", - "alias": "items_my-collection" - } - }, - { - "add": { - "index": "items_my-collection-lower_my-collection-hex-000002", - "alias": "items_my-collection" - } - } - ] + ``` + - After restoring, the item documents have been restored in the new index (e.g., my-collection-copy), but the value of the collection field in those documents is still the original value of my-collection + - This command updates these values to match the new collection name using Elasticsearch's Update By Query feature + +- **Creating a New Collection**: + ```shell + curl -X "POST" "http://localhost:8080/collections" \ + -H 'Content-Type: application/json' \ + -d $'{ + "id": "my-collection-copy" }' -``` + ``` + - The final step is to create a new collection through the API with the new name for each of the restored indices + - This gives you a copy of the collection that has a resource URI (/collections/my-collection-copy) and can be correctly queried by collection name -The modified Items with lowercase identifiers will now be visible to users accessing `my-collection` in the STAC API. +### Reindexing +- **Overview**: Reindexing allows you to copy documents from one index to another, optionally transforming them in the process. + +- **Use Cases**: + - Apply changes to documents + - Correct dynamically generated mappings + - Transform data (e.g., lowercase identifiers) + - The index templates will make sure that manually created indices will also have the correct mappings and settings + +- **Example: Reindexing with Transformation**: + ```shell + curl -X "POST" "http://localhost:9200/_reindex" \ + -H 'Content-Type: application/json' \ + -d $'{ + "source": { + "index": "items_my-collection-lower_my-collection-hex-000001" + }, + "dest": { + "index": "items_my-collection-lower_my-collection-hex-000002" + }, + "script": { + "source": "ctx._source.id = ctx._source.id.toLowerCase()", + "lang": "painless" + } + }' + ``` + - In this example, we make a copy of an existing Item index but change the Item identifier to be lowercase + - The script parameter allows you to transform documents during the reindexing process + +- **Updating Aliases**: + ```shell + curl -X "POST" "http://localhost:9200/_aliases" \ + -H 'Content-Type: application/json' \ + -d $'{ + "actions": [ + { + "remove": { + "index": "*", + "alias": "items_my-collection" + } + }, + { + "add": { + "index": "items_my-collection-lower_my-collection-hex-000002", + "alias": "items_my-collection" + } + } + ] + }' + ``` + - If you are happy with the data in the newly created index, you can move the alias items_my-collection to the new index + - This makes the modified Items with lowercase identifiers visible to users accessing my-collection in the STAC API + - Using aliases allows you to switch between different index versions without changing the API endpoint ## Auth -Authentication is an optional feature that can be enabled through `Route Dependencies` examples can be found and a more detailed explanation in [examples/auth](examples/auth). +- **Overview**: Authentication is an optional feature that can be enabled through Route Dependencies. +- **Implementation Options**: + - Basic authentication + - OAuth2 with Keycloak + - Custom route dependencies +- **Configuration**: Authentication can be configured using the `STAC_FASTAPI_ROUTE_DEPENDENCIES` environment variable. +- **Examples and Documentation**: Detailed examples and implementation guides can be found in the [examples/auth](examples/auth) directory. ## Aggregation -Aggregation of points and geometries, as well as frequency distribution aggregation of any other property including dates is supported in stac-fatsapi-elasticsearch-opensearch. Aggregations can be defined at the root Catalog level (`/aggregations`) and at the Collection level (`//aggregations`). Details for supported aggregations can be found in [the aggregation docs](./docs/src/aggregation.md) +- **Supported Aggregations**: + - Spatial aggregations of points and geometries + - Frequency distribution aggregation of any property including dates + - Temporal distribution of datetime values + +- **Endpoint Locations**: + - Root Catalog level: `/aggregations` + - Collection level: `//aggregations` + +- **Implementation Details**: The `sfeos_helpers.aggregation` package provides specialized functionality for both Elasticsearch and OpenSearch backends. + +- **Documentation**: Detailed information about supported aggregations can be found in [the aggregation docs](./docs/src/aggregation.md). + ## Rate Limiting -Rate limiting is an optional security feature that controls API request frequency on a remote address basis. It's enabled by setting the `STAC_FASTAPI_RATE_LIMIT` environment variable, e.g., `500/minute`. This limits each client to 500 requests per minute, helping prevent abuse and maintain API stability. Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory. \ No newline at end of file +- **Overview**: Rate limiting is an optional security feature that controls API request frequency on a remote address basis. + +- **Configuration**: Enabled by setting the `STAC_FASTAPI_RATE_LIMIT` environment variable: + ``` + STAC_FASTAPI_RATE_LIMIT=500/minute + ``` + +- **Functionality**: + - Limits each client to a specified number of requests per time period (e.g., 500 requests per minute) + - Helps prevent API abuse and maintains system stability + - Ensures fair resource allocation among all clients + +- **Examples**: Implementation examples are available in the [examples/rate_limit](examples/rate_limit) directory. + diff --git a/assets/STAC-01.png b/assets/STAC-01.png new file mode 100644 index 00000000..99deaefc Binary files /dev/null and b/assets/STAC-01.png differ diff --git a/assets/VITO.png b/assets/VITO.png new file mode 100644 index 00000000..c226b031 Binary files /dev/null and b/assets/VITO.png differ diff --git a/assets/am-logo-black.png b/assets/am-logo-black.png new file mode 100644 index 00000000..8b53392f Binary files /dev/null and b/assets/am-logo-black.png differ diff --git a/assets/elasticsearch.png b/assets/elasticsearch.png new file mode 100644 index 00000000..781567f8 Binary files /dev/null and b/assets/elasticsearch.png differ diff --git a/assets/fastapi.svg b/assets/fastapi.svg new file mode 100644 index 00000000..c307de80 --- /dev/null +++ b/assets/fastapi.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/assets/hh-logo-blue.png b/assets/hh-logo-blue.png new file mode 100644 index 00000000..850408ba Binary files /dev/null and b/assets/hh-logo-blue.png differ diff --git a/assets/opensearch.svg b/assets/opensearch.svg new file mode 100644 index 00000000..6ece2f4e --- /dev/null +++ b/assets/opensearch.svg @@ -0,0 +1,10 @@ + + +OpenSearch S logo +Search engine software fork of Elasticsearch + + + + + + diff --git a/assets/python.png b/assets/python.png new file mode 100644 index 00000000..f9674d82 Binary files /dev/null and b/assets/python.png differ diff --git a/assets/sfeos.png b/assets/sfeos.png new file mode 100644 index 00000000..a816f16c Binary files /dev/null and b/assets/sfeos.png differ diff --git a/compose.yml b/compose.yml index 125f6539..240934d6 100644 --- a/compose.yml +++ b/compose.yml @@ -9,7 +9,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-elasticsearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Elasticsearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-elasticsearch - APP_HOST=0.0.0.0 - APP_PORT=8080 @@ -42,7 +42,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-opensearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Opensearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-opensearch - APP_HOST=0.0.0.0 - APP_PORT=8082 diff --git a/dockerfiles/Dockerfile.ci.es b/dockerfiles/Dockerfile.ci.es index a6fb6a53..5bd3853b 100644 --- a/dockerfiles/Dockerfile.ci.es +++ b/dockerfiles/Dockerfile.ci.es @@ -12,6 +12,7 @@ RUN apt-get update && \ COPY . /app/ RUN pip3 install --no-cache-dir -e ./stac_fastapi/core && \ + pip3 install --no-cache-dir -e ./stac_fastapi/sfeos_helpers && \ pip3 install --no-cache-dir ./stac_fastapi/elasticsearch[server] USER root diff --git a/dockerfiles/Dockerfile.ci.os b/dockerfiles/Dockerfile.ci.os index a046a3b6..e359f1a8 100644 --- a/dockerfiles/Dockerfile.ci.os +++ b/dockerfiles/Dockerfile.ci.os @@ -12,6 +12,7 @@ RUN apt-get update && \ COPY . /app/ RUN pip3 install --no-cache-dir -e ./stac_fastapi/core && \ + pip3 install --no-cache-dir -e ./stac_fastapi/sfeos_helpers && \ pip3 install --no-cache-dir ./stac_fastapi/opensearch[server] USER root diff --git a/dockerfiles/Dockerfile.deploy.es b/dockerfiles/Dockerfile.deploy.es index 2eab7b9d..2a6fc4fc 100644 --- a/dockerfiles/Dockerfile.deploy.es +++ b/dockerfiles/Dockerfile.deploy.es @@ -13,6 +13,7 @@ WORKDIR /app COPY . /app RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir -e ./stac_fastapi/sfeos_helpers RUN pip install --no-cache-dir ./stac_fastapi/elasticsearch[server] EXPOSE 8080 diff --git a/dockerfiles/Dockerfile.deploy.os b/dockerfiles/Dockerfile.deploy.os index 035b181e..8a532f0c 100644 --- a/dockerfiles/Dockerfile.deploy.os +++ b/dockerfiles/Dockerfile.deploy.os @@ -13,6 +13,7 @@ WORKDIR /app COPY . /app RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir -e ./stac_fastapi/sfeos_helpers RUN pip install --no-cache-dir ./stac_fastapi/opensearch[server] EXPOSE 8080 diff --git a/dockerfiles/Dockerfile.dev.es b/dockerfiles/Dockerfile.dev.es index 009f9681..7a01aca8 100644 --- a/dockerfiles/Dockerfile.dev.es +++ b/dockerfiles/Dockerfile.dev.es @@ -16,4 +16,5 @@ WORKDIR /app COPY . /app RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir -e ./stac_fastapi/sfeos_helpers RUN pip install --no-cache-dir -e ./stac_fastapi/elasticsearch[dev,server] diff --git a/dockerfiles/Dockerfile.dev.os b/dockerfiles/Dockerfile.dev.os index d9dc8b0a..28012dfb 100644 --- a/dockerfiles/Dockerfile.dev.os +++ b/dockerfiles/Dockerfile.dev.os @@ -16,4 +16,5 @@ WORKDIR /app COPY . /app RUN pip install --no-cache-dir -e ./stac_fastapi/core +RUN pip install --no-cache-dir -e ./stac_fastapi/sfeos_helpers RUN pip install --no-cache-dir -e ./stac_fastapi/opensearch[dev,server] diff --git a/dockerfiles/Dockerfile.docs b/dockerfiles/Dockerfile.docs index f1fe63b8..aa080c7c 100644 --- a/dockerfiles/Dockerfile.docs +++ b/dockerfiles/Dockerfile.docs @@ -12,6 +12,7 @@ WORKDIR /opt/src RUN python -m pip install \ stac_fastapi/core \ + stac_fastapi/sfeos_helpers \ stac_fastapi/elasticsearch \ stac_fastapi/opensearch diff --git a/examples/auth/compose.basic_auth.yml b/examples/auth/compose.basic_auth.yml index e603f130..eb7b2d75 100644 --- a/examples/auth/compose.basic_auth.yml +++ b/examples/auth/compose.basic_auth.yml @@ -9,7 +9,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-elasticsearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Elasticsearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-elasticsearch - APP_HOST=0.0.0.0 - APP_PORT=8080 @@ -43,7 +43,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-opensearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Opensearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-opensearch - APP_HOST=0.0.0.0 - APP_PORT=8082 diff --git a/examples/auth/compose.oauth2.yml b/examples/auth/compose.oauth2.yml index 3a2f1982..f739e03a 100644 --- a/examples/auth/compose.oauth2.yml +++ b/examples/auth/compose.oauth2.yml @@ -9,7 +9,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-elasticsearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Elasticsearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-elasticsearch - APP_HOST=0.0.0.0 - APP_PORT=8080 @@ -44,7 +44,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-opensearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Opensearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-opensearch - APP_HOST=0.0.0.0 - APP_PORT=8082 diff --git a/examples/auth/compose.route_dependencies.yml b/examples/auth/compose.route_dependencies.yml index 967f9be6..3a11b1ad 100644 --- a/examples/auth/compose.route_dependencies.yml +++ b/examples/auth/compose.route_dependencies.yml @@ -9,7 +9,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-elasticsearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Elasticsearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-elasticsearch - APP_HOST=0.0.0.0 - APP_PORT=8080 @@ -43,7 +43,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-opensearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Opensearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-opensearch - APP_HOST=0.0.0.0 - APP_PORT=8082 diff --git a/examples/rate_limit/compose.rate_limit.yml b/examples/rate_limit/compose.rate_limit.yml index d1631f7b..448c7760 100644 --- a/examples/rate_limit/compose.rate_limit.yml +++ b/examples/rate_limit/compose.rate_limit.yml @@ -9,7 +9,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-elasticsearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Elasticsearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - STAC_FASTAPI_LANDING_PAGE_ID=stac-fastapi-elasticsearch - APP_HOST=0.0.0.0 - APP_PORT=8080 @@ -43,7 +43,7 @@ services: environment: - STAC_FASTAPI_TITLE=stac-fastapi-opensearch - STAC_FASTAPI_DESCRIPTION=A STAC FastAPI with an Opensearch backend - - STAC_FASTAPI_VERSION=4.2.0 + - STAC_FASTAPI_VERSION=5.0.0a0 - APP_HOST=0.0.0.0 - APP_PORT=8082 - RELOAD=true diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 05212f5b..866b429a 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -1,11 +1,10 @@ """Core client.""" import logging -from collections import deque from datetime import datetime as datetime_type from datetime import timezone from enum import Enum -from typing import Any, Dict, List, Literal, Optional, Set, Type, Union +from typing import List, Optional, Set, Type, Union from urllib.parse import unquote_plus, urljoin import attr @@ -22,11 +21,11 @@ from stac_fastapi.core.base_database_logic import BaseDatabaseLogic from stac_fastapi.core.base_settings import ApiBaseSettings +from stac_fastapi.core.datetime_utils import format_datetime_range from stac_fastapi.core.models.links import PagingLinks from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer from stac_fastapi.core.session import Session from stac_fastapi.core.utilities import filter_fields -from stac_fastapi.extensions.core.filter.client import AsyncBaseFiltersClient from stac_fastapi.extensions.third_party.bulk_transactions import ( BaseBulkTransactionsClient, BulkTransactionMethod, @@ -37,7 +36,6 @@ from stac_fastapi.types.core import AsyncBaseCoreClient, AsyncBaseTransactionsClient from stac_fastapi.types.extension import ApiExtension from stac_fastapi.types.requests import get_base_url -from stac_fastapi.types.rfc3339 import DateTimeType, rfc3339_str_to_datetime from stac_fastapi.types.search import BaseSearchPostRequest logger = logging.getLogger(__name__) @@ -318,9 +316,8 @@ async def item_collection( ) if datetime: - datetime_search = self._return_date(datetime) search = self.database.apply_datetime_filter( - search=search, datetime_search=datetime_search + search=search, interval=datetime ) if bbox: @@ -374,87 +371,6 @@ async def get_item( ) return self.item_serializer.db_to_stac(item, base_url) - @staticmethod - def _return_date( - interval: Optional[Union[DateTimeType, str]] - ) -> Dict[str, Optional[str]]: - """ - Convert a date interval. - - (which may be a datetime, a tuple of one or two datetimes a string - representing a datetime or range, or None) into a dictionary for filtering - search results with Elasticsearch. - - This function ensures the output dictionary contains 'gte' and 'lte' keys, - even if they are set to None, to prevent KeyError in the consuming logic. - - Args: - interval (Optional[Union[DateTimeType, str]]): The date interval, which might be a single datetime, - a tuple with one or two datetimes, a string, or None. - - Returns: - dict: A dictionary representing the date interval for use in filtering search results, - always containing 'gte' and 'lte' keys. - """ - result: Dict[str, Optional[str]] = {"gte": None, "lte": None} - - if interval is None: - return result - - if isinstance(interval, str): - if "/" in interval: - parts = interval.split("/") - result["gte"] = parts[0] if parts[0] != ".." else None - result["lte"] = ( - parts[1] if len(parts) > 1 and parts[1] != ".." else None - ) - else: - converted_time = interval if interval != ".." else None - result["gte"] = result["lte"] = converted_time - return result - - if isinstance(interval, datetime_type): - datetime_iso = interval.isoformat() - result["gte"] = result["lte"] = datetime_iso - elif isinstance(interval, tuple): - start, end = interval - # Ensure datetimes are converted to UTC and formatted with 'Z' - if start: - result["gte"] = start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - if end: - result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - - return result - - def _format_datetime_range(self, date_str: str) -> str: - """ - Convert a datetime range string into a normalized UTC string for API requests using rfc3339_str_to_datetime. - - Args: - date_str (str): A string containing two datetime values separated by a '/'. - - Returns: - str: A string formatted as 'YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ', with '..' used if any element is None. - """ - - def normalize(dt): - dt = dt.strip() - if not dt or dt == "..": - return ".." - dt_obj = rfc3339_str_to_datetime(dt) - dt_utc = dt_obj.astimezone(timezone.utc) - return dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ") - - if not isinstance(date_str, str): - return "../.." - if "/" not in date_str: - return f"{normalize(date_str)}/{normalize(date_str)}" - try: - start, end = date_str.split("/", 1) - except Exception: - return "../.." - return f"{normalize(start)}/{normalize(end)}" - async def get_search( self, request: Request, @@ -506,7 +422,7 @@ async def get_search( } if datetime: - base_args["datetime"] = self._format_datetime_range(date_str=datetime) + base_args["datetime"] = format_datetime_range(date_str=datetime) if intersects: base_args["intersects"] = orjson.loads(unquote_plus(intersects)) @@ -576,9 +492,8 @@ async def post_search( ) if search_request.datetime: - datetime_search = self._return_date(search_request.datetime) search = self.database.apply_datetime_filter( - search=search, datetime_search=datetime_search + search=search, interval=search_request.datetime ) if search_request.bbox: @@ -947,159 +862,3 @@ def bulk_item_insert( logger.info(f"Bulk sync operation succeeded with {success} actions.") return f"Successfully added/updated {success} Items. {attempted - success} errors occurred." - - -_DEFAULT_QUERYABLES: Dict[str, Dict[str, Any]] = { - "id": { - "description": "ID", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", - }, - "collection": { - "description": "Collection", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", - }, - "geometry": { - "description": "Geometry", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", - }, - "datetime": { - "description": "Acquisition Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/datetime", - }, - "created": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/created", - }, - "updated": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/updated", - }, - "cloud_cover": { - "description": "Cloud Cover", - "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover", - }, - "cloud_shadow_percentage": { - "title": "Cloud Shadow Percentage", - "description": "Cloud Shadow Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, - "nodata_pixel_percentage": { - "title": "No Data Pixel Percentage", - "description": "No Data Pixel Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, -} - -_ES_MAPPING_TYPE_TO_JSON: Dict[ - str, Literal["string", "number", "boolean", "object", "array", "null"] -] = { - "date": "string", - "date_nanos": "string", - "keyword": "string", - "match_only_text": "string", - "text": "string", - "wildcard": "string", - "byte": "number", - "double": "number", - "float": "number", - "half_float": "number", - "long": "number", - "scaled_float": "number", - "short": "number", - "token_count": "number", - "unsigned_long": "number", - "geo_point": "object", - "geo_shape": "object", - "nested": "array", -} - - -@attr.s -class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): - """Defines a pattern for implementing the STAC filter extension.""" - - database: BaseDatabaseLogic = attr.ib() - - async def get_queryables( - self, collection_id: Optional[str] = None, **kwargs - ) -> Dict[str, Any]: - """Get the queryables available for the given collection_id. - - If collection_id is None, returns the intersection of all - queryables over all collections. - - This base implementation returns a blank queryable schema. This is not allowed - under OGC CQL but it is allowed by the STAC API Filter Extension - - https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter#queryables - - Args: - collection_id (str, optional): The id of the collection to get queryables for. - **kwargs: additional keyword arguments - - Returns: - Dict[str, Any]: A dictionary containing the queryables for the given collection. - """ - queryables: Dict[str, Any] = { - "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://stac-api.example.com/queryables", - "type": "object", - "title": "Queryables for STAC API", - "description": "Queryable names for the STAC API Item Search filter.", - "properties": _DEFAULT_QUERYABLES, - "additionalProperties": True, - } - if not collection_id: - return queryables - - properties: Dict[str, Any] = queryables["properties"] - queryables.update( - { - "properties": properties, - "additionalProperties": False, - } - ) - - mapping_data = await self.database.get_items_mapping(collection_id) - mapping_properties = next(iter(mapping_data.values()))["mappings"]["properties"] - stack = deque(mapping_properties.items()) - - while stack: - field_name, field_def = stack.popleft() - - # Iterate over nested fields - field_properties = field_def.get("properties") - if field_properties: - # Fields in Item Properties should be exposed with their un-prefixed names, - # and not require expressions to prefix them with properties, - # e.g., eo:cloud_cover instead of properties.eo:cloud_cover. - if field_name == "properties": - stack.extend(field_properties.items()) - else: - stack.extend( - (f"{field_name}.{k}", v) for k, v in field_properties.items() - ) - - # Skip non-indexed or disabled fields - field_type = field_def.get("type") - if not field_type or not field_def.get("enabled", True): - continue - - # Generate field properties - field_result = _DEFAULT_QUERYABLES.get(field_name, {}) - properties[field_name] = field_result - - field_name_human = field_name.replace("_", " ").title() - field_result.setdefault("title", field_name_human) - - field_type_json = _ES_MAPPING_TYPE_TO_JSON.get(field_type, field_type) - field_result.setdefault("type", field_type_json) - - if field_type in {"date", "date_nanos"}: - field_result.setdefault("format", "date-time") - - return queryables diff --git a/stac_fastapi/core/stac_fastapi/core/datetime_utils.py b/stac_fastapi/core/stac_fastapi/core/datetime_utils.py index 3d6dd663..f9dbacf5 100644 --- a/stac_fastapi/core/stac_fastapi/core/datetime_utils.py +++ b/stac_fastapi/core/stac_fastapi/core/datetime_utils.py @@ -1,6 +1,38 @@ -"""A few datetime methods.""" +"""Utility functions to handle datetime parsing.""" from datetime import datetime, timezone +from stac_fastapi.types.rfc3339 import rfc3339_str_to_datetime + + +def format_datetime_range(date_str: str) -> str: + """ + Convert a datetime range string into a normalized UTC string for API requests using rfc3339_str_to_datetime. + + Args: + date_str (str): A string containing two datetime values separated by a '/'. + + Returns: + str: A string formatted as 'YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ', with '..' used if any element is None. + """ + + def normalize(dt): + dt = dt.strip() + if not dt or dt == "..": + return ".." + dt_obj = rfc3339_str_to_datetime(dt) + dt_utc = dt_obj.astimezone(timezone.utc) + return dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ") + + if not isinstance(date_str, str): + return "../.." + if "/" not in date_str: + return f"{normalize(date_str)}/{normalize(date_str)}" + try: + start, end = date_str.split("/", 1) + except Exception: + return "../.." + return f"{normalize(start)}/{normalize(end)}" + # Borrowed from pystac - https://github.com/stac-utils/pystac/blob/f5e4cf4a29b62e9ef675d4a4dac7977b09f53c8f/pystac/utils.py#L370-L394 def datetime_to_str(dt: datetime, timespec: str = "auto") -> str: diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py b/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py index d41d763c..cdce486f 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py @@ -1,36 +1,19 @@ """Request model for the Aggregation extension.""" -from datetime import datetime -from datetime import datetime as datetime_type -from typing import Dict, List, Literal, Optional, Union -from urllib.parse import unquote_plus, urljoin +from typing import Literal, Optional import attr -import orjson -from fastapi import HTTPException, Path, Request -from pygeofilter.backends.cql2_json import to_cql2 -from pygeofilter.parsers.cql2_text import parse as parse_cql2_text -from stac_pydantic.shared import BBox +from fastapi import Path from typing_extensions import Annotated -from stac_fastapi.core.base_database_logic import BaseDatabaseLogic -from stac_fastapi.core.base_settings import ApiBaseSettings -from stac_fastapi.core.datetime_utils import datetime_to_str -from stac_fastapi.core.session import Session -from stac_fastapi.extensions.core.aggregation.client import AsyncBaseAggregationClient from stac_fastapi.extensions.core.aggregation.request import ( AggregationExtensionGetRequest, AggregationExtensionPostRequest, ) -from stac_fastapi.extensions.core.aggregation.types import ( - Aggregation, - AggregationCollection, -) from stac_fastapi.extensions.core.filter.request import ( FilterExtensionGetRequest, FilterExtensionPostRequest, ) -from stac_fastapi.types.rfc3339 import DateTimeType FilterLang = Literal["cql-json", "cql2-json", "cql2-text"] @@ -64,514 +47,3 @@ class EsAggregationExtensionPostRequest( geometry_geohash_grid_frequency_precision: Optional[int] = None geometry_geotile_grid_frequency_precision: Optional[int] = None datetime_frequency_interval: Optional[str] = None - - -@attr.s -class EsAsyncAggregationClient(AsyncBaseAggregationClient): - """Defines a pattern for implementing the STAC aggregation extension.""" - - database: BaseDatabaseLogic = attr.ib() - settings: ApiBaseSettings = attr.ib() - session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) - - DEFAULT_AGGREGATIONS = [ - {"name": "total_count", "data_type": "integer"}, - {"name": "datetime_max", "data_type": "datetime"}, - {"name": "datetime_min", "data_type": "datetime"}, - { - "name": "datetime_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "datetime", - }, - { - "name": "collection_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string", - }, - { - "name": "geometry_geohash_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string", - }, - { - "name": "geometry_geotile_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string", - }, - ] - - GEO_POINT_AGGREGATIONS = [ - { - "name": "grid_code_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string", - }, - { - "name": "centroid_geohash_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string", - }, - { - "name": "centroid_geohex_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string", - }, - { - "name": "centroid_geotile_grid_frequency", - "data_type": "frequency_distribution", - "frequency_distribution_data_type": "string", - }, - ] - - MAX_GEOHASH_PRECISION = 12 - MAX_GEOHEX_PRECISION = 15 - MAX_GEOTILE_PRECISION = 29 - SUPPORTED_DATETIME_INTERVAL = {"day", "month", "year"} - DEFAULT_DATETIME_INTERVAL = "month" - - async def get_aggregations(self, collection_id: Optional[str] = None, **kwargs): - """Get the available aggregations for a catalog or collection defined in the STAC JSON. If no aggregations, default aggregations are used.""" - request: Request = kwargs["request"] - base_url = str(request.base_url) - links = [{"rel": "root", "type": "application/json", "href": base_url}] - - if collection_id is not None: - collection_endpoint = urljoin(base_url, f"collections/{collection_id}") - links.extend( - [ - { - "rel": "collection", - "type": "application/json", - "href": collection_endpoint, - }, - { - "rel": "self", - "type": "application/json", - "href": urljoin(collection_endpoint + "/", "aggregations"), - }, - ] - ) - if await self.database.check_collection_exists(collection_id) is None: - collection = await self.database.find_collection(collection_id) - aggregations = collection.get( - "aggregations", self.DEFAULT_AGGREGATIONS.copy() - ) - else: - raise IndexError(f"Collection {collection_id} does not exist") - else: - links.append( - { - "rel": "self", - "type": "application/json", - "href": urljoin(base_url, "aggregations"), - } - ) - - aggregations = self.DEFAULT_AGGREGATIONS - return AggregationCollection( - type="AggregationCollection", aggregations=aggregations, links=links - ) - - def extract_precision( - self, precision: Union[int, None], min_value: int, max_value: int - ) -> Optional[int]: - """Ensure that the aggregation precision value is withing the a valid range, otherwise return the minumium value.""" - if precision is not None: - if precision < min_value or precision > max_value: - raise HTTPException( - status_code=400, - detail=f"Invalid precision. Must be a number between {min_value} and {max_value} inclusive", - ) - return precision - else: - return min_value - - def extract_date_histogram_interval(self, value: Optional[str]) -> str: - """ - Ensure that the interval for the date histogram is valid. If no value is provided, the default will be returned. - - Args: - value: value entered by the user - - Returns: - string containing the date histogram interval to use. - - Raises: - HTTPException: if the supplied value is not in the supported intervals - """ - if value is not None: - if value not in self.SUPPORTED_DATETIME_INTERVAL: - raise HTTPException( - status_code=400, - detail=f"Invalid datetime interval. Must be one of {self.SUPPORTED_DATETIME_INTERVAL}", - ) - else: - return value - else: - return self.DEFAULT_DATETIME_INTERVAL - - @staticmethod - def _return_date( - interval: Optional[Union[DateTimeType, str]] - ) -> Dict[str, Optional[str]]: - """ - Convert a date interval. - - (which may be a datetime, a tuple of one or two datetimes a string - representing a datetime or range, or None) into a dictionary for filtering - search results with Elasticsearch. - - This function ensures the output dictionary contains 'gte' and 'lte' keys, - even if they are set to None, to prevent KeyError in the consuming logic. - - Args: - interval (Optional[Union[DateTimeType, str]]): The date interval, which might be a single datetime, - a tuple with one or two datetimes, a string, or None. - - Returns: - dict: A dictionary representing the date interval for use in filtering search results, - always containing 'gte' and 'lte' keys. - """ - result: Dict[str, Optional[str]] = {"gte": None, "lte": None} - - if interval is None: - return result - - if isinstance(interval, str): - if "/" in interval: - parts = interval.split("/") - result["gte"] = parts[0] if parts[0] != ".." else None - result["lte"] = ( - parts[1] if len(parts) > 1 and parts[1] != ".." else None - ) - else: - converted_time = interval if interval != ".." else None - result["gte"] = result["lte"] = converted_time - return result - - if isinstance(interval, datetime_type): - datetime_iso = interval.isoformat() - result["gte"] = result["lte"] = datetime_iso - elif isinstance(interval, tuple): - start, end = interval - # Ensure datetimes are converted to UTC and formatted with 'Z' - if start: - result["gte"] = start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - if end: - result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" - - return result - - def frequency_agg(self, es_aggs, name, data_type): - """Format an aggregation for a frequency distribution aggregation.""" - buckets = [] - for bucket in es_aggs.get(name, {}).get("buckets", []): - bucket_data = { - "key": bucket.get("key_as_string") or bucket.get("key"), - "data_type": data_type, - "frequency": bucket.get("doc_count"), - "to": bucket.get("to"), - "from": bucket.get("from"), - } - buckets.append(bucket_data) - return Aggregation( - name=name, - data_type="frequency_distribution", - overflow=es_aggs.get(name, {}).get("sum_other_doc_count", 0), - buckets=buckets, - ) - - def metric_agg(self, es_aggs, name, data_type): - """Format an aggregation for a metric aggregation.""" - value = es_aggs.get(name, {}).get("value_as_string") or es_aggs.get( - name, {} - ).get("value") - # ES 7.x does not return datetimes with a 'value_as_string' field - if "datetime" in name and isinstance(value, float): - value = datetime_to_str(datetime.fromtimestamp(value / 1e3)) - return Aggregation( - name=name, - data_type=data_type, - value=value, - ) - - def get_filter(self, filter, filter_lang): - """Format the filter parameter in cql2-json or cql2-text.""" - if filter_lang == "cql2-text": - return orjson.loads(to_cql2(parse_cql2_text(filter))) - elif filter_lang == "cql2-json": - if isinstance(filter, str): - return orjson.loads(unquote_plus(filter)) - else: - return filter - else: - raise HTTPException( - status_code=400, - detail=f"Unknown filter-lang: {filter_lang}. Only cql2-json or cql2-text are supported.", - ) - - def _format_datetime_range(self, date_tuple: DateTimeType) -> str: - """ - Convert a tuple of datetime objects or None into a formatted string for API requests. - - Args: - date_tuple (tuple): A tuple containing two elements, each can be a datetime object or None. - - Returns: - str: A string formatted as 'YYYY-MM-DDTHH:MM:SS.sssZ/YYYY-MM-DDTHH:MM:SS.sssZ', with '..' used if any element is None. - """ - - def format_datetime(dt): - """Format a single datetime object to the ISO8601 extended format with 'Z'.""" - return dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" if dt else ".." - - start, end = date_tuple - return f"{format_datetime(start)}/{format_datetime(end)}" - - async def aggregate( - self, - aggregate_request: Optional[EsAggregationExtensionPostRequest] = None, - collection_id: Optional[ - Annotated[str, Path(description="Collection ID")] - ] = None, - collections: Optional[List[str]] = [], - datetime: Optional[DateTimeType] = None, - intersects: Optional[str] = None, - filter_lang: Optional[str] = None, - filter_expr: Optional[str] = None, - aggregations: Optional[str] = None, - ids: Optional[List[str]] = None, - bbox: Optional[BBox] = None, - centroid_geohash_grid_frequency_precision: Optional[int] = None, - centroid_geohex_grid_frequency_precision: Optional[int] = None, - centroid_geotile_grid_frequency_precision: Optional[int] = None, - geometry_geohash_grid_frequency_precision: Optional[int] = None, - geometry_geotile_grid_frequency_precision: Optional[int] = None, - datetime_frequency_interval: Optional[str] = None, - **kwargs, - ) -> Union[Dict, Exception]: - """Get aggregations from the database.""" - request: Request = kwargs["request"] - base_url = str(request.base_url) - path = request.url.path - search = self.database.make_search() - - if aggregate_request is None: - - base_args = { - "collections": collections, - "ids": ids, - "bbox": bbox, - "aggregations": aggregations, - "centroid_geohash_grid_frequency_precision": centroid_geohash_grid_frequency_precision, - "centroid_geohex_grid_frequency_precision": centroid_geohex_grid_frequency_precision, - "centroid_geotile_grid_frequency_precision": centroid_geotile_grid_frequency_precision, - "geometry_geohash_grid_frequency_precision": geometry_geohash_grid_frequency_precision, - "geometry_geotile_grid_frequency_precision": geometry_geotile_grid_frequency_precision, - "datetime_frequency_interval": datetime_frequency_interval, - } - - if collection_id: - collections = [str(collection_id)] - - if intersects: - base_args["intersects"] = orjson.loads(unquote_plus(intersects)) - - if datetime: - base_args["datetime"] = self._format_datetime_range(datetime) - - if filter_expr: - base_args["filter"] = self.get_filter(filter_expr, filter_lang) - aggregate_request = EsAggregationExtensionPostRequest(**base_args) - else: - # Workaround for optional path param in POST requests - if "collections" in path: - collection_id = path.split("/")[2] - - filter_lang = "cql2-json" - if aggregate_request.filter_expr: - aggregate_request.filter_expr = self.get_filter( - aggregate_request.filter_expr, filter_lang - ) - - if collection_id: - if aggregate_request.collections: - raise HTTPException( - status_code=400, - detail="Cannot query multiple collections when executing '/collections//aggregate'. Use '/aggregate' and the collections field instead", - ) - else: - aggregate_request.collections = [collection_id] - - if ( - aggregate_request.aggregations is None - or aggregate_request.aggregations == [] - ): - raise HTTPException( - status_code=400, - detail="No 'aggregations' found. Use '/aggregations' to return available aggregations", - ) - - if aggregate_request.ids: - search = self.database.apply_ids_filter( - search=search, item_ids=aggregate_request.ids - ) - - if aggregate_request.datetime: - datetime_search = self._return_date(aggregate_request.datetime) - search = self.database.apply_datetime_filter( - search=search, datetime_search=datetime_search - ) - - if aggregate_request.bbox: - bbox = aggregate_request.bbox - if len(bbox) == 6: - bbox = [bbox[0], bbox[1], bbox[3], bbox[4]] - - search = self.database.apply_bbox_filter(search=search, bbox=bbox) - - if aggregate_request.intersects: - search = self.database.apply_intersects_filter( - search=search, intersects=aggregate_request.intersects - ) - - if aggregate_request.collections: - search = self.database.apply_collections_filter( - search=search, collection_ids=aggregate_request.collections - ) - # validate that aggregations are supported for all collections - for collection_id in aggregate_request.collections: - aggs = await self.get_aggregations( - collection_id=collection_id, request=request - ) - supported_aggregations = ( - aggs["aggregations"] + self.DEFAULT_AGGREGATIONS - ) - - for agg_name in aggregate_request.aggregations: - if agg_name not in set([x["name"] for x in supported_aggregations]): - raise HTTPException( - status_code=400, - detail=f"Aggregation {agg_name} not supported by collection {collection_id}", - ) - else: - # Validate that the aggregations requested are supported by the catalog - aggs = await self.get_aggregations(request=request) - supported_aggregations = aggs["aggregations"] - for agg_name in aggregate_request.aggregations: - if agg_name not in [x["name"] for x in supported_aggregations]: - raise HTTPException( - status_code=400, - detail=f"Aggregation {agg_name} not supported at catalog level", - ) - - if aggregate_request.filter_expr: - try: - search = await self.database.apply_cql2_filter( - search, aggregate_request.filter_expr - ) - except Exception as e: - raise HTTPException( - status_code=400, detail=f"Error with cql2 filter: {e}" - ) - - centroid_geohash_grid_precision = self.extract_precision( - aggregate_request.centroid_geohash_grid_frequency_precision, - 1, - self.MAX_GEOHASH_PRECISION, - ) - - centroid_geohex_grid_precision = self.extract_precision( - aggregate_request.centroid_geohex_grid_frequency_precision, - 0, - self.MAX_GEOHEX_PRECISION, - ) - - centroid_geotile_grid_precision = self.extract_precision( - aggregate_request.centroid_geotile_grid_frequency_precision, - 0, - self.MAX_GEOTILE_PRECISION, - ) - - geometry_geohash_grid_precision = self.extract_precision( - aggregate_request.geometry_geohash_grid_frequency_precision, - 1, - self.MAX_GEOHASH_PRECISION, - ) - - geometry_geotile_grid_precision = self.extract_precision( - aggregate_request.geometry_geotile_grid_frequency_precision, - 0, - self.MAX_GEOTILE_PRECISION, - ) - - datetime_frequency_interval = self.extract_date_histogram_interval( - aggregate_request.datetime_frequency_interval, - ) - - try: - db_response = await self.database.aggregate( - collections, - aggregate_request.aggregations, - search, - centroid_geohash_grid_precision, - centroid_geohex_grid_precision, - centroid_geotile_grid_precision, - geometry_geohash_grid_precision, - geometry_geotile_grid_precision, - datetime_frequency_interval, - ) - except Exception as error: - if not isinstance(error, IndexError): - raise error - aggs = [] - if db_response: - result_aggs = db_response.get("aggregations", {}) - for agg in { - frozenset(item.items()): item - for item in supported_aggregations + self.GEO_POINT_AGGREGATIONS - }.values(): - if agg["name"] in aggregate_request.aggregations: - if agg["name"].endswith("_frequency"): - aggs.append( - self.frequency_agg( - result_aggs, agg["name"], agg["data_type"] - ) - ) - else: - aggs.append( - self.metric_agg(result_aggs, agg["name"], agg["data_type"]) - ) - links = [ - {"rel": "root", "type": "application/json", "href": base_url}, - ] - - if collection_id: - collection_endpoint = urljoin(base_url, f"collections/{collection_id}") - links.extend( - [ - { - "rel": "collection", - "type": "application/json", - "href": collection_endpoint, - }, - { - "rel": "self", - "type": "application/json", - "href": urljoin(collection_endpoint, "aggregate"), - }, - ] - ) - else: - links.append( - { - "rel": "self", - "type": "application/json", - "href": urljoin(base_url, "aggregate"), - } - ) - results = AggregationCollection( - type="AggregationCollection", aggregations=aggs, links=links - ) - - return results diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py index 078e7fbf..08200030 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py @@ -1,4 +1,4 @@ -"""Filter extension logic for es conversion.""" +"""Filter extension logic for conversion.""" # """ # Implements Filter Extension. @@ -13,47 +13,55 @@ # defines spatial operators (S_INTERSECTS, S_CONTAINS, S_WITHIN, S_DISJOINT). # """ -import re from enum import Enum from typing import Any, Dict -_cql2_like_patterns = re.compile(r"\\.|[%_]|\\$") -_valid_like_substitutions = { - "\\\\": "\\", - "\\%": "%", - "\\_": "_", - "%": "*", - "_": "?", +DEFAULT_QUERYABLES: Dict[str, Dict[str, Any]] = { + "id": { + "description": "ID", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", + }, + "collection": { + "description": "Collection", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", + }, + "geometry": { + "description": "Geometry", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", + }, + "datetime": { + "description": "Acquisition Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/datetime", + }, + "created": { + "description": "Creation Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/created", + }, + "updated": { + "description": "Creation Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/updated", + }, + "cloud_cover": { + "description": "Cloud Cover", + "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover", + }, + "cloud_shadow_percentage": { + "title": "Cloud Shadow Percentage", + "description": "Cloud Shadow Percentage", + "type": "number", + "minimum": 0, + "maximum": 100, + }, + "nodata_pixel_percentage": { + "title": "No Data Pixel Percentage", + "description": "No Data Pixel Percentage", + "type": "number", + "minimum": 0, + "maximum": 100, + }, } -def _replace_like_patterns(match: re.Match) -> str: - pattern = match.group() - try: - return _valid_like_substitutions[pattern] - except KeyError: - raise ValueError(f"'{pattern}' is not a valid escape sequence") - - -def cql2_like_to_es(string: str) -> str: - """ - Convert CQL2 "LIKE" characters to Elasticsearch "wildcard" characters. - - Args: - string (str): The string containing CQL2 wildcard characters. - - Returns: - str: The converted string with Elasticsearch compatible wildcards. - - Raises: - ValueError: If an invalid escape sequence is encountered. - """ - return _cql2_like_patterns.sub( - repl=_replace_like_patterns, - string=string, - ) - - class LogicalOp(str, Enum): """Enumeration for logical operators used in constructing Elasticsearch queries.""" @@ -89,124 +97,3 @@ class SpatialOp(str, Enum): S_CONTAINS = "s_contains" S_WITHIN = "s_within" S_DISJOINT = "s_disjoint" - - -def to_es_field(queryables_mapping: Dict[str, Any], field: str) -> str: - """ - Map a given field to its corresponding Elasticsearch field according to a predefined mapping. - - Args: - field (str): The field name from a user query or filter. - - Returns: - str: The mapped field name suitable for Elasticsearch queries. - """ - return queryables_mapping.get(field, field) - - -def to_es(queryables_mapping: Dict[str, Any], query: Dict[str, Any]) -> Dict[str, Any]: - """ - Transform a simplified CQL2 query structure to an Elasticsearch compatible query DSL. - - Args: - query (Dict[str, Any]): The query dictionary containing 'op' and 'args'. - - Returns: - Dict[str, Any]: The corresponding Elasticsearch query in the form of a dictionary. - """ - if query["op"] in [LogicalOp.AND, LogicalOp.OR, LogicalOp.NOT]: - bool_type = { - LogicalOp.AND: "must", - LogicalOp.OR: "should", - LogicalOp.NOT: "must_not", - }[query["op"]] - return { - "bool": { - bool_type: [ - to_es(queryables_mapping, sub_query) for sub_query in query["args"] - ] - } - } - - elif query["op"] in [ - ComparisonOp.EQ, - ComparisonOp.NEQ, - ComparisonOp.LT, - ComparisonOp.LTE, - ComparisonOp.GT, - ComparisonOp.GTE, - ]: - range_op = { - ComparisonOp.LT: "lt", - ComparisonOp.LTE: "lte", - ComparisonOp.GT: "gt", - ComparisonOp.GTE: "gte", - } - - field = to_es_field(queryables_mapping, query["args"][0]["property"]) - value = query["args"][1] - if isinstance(value, dict) and "timestamp" in value: - value = value["timestamp"] - if query["op"] == ComparisonOp.EQ: - return {"range": {field: {"gte": value, "lte": value}}} - elif query["op"] == ComparisonOp.NEQ: - return { - "bool": { - "must_not": [{"range": {field: {"gte": value, "lte": value}}}] - } - } - else: - return {"range": {field: {range_op[query["op"]]: value}}} - else: - if query["op"] == ComparisonOp.EQ: - return {"term": {field: value}} - elif query["op"] == ComparisonOp.NEQ: - return {"bool": {"must_not": [{"term": {field: value}}]}} - else: - return {"range": {field: {range_op[query["op"]]: value}}} - - elif query["op"] == ComparisonOp.IS_NULL: - field = to_es_field(queryables_mapping, query["args"][0]["property"]) - return {"bool": {"must_not": {"exists": {"field": field}}}} - - elif query["op"] == AdvancedComparisonOp.BETWEEN: - field = to_es_field(queryables_mapping, query["args"][0]["property"]) - gte, lte = query["args"][1], query["args"][2] - if isinstance(gte, dict) and "timestamp" in gte: - gte = gte["timestamp"] - if isinstance(lte, dict) and "timestamp" in lte: - lte = lte["timestamp"] - return {"range": {field: {"gte": gte, "lte": lte}}} - - elif query["op"] == AdvancedComparisonOp.IN: - field = to_es_field(queryables_mapping, query["args"][0]["property"]) - values = query["args"][1] - if not isinstance(values, list): - raise ValueError(f"Arg {values} is not a list") - return {"terms": {field: values}} - - elif query["op"] == AdvancedComparisonOp.LIKE: - field = to_es_field(queryables_mapping, query["args"][0]["property"]) - pattern = cql2_like_to_es(query["args"][1]) - return {"wildcard": {field: {"value": pattern, "case_insensitive": True}}} - - elif query["op"] in [ - SpatialOp.S_INTERSECTS, - SpatialOp.S_CONTAINS, - SpatialOp.S_WITHIN, - SpatialOp.S_DISJOINT, - ]: - field = to_es_field(queryables_mapping, query["args"][0]["property"]) - geometry = query["args"][1] - - relation_mapping = { - SpatialOp.S_INTERSECTS: "intersects", - SpatialOp.S_CONTAINS: "contains", - SpatialOp.S_WITHIN: "within", - SpatialOp.S_DISJOINT: "disjoint", - } - - relation = relation_mapping[query["op"]] - return {"geo_shape": {field: {"shape": geometry, "relation": relation}}} - - return {} diff --git a/stac_fastapi/core/stac_fastapi/core/utilities.py b/stac_fastapi/core/stac_fastapi/core/utilities.py index d4a35109..be197f71 100644 --- a/stac_fastapi/core/stac_fastapi/core/utilities.py +++ b/stac_fastapi/core/stac_fastapi/core/utilities.py @@ -12,46 +12,6 @@ MAX_LIMIT = 10000 -def validate_refresh(value: Union[str, bool]) -> str: - """ - Validate the `refresh` parameter value. - - Args: - value (Union[str, bool]): The `refresh` parameter value, which can be a string or a boolean. - - Returns: - str: The validated value of the `refresh` parameter, which can be "true", "false", or "wait_for". - """ - logger = logging.getLogger(__name__) - - # Handle boolean-like values using get_bool_env - if isinstance(value, bool) or value in { - "true", - "false", - "1", - "0", - "yes", - "no", - "y", - "n", - }: - is_true = get_bool_env("DATABASE_REFRESH", default=value) - return "true" if is_true else "false" - - # Normalize to lowercase for case-insensitivity - value = value.lower() - - # Handle "wait_for" explicitly - if value == "wait_for": - return "wait_for" - - # Log a warning for invalid values and default to "false" - logger.warning( - f"Invalid value for `refresh`: '{value}'. Expected 'true', 'false', or 'wait_for'. Defaulting to 'false'." - ) - return "false" - - def get_bool_env(name: str, default: Union[bool, str] = False) -> bool: """ Retrieve a boolean value from an environment variable. diff --git a/stac_fastapi/core/stac_fastapi/core/version.py b/stac_fastapi/core/stac_fastapi/core/version.py index 1cd0ed04..e152cdff 100644 --- a/stac_fastapi/core/stac_fastapi/core/version.py +++ b/stac_fastapi/core/stac_fastapi/core/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "4.2.0" +__version__ = "5.0.0a0" diff --git a/stac_fastapi/elasticsearch/setup.py b/stac_fastapi/elasticsearch/setup.py index 06b8e880..34a139d6 100644 --- a/stac_fastapi/elasticsearch/setup.py +++ b/stac_fastapi/elasticsearch/setup.py @@ -6,7 +6,8 @@ desc = f.read() install_requires = [ - "stac-fastapi-core==4.2.0", + "stac-fastapi-core==5.0.0a0", + "sfeos-helpers==5.0.0a0", "elasticsearch[async]~=8.18.0", "uvicorn~=0.23.0", "starlette>=0.35.0,<0.36.0", diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 35027a63..e9a420d3 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -11,14 +11,12 @@ from stac_fastapi.core.core import ( BulkTransactionsClient, CoreClient, - EsAsyncBaseFiltersClient, TransactionsClient, ) from stac_fastapi.core.extensions import QueryExtension from stac_fastapi.core.extensions.aggregation import ( EsAggregationExtensionGetRequest, EsAggregationExtensionPostRequest, - EsAsyncAggregationClient, ) from stac_fastapi.core.extensions.fields import FieldsExtension from stac_fastapi.core.rate_limit import setup_rate_limit @@ -40,6 +38,8 @@ TransactionExtension, ) from stac_fastapi.extensions.third_party import BulkTransactionExtension +from stac_fastapi.sfeos_helpers.aggregation import EsAsyncBaseAggregationClient +from stac_fastapi.sfeos_helpers.filter import EsAsyncBaseFiltersClient logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -60,7 +60,7 @@ ) aggregation_extension = AggregationExtension( - client=EsAsyncAggregationClient( + client=EsAsyncBaseAggregationClient( database=database_logic, session=session, settings=settings ) ) @@ -106,7 +106,7 @@ api = StacApi( title=os.getenv("STAC_FASTAPI_TITLE", "stac-fastapi-elasticsearch"), description=os.getenv("STAC_FASTAPI_DESCRIPTION", "stac-fastapi-elasticsearch"), - api_version=os.getenv("STAC_FASTAPI_VERSION", "4.2.0"), + api_version=os.getenv("STAC_FASTAPI_VERSION", "5.0.0a0"), settings=settings, extensions=extensions, client=CoreClient( diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py index accbe8cc..d371c6a5 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py @@ -10,7 +10,8 @@ from elasticsearch import Elasticsearch # type: ignore[attr-defined] from stac_fastapi.core.base_settings import ApiBaseSettings -from stac_fastapi.core.utilities import get_bool_env, validate_refresh +from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.sfeos_helpers.database import validate_refresh from stac_fastapi.types.config import ApiSettings diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 958ee597..d529ce01 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -5,7 +5,7 @@ import logging from base64 import urlsafe_b64decode, urlsafe_b64encode from copy import deepcopy -from typing import Any, Dict, Iterable, List, Optional, Tuple, Type +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union import attr import elasticsearch.helpers as helpers @@ -14,29 +14,38 @@ from starlette.requests import Request from stac_fastapi.core.base_database_logic import BaseDatabaseLogic -from stac_fastapi.core.database_logic import ( - COLLECTIONS_INDEX, - DEFAULT_SORT, - ES_COLLECTIONS_MAPPINGS, - ES_ITEMS_MAPPINGS, - ES_ITEMS_SETTINGS, - ITEM_INDICES, - ITEMS_INDEX_PREFIX, - Geometry, +from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer +from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon +from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings +from stac_fastapi.elasticsearch.config import ( + ElasticsearchSettings as SyncElasticsearchSettings, +) +from stac_fastapi.sfeos_helpers import filter +from stac_fastapi.sfeos_helpers.database import ( + apply_free_text_filter_shared, + apply_intersects_filter_shared, + create_index_templates_shared, + delete_item_index_shared, + get_queryables_mapping_shared, index_alias_by_collection_id, index_by_collection_id, indices, mk_actions, mk_item_id, + populate_sort_shared, + return_date, + validate_refresh, ) -from stac_fastapi.core.extensions import filter -from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon, validate_refresh -from stac_fastapi.elasticsearch.config import AsyncElasticsearchSettings -from stac_fastapi.elasticsearch.config import ( - ElasticsearchSettings as SyncElasticsearchSettings, +from stac_fastapi.sfeos_helpers.mappings import ( + AGGREGATION_MAPPING, + COLLECTIONS_INDEX, + DEFAULT_SORT, + ITEM_INDICES, + ITEMS_INDEX_PREFIX, + Geometry, ) from stac_fastapi.types.errors import ConflictError, NotFoundError +from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item logger = logging.getLogger(__name__) @@ -50,22 +59,7 @@ async def create_index_templates() -> None: None """ - client = AsyncElasticsearchSettings().create_client - await client.indices.put_index_template( - name=f"template_{COLLECTIONS_INDEX}", - body={ - "index_patterns": [f"{COLLECTIONS_INDEX}*"], - "template": {"mappings": ES_COLLECTIONS_MAPPINGS}, - }, - ) - await client.indices.put_index_template( - name=f"template_{ITEMS_INDEX_PREFIX}", - body={ - "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"], - "template": {"settings": ES_ITEMS_SETTINGS, "mappings": ES_ITEMS_MAPPINGS}, - }, - ) - await client.close() + await create_index_templates_shared(settings=AsyncElasticsearchSettings()) async def create_collection_index() -> None: @@ -110,18 +104,13 @@ async def delete_item_index(collection_id: str): Args: collection_id (str): The ID of the collection whose items index will be deleted. - """ - client = AsyncElasticsearchSettings().create_client - name = index_alias_by_collection_id(collection_id) - resolved = await client.indices.resolve_index(name=name) - if "aliases" in resolved and resolved["aliases"]: - [alias] = resolved["aliases"] - await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) - await client.indices.delete(index=alias["indices"]) - else: - await client.indices.delete(index=name) - await client.close() + Notes: + This function delegates to the shared implementation in delete_item_index_shared. + """ + await delete_item_index_shared( + settings=AsyncElasticsearchSettings(), collection_id=collection_id + ) @attr.s @@ -150,76 +139,7 @@ def __attrs_post_init__(self): extensions: List[str] = attr.ib(default=attr.Factory(list)) - aggregation_mapping: Dict[str, Dict[str, Any]] = { - "total_count": {"value_count": {"field": "id"}}, - "collection_frequency": {"terms": {"field": "collection", "size": 100}}, - "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}}, - "cloud_cover_frequency": { - "range": { - "field": "properties.eo:cloud_cover", - "ranges": [ - {"to": 5}, - {"from": 5, "to": 15}, - {"from": 15, "to": 40}, - {"from": 40}, - ], - } - }, - "datetime_frequency": { - "date_histogram": { - "field": "properties.datetime", - "calendar_interval": "month", - } - }, - "datetime_min": {"min": {"field": "properties.datetime"}}, - "datetime_max": {"max": {"field": "properties.datetime"}}, - "grid_code_frequency": { - "terms": { - "field": "properties.grid:code", - "missing": "none", - "size": 10000, - } - }, - "sun_elevation_frequency": { - "histogram": {"field": "properties.view:sun_elevation", "interval": 5} - }, - "sun_azimuth_frequency": { - "histogram": {"field": "properties.view:sun_azimuth", "interval": 5} - }, - "off_nadir_frequency": { - "histogram": {"field": "properties.view:off_nadir", "interval": 5} - }, - "centroid_geohash_grid_frequency": { - "geohash_grid": { - "field": "properties.proj:centroid", - "precision": 1, - } - }, - "centroid_geohex_grid_frequency": { - "geohex_grid": { - "field": "properties.proj:centroid", - "precision": 0, - } - }, - "centroid_geotile_grid_frequency": { - "geotile_grid": { - "field": "properties.proj:centroid", - "precision": 0, - } - }, - "geometry_geohash_grid_frequency": { - "geohash_grid": { - "field": "geometry", - "precision": 1, - } - }, - "geometry_geotile_grid_frequency": { - "geotile_grid": { - "field": "geometry", - "precision": 0, - } - }, - } + aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING """CORE LOGIC""" @@ -300,23 +220,12 @@ async def get_queryables_mapping(self, collection_id: str = "*") -> dict: Returns: dict: A dictionary containing the Queryables mappings. """ - queryables_mapping = {} - mappings = await self.client.indices.get_mapping( index=f"{ITEMS_INDEX_PREFIX}{collection_id}", ) - - for mapping in mappings.values(): - fields = mapping["mappings"].get("properties", {}) - properties = fields.pop("properties", {}).get("properties", {}).keys() - - for field_key in fields: - queryables_mapping[field_key] = field_key - - for property_key in properties: - queryables_mapping[property_key] = f"properties.{property_key}" - - return queryables_mapping + return await get_queryables_mapping_shared( + collection_id=collection_id, mappings=mappings + ) @staticmethod def make_search(): @@ -334,17 +243,20 @@ def apply_collections_filter(search: Search, collection_ids: List[str]): return search.filter("terms", collection=collection_ids) @staticmethod - def apply_datetime_filter(search: Search, datetime_search: dict): + def apply_datetime_filter( + search: Search, interval: Optional[Union[DateTimeType, str]] + ): """Apply a filter to search on datetime, start_datetime, and end_datetime fields. Args: search (Search): The search object to filter. - datetime_search (dict): The datetime filter criteria. + interval: Optional[Union[DateTimeType, str]] Returns: Search: The filtered search object. """ should = [] + datetime_search = return_date(interval) # If the request is a single datetime return # items with datetimes equal to the requested datetime OR @@ -497,21 +409,8 @@ def apply_intersects_filter( Notes: A geo_shape filter is added to the search object, set to intersect with the specified geometry. """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": intersects.type.lower(), - "coordinates": intersects.coordinates, - }, - "relation": "intersects", - } - } - } - ) - ) + filter = apply_intersects_filter_shared(intersects=intersects) + return search.filter(Q(filter)) @staticmethod def apply_stacql_filter(search: Search, op: str, field: str, value: float): @@ -537,14 +436,21 @@ def apply_stacql_filter(search: Search, op: str, field: str, value: float): @staticmethod def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]): - """Database logic to perform query for search endpoint.""" - if free_text_queries is not None: - free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries) - search = search.query( - "query_string", query=f'properties.\\*:"{free_text_query_string}"' - ) + """Create a free text query for Elasticsearch queries. - return search + This method delegates to the shared implementation in apply_free_text_filter_shared. + + Args: + search (Search): The search object to apply the query to. + free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties. + + Returns: + Search: The search object with the free text query applied, or the original search + object if no free_text_queries were provided. + """ + return apply_free_text_filter_shared( + search=search, free_text_queries=free_text_queries + ) async def apply_cql2_filter( self, search: Search, _filter: Optional[Dict[str, Any]] @@ -575,11 +481,18 @@ async def apply_cql2_filter( @staticmethod def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: - """Database logic to sort search instance.""" - if sortby: - return {s.field: {"order": s.direction} for s in sortby} - else: - return None + """Create a sort configuration for Elasticsearch queries. + + This method delegates to the shared implementation in populate_sort_shared. + + Args: + sortby (List): A list of sort specifications, each containing a field and direction. + + Returns: + Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction + configurations, or None if no sort was specified. + """ + return populate_sort_shared(sortby=sortby) async def execute_search( self, diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py index 1cd0ed04..e152cdff 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "4.2.0" +__version__ = "5.0.0a0" diff --git a/stac_fastapi/opensearch/setup.py b/stac_fastapi/opensearch/setup.py index 7fe18f87..68586236 100644 --- a/stac_fastapi/opensearch/setup.py +++ b/stac_fastapi/opensearch/setup.py @@ -6,7 +6,8 @@ desc = f.read() install_requires = [ - "stac-fastapi-core==4.2.0", + "stac-fastapi-core==5.0.0a0", + "sfeos-helpers==5.0.0a0", "opensearch-py~=2.8.0", "opensearch-py[async]~=2.8.0", "uvicorn~=0.23.0", diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index 5273e598..bd2ec073 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -11,14 +11,12 @@ from stac_fastapi.core.core import ( BulkTransactionsClient, CoreClient, - EsAsyncBaseFiltersClient, TransactionsClient, ) from stac_fastapi.core.extensions import QueryExtension from stac_fastapi.core.extensions.aggregation import ( EsAggregationExtensionGetRequest, EsAggregationExtensionPostRequest, - EsAsyncAggregationClient, ) from stac_fastapi.core.extensions.fields import FieldsExtension from stac_fastapi.core.rate_limit import setup_rate_limit @@ -40,6 +38,8 @@ create_collection_index, create_index_templates, ) +from stac_fastapi.sfeos_helpers.aggregation import EsAsyncBaseAggregationClient +from stac_fastapi.sfeos_helpers.filter import EsAsyncBaseFiltersClient logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -60,7 +60,7 @@ ) aggregation_extension = AggregationExtension( - client=EsAsyncAggregationClient( + client=EsAsyncBaseAggregationClient( database=database_logic, session=session, settings=settings ) ) @@ -107,7 +107,7 @@ api = StacApi( title=os.getenv("STAC_FASTAPI_TITLE", "stac-fastapi-opensearch"), description=os.getenv("STAC_FASTAPI_DESCRIPTION", "stac-fastapi-opensearch"), - api_version=os.getenv("STAC_FASTAPI_VERSION", "4.2.0"), + api_version=os.getenv("STAC_FASTAPI_VERSION", "5.0.0a0"), settings=settings, extensions=extensions, client=CoreClient( diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py index 3a53ffdf..d3811376 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py @@ -8,7 +8,8 @@ from opensearchpy import AsyncOpenSearch, OpenSearch from stac_fastapi.core.base_settings import ApiBaseSettings -from stac_fastapi.core.utilities import get_bool_env, validate_refresh +from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.sfeos_helpers.database import validate_refresh from stac_fastapi.types.config import ApiSettings diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 71ab9275..f93311f9 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -5,7 +5,7 @@ import logging from base64 import urlsafe_b64decode, urlsafe_b64encode from copy import deepcopy -from typing import Any, Dict, Iterable, List, Optional, Tuple, Type +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union import attr from opensearchpy import exceptions, helpers @@ -14,7 +14,30 @@ from starlette.requests import Request from stac_fastapi.core.base_database_logic import BaseDatabaseLogic -from stac_fastapi.core.database_logic import ( +from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer +from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon +from stac_fastapi.opensearch.config import ( + AsyncOpensearchSettings as AsyncSearchSettings, +) +from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings +from stac_fastapi.sfeos_helpers import filter +from stac_fastapi.sfeos_helpers.database import ( + apply_free_text_filter_shared, + apply_intersects_filter_shared, + create_index_templates_shared, + delete_item_index_shared, + get_queryables_mapping_shared, + index_alias_by_collection_id, + index_by_collection_id, + indices, + mk_actions, + mk_item_id, + populate_sort_shared, + return_date, + validate_refresh, +) +from stac_fastapi.sfeos_helpers.mappings import ( + AGGREGATION_MAPPING, COLLECTIONS_INDEX, DEFAULT_SORT, ES_COLLECTIONS_MAPPINGS, @@ -23,20 +46,9 @@ ITEM_INDICES, ITEMS_INDEX_PREFIX, Geometry, - index_alias_by_collection_id, - index_by_collection_id, - indices, - mk_actions, - mk_item_id, ) -from stac_fastapi.core.extensions import filter -from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer -from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon, validate_refresh -from stac_fastapi.opensearch.config import ( - AsyncOpensearchSettings as AsyncSearchSettings, -) -from stac_fastapi.opensearch.config import OpensearchSettings as SyncSearchSettings from stac_fastapi.types.errors import ConflictError, NotFoundError +from stac_fastapi.types.rfc3339 import DateTimeType from stac_fastapi.types.stac import Collection, Item logger = logging.getLogger(__name__) @@ -50,23 +62,7 @@ async def create_index_templates() -> None: None """ - client = AsyncSearchSettings().create_client - await client.indices.put_template( - name=f"template_{COLLECTIONS_INDEX}", - body={ - "index_patterns": [f"{COLLECTIONS_INDEX}*"], - "mappings": ES_COLLECTIONS_MAPPINGS, - }, - ) - await client.indices.put_template( - name=f"template_{ITEMS_INDEX_PREFIX}", - body={ - "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"], - "settings": ES_ITEMS_SETTINGS, - "mappings": ES_ITEMS_MAPPINGS, - }, - ) - await client.close() + await create_index_templates_shared(settings=AsyncSearchSettings()) async def create_collection_index() -> None: @@ -125,18 +121,13 @@ async def delete_item_index(collection_id: str) -> None: Args: collection_id (str): The ID of the collection whose items index will be deleted. - """ - client = AsyncSearchSettings().create_client - name = index_alias_by_collection_id(collection_id) - resolved = await client.indices.resolve_index(name=name) - if "aliases" in resolved and resolved["aliases"]: - [alias] = resolved["aliases"] - await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) - await client.indices.delete(index=alias["indices"]) - else: - await client.indices.delete(index=name) - await client.close() + Notes: + This function delegates to the shared implementation in delete_item_index_shared. + """ + await delete_item_index_shared( + settings=AsyncSearchSettings(), collection_id=collection_id + ) @attr.s @@ -161,76 +152,7 @@ def __attrs_post_init__(self): extensions: List[str] = attr.ib(default=attr.Factory(list)) - aggregation_mapping: Dict[str, Dict[str, Any]] = { - "total_count": {"value_count": {"field": "id"}}, - "collection_frequency": {"terms": {"field": "collection", "size": 100}}, - "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}}, - "cloud_cover_frequency": { - "range": { - "field": "properties.eo:cloud_cover", - "ranges": [ - {"to": 5}, - {"from": 5, "to": 15}, - {"from": 15, "to": 40}, - {"from": 40}, - ], - } - }, - "datetime_frequency": { - "date_histogram": { - "field": "properties.datetime", - "calendar_interval": "month", - } - }, - "datetime_min": {"min": {"field": "properties.datetime"}}, - "datetime_max": {"max": {"field": "properties.datetime"}}, - "grid_code_frequency": { - "terms": { - "field": "properties.grid:code", - "missing": "none", - "size": 10000, - } - }, - "sun_elevation_frequency": { - "histogram": {"field": "properties.view:sun_elevation", "interval": 5} - }, - "sun_azimuth_frequency": { - "histogram": {"field": "properties.view:sun_azimuth", "interval": 5} - }, - "off_nadir_frequency": { - "histogram": {"field": "properties.view:off_nadir", "interval": 5} - }, - "centroid_geohash_grid_frequency": { - "geohash_grid": { - "field": "properties.proj:centroid", - "precision": 1, - } - }, - "centroid_geohex_grid_frequency": { - "geohex_grid": { - "field": "properties.proj:centroid", - "precision": 0, - } - }, - "centroid_geotile_grid_frequency": { - "geotile_grid": { - "field": "properties.proj:centroid", - "precision": 0, - } - }, - "geometry_geohash_grid_frequency": { - "geohash_grid": { - "field": "geometry", - "precision": 1, - } - }, - "geometry_geotile_grid_frequency": { - "geotile_grid": { - "field": "geometry", - "precision": 0, - } - }, - } + aggregation_mapping: Dict[str, Dict[str, Any]] = AGGREGATION_MAPPING """CORE LOGIC""" @@ -317,23 +239,12 @@ async def get_queryables_mapping(self, collection_id: str = "*") -> dict: Returns: dict: A dictionary containing the Queryables mappings. """ - queryables_mapping = {} - mappings = await self.client.indices.get_mapping( index=f"{ITEMS_INDEX_PREFIX}{collection_id}", ) - - for mapping in mappings.values(): - fields = mapping["mappings"].get("properties", {}) - properties = fields.pop("properties", {}).get("properties", {}).keys() - - for field_key in fields: - queryables_mapping[field_key] = field_key - - for property_key in properties: - queryables_mapping[property_key] = f"properties.{property_key}" - - return queryables_mapping + return await get_queryables_mapping_shared( + collection_id=collection_id, mappings=mappings + ) @staticmethod def make_search(): @@ -352,27 +263,37 @@ def apply_collections_filter(search: Search, collection_ids: List[str]): @staticmethod def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str]]): - """Database logic to perform query for search endpoint.""" - if free_text_queries is not None: - free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries) - search = search.query( - "query_string", query=f'properties.\\*:"{free_text_query_string}"' - ) + """Create a free text query for OpenSearch queries. - return search + This method delegates to the shared implementation in apply_free_text_filter_shared. + + Args: + search (Search): The search object to apply the query to. + free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties. + + Returns: + Search: The search object with the free text query applied, or the original search + object if no free_text_queries were provided. + """ + return apply_free_text_filter_shared( + search=search, free_text_queries=free_text_queries + ) @staticmethod - def apply_datetime_filter(search: Search, datetime_search): + def apply_datetime_filter( + search: Search, interval: Optional[Union[DateTimeType, str]] + ): """Apply a filter to search based on datetime field, start_datetime, and end_datetime fields. Args: search (Search): The search object to filter. - datetime_search (dict): The datetime filter criteria. + interval: Optional[Union[DateTimeType, str]] Returns: Search: The filtered search object. """ should = [] + datetime_search = return_date(interval) # If the request is a single datetime return # items with datetimes equal to the requested datetime OR @@ -525,21 +446,8 @@ def apply_intersects_filter( Notes: A geo_shape filter is added to the search object, set to intersect with the specified geometry. """ - return search.filter( - Q( - { - "geo_shape": { - "geometry": { - "shape": { - "type": intersects.type.lower(), - "coordinates": intersects.coordinates, - }, - "relation": "intersects", - } - } - } - ) - ) + filter = apply_intersects_filter_shared(intersects=intersects) + return search.filter(Q(filter)) @staticmethod def apply_stacql_filter(search: Search, op: str, field: str, value: float): @@ -592,11 +500,18 @@ async def apply_cql2_filter( @staticmethod def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: - """Database logic to sort search instance.""" - if sortby: - return {s.field: {"order": s.direction} for s in sortby} - else: - return None + """Create a sort configuration for OpenSearch queries. + + This method delegates to the shared implementation in populate_sort_shared. + + Args: + sortby (List): A list of sort specifications, each containing a field and direction. + + Returns: + Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction + configurations, or None if no sort was specified. + """ + return populate_sort_shared(sortby=sortby) async def execute_search( self, diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py index 1cd0ed04..e152cdff 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "4.2.0" +__version__ = "5.0.0a0" diff --git a/stac_fastapi/sfeos_helpers/README.md b/stac_fastapi/sfeos_helpers/README.md new file mode 120000 index 00000000..fe840054 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/stac_fastapi/sfeos_helpers/setup.cfg b/stac_fastapi/sfeos_helpers/setup.cfg new file mode 100644 index 00000000..a3210acb --- /dev/null +++ b/stac_fastapi/sfeos_helpers/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +version = attr: stac_fastapi.sfeos_helpers.version.__version__ diff --git a/stac_fastapi/sfeos_helpers/setup.py b/stac_fastapi/sfeos_helpers/setup.py new file mode 100644 index 00000000..7228a41c --- /dev/null +++ b/stac_fastapi/sfeos_helpers/setup.py @@ -0,0 +1,34 @@ +"""stac_fastapi: helpers elasticsearch/ opensearch module.""" + +from setuptools import find_namespace_packages, setup + +with open("README.md") as f: + desc = f.read() + +install_requires = [ + "stac-fastapi.core==5.0.0a0", +] + +setup( + name="sfeos_helpers", + description="Helper library for the Elasticsearch and Opensearch stac-fastapi backends.", + long_description=desc, + long_description_content_type="text/markdown", + python_requires=">=3.9", + classifiers=[ + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "License :: OSI Approved :: MIT License", + ], + url="https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch", + license="MIT", + packages=find_namespace_packages(), + zip_safe=False, + install_requires=install_requires, +) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/README.md b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/README.md new file mode 100644 index 00000000..253855b4 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/README.md @@ -0,0 +1,57 @@ +# STAC FastAPI Aggregation Package + +This package contains shared aggregation functionality used by both the Elasticsearch and OpenSearch implementations of STAC FastAPI. It helps reduce code duplication and ensures consistent behavior between the two implementations. + +## Package Structure + +The aggregation package is organized into three main modules: + +- **client.py**: Contains the base aggregation client implementation + - `EsAsyncBaseAggregationClient`: The main class that implements the STAC aggregation extension for Elasticsearch/OpenSearch + - Methods for handling aggregation requests, validating parameters, and formatting responses + +- **format.py**: Contains functions for formatting aggregation responses + - `frequency_agg`: Formats frequency distribution aggregation responses + - `metric_agg`: Formats metric aggregation responses + +- **__init__.py**: Package initialization and exports + - Exports the main classes and functions for use by other modules + +## Features + +The aggregation package provides the following features: + +- Support for various aggregation types: + - Datetime frequency + - Collection frequency + - Property frequency + - Geospatial grid aggregations (geohash, geohex, geotile) + - Metric aggregations (min, max, etc.) + +- Parameter validation: + - Precision validation for geospatial aggregations + - Interval validation for datetime aggregations + +- Response formatting: + - Consistent response structure + - Proper typing and documentation + +## Usage + +The aggregation package is used by the Elasticsearch and OpenSearch implementations to provide aggregation functionality for STAC API. The main entry point is the `EsAsyncBaseAggregationClient` class, which is instantiated in the respective app.py files. + +Example: +```python +from stac_fastapi.sfeos_helpers.aggregation import EsAsyncBaseAggregationClient + +# Create an instance of the aggregation client +aggregation_client = EsAsyncBaseAggregationClient(database) + +# Register the aggregation extension with the API +api = StacApi( + ..., + extensions=[ + ..., + AggregationExtension(client=aggregation_client), + ], +) \ No newline at end of file diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/__init__.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/__init__.py new file mode 100644 index 00000000..2beeff67 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/__init__.py @@ -0,0 +1,31 @@ +"""Shared aggregation extension methods for stac-fastapi elasticsearch and opensearch backends. + +This module provides shared functionality for implementing the STAC API Aggregation Extension +with Elasticsearch and OpenSearch. It includes: + +1. Functions for formatting aggregation responses +2. Helper functions for handling aggregation parameters +3. Base implementation of the AsyncBaseAggregationClient for Elasticsearch/OpenSearch + +The aggregation package is organized as follows: +- client.py: Aggregation client implementation +- format.py: Response formatting functions + +When adding new functionality to this package, consider: +1. Will this code be used by both Elasticsearch and OpenSearch implementations? +2. Is the functionality stable and unlikely to diverge between implementations? +3. Is the function well-documented with clear input/output contracts? + +Function Naming Conventions: +- Function names should be descriptive and indicate their purpose +- Parameter names should be consistent across similar functions +""" + +from .client import EsAsyncBaseAggregationClient +from .format import frequency_agg, metric_agg + +__all__ = [ + "EsAsyncBaseAggregationClient", + "frequency_agg", + "metric_agg", +] diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/client.py new file mode 100644 index 00000000..1f335245 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/client.py @@ -0,0 +1,469 @@ +"""Client implementation for the STAC API Aggregation Extension.""" + +from pathlib import Path +from typing import Annotated, Any, Dict, List, Optional, Union +from urllib.parse import unquote_plus, urljoin + +import attr +import orjson +from fastapi import HTTPException, Request +from pygeofilter.backends.cql2_json import to_cql2 +from pygeofilter.parsers.cql2_text import parse as parse_cql2_text +from stac_pydantic.shared import BBox + +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic +from stac_fastapi.core.base_settings import ApiBaseSettings +from stac_fastapi.core.datetime_utils import format_datetime_range +from stac_fastapi.core.extensions.aggregation import EsAggregationExtensionPostRequest +from stac_fastapi.core.session import Session +from stac_fastapi.extensions.core.aggregation.client import AsyncBaseAggregationClient +from stac_fastapi.extensions.core.aggregation.types import ( + Aggregation, + AggregationCollection, +) +from stac_fastapi.types.rfc3339 import DateTimeType + +from .format import frequency_agg, metric_agg + + +@attr.s +class EsAsyncBaseAggregationClient(AsyncBaseAggregationClient): + """Defines a pattern for implementing the STAC aggregation extension with Elasticsearch/OpenSearch.""" + + database: BaseDatabaseLogic = attr.ib() + settings: ApiBaseSettings = attr.ib() + session: Session = attr.ib(default=attr.Factory(Session.create_from_env)) + + # Default aggregations to use if none are specified + DEFAULT_AGGREGATIONS = [ + {"name": "total_count", "data_type": "integer"}, + {"name": "datetime_max", "data_type": "datetime"}, + {"name": "datetime_min", "data_type": "datetime"}, + { + "name": "datetime_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "datetime", + }, + { + "name": "collection_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string", + }, + { + "name": "geometry_geohash_grid_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string", + }, + { + "name": "geometry_geotile_grid_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string", + }, + ] + + # Geo point aggregations + GEO_POINT_AGGREGATIONS = [ + { + "name": "grid_code_frequency", + "data_type": "frequency_distribution", + "frequency_distribution_data_type": "string", + }, + ] + + # Supported datetime intervals + SUPPORTED_DATETIME_INTERVAL = [ + "year", + "quarter", + "month", + "week", + "day", + "hour", + "minute", + "second", + ] + + # Default datetime interval + DEFAULT_DATETIME_INTERVAL = "month" + + # Maximum precision values + MAX_GEOHASH_PRECISION = 12 + MAX_GEOHEX_PRECISION = 15 + MAX_GEOTILE_PRECISION = 29 + + async def get_aggregations( + self, collection_id: Optional[str] = None, **kwargs + ) -> Dict[str, Any]: + """Get the available aggregations for a catalog or collection defined in the STAC JSON. + + If no aggregations are defined, default aggregations are used. + + Args: + collection_id: Optional collection ID to get aggregations for + **kwargs: Additional keyword arguments + + Returns: + Dict[str, Any]: A dictionary containing the available aggregations + """ + request: Request = kwargs.get("request") + base_url = str(request.base_url) if request else "" + links = [{"rel": "root", "type": "application/json", "href": base_url}] + + if collection_id is not None: + collection_endpoint = urljoin(base_url, f"collections/{collection_id}") + links.extend( + [ + { + "rel": "collection", + "type": "application/json", + "href": collection_endpoint, + }, + { + "rel": "self", + "type": "application/json", + "href": urljoin(collection_endpoint + "/", "aggregations"), + }, + ] + ) + if await self.database.check_collection_exists(collection_id) is None: + collection = await self.database.find_collection(collection_id) + aggregations = collection.get( + "aggregations", self.DEFAULT_AGGREGATIONS.copy() + ) + else: + raise IndexError(f"Collection {collection_id} does not exist") + else: + links.append( + { + "rel": "self", + "type": "application/json", + "href": urljoin(base_url, "aggregations"), + } + ) + aggregations = self.DEFAULT_AGGREGATIONS.copy() + + return { + "type": "AggregationCollection", + "aggregations": aggregations, + "links": links, + } + + def extract_precision( + self, precision: Union[int, None], min_value: int, max_value: int + ) -> int: + """Ensure that the aggregation precision value is within a valid range. + + Args: + precision: The precision value to validate + min_value: The minimum allowed precision value + max_value: The maximum allowed precision value + + Returns: + int: A validated precision value + + Raises: + HTTPException: If the precision is outside the valid range + """ + if precision is None: + return min_value + if precision < min_value or precision > max_value: + raise HTTPException( + status_code=400, + detail=f"Invalid precision value. Must be between {min_value} and {max_value}", + ) + return precision + + def extract_date_histogram_interval(self, value: Optional[str]) -> str: + """Ensure that the interval for the date histogram is valid. + + If no value is provided, the default will be returned. + + Args: + value: The interval value to validate + + Returns: + str: A validated date histogram interval + + Raises: + HTTPException: If the supplied value is not in the supported intervals + """ + if value is not None: + if value not in self.SUPPORTED_DATETIME_INTERVAL: + raise HTTPException( + status_code=400, + detail=f"Invalid datetime interval. Must be one of {self.SUPPORTED_DATETIME_INTERVAL}", + ) + else: + return value + else: + return self.DEFAULT_DATETIME_INTERVAL + + def get_filter(self, filter, filter_lang): + """Format the filter parameter in cql2-json or cql2-text. + + Args: + filter: The filter expression + filter_lang: The filter language (cql2-json or cql2-text) + + Returns: + dict: A formatted filter expression + + Raises: + HTTPException: If the filter language is not supported + """ + if filter_lang == "cql2-text": + return orjson.loads(to_cql2(parse_cql2_text(filter))) + elif filter_lang == "cql2-json": + if isinstance(filter, str): + return orjson.loads(unquote_plus(filter)) + else: + return filter + else: + raise HTTPException( + status_code=400, + detail=f"Unknown filter-lang: {filter_lang}. Only cql2-json or cql2-text are supported.", + ) + + async def aggregate( + self, + aggregate_request: Optional[EsAggregationExtensionPostRequest] = None, + collection_id: Optional[ + Annotated[str, Path(description="Collection ID")] + ] = None, + collections: Optional[List[str]] = [], + datetime: Optional[DateTimeType] = None, + intersects: Optional[str] = None, + filter_lang: Optional[str] = None, + filter_expr: Optional[str] = None, + aggregations: Optional[str] = None, + ids: Optional[List[str]] = None, + bbox: Optional[BBox] = None, + centroid_geohash_grid_frequency_precision: Optional[int] = None, + centroid_geohex_grid_frequency_precision: Optional[int] = None, + centroid_geotile_grid_frequency_precision: Optional[int] = None, + geometry_geohash_grid_frequency_precision: Optional[int] = None, + geometry_geotile_grid_frequency_precision: Optional[int] = None, + datetime_frequency_interval: Optional[str] = None, + **kwargs, + ) -> Union[Dict, Exception]: + """Get aggregations from the database.""" + request: Request = kwargs["request"] + base_url = str(request.base_url) + path = request.url.path + search = self.database.make_search() + + if aggregate_request is None: + + base_args = { + "collections": collections, + "ids": ids, + "bbox": bbox, + "aggregations": aggregations, + "centroid_geohash_grid_frequency_precision": centroid_geohash_grid_frequency_precision, + "centroid_geohex_grid_frequency_precision": centroid_geohex_grid_frequency_precision, + "centroid_geotile_grid_frequency_precision": centroid_geotile_grid_frequency_precision, + "geometry_geohash_grid_frequency_precision": geometry_geohash_grid_frequency_precision, + "geometry_geotile_grid_frequency_precision": geometry_geotile_grid_frequency_precision, + "datetime_frequency_interval": datetime_frequency_interval, + } + + if collection_id: + collections = [str(collection_id)] + + if intersects: + base_args["intersects"] = orjson.loads(unquote_plus(intersects)) + + if datetime: + base_args["datetime"] = format_datetime_range(datetime) + + if filter_expr: + base_args["filter"] = self.get_filter(filter_expr, filter_lang) + aggregate_request = EsAggregationExtensionPostRequest(**base_args) + else: + # Workaround for optional path param in POST requests + if "collections" in path: + collection_id = path.split("/")[2] + + filter_lang = "cql2-json" + if aggregate_request.filter_expr: + aggregate_request.filter_expr = self.get_filter( + aggregate_request.filter_expr, filter_lang + ) + + if collection_id: + if aggregate_request.collections: + raise HTTPException( + status_code=400, + detail="Cannot query multiple collections when executing '/collections//aggregate'. Use '/aggregate' and the collections field instead", + ) + else: + aggregate_request.collections = [collection_id] + + if ( + aggregate_request.aggregations is None + or aggregate_request.aggregations == [] + ): + raise HTTPException( + status_code=400, + detail="No 'aggregations' found. Use '/aggregations' to return available aggregations", + ) + + if aggregate_request.ids: + search = self.database.apply_ids_filter( + search=search, item_ids=aggregate_request.ids + ) + + if aggregate_request.datetime: + search = self.database.apply_datetime_filter( + search=search, interval=aggregate_request.datetime + ) + + if aggregate_request.bbox: + bbox = aggregate_request.bbox + if len(bbox) == 6: + bbox = [bbox[0], bbox[1], bbox[3], bbox[4]] + + search = self.database.apply_bbox_filter(search=search, bbox=bbox) + + if aggregate_request.intersects: + search = self.database.apply_intersects_filter( + search=search, intersects=aggregate_request.intersects + ) + + if aggregate_request.collections: + search = self.database.apply_collections_filter( + search=search, collection_ids=aggregate_request.collections + ) + # validate that aggregations are supported for all collections + for collection_id in aggregate_request.collections: + aggregation_info = await self.get_aggregations( + collection_id=collection_id, request=request + ) + supported_aggregations = ( + aggregation_info["aggregations"] + self.DEFAULT_AGGREGATIONS + ) + + for agg_name in aggregate_request.aggregations: + if agg_name not in set([x["name"] for x in supported_aggregations]): + raise HTTPException( + status_code=400, + detail=f"Aggregation {agg_name} not supported by collection {collection_id}", + ) + else: + # Validate that the aggregations requested are supported by the catalog + aggregation_info = await self.get_aggregations(request=request) + supported_aggregations = aggregation_info["aggregations"] + for agg_name in aggregate_request.aggregations: + if agg_name not in [x["name"] for x in supported_aggregations]: + raise HTTPException( + status_code=400, + detail=f"Aggregation {agg_name} not supported at catalog level", + ) + + if aggregate_request.filter_expr: + try: + search = await self.database.apply_cql2_filter( + search, aggregate_request.filter_expr + ) + except Exception as e: + raise HTTPException( + status_code=400, detail=f"Error with cql2 filter: {e}" + ) + + centroid_geohash_grid_precision = self.extract_precision( + aggregate_request.centroid_geohash_grid_frequency_precision, + 1, + self.MAX_GEOHASH_PRECISION, + ) + + centroid_geohex_grid_precision = self.extract_precision( + aggregate_request.centroid_geohex_grid_frequency_precision, + 0, + self.MAX_GEOHEX_PRECISION, + ) + + centroid_geotile_grid_precision = self.extract_precision( + aggregate_request.centroid_geotile_grid_frequency_precision, + 0, + self.MAX_GEOTILE_PRECISION, + ) + + geometry_geohash_grid_precision = self.extract_precision( + aggregate_request.geometry_geohash_grid_frequency_precision, + 1, + self.MAX_GEOHASH_PRECISION, + ) + + geometry_geotile_grid_precision = self.extract_precision( + aggregate_request.geometry_geotile_grid_frequency_precision, + 0, + self.MAX_GEOTILE_PRECISION, + ) + + datetime_frequency_interval = self.extract_date_histogram_interval( + aggregate_request.datetime_frequency_interval, + ) + + try: + db_response = await self.database.aggregate( + collections, + aggregate_request.aggregations, + search, + centroid_geohash_grid_precision, + centroid_geohex_grid_precision, + centroid_geotile_grid_precision, + geometry_geohash_grid_precision, + geometry_geotile_grid_precision, + datetime_frequency_interval, + ) + except Exception as error: + if not isinstance(error, IndexError): + raise error + aggs: List[Aggregation] = [] + if db_response: + result_aggs = db_response.get("aggregations", {}) + for agg in { + frozenset(item.items()): item + for item in supported_aggregations + self.GEO_POINT_AGGREGATIONS + }.values(): + if agg["name"] in aggregate_request.aggregations: + if agg["name"].endswith("_frequency"): + aggs.append( + frequency_agg(result_aggs, agg["name"], agg["data_type"]) + ) + else: + aggs.append( + metric_agg(result_aggs, agg["name"], agg["data_type"]) + ) + links = [ + {"rel": "root", "type": "application/json", "href": base_url}, + ] + + if collection_id: + collection_endpoint = urljoin(base_url, f"collections/{collection_id}") + links.extend( + [ + { + "rel": "collection", + "type": "application/json", + "href": collection_endpoint, + }, + { + "rel": "self", + "type": "application/json", + "href": urljoin(collection_endpoint, "aggregate"), + }, + ] + ) + else: + links.append( + { + "rel": "self", + "type": "application/json", + "href": urljoin(base_url, "aggregate"), + } + ) + results = AggregationCollection( + type="AggregationCollection", aggregations=aggs, links=links + ) + + return results diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/format.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/format.py new file mode 100644 index 00000000..9553ede4 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/aggregation/format.py @@ -0,0 +1,60 @@ +"""Formatting functions for aggregation responses.""" + +from datetime import datetime +from typing import Any, Dict + +from stac_fastapi.core.datetime_utils import datetime_to_str +from stac_fastapi.extensions.core.aggregation.types import Aggregation + + +def frequency_agg(es_aggs: Dict[str, Any], name: str, data_type: str) -> Aggregation: + """Format an aggregation for a frequency distribution aggregation. + + Args: + es_aggs: The Elasticsearch/OpenSearch aggregation response + name: The name of the aggregation + data_type: The data type of the aggregation + + Returns: + Aggregation: A formatted aggregation response + """ + buckets = [] + for bucket in es_aggs.get(name, {}).get("buckets", []): + bucket_data = { + "key": bucket.get("key_as_string") or bucket.get("key"), + "data_type": data_type, + "frequency": bucket.get("doc_count"), + "to": bucket.get("to"), + "from": bucket.get("from"), + } + buckets.append(bucket_data) + return Aggregation( + name=name, + data_type="frequency_distribution", + overflow=es_aggs.get(name, {}).get("sum_other_doc_count", 0), + buckets=buckets, + ) + + +def metric_agg(es_aggs: Dict[str, Any], name: str, data_type: str) -> Aggregation: + """Format an aggregation for a metric aggregation. + + Args: + es_aggs: The Elasticsearch/OpenSearch aggregation response + name: The name of the aggregation + data_type: The data type of the aggregation + + Returns: + Aggregation: A formatted aggregation response + """ + value = es_aggs.get(name, {}).get("value_as_string") or es_aggs.get(name, {}).get( + "value" + ) + # ES 7.x does not return datetimes with a 'value_as_string' field + if "datetime" in name and isinstance(value, float): + value = datetime_to_str(datetime.fromtimestamp(value / 1e3)) + return Aggregation( + name=name, + data_type=data_type, + value=value, + ) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/README.md b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/README.md new file mode 100644 index 00000000..5f4a6ada --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/README.md @@ -0,0 +1,61 @@ +# STAC FastAPI Database Package + +This package contains shared database operations used by both the Elasticsearch and OpenSearch +implementations of STAC FastAPI. It helps reduce code duplication and ensures consistent behavior +between the two implementations. + +## Package Structure + +The database package is organized into five main modules: + +- **index.py**: Contains functions for managing indices + - [create_index_templates_shared](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database_logic_helpers.py:15:0-48:33): Creates index templates for Collections and Items + - [delete_item_index_shared](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database_logic_helpers.py:128:0-153:30): Deletes an item index for a collection + - [index_by_collection_id](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/utilities.py:86:0-100:5): Translates a collection ID into an index name + - [index_alias_by_collection_id](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/utilities.py:103:0-115:5): Translates a collection ID into an index alias + - [indices](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/utilities.py:118:0-132:5): Gets a comma-separated string of index names + +- **query.py**: Contains functions for building and manipulating queries + - [apply_free_text_filter_shared](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database_logic_helpers.py:51:0-74:16): Applies a free text filter to a search + - [apply_intersects_filter_shared](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database_logic_helpers.py:77:0-104:5): Creates a geo_shape filter for intersecting geometry + - [populate_sort_shared](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database_logic_helpers.py:107:0-125:16): Creates a sort configuration for queries + +- **mapping.py**: Contains functions for working with mappings + - [get_queryables_mapping_shared](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database_logic_helpers.py:156:0-185:27): Retrieves mapping of Queryables for search + +- **document.py**: Contains functions for working with documents + - [mk_item_id](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/utilities.py:140:0-150:5): Creates a document ID for an Item + - [mk_actions](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/utilities.py:153:0-175:5): Creates bulk actions for indexing items + +- **utils.py**: Contains utility functions for database operations + - [validate_refresh](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/utilities.py:41:0-78:5): Validates the refresh parameter value + +## Usage + +Import the necessary components from the database package: + +```python +from stac_fastapi.sfeos_helpers.database import ( + # Index operations + create_index_templates_shared, + delete_item_index_shared, + index_alias_by_collection_id, + index_by_collection_id, + indices, + + # Query operations + apply_free_text_filter_shared, + apply_intersects_filter_shared, + populate_sort_shared, + + # Mapping operations + get_queryables_mapping_shared, + + # Document operations + mk_item_id, + mk_actions, + + # Utility functions + validate_refresh, +) +``` diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py new file mode 100644 index 00000000..31bf28d8 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/__init__.py @@ -0,0 +1,71 @@ +"""Shared database operations for stac-fastapi elasticsearch and opensearch backends. + +This module provides shared database functionality used by both the Elasticsearch and OpenSearch +implementations of STAC FastAPI. It includes: + +1. Index management functions for creating and deleting indices +2. Query building functions for constructing search queries +3. Mapping functions for working with Elasticsearch/OpenSearch mappings +4. Document operations for working with documents +5. Utility functions for database operations +6. Datetime utilities for query formatting + +The database package is organized as follows: +- index.py: Index management functions +- query.py: Query building functions +- mapping.py: Mapping functions +- document.py: Document operations +- utils.py: Utility functions +- datetime.py: Datetime utilities for query formatting + +When adding new functionality to this package, consider: +1. Will this code be used by both Elasticsearch and OpenSearch implementations? +2. Is the functionality stable and unlikely to diverge between implementations? +3. Is the function well-documented with clear input/output contracts? + +Function Naming Conventions: +- All shared functions should end with `_shared` to clearly indicate they're meant to be used by both implementations +- Function names should be descriptive and indicate their purpose +- Parameter names should be consistent across similar functions +""" + +# Re-export all functions for backward compatibility +from .datetime import return_date +from .document import mk_actions, mk_item_id +from .index import ( + create_index_templates_shared, + delete_item_index_shared, + index_alias_by_collection_id, + index_by_collection_id, + indices, +) +from .mapping import get_queryables_mapping_shared +from .query import ( + apply_free_text_filter_shared, + apply_intersects_filter_shared, + populate_sort_shared, +) +from .utils import get_bool_env, validate_refresh + +__all__ = [ + # Index operations + "create_index_templates_shared", + "delete_item_index_shared", + "index_alias_by_collection_id", + "index_by_collection_id", + "indices", + # Query operations + "apply_free_text_filter_shared", + "apply_intersects_filter_shared", + "populate_sort_shared", + # Mapping operations + "get_queryables_mapping_shared", + # Document operations + "mk_item_id", + "mk_actions", + # Utility functions + "validate_refresh", + "get_bool_env", + # Datetime utilities + "return_date", +] diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py new file mode 100644 index 00000000..352ed4b5 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/datetime.py @@ -0,0 +1,60 @@ +"""Elasticsearch/OpenSearch-specific datetime utilities. + +This module provides datetime utility functions specifically designed for +Elasticsearch and OpenSearch query formatting. +""" + +from datetime import datetime as datetime_type +from typing import Dict, Optional, Union + +from stac_fastapi.types.rfc3339 import DateTimeType + + +def return_date( + interval: Optional[Union[DateTimeType, str]] +) -> Dict[str, Optional[str]]: + """ + Convert a date interval to an Elasticsearch/OpenSearch query format. + + This function converts a date interval (which may be a datetime, a tuple of one or two datetimes, + a string representing a datetime or range, or None) into a dictionary for filtering + search results with Elasticsearch/OpenSearch. + + This function ensures the output dictionary contains 'gte' and 'lte' keys, + even if they are set to None, to prevent KeyError in the consuming logic. + + Args: + interval (Optional[Union[DateTimeType, str]]): The date interval, which might be a single datetime, + a tuple with one or two datetimes, a string, or None. + + Returns: + dict: A dictionary representing the date interval for use in filtering search results, + always containing 'gte' and 'lte' keys. + """ + result: Dict[str, Optional[str]] = {"gte": None, "lte": None} + + if interval is None: + return result + + if isinstance(interval, str): + if "/" in interval: + parts = interval.split("/") + result["gte"] = parts[0] if parts[0] != ".." else None + result["lte"] = parts[1] if len(parts) > 1 and parts[1] != ".." else None + else: + converted_time = interval if interval != ".." else None + result["gte"] = result["lte"] = converted_time + return result + + if isinstance(interval, datetime_type): + datetime_iso = interval.isoformat() + result["gte"] = result["lte"] = datetime_iso + elif isinstance(interval, tuple): + start, end = interval + # Ensure datetimes are converted to UTC and formatted with 'Z' + if start: + result["gte"] = start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + if end: + result["lte"] = end.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" + + return result diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/document.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/document.py new file mode 100644 index 00000000..0ba0e025 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/document.py @@ -0,0 +1,48 @@ +"""Document operations for Elasticsearch/OpenSearch. + +This module provides functions for working with documents in Elasticsearch/OpenSearch, +including document ID generation and bulk action creation. +""" + +from typing import Any, Dict, List + +from stac_fastapi.sfeos_helpers.database.index import index_alias_by_collection_id +from stac_fastapi.types.stac import Item + + +def mk_item_id(item_id: str, collection_id: str) -> str: + """Create the document id for an Item in Elasticsearch. + + Args: + item_id (str): The id of the Item. + collection_id (str): The id of the Collection that the Item belongs to. + + Returns: + str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. + """ + return f"{item_id}|{collection_id}" + + +def mk_actions(collection_id: str, processed_items: List[Item]) -> List[Dict[str, Any]]: + """Create Elasticsearch bulk actions for a list of processed items. + + Args: + collection_id (str): The identifier for the collection the items belong to. + processed_items (List[Item]): The list of processed items to be bulk indexed. + + Returns: + List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, + each action being a dictionary with the following keys: + - `_index`: the index to store the document in. + - `_id`: the document's identifier. + - `_source`: the source of the document. + """ + index_alias = index_alias_by_collection_id(collection_id) + return [ + { + "_index": index_alias, + "_id": mk_item_id(item["id"], item["collection"]), + "_source": item, + } + for item in processed_items + ] diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/index.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/index.py new file mode 100644 index 00000000..3305f50f --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/index.py @@ -0,0 +1,130 @@ +"""Index management functions for Elasticsearch/OpenSearch. + +This module provides functions for creating and managing indices in Elasticsearch/OpenSearch. +""" + +from functools import lru_cache +from typing import Any, List, Optional + +from stac_fastapi.sfeos_helpers.mappings import ( + _ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE, + COLLECTIONS_INDEX, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, + ES_ITEMS_SETTINGS, + ITEM_INDICES, + ITEMS_INDEX_PREFIX, +) + + +@lru_cache(256) +def index_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index name. + + Args: + collection_id (str): The collection id to translate into an index name. + + Returns: + str: The index name derived from the collection id. + """ + cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE) + return ( + f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{collection_id.encode('utf-8').hex()}" + ) + + +@lru_cache(256) +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE) + return f"{ITEMS_INDEX_PREFIX}{cleaned}" + + +def indices(collection_ids: Optional[List[str]]) -> str: + """ + Get a comma-separated string of index names for a given list of collection ids. + + Args: + collection_ids: A list of collection ids. + + Returns: + A string of comma-separated index names. If `collection_ids` is empty, returns the default indices. + """ + return ( + ",".join(map(index_alias_by_collection_id, collection_ids)) + if collection_ids + else ITEM_INDICES + ) + + +async def create_index_templates_shared(settings: Any) -> None: + """Create index templates for Elasticsearch/OpenSearch Collection and Item indices. + + Args: + settings (Any): The settings object containing the client configuration. + Must have a create_client attribute that returns an Elasticsearch/OpenSearch client. + + Returns: + None: This function doesn't return any value but creates index templates in the database. + + Notes: + This function creates two index templates: + 1. A template for the Collections index with the appropriate mappings + 2. A template for the Items indices with both settings and mappings + + These templates ensure that any new indices created with matching patterns + will automatically have the correct structure. + """ + client = settings.create_client + await client.indices.put_index_template( + name=f"template_{COLLECTIONS_INDEX}", + body={ + "index_patterns": [f"{COLLECTIONS_INDEX}*"], + "template": {"mappings": ES_COLLECTIONS_MAPPINGS}, + }, + ) + await client.indices.put_index_template( + name=f"template_{ITEMS_INDEX_PREFIX}", + body={ + "index_patterns": [f"{ITEMS_INDEX_PREFIX}*"], + "template": {"settings": ES_ITEMS_SETTINGS, "mappings": ES_ITEMS_MAPPINGS}, + }, + ) + await client.close() + + +async def delete_item_index_shared(settings: Any, collection_id: str) -> None: + """Delete the index for items in a collection. + + Args: + settings (Any): The settings object containing the client configuration. + Must have a create_client attribute that returns an Elasticsearch/OpenSearch client. + collection_id (str): The ID of the collection whose items index will be deleted. + + Returns: + None: This function doesn't return any value but deletes an item index in the database. + + Notes: + This function deletes an item index and its alias. It first resolves the alias to find + the actual index name, then deletes both the alias and the index. + """ + client = settings.create_client + + name = index_alias_by_collection_id(collection_id) + resolved = await client.indices.resolve_index(name=name) + if "aliases" in resolved and resolved["aliases"]: + [alias] = resolved["aliases"] + await client.indices.delete_alias(index=alias["indices"], name=alias["name"]) + await client.indices.delete(index=alias["indices"]) + else: + await client.indices.delete(index=name) + await client.close() diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py new file mode 100644 index 00000000..8f664651 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py @@ -0,0 +1,38 @@ +"""Mapping functions for Elasticsearch/OpenSearch. + +This module provides functions for working with Elasticsearch/OpenSearch mappings. +""" + +from typing import Any, Dict + + +async def get_queryables_mapping_shared( + mappings: Dict[str, Dict[str, Any]], collection_id: str = "*" +) -> Dict[str, str]: + """Retrieve mapping of Queryables for search. + + Args: + mappings (Dict[str, Dict[str, Any]]): The mapping information returned from + Elasticsearch/OpenSearch client's indices.get_mapping() method. + Expected structure is {index_name: {"mappings": {...}}}. + collection_id (str, optional): The id of the Collection the Queryables + belongs to. Defaults to "*". + + Returns: + Dict[str, str]: A dictionary containing the Queryables mappings, where keys are + field names and values are the corresponding paths in the Elasticsearch/OpenSearch + document structure. + """ + queryables_mapping = {} + + for mapping in mappings.values(): + fields = mapping["mappings"].get("properties", {}) + properties = fields.pop("properties", {}).get("properties", {}).keys() + + for field_key in fields: + queryables_mapping[field_key] = field_key + + for property_key in properties: + queryables_mapping[property_key] = f"properties.{property_key}" + + return queryables_mapping diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py new file mode 100644 index 00000000..dacbb590 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/query.py @@ -0,0 +1,85 @@ +"""Query building functions for Elasticsearch/OpenSearch. + +This module provides functions for building and manipulating Elasticsearch/OpenSearch queries. +""" + +from typing import Any, Dict, List, Optional + +from stac_fastapi.sfeos_helpers.mappings import Geometry + + +def apply_free_text_filter_shared( + search: Any, free_text_queries: Optional[List[str]] +) -> Any: + """Create a free text query for Elasticsearch/OpenSearch. + + Args: + search (Any): The search object to apply the query to. + free_text_queries (Optional[List[str]]): A list of text strings to search for in the properties. + + Returns: + Any: The search object with the free text query applied, or the original search + object if no free_text_queries were provided. + + Notes: + This function creates a query_string query that searches for the specified text strings + in all properties of the documents. The query strings are joined with OR operators. + """ + if free_text_queries is not None: + free_text_query_string = '" OR properties.\\*:"'.join(free_text_queries) + search = search.query( + "query_string", query=f'properties.\\*:"{free_text_query_string}"' + ) + + return search + + +def apply_intersects_filter_shared( + intersects: Geometry, +) -> Dict[str, Dict]: + """Create a geo_shape filter for intersecting geometry. + + Args: + intersects (Geometry): The intersecting geometry, represented as a GeoJSON-like object. + + Returns: + Dict[str, Dict]: A dictionary containing the geo_shape filter configuration + that can be used with Elasticsearch/OpenSearch Q objects. + + Notes: + This function creates a geo_shape filter configuration to find documents that intersect + with the specified geometry. The returned dictionary should be wrapped in a Q object + when applied to a search. + """ + return { + "geo_shape": { + "geometry": { + "shape": { + "type": intersects.type.lower(), + "coordinates": intersects.coordinates, + }, + "relation": "intersects", + } + } + } + + +def populate_sort_shared(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: + """Create a sort configuration for Elasticsearch/OpenSearch queries. + + Args: + sortby (List): A list of sort specifications, each containing a field and direction. + + Returns: + Optional[Dict[str, Dict[str, str]]]: A dictionary mapping field names to sort direction + configurations, or None if no sort was specified. + + Notes: + This function transforms a list of sort specifications into the format required by + Elasticsearch/OpenSearch for sorting query results. The returned dictionary can be + directly used in search requests. + """ + if sortby: + return {s.field: {"order": s.direction} for s in sortby} + else: + return None diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/utils.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/utils.py new file mode 100644 index 00000000..0c6b4c45 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/utils.py @@ -0,0 +1,50 @@ +"""Utility functions for database operations in Elasticsearch/OpenSearch. + +This module provides utility functions for working with database operations +in Elasticsearch/OpenSearch, such as parameter validation. +""" + +import logging +from typing import Union + +from stac_fastapi.core.utilities import get_bool_env + + +def validate_refresh(value: Union[str, bool]) -> str: + """ + Validate the `refresh` parameter value. + + Args: + value (Union[str, bool]): The `refresh` parameter value, which can be a string or a boolean. + + Returns: + str: The validated value of the `refresh` parameter, which can be "true", "false", or "wait_for". + """ + logger = logging.getLogger(__name__) + + # Handle boolean-like values using get_bool_env + if isinstance(value, bool) or value in { + "true", + "false", + "1", + "0", + "yes", + "no", + "y", + "n", + }: + is_true = get_bool_env("DATABASE_REFRESH", default=value) + return "true" if is_true else "false" + + # Normalize to lowercase for case-insensitivity + value = value.lower() + + # Handle "wait_for" explicitly + if value == "wait_for": + return "wait_for" + + # Log a warning for invalid values and default to "false" + logger.warning( + f"Invalid value for `refresh`: '{value}'. Expected 'true', 'false', or 'wait_for'. Defaulting to 'false'." + ) + return "false" diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md new file mode 100644 index 00000000..d3b09167 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/README.md @@ -0,0 +1,27 @@ +# STAC FastAPI Filter Package + +This package contains shared filter extension functionality used by both the Elasticsearch and OpenSearch +implementations of STAC FastAPI. It helps reduce code duplication and ensures consistent behavior +between the two implementations. + +## Package Structure + +The filter package is organized into three main modules: + +- **cql2.py**: Contains functions for converting CQL2 patterns to Elasticsearch/OpenSearch compatible formats + - [cql2_like_to_es](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:59:0-75:5): Converts CQL2 "LIKE" characters to Elasticsearch "wildcard" characters + - [_replace_like_patterns](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:51:0-56:71): Helper function for pattern replacement + +- **transform.py**: Contains functions for transforming CQL2 queries to Elasticsearch/OpenSearch query DSL + - [to_es_field](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:83:0-93:47): Maps field names using queryables mapping + - [to_es](cci:1://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:96:0-201:13): Transforms CQL2 query structures to Elasticsearch/OpenSearch query DSL + +- **client.py**: Contains the base filter client implementation + - [EsAsyncBaseFiltersClient](cci:2://file:///home/computer/Code/stac-fastapi-elasticsearch-opensearch/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter.py:209:0-293:25): Base class for implementing the STAC filter extension + +## Usage + +Import the necessary components from the filter package: + +```python +from stac_fastapi.sfeos_helpers.filter import cql2_like_to_es, to_es, EsAsyncBaseFiltersClient \ No newline at end of file diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/__init__.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/__init__.py new file mode 100644 index 00000000..02b5db92 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/__init__.py @@ -0,0 +1,44 @@ +"""Shared filter extension methods for stac-fastapi elasticsearch and opensearch backends. + +This module provides shared functionality for implementing the STAC API Filter Extension +with Elasticsearch and OpenSearch. It includes: + +1. Functions for converting CQL2 queries to Elasticsearch/OpenSearch query DSL +2. Helper functions for field mapping and query transformation +3. Base implementation of the AsyncBaseFiltersClient for Elasticsearch/OpenSearch + +The filter package is organized as follows: +- cql2.py: CQL2 pattern conversion helpers +- transform.py: Query transformation functions +- client.py: Filter client implementation + +When adding new functionality to this package, consider: +1. Will this code be used by both Elasticsearch and OpenSearch implementations? +2. Is the functionality stable and unlikely to diverge between implementations? +3. Is the function well-documented with clear input/output contracts? + +Function Naming Conventions: +- Function names should be descriptive and indicate their purpose +- Parameter names should be consistent across similar functions +""" + +from .client import EsAsyncBaseFiltersClient + +# Re-export the main functions and classes for backward compatibility +from .cql2 import ( + _replace_like_patterns, + cql2_like_patterns, + cql2_like_to_es, + valid_like_substitutions, +) +from .transform import to_es, to_es_field + +__all__ = [ + "cql2_like_patterns", + "valid_like_substitutions", + "cql2_like_to_es", + "_replace_like_patterns", + "to_es_field", + "to_es", + "EsAsyncBaseFiltersClient", +] diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py new file mode 100644 index 00000000..4b2a1a71 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py @@ -0,0 +1,98 @@ +"""Filter client implementation for Elasticsearch/OpenSearch.""" + +from collections import deque +from typing import Any, Dict, Optional + +import attr + +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic +from stac_fastapi.core.extensions.filter import DEFAULT_QUERYABLES +from stac_fastapi.extensions.core.filter.client import AsyncBaseFiltersClient +from stac_fastapi.sfeos_helpers.mappings import ES_MAPPING_TYPE_TO_JSON + + +@attr.s +class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): + """Defines a pattern for implementing the STAC filter extension.""" + + database: BaseDatabaseLogic = attr.ib() + + async def get_queryables( + self, collection_id: Optional[str] = None, **kwargs + ) -> Dict[str, Any]: + """Get the queryables available for the given collection_id. + + If collection_id is None, returns the intersection of all + queryables over all collections. + + This base implementation returns a blank queryable schema. This is not allowed + under OGC CQL but it is allowed by the STAC API Filter Extension + + https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter#queryables + + Args: + collection_id (str, optional): The id of the collection to get queryables for. + **kwargs: additional keyword arguments + + Returns: + Dict[str, Any]: A dictionary containing the queryables for the given collection. + """ + queryables: Dict[str, Any] = { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://stac-api.example.com/queryables", + "type": "object", + "title": "Queryables for STAC API", + "description": "Queryable names for the STAC API Item Search filter.", + "properties": DEFAULT_QUERYABLES, + "additionalProperties": True, + } + if not collection_id: + return queryables + + properties: Dict[str, Any] = queryables["properties"] + queryables.update( + { + "properties": properties, + "additionalProperties": False, + } + ) + + mapping_data = await self.database.get_items_mapping(collection_id) + mapping_properties = next(iter(mapping_data.values()))["mappings"]["properties"] + stack = deque(mapping_properties.items()) + + while stack: + field_name, field_def = stack.popleft() + + # Iterate over nested fields + field_properties = field_def.get("properties") + if field_properties: + # Fields in Item Properties should be exposed with their un-prefixed names, + # and not require expressions to prefix them with properties, + # e.g., eo:cloud_cover instead of properties.eo:cloud_cover. + if field_name == "properties": + stack.extend(field_properties.items()) + else: + stack.extend( + (f"{field_name}.{k}", v) for k, v in field_properties.items() + ) + + # Skip non-indexed or disabled fields + field_type = field_def.get("type") + if not field_type or not field_def.get("enabled", True): + continue + + # Generate field properties + field_result = DEFAULT_QUERYABLES.get(field_name, {}) + properties[field_name] = field_result + + field_name_human = field_name.replace("_", " ").title() + field_result.setdefault("title", field_name_human) + + field_type_json = ES_MAPPING_TYPE_TO_JSON.get(field_type, field_type) + field_result.setdefault("type", field_type_json) + + if field_type in {"date", "date_nanos"}: + field_result.setdefault("format", "date-time") + + return queryables diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/cql2.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/cql2.py new file mode 100644 index 00000000..bd248c90 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/cql2.py @@ -0,0 +1,39 @@ +"""CQL2 pattern conversion helpers for Elasticsearch/OpenSearch.""" + +import re + +cql2_like_patterns = re.compile(r"\\.|[%_]|\\$") +valid_like_substitutions = { + "\\\\": "\\", + "\\%": "%", + "\\_": "_", + "%": "*", + "_": "?", +} + + +def _replace_like_patterns(match: re.Match) -> str: + pattern = match.group() + try: + return valid_like_substitutions[pattern] + except KeyError: + raise ValueError(f"'{pattern}' is not a valid escape sequence") + + +def cql2_like_to_es(string: str) -> str: + """ + Convert CQL2 "LIKE" characters to Elasticsearch "wildcard" characters. + + Args: + string (str): The string containing CQL2 wildcard characters. + + Returns: + str: The converted string with Elasticsearch compatible wildcards. + + Raises: + ValueError: If an invalid escape sequence is encountered. + """ + return cql2_like_patterns.sub( + repl=_replace_like_patterns, + string=string, + ) diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py new file mode 100644 index 00000000..c78b19e5 --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py @@ -0,0 +1,133 @@ +"""Query transformation functions for Elasticsearch/OpenSearch.""" + +from typing import Any, Dict + +from stac_fastapi.core.extensions.filter import ( + AdvancedComparisonOp, + ComparisonOp, + LogicalOp, + SpatialOp, +) + +from .cql2 import cql2_like_to_es + + +def to_es_field(queryables_mapping: Dict[str, Any], field: str) -> str: + """ + Map a given field to its corresponding Elasticsearch field according to a predefined mapping. + + Args: + field (str): The field name from a user query or filter. + + Returns: + str: The mapped field name suitable for Elasticsearch queries. + """ + return queryables_mapping.get(field, field) + + +def to_es(queryables_mapping: Dict[str, Any], query: Dict[str, Any]) -> Dict[str, Any]: + """ + Transform a simplified CQL2 query structure to an Elasticsearch compatible query DSL. + + Args: + query (Dict[str, Any]): The query dictionary containing 'op' and 'args'. + + Returns: + Dict[str, Any]: The corresponding Elasticsearch query in the form of a dictionary. + """ + if query["op"] in [LogicalOp.AND, LogicalOp.OR, LogicalOp.NOT]: + bool_type = { + LogicalOp.AND: "must", + LogicalOp.OR: "should", + LogicalOp.NOT: "must_not", + }[query["op"]] + return { + "bool": { + bool_type: [ + to_es(queryables_mapping, sub_query) for sub_query in query["args"] + ] + } + } + + elif query["op"] in [ + ComparisonOp.EQ, + ComparisonOp.NEQ, + ComparisonOp.LT, + ComparisonOp.LTE, + ComparisonOp.GT, + ComparisonOp.GTE, + ]: + range_op = { + ComparisonOp.LT: "lt", + ComparisonOp.LTE: "lte", + ComparisonOp.GT: "gt", + ComparisonOp.GTE: "gte", + } + + field = to_es_field(queryables_mapping, query["args"][0]["property"]) + value = query["args"][1] + if isinstance(value, dict) and "timestamp" in value: + value = value["timestamp"] + if query["op"] == ComparisonOp.EQ: + return {"range": {field: {"gte": value, "lte": value}}} + elif query["op"] == ComparisonOp.NEQ: + return { + "bool": { + "must_not": [{"range": {field: {"gte": value, "lte": value}}}] + } + } + else: + return {"range": {field: {range_op[query["op"]]: value}}} + else: + if query["op"] == ComparisonOp.EQ: + return {"term": {field: value}} + elif query["op"] == ComparisonOp.NEQ: + return {"bool": {"must_not": [{"term": {field: value}}]}} + else: + return {"range": {field: {range_op[query["op"]]: value}}} + + elif query["op"] == ComparisonOp.IS_NULL: + field = to_es_field(queryables_mapping, query["args"][0]["property"]) + return {"bool": {"must_not": {"exists": {"field": field}}}} + + elif query["op"] == AdvancedComparisonOp.BETWEEN: + field = to_es_field(queryables_mapping, query["args"][0]["property"]) + gte, lte = query["args"][1], query["args"][2] + if isinstance(gte, dict) and "timestamp" in gte: + gte = gte["timestamp"] + if isinstance(lte, dict) and "timestamp" in lte: + lte = lte["timestamp"] + return {"range": {field: {"gte": gte, "lte": lte}}} + + elif query["op"] == AdvancedComparisonOp.IN: + field = to_es_field(queryables_mapping, query["args"][0]["property"]) + values = query["args"][1] + if not isinstance(values, list): + raise ValueError(f"Arg {values} is not a list") + return {"terms": {field: values}} + + elif query["op"] == AdvancedComparisonOp.LIKE: + field = to_es_field(queryables_mapping, query["args"][0]["property"]) + pattern = cql2_like_to_es(query["args"][1]) + return {"wildcard": {field: {"value": pattern, "case_insensitive": True}}} + + elif query["op"] in [ + SpatialOp.S_INTERSECTS, + SpatialOp.S_CONTAINS, + SpatialOp.S_WITHIN, + SpatialOp.S_DISJOINT, + ]: + field = to_es_field(queryables_mapping, query["args"][0]["property"]) + geometry = query["args"][1] + + relation_mapping = { + SpatialOp.S_INTERSECTS: "intersects", + SpatialOp.S_CONTAINS: "contains", + SpatialOp.S_WITHIN: "within", + SpatialOp.S_DISJOINT: "disjoint", + } + + relation = relation_mapping[query["op"]] + return {"geo_shape": {field: {"shape": geometry, "relation": relation}}} + + return {} diff --git a/stac_fastapi/core/stac_fastapi/core/database_logic.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py similarity index 50% rename from stac_fastapi/core/stac_fastapi/core/database_logic.py rename to stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index 85ebcf21..476d656a 100644 --- a/stac_fastapi/core/stac_fastapi/core/database_logic.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -1,10 +1,32 @@ -"""Database logic core.""" +"""Shared mappings for stac-fastapi elasticsearch and opensearch backends. -import os -from functools import lru_cache -from typing import Any, Dict, List, Optional, Protocol +This module contains shared constants, mappings, and type definitions used by both +the Elasticsearch and OpenSearch implementations of STAC FastAPI. It includes: + +1. Index name constants and character translation tables +2. Mapping definitions for Collections and Items +3. Aggregation mappings for search queries +4. Type conversion mappings between Elasticsearch/OpenSearch and JSON Schema types -from stac_fastapi.types.stac import Item +The sfeos_helpers package is organized as follows: +- database_logic_helpers.py: Shared database operations +- filter.py: Shared filter extension implementation +- mappings.py: Shared constants and mapping definitions (this file) +- utilities.py: Shared utility functions + +When adding new functionality to this package, consider: +1. Will this code be used by both Elasticsearch and OpenSearch implementations? +2. Is the functionality stable and unlikely to diverge between implementations? +3. Is the function well-documented with clear input/output contracts? + +Function Naming Conventions: +- All shared functions should end with `_shared` to clearly indicate they're meant to be used by both implementations +- Function names should be descriptive and indicate their purpose +- Parameter names should be consistent across similar functions +""" + +import os +from typing import Any, Dict, Literal, Protocol # stac_pydantic classes extend _GeometryBase, which doesn't have a type field, @@ -144,89 +166,97 @@ class Geometry(Protocol): # noqa }, } - -@lru_cache(256) -def index_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index name. - - Args: - collection_id (str): The collection id to translate into an index name. - - Returns: - str: The index name derived from the collection id. - """ - cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE) - return ( - f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{collection_id.encode('utf-8').hex()}" - ) - - -@lru_cache(256) -def index_alias_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index alias. - - Args: - collection_id (str): The collection id to translate into an index alias. - - Returns: - str: The index alias derived from the collection id. - """ - cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE) - return f"{ITEMS_INDEX_PREFIX}{cleaned}" - - -def indices(collection_ids: Optional[List[str]]) -> str: - """ - Get a comma-separated string of index names for a given list of collection ids. - - Args: - collection_ids: A list of collection ids. - - Returns: - A string of comma-separated index names. If `collection_ids` is empty, returns the default indices. - """ - return ( - ",".join(map(index_alias_by_collection_id, collection_ids)) - if collection_ids - else ITEM_INDICES - ) - - -def mk_item_id(item_id: str, collection_id: str) -> str: - """Create the document id for an Item in Elasticsearch. - - Args: - item_id (str): The id of the Item. - collection_id (str): The id of the Collection that the Item belongs to. - - Returns: - str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. - """ - return f"{item_id}|{collection_id}" - - -def mk_actions(collection_id: str, processed_items: List[Item]) -> List[Dict[str, Any]]: - """Create Elasticsearch bulk actions for a list of processed items. - - Args: - collection_id (str): The identifier for the collection the items belong to. - processed_items (List[Item]): The list of processed items to be bulk indexed. - - Returns: - List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, - each action being a dictionary with the following keys: - - `_index`: the index to store the document in. - - `_id`: the document's identifier. - - `_source`: the source of the document. - """ - index_alias = index_alias_by_collection_id(collection_id) - return [ - { - "_index": index_alias, - "_id": mk_item_id(item["id"], item["collection"]), - "_source": item, +# Shared aggregation mapping for both Elasticsearch and OpenSearch +AGGREGATION_MAPPING: Dict[str, Dict[str, Any]] = { + "total_count": {"value_count": {"field": "id"}}, + "collection_frequency": {"terms": {"field": "collection", "size": 100}}, + "platform_frequency": {"terms": {"field": "properties.platform", "size": 100}}, + "cloud_cover_frequency": { + "range": { + "field": "properties.eo:cloud_cover", + "ranges": [ + {"to": 5}, + {"from": 5, "to": 15}, + {"from": 15, "to": 40}, + {"from": 40}, + ], + } + }, + "datetime_frequency": { + "date_histogram": { + "field": "properties.datetime", + "calendar_interval": "month", + } + }, + "datetime_min": {"min": {"field": "properties.datetime"}}, + "datetime_max": {"max": {"field": "properties.datetime"}}, + "grid_code_frequency": { + "terms": { + "field": "properties.grid:code", + "missing": "none", + "size": 10000, + } + }, + "sun_elevation_frequency": { + "histogram": {"field": "properties.view:sun_elevation", "interval": 5} + }, + "sun_azimuth_frequency": { + "histogram": {"field": "properties.view:sun_azimuth", "interval": 5} + }, + "off_nadir_frequency": { + "histogram": {"field": "properties.view:off_nadir", "interval": 5} + }, + "centroid_geohash_grid_frequency": { + "geohash_grid": { + "field": "properties.proj:centroid", + "precision": 1, + } + }, + "centroid_geohex_grid_frequency": { + "geohex_grid": { + "field": "properties.proj:centroid", + "precision": 0, + } + }, + "centroid_geotile_grid_frequency": { + "geotile_grid": { + "field": "properties.proj:centroid", + "precision": 0, + } + }, + "geometry_geohash_grid_frequency": { + "geohash_grid": { + "field": "geometry", + "precision": 1, + } + }, + "geometry_geotile_grid_frequency": { + "geotile_grid": { + "field": "geometry", + "precision": 0, } - for item in processed_items - ] + }, +} + +ES_MAPPING_TYPE_TO_JSON: Dict[ + str, Literal["string", "number", "boolean", "object", "array", "null"] +] = { + "date": "string", + "date_nanos": "string", + "keyword": "string", + "match_only_text": "string", + "text": "string", + "wildcard": "string", + "byte": "number", + "double": "number", + "float": "number", + "half_float": "number", + "long": "number", + "scaled_float": "number", + "short": "number", + "token_count": "number", + "unsigned_long": "number", + "geo_point": "object", + "geo_shape": "object", + "nested": "array", +} diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/version.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/version.py new file mode 100644 index 00000000..e152cdff --- /dev/null +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/version.py @@ -0,0 +1,2 @@ +"""library version.""" +__version__ = "5.0.0a0" diff --git a/stac_fastapi/tests/conftest.py b/stac_fastapi/tests/conftest.py index 066b014d..afb9ac9b 100644 --- a/stac_fastapi/tests/conftest.py +++ b/stac_fastapi/tests/conftest.py @@ -23,11 +23,11 @@ from stac_fastapi.core.extensions.aggregation import ( EsAggregationExtensionGetRequest, EsAggregationExtensionPostRequest, - EsAsyncAggregationClient, ) from stac_fastapi.core.rate_limit import setup_rate_limit from stac_fastapi.core.route_dependencies import get_route_dependencies from stac_fastapi.core.utilities import get_bool_env +from stac_fastapi.sfeos_helpers.aggregation import EsAsyncBaseAggregationClient if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": from stac_fastapi.opensearch.config import AsyncOpensearchSettings as AsyncSettings @@ -199,7 +199,7 @@ async def app(): settings = AsyncSettings() aggregation_extension = AggregationExtension( - client=EsAsyncAggregationClient( + client=EsAsyncBaseAggregationClient( database=database, session=None, settings=settings ) ) @@ -255,7 +255,7 @@ async def app_rate_limit(): settings = AsyncSettings() aggregation_extension = AggregationExtension( - client=EsAsyncAggregationClient( + client=EsAsyncBaseAggregationClient( database=database, session=None, settings=settings ) ) @@ -349,7 +349,7 @@ async def app_basic_auth(): settings = AsyncSettings() aggregation_extension = AggregationExtension( - client=EsAsyncAggregationClient( + client=EsAsyncBaseAggregationClient( database=database, session=None, settings=settings ) ) @@ -488,7 +488,7 @@ def build_test_app(): ) settings = AsyncSettings() aggregation_extension = AggregationExtension( - client=EsAsyncAggregationClient( + client=EsAsyncBaseAggregationClient( database=database, session=None, settings=settings ) ) diff --git a/stac_fastapi/tests/database/test_database.py b/stac_fastapi/tests/database/test_database.py index a5a01e60..86611235 100644 --- a/stac_fastapi/tests/database/test_database.py +++ b/stac_fastapi/tests/database/test_database.py @@ -1,25 +1,16 @@ -import os import uuid import pytest from stac_pydantic import api -from ..conftest import MockRequest, database +from stac_fastapi.sfeos_helpers.database import index_alias_by_collection_id +from stac_fastapi.sfeos_helpers.mappings import ( + COLLECTIONS_INDEX, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, +) -if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": - from stac_fastapi.opensearch.database_logic import ( - COLLECTIONS_INDEX, - ES_COLLECTIONS_MAPPINGS, - ES_ITEMS_MAPPINGS, - index_alias_by_collection_id, - ) -else: - from stac_fastapi.elasticsearch.database_logic import ( - COLLECTIONS_INDEX, - ES_COLLECTIONS_MAPPINGS, - ES_ITEMS_MAPPINGS, - index_alias_by_collection_id, - ) +from ..conftest import MockRequest, database @pytest.mark.asyncio diff --git a/stac_fastapi/tests/extensions/test_cql2_like_to_es.py b/stac_fastapi/tests/extensions/test_cql2_like_to_es.py index 96d51272..2125eeed 100644 --- a/stac_fastapi/tests/extensions/test_cql2_like_to_es.py +++ b/stac_fastapi/tests/extensions/test_cql2_like_to_es.py @@ -1,6 +1,6 @@ import pytest -from stac_fastapi.core.extensions.filter import cql2_like_to_es +from stac_fastapi.sfeos_helpers.filter import cql2_like_to_es @pytest.mark.parametrize( pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy