diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index a966248b..864b52e3 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -65,7 +65,7 @@ jobs: strategy: matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13"] backend: [ "elasticsearch7", "elasticsearch8", "opensearch"] name: Python ${{ matrix.python-version }} testing with ${{ matrix.backend }} diff --git a/.github/workflows/deploy_mkdocs.yml b/.github/workflows/deploy_mkdocs.yml index 833c1021..3606d654 100644 --- a/.github/workflows/deploy_mkdocs.yml +++ b/.github/workflows/deploy_mkdocs.yml @@ -20,10 +20,10 @@ jobs: - name: Checkout main uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.9 - name: Install dependencies run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d58271e..ef0188bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,27 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed +### Fixed + +## [v4.0.0a0] - 2025-04-17 + +### Added +- Added support for dynamically-generated queryables based on Elasticsearch/OpenSearch mappings, with extensible metadata augmentation [#351](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/351) +- Included default queryables configuration for seamless integration. [#351](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/351) + +### Changed +- Refactored database logic to reduce duplication [#351](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/351) +- Replaced `fastapi-slim` with `fastapi` dependency [#351](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/351) +- Changed minimum Python version to 3.9 [#354](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/354) +- Updated stac-fastapi api, types, and extensions libraries to 5.1.1 from 3.0.0 and made various associated changes [#354](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/354) +- Changed makefile commands from 'docker-compose' to 'docker compose' [#354](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/354) + +### Fixed +- Improved performance of `mk_actions` and `filter-links` methods [#351](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/351) +- Fixed inheritance relating to BaseDatabaseSettings and ApiBaseSettings [#355](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/355) +- Fixed delete_item and delete_collection methods return types [#355](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/355) +- Fixed inheritance relating to DatabaseLogic and BaseDatabaseLogic, and ApiBaseSettings [#355](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/355) + ## [v3.2.5] - 2025-04-07 ### Added @@ -307,7 +328,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Use genexp in execute_search and get_all_collections to return results. - Added db_to_stac serializer to item_collection method in core.py. -[Unreleased]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.5...main +[Unreleased]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v4.0.0a0...main +[v4.0.0a0]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.5...v4.0.0a0 [v3.2.5]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.4...v3.2.5 [v3.2.4]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.3...v3.2.4 [v3.2.3]: https://github.com/stac-utils/stac-fastapi-elasticsearch/tree/v3.2.2...v3.2.3 diff --git a/Makefile b/Makefile index 9a3f23ce..e965d785 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ OS_APP_PORT ?= 8082 OS_HOST ?= docker.for.mac.localhost OS_PORT ?= 9202 -run_es = docker-compose \ +run_es = docker compose \ run \ -p ${EXTERNAL_APP_PORT}:${ES_APP_PORT} \ -e PY_IGNORE_IMPORTMISMATCH=1 \ @@ -18,7 +18,7 @@ run_es = docker-compose \ -e APP_PORT=${ES_APP_PORT} \ app-elasticsearch -run_os = docker-compose \ +run_os = docker compose \ run \ -p ${EXTERNAL_APP_PORT}:${OS_APP_PORT} \ -e PY_IGNORE_IMPORTMISMATCH=1 \ @@ -45,7 +45,7 @@ run-deploy-locally: .PHONY: image-dev image-dev: - docker-compose build + docker compose build .PHONY: docker-run-es docker-run-es: image-dev @@ -66,28 +66,28 @@ docker-shell-os: .PHONY: test-elasticsearch test-elasticsearch: -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest' - docker-compose down + docker compose down .PHONY: test-opensearch test-opensearch: -$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest' - docker-compose down + docker compose down .PHONY: test test: -$(run_es) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh elasticsearch:9200 && cd stac_fastapi/tests/ && pytest' - docker-compose down + docker compose down -$(run_os) /bin/bash -c 'export && ./scripts/wait-for-it-es.sh opensearch:9202 && cd stac_fastapi/tests/ && pytest' - docker-compose down + docker compose down .PHONY: run-database-es run-database-es: - docker-compose run --rm elasticsearch + docker compose run --rm elasticsearch .PHONY: run-database-os run-database-os: - docker-compose run --rm opensearch + docker compose run --rm opensearch .PHONY: pybase-install pybase-install: @@ -107,10 +107,10 @@ install-os: pybase-install .PHONY: docs-image docs-image: - docker-compose -f docker-compose.docs.yml \ + docker compose -f docker compose.docs.yml \ build .PHONY: docs docs: docs-image - docker-compose -f docker-compose.docs.yml \ + docker compose -f docker compose.docs.yml \ run docs \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index da4633b9..8ec0701b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.9' - services: app-elasticsearch: container_name: stac-fastapi-es diff --git a/stac_fastapi/core/setup.py b/stac_fastapi/core/setup.py index 01191c1b..43b3e911 100644 --- a/stac_fastapi/core/setup.py +++ b/stac_fastapi/core/setup.py @@ -6,13 +6,13 @@ desc = f.read() install_requires = [ - "fastapi-slim", + "fastapi", "attrs>=23.2.0", "pydantic", - "stac_pydantic>=3", - "stac-fastapi.types==3.0.0", - "stac-fastapi.api==3.0.0", - "stac-fastapi.extensions==3.0.0", + "stac_pydantic==3.1.*", + "stac-fastapi.api==5.1.1", + "stac-fastapi.extensions==5.1.1", + "stac-fastapi.types==5.1.1", "orjson", "overrides", "geojson-pydantic", diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 56afcbc8..16197da3 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -1,10 +1,11 @@ """Core client.""" import logging +from collections import deque from datetime import datetime as datetime_type from datetime import timezone from enum import Enum -from typing import Any, Dict, List, Optional, Set, Type, Union +from typing import Any, Dict, List, Literal, Optional, Set, Type, Union from urllib.parse import unquote_plus, urljoin import attr @@ -36,13 +37,11 @@ from stac_fastapi.types.core import AsyncBaseCoreClient, AsyncBaseTransactionsClient from stac_fastapi.types.extension import ApiExtension from stac_fastapi.types.requests import get_base_url -from stac_fastapi.types.rfc3339 import DateTimeType +from stac_fastapi.types.rfc3339 import DateTimeType, rfc3339_str_to_datetime from stac_fastapi.types.search import BaseSearchPostRequest logger = logging.getLogger(__name__) -NumType = Union[float, int] - @attr.s class CoreClient(AsyncBaseCoreClient): @@ -278,7 +277,7 @@ async def item_collection( self, collection_id: str, bbox: Optional[BBox] = None, - datetime: Optional[DateTimeType] = None, + datetime: Optional[str] = None, limit: Optional[int] = 10, token: Optional[str] = None, **kwargs, @@ -288,7 +287,7 @@ async def item_collection( Args: collection_id (str): The identifier of the collection to read items from. bbox (Optional[BBox]): The bounding box to filter items by. - datetime (Optional[DateTimeType]): The datetime range to filter items by. + datetime (Optional[str]): The datetime range to filter items by. limit (int): The maximum number of items to return. The default value is 10. token (str): A token used for pagination. request (Request): The incoming request. @@ -427,23 +426,34 @@ def _return_date( return result - def _format_datetime_range(self, date_tuple: DateTimeType) -> str: + def _format_datetime_range(self, date_str: str) -> str: """ - Convert a tuple of datetime objects or None into a formatted string for API requests. + Convert a datetime range string into a normalized UTC string for API requests using rfc3339_str_to_datetime. Args: - date_tuple (tuple): A tuple containing two elements, each can be a datetime object or None. + date_str (str): A string containing two datetime values separated by a '/'. Returns: - str: A string formatted as 'YYYY-MM-DDTHH:MM:SS.sssZ/YYYY-MM-DDTHH:MM:SS.sssZ', with '..' used if any element is None. + str: A string formatted as 'YYYY-MM-DDTHH:MM:SSZ/YYYY-MM-DDTHH:MM:SSZ', with '..' used if any element is None. """ - def format_datetime(dt): - """Format a single datetime object to the ISO8601 extended format with 'Z'.""" - return dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" if dt else ".." - - start, end = date_tuple - return f"{format_datetime(start)}/{format_datetime(end)}" + def normalize(dt): + dt = dt.strip() + if not dt or dt == "..": + return ".." + dt_obj = rfc3339_str_to_datetime(dt) + dt_utc = dt_obj.astimezone(timezone.utc) + return dt_utc.strftime("%Y-%m-%dT%H:%M:%SZ") + + if not isinstance(date_str, str): + return "../.." + if "/" not in date_str: + return f"{normalize(date_str)}/{normalize(date_str)}" + try: + start, end = date_str.split("/", 1) + except Exception: + return "../.." + return f"{normalize(start)}/{normalize(end)}" async def get_search( self, @@ -451,7 +461,7 @@ async def get_search( collections: Optional[List[str]] = None, ids: Optional[List[str]] = None, bbox: Optional[BBox] = None, - datetime: Optional[DateTimeType] = None, + datetime: Optional[str] = None, limit: Optional[int] = 10, query: Optional[str] = None, token: Optional[str] = None, @@ -459,7 +469,7 @@ async def get_search( sortby: Optional[str] = None, q: Optional[List[str]] = None, intersects: Optional[str] = None, - filter: Optional[str] = None, + filter_expr: Optional[str] = None, filter_lang: Optional[str] = None, **kwargs, ) -> stac_types.ItemCollection: @@ -469,7 +479,7 @@ async def get_search( collections (Optional[List[str]]): List of collection IDs to search in. ids (Optional[List[str]]): List of item IDs to search for. bbox (Optional[BBox]): Bounding box to search in. - datetime (Optional[DateTimeType]): Filter items based on the datetime field. + datetime (Optional[str]): Filter items based on the datetime field. limit (Optional[int]): Maximum number of results to return. query (Optional[str]): Query string to filter the results. token (Optional[str]): Access token to use when searching the catalog. @@ -496,7 +506,7 @@ async def get_search( } if datetime: - base_args["datetime"] = self._format_datetime_range(datetime) + base_args["datetime"] = self._format_datetime_range(date_str=datetime) if intersects: base_args["intersects"] = orjson.loads(unquote_plus(intersects)) @@ -507,12 +517,12 @@ async def get_search( for sort in sortby ] - if filter: - base_args["filter-lang"] = "cql2-json" + if filter_expr: + base_args["filter_lang"] = "cql2-json" base_args["filter"] = orjson.loads( - unquote_plus(filter) + unquote_plus(filter_expr) if filter_lang == "cql2-json" - else to_cql2(parse_cql2_text(filter)) + else to_cql2(parse_cql2_text(filter_expr)) ) if fields: @@ -594,8 +604,8 @@ async def post_search( ) # only cql2_json is supported here - if hasattr(search_request, "filter"): - cql2_filter = getattr(search_request, "filter", None) + if hasattr(search_request, "filter_expr"): + cql2_filter = getattr(search_request, "filter_expr", None) try: search = self.database.apply_cql2_filter(search, cql2_filter) except Exception as e: @@ -735,9 +745,7 @@ async def update_item( return ItemSerializer.db_to_stac(item, base_url) @overrides - async def delete_item( - self, item_id: str, collection_id: str, **kwargs - ) -> Optional[stac_types.Item]: + async def delete_item(self, item_id: str, collection_id: str, **kwargs) -> None: """Delete an item from a collection. Args: @@ -745,7 +753,7 @@ async def delete_item( collection_id (str): The identifier of the collection that contains the item. Returns: - Optional[stac_types.Item]: The deleted item, or `None` if the item was successfully deleted. + None: Returns 204 No Content on successful deletion """ await self.database.delete_item(item_id=item_id, collection_id=collection_id) return None @@ -815,23 +823,20 @@ async def update_collection( ) @overrides - async def delete_collection( - self, collection_id: str, **kwargs - ) -> Optional[stac_types.Collection]: + async def delete_collection(self, collection_id: str, **kwargs) -> None: """ Delete a collection. This method deletes an existing collection in the database. Args: - collection_id (str): The identifier of the collection that contains the item. - kwargs: Additional keyword arguments. + collection_id (str): The identifier of the collection to delete Returns: - None. + None: Returns 204 No Content on successful deletion Raises: - NotFoundError: If the collection doesn't exist. + NotFoundError: If the collection doesn't exist """ await self.database.delete_collection(collection_id=collection_id) return None @@ -907,11 +912,81 @@ def bulk_item_insert( return f"Successfully added {len(processed_items)} Items." +_DEFAULT_QUERYABLES: Dict[str, Dict[str, Any]] = { + "id": { + "description": "ID", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", + }, + "collection": { + "description": "Collection", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", + }, + "geometry": { + "description": "Geometry", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", + }, + "datetime": { + "description": "Acquisition Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/datetime", + }, + "created": { + "description": "Creation Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/created", + }, + "updated": { + "description": "Creation Timestamp", + "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/updated", + }, + "cloud_cover": { + "description": "Cloud Cover", + "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover", + }, + "cloud_shadow_percentage": { + "title": "Cloud Shadow Percentage", + "description": "Cloud Shadow Percentage", + "type": "number", + "minimum": 0, + "maximum": 100, + }, + "nodata_pixel_percentage": { + "title": "No Data Pixel Percentage", + "description": "No Data Pixel Percentage", + "type": "number", + "minimum": 0, + "maximum": 100, + }, +} + +_ES_MAPPING_TYPE_TO_JSON: Dict[ + str, Literal["string", "number", "boolean", "object", "array", "null"] +] = { + "date": "string", + "date_nanos": "string", + "keyword": "string", + "match_only_text": "string", + "text": "string", + "wildcard": "string", + "byte": "number", + "double": "number", + "float": "number", + "half_float": "number", + "long": "number", + "scaled_float": "number", + "short": "number", + "token_count": "number", + "unsigned_long": "number", + "geo_point": "object", + "geo_shape": "object", + "nested": "array", +} + + @attr.s class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): """Defines a pattern for implementing the STAC filter extension.""" - # todo: use the ES _mapping endpoint to dynamically find what fields exist + database: BaseDatabaseLogic = attr.ib() + async def get_queryables( self, collection_id: Optional[str] = None, **kwargs ) -> Dict[str, Any]: @@ -932,55 +1007,62 @@ async def get_queryables( Returns: Dict[str, Any]: A dictionary containing the queryables for the given collection. """ - return { + queryables: Dict[str, Any] = { "$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://stac-api.example.com/queryables", "type": "object", - "title": "Queryables for Example STAC API", - "description": "Queryable names for the example STAC API Item Search filter.", - "properties": { - "id": { - "description": "ID", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", - }, - "collection": { - "description": "Collection", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", - }, - "geometry": { - "description": "Geometry", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", - }, - "datetime": { - "description": "Acquisition Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/datetime", - }, - "created": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/created", - }, - "updated": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/updated", - }, - "cloud_cover": { - "description": "Cloud Cover", - "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover", - }, - "cloud_shadow_percentage": { - "description": "Cloud Shadow Percentage", - "title": "Cloud Shadow Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, - "nodata_pixel_percentage": { - "description": "No Data Pixel Percentage", - "title": "No Data Pixel Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, - }, + "title": "Queryables for STAC API", + "description": "Queryable names for the STAC API Item Search filter.", + "properties": _DEFAULT_QUERYABLES, "additionalProperties": True, } + if not collection_id: + return queryables + + properties: Dict[str, Any] = queryables["properties"] + queryables.update( + { + "properties": properties, + "additionalProperties": False, + } + ) + + mapping_data = await self.database.get_items_mapping(collection_id) + mapping_properties = next(iter(mapping_data.values()))["mappings"]["properties"] + stack = deque(mapping_properties.items()) + + while stack: + field_name, field_def = stack.popleft() + + # Iterate over nested fields + field_properties = field_def.get("properties") + if field_properties: + # Fields in Item Properties should be exposed with their un-prefixed names, + # and not require expressions to prefix them with properties, + # e.g., eo:cloud_cover instead of properties.eo:cloud_cover. + if field_name == "properties": + stack.extend(field_properties.items()) + else: + stack.extend( + (f"{field_name}.{k}", v) for k, v in field_properties.items() + ) + + # Skip non-indexed or disabled fields + field_type = field_def.get("type") + if not field_type or not field_def.get("enabled", True): + continue + + # Generate field properties + field_result = _DEFAULT_QUERYABLES.get(field_name, {}) + properties[field_name] = field_result + + field_name_human = field_name.replace("_", " ").title() + field_result.setdefault("title", field_name_human) + + field_type_json = _ES_MAPPING_TYPE_TO_JSON.get(field_type, field_type) + field_result.setdefault("type", field_type_json) + + if field_type in {"date", "date_nanos"}: + field_result.setdefault("format", "date-time") + + return queryables diff --git a/stac_fastapi/core/stac_fastapi/core/database_logic.py b/stac_fastapi/core/stac_fastapi/core/database_logic.py new file mode 100644 index 00000000..7ddd8af7 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/database_logic.py @@ -0,0 +1,226 @@ +"""Database logic core.""" + +import os +from functools import lru_cache +from typing import Any, Dict, List, Optional, Protocol + +from stac_fastapi.types.stac import Item + + +# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, +# So create our own Protocol for typing +# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] +class Geometry(Protocol): # noqa + type: str + coordinates: Any + + +COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") +ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") + +ES_INDEX_NAME_UNSUPPORTED_CHARS = { + "\\", + "/", + "*", + "?", + '"', + "<", + ">", + "|", + " ", + ",", + "#", + ":", +} + +_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE = str.maketrans( + "", "", "".join(ES_INDEX_NAME_UNSUPPORTED_CHARS) +) + +ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" + +DEFAULT_SORT = { + "properties.datetime": {"order": "desc"}, + "id": {"order": "desc"}, + "collection": {"order": "desc"}, +} + +ES_ITEMS_SETTINGS = { + "index": { + "sort.field": list(DEFAULT_SORT.keys()), + "sort.order": [v["order"] for v in DEFAULT_SORT.values()], + } +} + +ES_MAPPINGS_DYNAMIC_TEMPLATES = [ + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + { + "descriptions": { + "match_mapping_type": "string", + "match": "description", + "mapping": {"type": "text"}, + } + }, + { + "titles": { + "match_mapping_type": "string", + "match": "title", + "mapping": {"type": "text"}, + } + }, + # Projection Extension https://github.com/stac-extensions/projection + {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, + { + "proj_projjson": { + "match": "proj:projjson", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "proj_centroid": { + "match": "proj:centroid", + "mapping": {"type": "geo_point"}, + } + }, + { + "proj_geometry": { + "match": "proj:geometry", + "mapping": {"type": "object", "enabled": False}, + } + }, + { + "no_index_href": { + "match": "href", + "mapping": {"type": "text", "index": False}, + } + }, + # Default all other strings not otherwise specified to keyword + {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, + {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, +] + +ES_ITEMS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "id": {"type": "keyword"}, + "collection": {"type": "keyword"}, + "geometry": {"type": "geo_shape"}, + "assets": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "properties": { + "type": "object", + "properties": { + # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md + "datetime": {"type": "date"}, + "start_datetime": {"type": "date"}, + "end_datetime": {"type": "date"}, + "created": {"type": "date"}, + "updated": {"type": "date"}, + # Satellite Extension https://github.com/stac-extensions/sat + "sat:absolute_orbit": {"type": "integer"}, + "sat:relative_orbit": {"type": "integer"}, + }, + }, + }, +} + +ES_COLLECTIONS_MAPPINGS = { + "numeric_detection": False, + "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, + "properties": { + "id": {"type": "keyword"}, + "extent.spatial.bbox": {"type": "long"}, + "extent.temporal.interval": {"type": "date"}, + "providers": {"type": "object", "enabled": False}, + "links": {"type": "object", "enabled": False}, + "item_assets": {"type": "object", "enabled": False}, + }, +} + + +@lru_cache(256) +def index_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index name. + + Args: + collection_id (str): The collection id to translate into an index name. + + Returns: + str: The index name derived from the collection id. + """ + cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE) + return ( + f"{ITEMS_INDEX_PREFIX}{cleaned.lower()}_{collection_id.encode('utf-8').hex()}" + ) + + +@lru_cache(256) +def index_alias_by_collection_id(collection_id: str) -> str: + """ + Translate a collection id into an Elasticsearch index alias. + + Args: + collection_id (str): The collection id to translate into an index alias. + + Returns: + str: The index alias derived from the collection id. + """ + cleaned = collection_id.translate(_ES_INDEX_NAME_UNSUPPORTED_CHARS_TABLE) + return f"{ITEMS_INDEX_PREFIX}{cleaned}" + + +def indices(collection_ids: Optional[List[str]]) -> str: + """ + Get a comma-separated string of index names for a given list of collection ids. + + Args: + collection_ids: A list of collection ids. + + Returns: + A string of comma-separated index names. If `collection_ids` is empty, returns the default indices. + """ + return ( + ",".join(map(index_alias_by_collection_id, collection_ids)) + if collection_ids + else ITEM_INDICES + ) + + +def mk_item_id(item_id: str, collection_id: str) -> str: + """Create the document id for an Item in Elasticsearch. + + Args: + item_id (str): The id of the Item. + collection_id (str): The id of the Collection that the Item belongs to. + + Returns: + str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. + """ + return f"{item_id}|{collection_id}" + + +def mk_actions(collection_id: str, processed_items: List[Item]) -> List[Dict[str, Any]]: + """Create Elasticsearch bulk actions for a list of processed items. + + Args: + collection_id (str): The identifier for the collection the items belong to. + processed_items (List[Item]): The list of processed items to be bulk indexed. + + Returns: + List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, + each action being a dictionary with the following keys: + - `_index`: the index to store the document in. + - `_id`: the document's identifier. + - `_source`: the source of the document. + """ + index_alias = index_alias_by_collection_id(collection_id) + return [ + { + "_index": index_alias, + "_id": mk_item_id(item["id"], item["collection"]), + "_source": item, + } + for item in processed_items + ] diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py b/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py index 2cf880c9..43bd543c 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/aggregation.py @@ -338,7 +338,7 @@ async def aggregate( datetime: Optional[DateTimeType] = None, intersects: Optional[str] = None, filter_lang: Optional[str] = None, - filter: Optional[str] = None, + filter_expr: Optional[str] = None, aggregations: Optional[str] = None, ids: Optional[List[str]] = None, bbox: Optional[BBox] = None, @@ -380,8 +380,8 @@ async def aggregate( if datetime: base_args["datetime"] = self._format_datetime_range(datetime) - if filter: - base_args["filter"] = self.get_filter(filter, filter_lang) + if filter_expr: + base_args["filter"] = self.get_filter(filter_expr, filter_lang) aggregate_request = EsAggregationExtensionPostRequest(**base_args) else: # Workaround for optional path param in POST requests @@ -389,9 +389,9 @@ async def aggregate( collection_id = path.split("/")[2] filter_lang = "cql2-json" - if aggregate_request.filter: - aggregate_request.filter = self.get_filter( - aggregate_request.filter, filter_lang + if aggregate_request.filter_expr: + aggregate_request.filter_expr = self.get_filter( + aggregate_request.filter_expr, filter_lang ) if collection_id: @@ -465,10 +465,10 @@ async def aggregate( detail=f"Aggregation {agg_name} not supported at catalog level", ) - if aggregate_request.filter: + if aggregate_request.filter_expr: try: search = self.database.apply_cql2_filter( - search, aggregate_request.filter + search, aggregate_request.filter_expr ) except Exception as e: raise HTTPException( diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/query.py b/stac_fastapi/core/stac_fastapi/core/extensions/query.py index 97342c66..3084cbf8 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/query.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/query.py @@ -8,7 +8,7 @@ from dataclasses import dataclass from enum import auto from types import DynamicClassAttribute -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Dict, Optional from pydantic import BaseModel, root_validator from stac_pydantic.utils import AutoValueEnum @@ -17,8 +17,6 @@ logger = logging.getLogger("uvicorn") logger.setLevel(logging.INFO) -# Be careful: https://github.com/samuelcolvin/pydantic/issues/1423#issuecomment-642797287 -NumType = Union[float, int] class Operator(str, AutoValueEnum): diff --git a/stac_fastapi/core/stac_fastapi/core/models/links.py b/stac_fastapi/core/stac_fastapi/core/models/links.py index 76f0ce5b..f72d4ed4 100644 --- a/stac_fastapi/core/stac_fastapi/core/models/links.py +++ b/stac_fastapi/core/stac_fastapi/core/models/links.py @@ -12,7 +12,7 @@ # These can be inferred from the item/collection, so they aren't included in the database # Instead they are dynamically generated when querying the database using the classes defined below -INFERRED_LINK_RELS = ["self", "item", "parent", "collection", "root"] +INFERRED_LINK_RELS = {"self", "item", "parent", "collection", "root"} def merge_params(url: str, newparams: Dict) -> str: diff --git a/stac_fastapi/core/stac_fastapi/core/version.py b/stac_fastapi/core/stac_fastapi/core/version.py index ca97d75a..3488c82b 100644 --- a/stac_fastapi/core/stac_fastapi/core/version.py +++ b/stac_fastapi/core/stac_fastapi/core/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "3.2.5" +__version__ = "4.0.0a0" diff --git a/stac_fastapi/elasticsearch/setup.py b/stac_fastapi/elasticsearch/setup.py index 7fb82dc7..3355dbe3 100644 --- a/stac_fastapi/elasticsearch/setup.py +++ b/stac_fastapi/elasticsearch/setup.py @@ -6,7 +6,7 @@ desc = f.read() install_requires = [ - "stac-fastapi.core==3.2.5", + "stac-fastapi.core==4.0.0a0", "elasticsearch[async]==8.11.0", "elasticsearch-dsl==8.11.0", "uvicorn", diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 5e6307e7..9510eaa6 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -39,13 +39,15 @@ settings = ElasticsearchSettings() session = Session.create_from_settings(settings) -filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) +database_logic = DatabaseLogic() + +filter_extension = FilterExtension( + client=EsAsyncBaseFiltersClient(database=database_logic) +) filter_extension.conformance_classes.append( "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" ) -database_logic = DatabaseLogic() - aggregation_extension = AggregationExtension( client=EsAsyncAggregationClient( database=database_logic, session=session, settings=settings diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py index 0b1bcb5e..d14295f4 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/config.py @@ -7,6 +7,7 @@ import certifi from elasticsearch import AsyncElasticsearch, Elasticsearch # type: ignore +from stac_fastapi.core.base_settings import ApiBaseSettings from stac_fastapi.types.config import ApiSettings @@ -69,7 +70,7 @@ def _es_config() -> Dict[str, Any]: _forbidden_fields: Set[str] = {"type"} -class ElasticsearchSettings(ApiSettings): +class ElasticsearchSettings(ApiSettings, ApiBaseSettings): """API settings.""" # Fields which are defined by STAC but not included in the database model @@ -82,7 +83,7 @@ def create_client(self): return Elasticsearch(**_es_config()) -class AsyncElasticsearchSettings(ApiSettings): +class AsyncElasticsearchSettings(ApiSettings, ApiBaseSettings): """API settings.""" # Fields which are defined by STAC but not included in the database model diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index 0f272218..38d05e29 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -3,16 +3,31 @@ import asyncio import json import logging -import os from base64 import urlsafe_b64decode, urlsafe_b64encode from copy import deepcopy -from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type import attr from elasticsearch_dsl import Q, Search from starlette.requests import Request from elasticsearch import exceptions, helpers # type: ignore +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic +from stac_fastapi.core.database_logic import ( + COLLECTIONS_INDEX, + DEFAULT_SORT, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, + ES_ITEMS_SETTINGS, + ITEM_INDICES, + ITEMS_INDEX_PREFIX, + Geometry, + index_alias_by_collection_id, + index_by_collection_id, + indices, + mk_actions, + mk_item_id, +) from stac_fastapi.core.extensions import filter from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon @@ -25,168 +40,6 @@ logger = logging.getLogger(__name__) -NumType = Union[float, int] - -COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") -ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") -ES_INDEX_NAME_UNSUPPORTED_CHARS = { - "\\", - "/", - "*", - "?", - '"', - "<", - ">", - "|", - " ", - ",", - "#", - ":", -} - -ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" - -DEFAULT_SORT = { - "properties.datetime": {"order": "desc"}, - "id": {"order": "desc"}, - "collection": {"order": "desc"}, -} - -ES_ITEMS_SETTINGS = { - "index": { - "sort.field": list(DEFAULT_SORT.keys()), - "sort.order": [v["order"] for v in DEFAULT_SORT.values()], - } -} - -ES_MAPPINGS_DYNAMIC_TEMPLATES = [ - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - { - "descriptions": { - "match_mapping_type": "string", - "match": "description", - "mapping": {"type": "text"}, - } - }, - { - "titles": { - "match_mapping_type": "string", - "match": "title", - "mapping": {"type": "text"}, - } - }, - # Projection Extension https://github.com/stac-extensions/projection - {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, - { - "proj_projjson": { - "match": "proj:projjson", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "proj_centroid": { - "match": "proj:centroid", - "mapping": {"type": "geo_point"}, - } - }, - { - "proj_geometry": { - "match": "proj:geometry", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "no_index_href": { - "match": "href", - "mapping": {"type": "text", "index": False}, - } - }, - # Default all other strings not otherwise specified to keyword - {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, - {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, -] - -ES_ITEMS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "collection": {"type": "keyword"}, - "geometry": {"type": "geo_shape"}, - "assets": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "properties": { - "type": "object", - "properties": { - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - "datetime": {"type": "date"}, - "start_datetime": {"type": "date"}, - "end_datetime": {"type": "date"}, - "created": {"type": "date"}, - "updated": {"type": "date"}, - # Satellite Extension https://github.com/stac-extensions/sat - "sat:absolute_orbit": {"type": "integer"}, - "sat:relative_orbit": {"type": "integer"}, - }, - }, - }, -} - -ES_COLLECTIONS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "extent.spatial.bbox": {"type": "long"}, - "extent.temporal.interval": {"type": "date"}, - "providers": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "item_assets": {"type": "object", "enabled": False}, - }, -} - - -def index_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index name. - - Args: - collection_id (str): The collection id to translate into an index name. - - Returns: - str: The index name derived from the collection id. - """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" - - -def index_alias_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index alias. - - Args: - collection_id (str): The collection id to translate into an index alias. - - Returns: - str: The index alias derived from the collection id. - """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" - - -def indices(collection_ids: Optional[List[str]]) -> str: - """ - Get a comma-separated string of index names for a given list of collection ids. - - Args: - collection_ids: A list of collection ids. - - Returns: - A string of comma-separated index names. If `collection_ids` is None, returns the default indices. - """ - if collection_ids is None or collection_ids == []: - return ITEM_INDICES - else: - return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) - async def create_index_templates() -> None: """ @@ -271,53 +124,8 @@ async def delete_item_index(collection_id: str): await client.close() -def mk_item_id(item_id: str, collection_id: str): - """Create the document id for an Item in Elasticsearch. - - Args: - item_id (str): The id of the Item. - collection_id (str): The id of the Collection that the Item belongs to. - - Returns: - str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. - """ - return f"{item_id}|{collection_id}" - - -def mk_actions(collection_id: str, processed_items: List[Item]): - """Create Elasticsearch bulk actions for a list of processed items. - - Args: - collection_id (str): The identifier for the collection the items belong to. - processed_items (List[Item]): The list of processed items to be bulk indexed. - - Returns: - List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, - each action being a dictionary with the following keys: - - `_index`: the index to store the document in. - - `_id`: the document's identifier. - - `_source`: the source of the document. - """ - return [ - { - "_index": index_alias_by_collection_id(collection_id), - "_id": mk_item_id(item["id"], item["collection"]), - "_source": item, - } - for item in processed_items - ] - - -# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, -# So create our own Protocol for typing -# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] -class Geometry(Protocol): # noqa - type: str - coordinates: Any - - @attr.s -class DatabaseLogic: +class DatabaseLogic(BaseDatabaseLogic): """Database logic.""" client = AsyncElasticsearchSettings().create_client @@ -466,7 +274,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: ) except exceptions.NotFoundError: raise NotFoundError( - f"Item {item_id} does not exist in Collection {collection_id}" + f"Item {item_id} does not exist inside Collection {collection_id}" ) return item["_source"] @@ -918,6 +726,24 @@ async def delete_item( f"Item {item_id} in collection {collection_id} not found" ) + async def get_items_mapping(self, collection_id: str) -> Dict[str, Any]: + """Get the mapping for the specified collection's items index. + + Args: + collection_id (str): The ID of the collection to get items mapping for. + + Returns: + Dict[str, Any]: The mapping information. + """ + index_name = index_alias_by_collection_id(collection_id) + try: + mapping = await self.client.indices.get_mapping( + index=index_name, allow_no_indices=False + ) + return mapping.body + except exceptions.NotFoundError: + raise NotFoundError(f"Mapping for index {index_name} not found") + async def create_collection(self, collection: Collection, refresh: bool = False): """Create a single collection in the database. @@ -1001,7 +827,7 @@ async def update_collection( "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, "script": { "lang": "painless", - "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", + "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", # noqa: E702 }, }, wait_for_completion=True, diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py index ca97d75a..3488c82b 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "3.2.5" +__version__ = "4.0.0a0" diff --git a/stac_fastapi/opensearch/setup.py b/stac_fastapi/opensearch/setup.py index 0befa10e..8cae5dce 100644 --- a/stac_fastapi/opensearch/setup.py +++ b/stac_fastapi/opensearch/setup.py @@ -6,7 +6,7 @@ desc = f.read() install_requires = [ - "stac-fastapi.core==3.2.5", + "stac-fastapi.core==4.0.0a0", "opensearch-py==2.4.2", "opensearch-py[async]==2.4.2", "uvicorn", diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index 8be0eafd..90038302 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -39,13 +39,15 @@ settings = OpensearchSettings() session = Session.create_from_settings(settings) -filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) +database_logic = DatabaseLogic() + +filter_extension = FilterExtension( + client=EsAsyncBaseFiltersClient(database=database_logic) +) filter_extension.conformance_classes.append( "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" ) -database_logic = DatabaseLogic() - aggregation_extension = AggregationExtension( client=EsAsyncAggregationClient( database=database_logic, session=session, settings=settings diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py index 01551d94..6de2ab91 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/config.py @@ -6,6 +6,7 @@ import certifi from opensearchpy import AsyncOpenSearch, OpenSearch +from stac_fastapi.core.base_settings import ApiBaseSettings from stac_fastapi.types.config import ApiSettings @@ -67,7 +68,7 @@ def _es_config() -> Dict[str, Any]: _forbidden_fields: Set[str] = {"type"} -class OpensearchSettings(ApiSettings): +class OpensearchSettings(ApiSettings, ApiBaseSettings): """API settings.""" # Fields which are defined by STAC but not included in the database model @@ -80,7 +81,7 @@ def create_client(self): return OpenSearch(**_es_config()) -class AsyncOpensearchSettings(ApiSettings): +class AsyncOpensearchSettings(ApiSettings, ApiBaseSettings): """API settings.""" # Fields which are defined by STAC but not included in the database model diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 498c9c01..22e6ffe0 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -3,10 +3,9 @@ import asyncio import json import logging -import os from base64 import urlsafe_b64decode, urlsafe_b64encode from copy import deepcopy -from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type import attr from opensearchpy import exceptions, helpers @@ -16,6 +15,22 @@ from starlette.requests import Request from stac_fastapi.core import serializers +from stac_fastapi.core.base_database_logic import BaseDatabaseLogic +from stac_fastapi.core.database_logic import ( + COLLECTIONS_INDEX, + DEFAULT_SORT, + ES_COLLECTIONS_MAPPINGS, + ES_ITEMS_MAPPINGS, + ES_ITEMS_SETTINGS, + ITEM_INDICES, + ITEMS_INDEX_PREFIX, + Geometry, + index_alias_by_collection_id, + index_by_collection_id, + indices, + mk_actions, + mk_item_id, +) from stac_fastapi.core.extensions import filter from stac_fastapi.core.utilities import MAX_LIMIT, bbox2polygon from stac_fastapi.opensearch.config import ( @@ -27,168 +42,6 @@ logger = logging.getLogger(__name__) -NumType = Union[float, int] - -COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") -ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") -ES_INDEX_NAME_UNSUPPORTED_CHARS = { - "\\", - "/", - "*", - "?", - '"', - "<", - ">", - "|", - " ", - ",", - "#", - ":", -} - -ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*" - -DEFAULT_SORT = { - "properties.datetime": {"order": "desc"}, - "id": {"order": "desc"}, - "collection": {"order": "desc"}, -} - -ES_ITEMS_SETTINGS = { - "index": { - "sort.field": list(DEFAULT_SORT.keys()), - "sort.order": [v["order"] for v in DEFAULT_SORT.values()], - } -} - -ES_MAPPINGS_DYNAMIC_TEMPLATES = [ - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - { - "descriptions": { - "match_mapping_type": "string", - "match": "description", - "mapping": {"type": "text"}, - } - }, - { - "titles": { - "match_mapping_type": "string", - "match": "title", - "mapping": {"type": "text"}, - } - }, - # Projection Extension https://github.com/stac-extensions/projection - {"proj_epsg": {"match": "proj:epsg", "mapping": {"type": "integer"}}}, - { - "proj_projjson": { - "match": "proj:projjson", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "proj_centroid": { - "match": "proj:centroid", - "mapping": {"type": "geo_point"}, - } - }, - { - "proj_geometry": { - "match": "proj:geometry", - "mapping": {"type": "object", "enabled": False}, - } - }, - { - "no_index_href": { - "match": "href", - "mapping": {"type": "text", "index": False}, - } - }, - # Default all other strings not otherwise specified to keyword - {"strings": {"match_mapping_type": "string", "mapping": {"type": "keyword"}}}, - {"numerics": {"match_mapping_type": "long", "mapping": {"type": "float"}}}, -] - -ES_ITEMS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "collection": {"type": "keyword"}, - "geometry": {"type": "geo_shape"}, - "assets": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "properties": { - "type": "object", - "properties": { - # Common https://github.com/radiantearth/stac-spec/blob/master/item-spec/common-metadata.md - "datetime": {"type": "date"}, - "start_datetime": {"type": "date"}, - "end_datetime": {"type": "date"}, - "created": {"type": "date"}, - "updated": {"type": "date"}, - # Satellite Extension https://github.com/stac-extensions/sat - "sat:absolute_orbit": {"type": "integer"}, - "sat:relative_orbit": {"type": "integer"}, - }, - }, - }, -} - -ES_COLLECTIONS_MAPPINGS = { - "numeric_detection": False, - "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, - "properties": { - "id": {"type": "keyword"}, - "extent.spatial.bbox": {"type": "long"}, - "extent.temporal.interval": {"type": "date"}, - "providers": {"type": "object", "enabled": False}, - "links": {"type": "object", "enabled": False}, - "item_assets": {"type": "object", "enabled": False}, - }, -} - - -def index_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index name. - - Args: - collection_id (str): The collection id to translate into an index name. - - Returns: - str: The index name derived from the collection id. - """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}" - - -def index_alias_by_collection_id(collection_id: str) -> str: - """ - Translate a collection id into an Elasticsearch index alias. - - Args: - collection_id (str): The collection id to translate into an index alias. - - Returns: - str: The index alias derived from the collection id. - """ - return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}" - - -def indices(collection_ids: Optional[List[str]]) -> str: - """ - Get a comma-separated string of index names for a given list of collection ids. - - Args: - collection_ids: A list of collection ids. - - Returns: - A string of comma-separated index names. If `collection_ids` is None, returns the default indices. - """ - if collection_ids is None or collection_ids == []: - return ITEM_INDICES - else: - return ",".join([index_alias_by_collection_id(c) for c in collection_ids]) - async def create_index_templates() -> None: """ @@ -292,53 +145,8 @@ async def delete_item_index(collection_id: str): await client.close() -def mk_item_id(item_id: str, collection_id: str): - """Create the document id for an Item in Elasticsearch. - - Args: - item_id (str): The id of the Item. - collection_id (str): The id of the Collection that the Item belongs to. - - Returns: - str: The document id for the Item, combining the Item id and the Collection id, separated by a `|` character. - """ - return f"{item_id}|{collection_id}" - - -def mk_actions(collection_id: str, processed_items: List[Item]): - """Create Elasticsearch bulk actions for a list of processed items. - - Args: - collection_id (str): The identifier for the collection the items belong to. - processed_items (List[Item]): The list of processed items to be bulk indexed. - - Returns: - List[Dict[str, Union[str, Dict]]]: The list of bulk actions to be executed, - each action being a dictionary with the following keys: - - `_index`: the index to store the document in. - - `_id`: the document's identifier. - - `_source`: the source of the document. - """ - return [ - { - "_index": index_alias_by_collection_id(collection_id), - "_id": mk_item_id(item["id"], item["collection"]), - "_source": item, - } - for item in processed_items - ] - - -# stac_pydantic classes extend _GeometryBase, which doesn't have a type field, -# So create our own Protocol for typing -# Union[ Point, MultiPoint, LineString, MultiLineString, Polygon, MultiPolygon, GeometryCollection] -class Geometry(Protocol): # noqa - type: str - coordinates: Any - - @attr.s -class DatabaseLogic: +class DatabaseLogic(BaseDatabaseLogic): """Database logic.""" client = AsyncSearchSettings().create_client @@ -495,7 +303,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict: ) except exceptions.NotFoundError: raise NotFoundError( - f"Item {item_id} does not exist in Collection {collection_id}" + f"Item {item_id} does not exist inside Collection {collection_id}" ) return item["_source"] @@ -950,6 +758,24 @@ async def delete_item( f"Item {item_id} in collection {collection_id} not found" ) + async def get_items_mapping(self, collection_id: str) -> Dict[str, Any]: + """Get the mapping for the specified collection's items index. + + Args: + collection_id (str): The ID of the collection to get items mapping for. + + Returns: + Dict[str, Any]: The mapping information. + """ + index_name = index_alias_by_collection_id(collection_id) + try: + mapping = await self.client.indices.get_mapping( + index=index_name, params={"allow_no_indices": "false"} + ) + return mapping + except exceptions.NotFoundError: + raise NotFoundError(f"Mapping for index {index_name} not found") + async def create_collection(self, collection: Collection, refresh: bool = False): """Create a single collection in the database. @@ -1033,7 +859,7 @@ async def update_collection( "source": {"index": f"{ITEMS_INDEX_PREFIX}{collection_id}"}, "script": { "lang": "painless", - "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", + "source": f"""ctx._id = ctx._id.replace('{collection_id}', '{collection["id"]}'); ctx._source.collection = '{collection["id"]}' ;""", # noqa: E702 }, }, wait_for_completion=True, diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py index ca97d75a..3488c82b 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/version.py @@ -1,2 +1,2 @@ """library version.""" -__version__ = "3.2.5" +__version__ = "4.0.0a0" diff --git a/stac_fastapi/tests/rate_limit/test_rate_limit.py b/stac_fastapi/tests/rate_limit/test_rate_limit.py index fd6b5bce..4a7a7da5 100644 --- a/stac_fastapi/tests/rate_limit/test_rate_limit.py +++ b/stac_fastapi/tests/rate_limit/test_rate_limit.py @@ -18,7 +18,7 @@ async def test_rate_limit(app_client_rate_limit: AsyncClient, ctx): except RateLimitExceeded: status_code = 429 - logger.info(f"Request {i+1}: Status code {status_code}") + logger.info(f"Request {i + 1}: Status code {status_code}") assert ( status_code == expected_status_code ), f"Expected status code {expected_status_code}, but got {status_code}" @@ -32,7 +32,7 @@ async def test_rate_limit_no_limit(app_client: AsyncClient, ctx): response = await app_client.get("/collections") status_code = response.status_code - logger.info(f"Request {i+1}: Status code {status_code}") + logger.info(f"Request {i + 1}: Status code {status_code}") assert ( status_code == expected_status_code ), f"Expected status code {expected_status_code}, but got {status_code}" diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 904adbbf..5313b1fa 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -2,7 +2,7 @@ import os import uuid from copy import deepcopy -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta from random import randint from urllib.parse import parse_qs, urlparse, urlsplit @@ -478,13 +478,10 @@ async def test_item_search_temporal_window_timezone_get( app_client, ctx, load_test_data ): """Test GET search with spatio-temporal query ending with Zulu and pagination(core)""" - tzinfo = timezone(timedelta(hours=1)) test_item = load_test_data("test_item.json") item_date = rfc3339_str_to_datetime(test_item["properties"]["datetime"]) item_date_before = item_date - timedelta(seconds=1) - item_date_before = item_date_before.replace(tzinfo=tzinfo) item_date_after = item_date + timedelta(seconds=1) - item_date_after = item_date_after.replace(tzinfo=tzinfo) params = { "collections": test_item["collection"],
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: