diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index 56afcbc8..7a38ee6b 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -377,7 +377,7 @@ async def get_item( @staticmethod def _return_date( - interval: Optional[Union[DateTimeType, str]] + interval: Optional[Union[DateTimeType, str]], ) -> Dict[str, Optional[str]]: """ Convert a date interval. @@ -911,6 +911,8 @@ def bulk_item_insert( class EsAsyncBaseFiltersClient(AsyncBaseFiltersClient): """Defines a pattern for implementing the STAC filter extension.""" + database: BaseDatabaseLogic = attr.ib() + # todo: use the ES _mapping endpoint to dynamically find what fields exist async def get_queryables( self, collection_id: Optional[str] = None, **kwargs @@ -932,55 +934,4 @@ async def get_queryables( Returns: Dict[str, Any]: A dictionary containing the queryables for the given collection. """ - return { - "$schema": "https://json-schema.org/draft/2019-09/schema", - "$id": "https://stac-api.example.com/queryables", - "type": "object", - "title": "Queryables for Example STAC API", - "description": "Queryable names for the example STAC API Item Search filter.", - "properties": { - "id": { - "description": "ID", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/properties/id", - }, - "collection": { - "description": "Collection", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/2/then/properties/collection", - }, - "geometry": { - "description": "Geometry", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/item.json#/definitions/core/allOf/1/oneOf/0/properties/geometry", - }, - "datetime": { - "description": "Acquisition Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/datetime", - }, - "created": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/created", - }, - "updated": { - "description": "Creation Timestamp", - "$ref": "https://schemas.stacspec.org/v1.0.0/item-spec/json-schema/datetime.json#/properties/updated", - }, - "cloud_cover": { - "description": "Cloud Cover", - "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover", - }, - "cloud_shadow_percentage": { - "description": "Cloud Shadow Percentage", - "title": "Cloud Shadow Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, - "nodata_pixel_percentage": { - "description": "No Data Pixel Percentage", - "title": "No Data Pixel Percentage", - "type": "number", - "minimum": 0, - "maximum": 100, - }, - }, - "additionalProperties": True, - } + return self.database.get_queryables(collection_id=collection_id) diff --git a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py index 251614e1..da9fee91 100644 --- a/stac_fastapi/core/stac_fastapi/core/extensions/filter.py +++ b/stac_fastapi/core/stac_fastapi/core/extensions/filter.py @@ -101,7 +101,7 @@ class SpatialIntersectsOp(str, Enum): } -def to_es_field(field: str) -> str: +def to_es_field(queryables_mapping: Dict[str, Any], field: str) -> str: """ Map a given field to its corresponding Elasticsearch field according to a predefined mapping. @@ -114,7 +114,7 @@ def to_es_field(field: str) -> str: return queryables_mapping.get(field, field) -def to_es(query: Dict[str, Any]) -> Dict[str, Any]: +def to_es(queryables_mapping: Dict[str, Any], query: Dict[str, Any]) -> Dict[str, Any]: """ Transform a simplified CQL2 query structure to an Elasticsearch compatible query DSL. @@ -130,7 +130,13 @@ def to_es(query: Dict[str, Any]) -> Dict[str, Any]: LogicalOp.OR: "should", LogicalOp.NOT: "must_not", }[query["op"]] - return {"bool": {bool_type: [to_es(sub_query) for sub_query in query["args"]]}} + return { + "bool": { + bool_type: [ + to_es(queryables_mapping, sub_query) for sub_query in query["args"] + ] + } + } elif query["op"] in [ ComparisonOp.EQ, @@ -147,7 +153,7 @@ def to_es(query: Dict[str, Any]) -> Dict[str, Any]: ComparisonOp.GTE: "gte", } - field = to_es_field(query["args"][0]["property"]) + field = to_es_field(queryables_mapping, query["args"][0]["property"]) value = query["args"][1] if isinstance(value, dict) and "timestamp" in value: value = value["timestamp"] @@ -170,11 +176,11 @@ def to_es(query: Dict[str, Any]) -> Dict[str, Any]: return {"range": {field: {range_op[query["op"]]: value}}} elif query["op"] == ComparisonOp.IS_NULL: - field = to_es_field(query["args"][0]["property"]) + field = to_es_field(queryables_mapping, query["args"][0]["property"]) return {"bool": {"must_not": {"exists": {"field": field}}}} elif query["op"] == AdvancedComparisonOp.BETWEEN: - field = to_es_field(query["args"][0]["property"]) + field = to_es_field(queryables_mapping, query["args"][0]["property"]) gte, lte = query["args"][1], query["args"][2] if isinstance(gte, dict) and "timestamp" in gte: gte = gte["timestamp"] @@ -183,19 +189,19 @@ def to_es(query: Dict[str, Any]) -> Dict[str, Any]: return {"range": {field: {"gte": gte, "lte": lte}}} elif query["op"] == AdvancedComparisonOp.IN: - field = to_es_field(query["args"][0]["property"]) + field = to_es_field(queryables_mapping, query["args"][0]["property"]) values = query["args"][1] if not isinstance(values, list): raise ValueError(f"Arg {values} is not a list") return {"terms": {field: values}} elif query["op"] == AdvancedComparisonOp.LIKE: - field = to_es_field(query["args"][0]["property"]) + field = to_es_field(queryables_mapping, query["args"][0]["property"]) pattern = cql2_like_to_es(query["args"][1]) return {"wildcard": {field: {"value": pattern, "case_insensitive": True}}} elif query["op"] == SpatialIntersectsOp.S_INTERSECTS: - field = to_es_field(query["args"][0]["property"]) + field = to_es_field(queryables_mapping, query["args"][0]["property"]) geometry = query["args"][1] return {"geo_shape": {field: {"shape": geometry, "relation": "intersects"}}} diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index 5e6307e7..9510eaa6 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -39,13 +39,15 @@ settings = ElasticsearchSettings() session = Session.create_from_settings(settings) -filter_extension = FilterExtension(client=EsAsyncBaseFiltersClient()) +database_logic = DatabaseLogic() + +filter_extension = FilterExtension( + client=EsAsyncBaseFiltersClient(database=database_logic) +) filter_extension.conformance_classes.append( "http://www.opengis.net/spec/cql2/1.0/conf/advanced-comparison-operators" ) -database_logic = DatabaseLogic() - aggregation_extension = AggregationExtension( client=EsAsyncAggregationClient( database=database_logic, session=session, settings=settings diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index c404b5e5..97bc45c6 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -9,6 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple, Type, Union import attr +import requests from elasticsearch_dsl import Q, Search from starlette.requests import Request @@ -29,6 +30,7 @@ COLLECTIONS_INDEX = os.getenv("STAC_COLLECTIONS_INDEX", "collections") ITEMS_INDEX_PREFIX = os.getenv("STAC_ITEMS_INDEX_PREFIX", "items_") +QUERYABLES_INDEX = os.getenv("STAC_QUERYABLES_INDEX", "queryables") ES_INDEX_NAME_UNSUPPORTED_CHARS = { "\\", "/", @@ -240,6 +242,23 @@ async def create_item_index(collection_id: str): await client.close() +async def create_queryables_index() -> None: + """ + Create the index for Qyeryables. + + Returns: + None + + """ + client = AsyncElasticsearchSettings().create_client + + await client.options(ignore_status=400).indices.create( + index=f"{QUERYABLES_INDEX}-000001", + aliases={QUERYABLES_INDEX: {}}, + ) + await client.close() + + async def delete_item_index(collection_id: str): """Delete the index for items in a collection. @@ -391,6 +410,95 @@ class DatabaseLogic: """CORE LOGIC""" + async def get_queryables_mapping(self, collection_id: str = "*") -> dict: + """Retrieve mapping of Queryables for search. + + Args: + collection_id (str, optional): The id of the Collection the Queryables + belongs to. Defaults to "*". + + Returns: + dict: A dictionary containing the Queryables mappings. + """ + queryables_mapping = {} + + mappings = await self.client.indices.get_mapping( + index=f"{ITEMS_INDEX_PREFIX}{collection_id}", + ) + + for mapping in mappings.values(): + fields = mapping["mappings"]["properties"] + properties = fields.pop("properties") + + for field_key in fields: + queryables_mapping[field_key] = field_key + + for property_key in properties["properties"]: + queryables_mapping[property_key] = f"properties.{property_key}" + + return queryables_mapping + + async def get_extensions(self, collection_id: str = "*") -> set: + """Get list of STAC Extensions for a collection. + + Args: + collection_id (str, optional): The id of the Collection the STAC + Extensions belongs to. Defaults to "*". + + Returns: + set: set of STAC Extensions + """ + response = await self.client.search( + index=f"{ITEMS_INDEX_PREFIX}{collection_id}", + aggs={ + "stac_extensions": {"terms": {"field": "stac_extensions"}}, + "size": 10000, + }, + ) + + return { + stac_extension["key"] + for stac_extension in response["aggregations"]["stac_extensions"]["buckets"] + } + + async def get_queryables(self, collection_id: str = "*") -> dict: + """Retrieve Queryables from elasticsearch mappings. + + Args: + collection_id (str, optional): The id of the Collection the Queryables + belongs to. Defaults to "*". + + Returns: + dict: A dictionary containing the Queryables. + """ + if collection_id != "*": + response = await self.client.get( + index=f"{ITEMS_INDEX_PREFIX}{collection_id}", + id=collection_id, + ) + else: + queryables: Dict[str, Any] = {} + search_after: List[str] = [] + + while True: + + response = self.client.search( + index=f"{ITEMS_INDEX_PREFIX}{collection_id}", + size=10000, + search_after=search_after, + ) + + if hits := response["hits"]["hits"]: + for hit in hits: + queryables |= hit + + search_after = hit["sort"] + + if not search_after: + break + + return queryables + async def get_all_collections( self, token: Optional[str], limit: int, request: Request ) -> Tuple[List[Dict[str, Any]], Optional[str]]: @@ -594,8 +702,7 @@ def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str] return search - @staticmethod - def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): + def apply_cql2_filter(self, search: Search, _filter: Optional[Dict[str, Any]]): """ Apply a CQL2 filter to an Elasticsearch Search object. @@ -615,7 +722,7 @@ def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): otherwise the original Search object. """ if _filter is not None: - es_query = filter.to_es(_filter) + es_query = filter.to_es(self.get_queryables_mapping(), _filter) search = search.query(es_query) return search @@ -854,6 +961,149 @@ def sync_prep_create_item( return self.item_serializer.stac_to_db(item, base_url) + async def create_queryables(self, collection_id: str): + """Database logic for creating an initial queryables record. + + Args: + collection_id (str): The id of the Collection that the Queryables belongs to. + """ + base_queryables = { + "$schema": "https://json-schema.org/draft/2019-09/schema", + "$id": "https://stac-api.example.com/queryables", + "type": "object", + "title": f"Queryables for {collection_id} STAC API", + "description": f"Queryable names for the {collection_id} in STAC API Item Search filter.", + "properties": {}, + "additionalProperties": True, + } + + await self.client.update( + index=QUERYABLES_INDEX, + id=collection_id, + document=base_queryables, + ) + + async def add_queryables(self, item: Item, stac_extensions: set) -> None: + """Database logic for adding queryables. + + Args: + collection_id (str): The id of the Collection that the Queryables belongs to. + stac_extensions (list): List of the previous stac extensions for the collection. + """ + queryables = await self.get_queryables(collection_id=item["collection"]) + + # Add fields of any new extensions. + if ( + new_extensions := await self.get_extensions(item["collection"]) + - stac_extensions + ): + + for new_extension in new_extensions: + stac_extension_response = requests.get(new_extension, timeout=5) + + # Get field definitions + stac_extension_fields = stac_extension_response.json()["definitions"][ + "fields" + ]["properties"] + + for ( + stac_extension_field_key, + stac_extension_field_value, + ) in stac_extension_fields.values(): + + queryables["properties"][ + stac_extension_field_key + ] = stac_extension_field_value + + # if there are any new properties that are not in extensions add them through mappings. + # remove non queryable fields + del item["assets"] + del item["bbox"] + del item["links"] + del item["stac_extensions"] + del item["stac_version"] + del item["type"] + item_properties = item.pop("properties").keys() + item_fields = item.keys() + item_properties + + if new_fields := set(item_fields) - set(queryables["properties"].keys()): + + mappings = await self.client.indices.get_mapping( + index=f"{ITEMS_INDEX_PREFIX}{item['collection']}", + ) + + fields = mappings[0]["mappings"]["properties"] + fields |= fields.pop("properties") + + for new_field in new_fields: + queryables["properties"][new_field] = { + "type": fields[new_field]["type"] + } + + await self.client.update( + index=QUERYABLES_INDEX, + id=item["collection"], + doc=queryables, + ) + + async def remove_queryables(self, item: Item) -> None: + """Database logic for adding queryables. + + Args: + collection_id (str): The id of the Collection that the Queryables belongs to. + stac_extensions (list): List of the previous stac extensions for the collection. + """ + queryables = await self.get_queryables(collection_id=item["collection"]) + + # Remove any fields of any unused extensions. + if removed_extensions := set( + item["stac_extensions"] + ) - await self.get_extensions(item["collection"]): + + for removed_extension in removed_extensions: + stac_extension_response = requests.get(removed_extension, timeout=5) + + # Get field definitions + stac_extension_fields = stac_extension_response.json()["definitions"][ + "fields" + ]["properties"] + + for stac_extension_field_key in stac_extension_fields: + del queryables["properties"][stac_extension_field_key] + + # if there are any properties no longer in any items remove them from queryables. + # remove non queryable fields + del item["assets"] + del item["bbox"] + del item["links"] + del item["stac_extensions"] + del item["stac_version"] + del item["type"] + item_properties = item.pop("properties").keys() + + aggs = {} + for item_field in item_properties: + aggs[item_field] = {"terms": {"field": f"properties.{item_field}"}} + + for item_field in item.keys(): + aggs[item_field] = {"terms": {"field": item_field}} + + es_resp = await self.client.search( + index=QUERYABLES_INDEX, + aggs=aggs, + id=item["collection"], + ) + + for aggregation in es_resp["aggregations"]: + if not aggregation["buckets"]: + del queryables["properties"][aggregation["key"]] + + await self.client.update( + index=QUERYABLES_INDEX, + id=item["collection"], + doc=queryables, + ) + async def create_item(self, item: Item, refresh: bool = False): """Database logic for creating one item. @@ -868,6 +1118,8 @@ async def create_item(self, item: Item, refresh: bool = False): None """ # todo: check if collection exists, but cache + stac_extensions = await self.get_extensions(item["collection"]) + item_id = item["id"] collection_id = item["collection"] es_resp = await self.client.index( @@ -882,6 +1134,11 @@ async def create_item(self, item: Item, refresh: bool = False): f"Item {item_id} in collection {collection_id} already exists" ) + await self.add_queryables( + item=item, + stac_extensions=stac_extensions, + ) + async def delete_item( self, item_id: str, collection_id: str, refresh: bool = False ): @@ -896,11 +1153,18 @@ async def delete_item( NotFoundError: If the Item does not exist in the database. """ try: + item = await self.client.get( + index=index_by_collection_id(collection_id), + id=mk_item_id(item_id, collection_id), + ) + await self.client.delete( index=index_by_collection_id(collection_id), id=mk_item_id(item_id, collection_id), refresh=refresh, ) + + await self.remove_queryables(item=item) except exceptions.NotFoundError: raise NotFoundError( f"Item {item_id} in collection {collection_id} not found" @@ -933,6 +1197,8 @@ async def create_collection(self, collection: Collection, refresh: bool = False) await create_item_index(collection_id) + await self.create_queryables(collection_id) + async def find_collection(self, collection_id: str) -> Collection: """Find and return a collection from the database. diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/t.json b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/t.json new file mode 100644 index 00000000..6da9f4c1 --- /dev/null +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/t.json @@ -0,0 +1,39 @@ +{ + "collections": [ + "PSScene" + ], + "datetime": "2023-01-12T00:00:00.000Z/2023-02-12T23:59:59.999Z", + "limit": 100, + "filter": { + "op": "and", + "args": [ + { + "op": ">", + "args": [ + { + "property": "properties.datetime" + }, + "2023-01-12T00:00:00.000Z" + ] + }, + { + "op": "<", + "args": [ + { + "property": "properties.datetime" + }, + "2023-02-12T23:59:59.999Z" + ] + }, + { + "op": "<=", + "args": [ + { + "property": "properties.cloud_percent" + }, + 18 + ] + } + ] + } +} \ No newline at end of file diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 63a42427..73d06e57 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -621,8 +621,11 @@ def apply_stacql_filter(search: Search, op: str, field: str, value: float): return search - @staticmethod - def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): + def apply_cql2_filter( + self, + search: Search, + _filter: Optional[Dict[str, Any]], + ): """ Apply a CQL2 filter to an Opensearch Search object. @@ -642,11 +645,39 @@ def apply_cql2_filter(search: Search, _filter: Optional[Dict[str, Any]]): otherwise the original Search object. """ if _filter is not None: - es_query = filter.to_es(_filter) + es_query = filter.to_es(self.get_queryables_mapping(), _filter) search = search.filter(es_query) return search + async def get_queryables_mapping(self, collection_id: str = "*") -> dict: + """Retrieve mapping of Queryables for search. + + Args: + collection_id (str, optional): The id of the Collection the Queryables + belongs to. Defaults to "*". + + Returns: + dict: A dictionary containing the Queryables mappings. + """ + queryables_mapping = {} + + mappings = await self.client.indices.get_mapping( + index=f"{ITEMS_INDEX_PREFIX}{collection_id}", + ) + + for mapping in mappings.values(): + fields = mapping["mappings"]["properties"] + properties = fields.pop("properties") + + for field_key in fields: + queryables_mapping[field_key] = field_key + + for property_key in properties["properties"]: + queryables_mapping[property_key] = f"properties.{property_key}" + + return queryables_mapping + @staticmethod def populate_sort(sortby: List) -> Optional[Dict[str, Dict[str, str]]]: """Database logic to sort search instance."""
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: