diff --git a/CHANGELOG.md b/CHANGELOG.md index e572c99d..cafe4bcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed +- Improved datetime query handling to only check start and end datetime values when datetime is None [#396](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/396) - Optimize data_loader.py script [#395](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/395) - Refactored test configuration to use shared app config pattern [#399](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/399) diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index a1ca6250..2d630769 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -245,121 +245,97 @@ def apply_collections_filter(search: Search, collection_ids: List[str]): @staticmethod def apply_datetime_filter( search: Search, interval: Optional[Union[DateTimeType, str]] - ): + ) -> Search: """Apply a filter to search on datetime, start_datetime, and end_datetime fields. Args: - search (Search): The search object to filter. - interval: Optional[Union[DateTimeType, str]] + search: The search object to filter. + interval: Optional datetime interval to filter by. Can be: + - A single datetime string (e.g., "2023-01-01T12:00:00") + - A datetime range string (e.g., "2023-01-01/2023-12-31") + - A datetime object + - A tuple of (start_datetime, end_datetime) Returns: - Search: The filtered search object. + The filtered search object. """ + if not interval: + return search + should = [] - datetime_search = return_date(interval) + try: + datetime_search = return_date(interval) + except (ValueError, TypeError) as e: + # Handle invalid interval formats if return_date fails + logger.error(f"Invalid interval format: {interval}, error: {e}") + return search - # If the request is a single datetime return - # items with datetimes equal to the requested datetime OR - # the requested datetime is between their start and end datetimes if "eq" in datetime_search: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "term", - properties__datetime=datetime_search["eq"], - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["eq"], - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["eq"], - }, - ), - ], - ), - ] - ) - - # If the request is a date range return - # items with datetimes within the requested date range OR - # their startdatetime ithin the requested date range OR - # their enddatetime ithin the requested date range OR - # the requested daterange within their start and end datetimes + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q("term", **{"properties__datetime": datetime_search["eq"]}), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["eq"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] else: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["gte"] - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["lte"] - }, - ), - ], - ), - ] - ) - - search = search.query(Q("bool", filter=[Q("bool", should=should)])) - - return search + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["lte"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["gte"]}, + ), + ], + ), + ] + + return search.query(Q("bool", should=should, minimum_should_match=1)) @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 88c7fcdc..ba6fb9e8 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -282,121 +282,97 @@ def apply_free_text_filter(search: Search, free_text_queries: Optional[List[str] @staticmethod def apply_datetime_filter( search: Search, interval: Optional[Union[DateTimeType, str]] - ): - """Apply a filter to search based on datetime field, start_datetime, and end_datetime fields. + ) -> Search: + """Apply a filter to search on datetime, start_datetime, and end_datetime fields. Args: - search (Search): The search object to filter. - interval: Optional[Union[DateTimeType, str]] + search: The search object to filter. + interval: Optional datetime interval to filter by. Can be: + - A single datetime string (e.g., "2023-01-01T12:00:00") + - A datetime range string (e.g., "2023-01-01/2023-12-31") + - A datetime object + - A tuple of (start_datetime, end_datetime) Returns: - Search: The filtered search object. + The filtered search object. """ + if not interval: + return search + should = [] - datetime_search = return_date(interval) + try: + datetime_search = return_date(interval) + except (ValueError, TypeError) as e: + # Handle invalid interval formats if return_date fails + logger.error(f"Invalid interval format: {interval}, error: {e}") + return search - # If the request is a single datetime return - # items with datetimes equal to the requested datetime OR - # the requested datetime is between their start and end datetimes if "eq" in datetime_search: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "term", - properties__datetime=datetime_search["eq"], - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["eq"], - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["eq"], - }, - ), - ], - ), - ] - ) - - # If the request is a date range return - # items with datetimes within the requested date range OR - # their startdatetime ithin the requested date range OR - # their enddatetime ithin the requested date range OR - # the requested daterange within their start and end datetimes + # For exact matches, include: + # 1. Items with matching exact datetime + # 2. Items with datetime:null where the time falls within their range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q("term", **{"properties__datetime": datetime_search["eq"]}), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["eq"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["eq"]}, + ), + ], + ), + ] else: - should.extend( - [ - Q( - "bool", - filter=[ - Q( - "range", - properties__datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["gte"], - "lte": datetime_search["lte"], - }, - ), - ], - ), - Q( - "bool", - filter=[ - Q( - "range", - properties__start_datetime={ - "lte": datetime_search["gte"] - }, - ), - Q( - "range", - properties__end_datetime={ - "gte": datetime_search["lte"] - }, - ), - ], - ), - ] - ) - - search = search.query(Q("bool", filter=[Q("bool", should=should)])) - - return search + # For date ranges, include: + # 1. Items with datetime in the range + # 2. Items with datetime:null that overlap the search range + should = [ + Q( + "bool", + filter=[ + Q("exists", field="properties.datetime"), + Q( + "range", + properties__datetime={ + "gte": datetime_search["gte"], + "lte": datetime_search["lte"], + }, + ), + ], + ), + Q( + "bool", + must_not=[Q("exists", field="properties.datetime")], + filter=[ + Q("exists", field="properties.start_datetime"), + Q("exists", field="properties.end_datetime"), + Q( + "range", + properties__start_datetime={"lte": datetime_search["lte"]}, + ), + Q( + "range", + properties__end_datetime={"gte": datetime_search["gte"]}, + ), + ], + ), + ] + + return search.query(Q("bool", should=should, minimum_should_match=1)) @staticmethod def apply_bbox_filter(search: Search, bbox: List): diff --git a/stac_fastapi/tests/resources/test_item.py b/stac_fastapi/tests/resources/test_item.py index 6f344b19..0102bf9b 100644 --- a/stac_fastapi/tests/resources/test_item.py +++ b/stac_fastapi/tests/resources/test_item.py @@ -1,9 +1,11 @@ import json +import logging import os import uuid from copy import deepcopy from datetime import datetime, timedelta from random import randint +from typing import Dict from urllib.parse import parse_qs, urlparse, urlsplit import ciso8601 @@ -15,7 +17,9 @@ from stac_fastapi.core.datetime_utils import datetime_to_str, now_to_rfc3339_str from stac_fastapi.types.core import LandingPageMixin -from ..conftest import create_item, refresh_indices +from ..conftest import create_collection, create_item, refresh_indices + +logger = logging.getLogger(__name__) if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": from stac_fastapi.opensearch.database_logic import DatabaseLogic @@ -398,8 +402,8 @@ async def test_item_search_temporal_intersecting_window_post(app_client, ctx): test_item = ctx.item item_date = rfc3339_str_to_datetime(test_item["properties"]["datetime"]) - item_date_before = item_date - timedelta(days=10) - item_date_after = item_date - timedelta(days=2) + item_date_before = item_date - timedelta(days=2) # Changed from 10 to 2 + item_date_after = item_date + timedelta(days=2) # Changed from -2 to +2 params = { "collections": [test_item["collection"]], @@ -940,36 +944,183 @@ async def test_search_datetime_validation_errors(app_client): assert resp.status_code == 400 -# this test should probably pass but doesn't - stac-pydantic -# https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/issues/247 - -# @pytest.mark.asyncio -# async def test_item_custom_links(app_client, ctx, txn_client): -# item = ctx.item -# item_id = "test-item-custom-links" -# item["id"] = item_id -# item["links"].append( -# { -# "href": "https://maps.example.com/wms", -# "rel": "wms", -# "type": "image/png", -# "title": "RGB composite visualized through a WMS", -# "wms:layers": ["rgb"], -# "wms:transparent": True, -# } -# ) -# await create_item(txn_client, item) - -# resp = await app_client.get("/search", params={"id": item_id}) -# assert resp.status_code == 200 -# resp_json = resp.json() -# links = resp_json["features"][0]["links"] -# for link in links: -# if link["rel"] == "wms": -# assert link["href"] == "https://maps.example.com/wms" -# assert link["type"] == "image/png" -# assert link["title"] == "RGB composite visualized through a WMS" -# assert link["wms:layers"] == ["rgb"] -# assert link["wms:transparent"] -# return True -# assert False, resp_json +@pytest.mark.asyncio +async def test_item_custom_links(app_client, ctx, txn_client): + item = ctx.item + item_id = "test-item-custom-links" + item["id"] = item_id + item["links"].append( + { + "href": "https://maps.example.com/wms", + "rel": "wms", + "type": "image/png", + "title": "RGB composite visualized through a WMS", + "wms:layers": ["rgb"], + "wms:transparent": True, + } + ) + await create_item(txn_client, item) + + resp = await app_client.get("/search", params={"id": item_id}) + assert resp.status_code == 200 + resp_json = resp.json() + links = resp_json["features"][0]["links"] + for link in links: + if link["rel"] == "wms": + assert link["href"] == "https://maps.example.com/wms" + assert link["type"] == "image/png" + assert link["title"] == "RGB composite visualized through a WMS" + assert link["wms:layers"] == ["rgb"] + assert link["wms:transparent"] + return True + assert False, resp_json + + +async def _search_and_get_ids( + app_client, + endpoint: str = "/search", + method: str = "get", + params: Dict = None, + json: Dict = None, +) -> set: + """Helper to send search request and extract feature IDs.""" + if method == "get": + resp = await app_client.get(endpoint, params=params) + else: + resp = await app_client.post(endpoint, json=json) + + assert resp.status_code == 200, f"Search failed: {resp.text}" + data = resp.json() + return {f["id"] for f in data.get("features", [])} + + +@pytest.mark.asyncio +async def test_search_datetime_with_null_datetime( + app_client, txn_client, load_test_data +): + """Test datetime filtering when properties.datetime is null or set, ensuring start_datetime and end_datetime are set when datetime is null.""" + # Setup: Create test collection + test_collection = load_test_data("test_collection.json") + try: + await create_collection(txn_client, collection=test_collection) + except Exception as e: + logger.error(f"Failed to create collection: {e}") + pytest.fail(f"Collection creation failed: {e}") + + base_item = load_test_data("test_item.json") + collection_id = base_item["collection"] + + # Item 1: Null datetime, valid start/end datetimes + null_dt_item = deepcopy(base_item) + null_dt_item["id"] = "null-datetime-item" + null_dt_item["properties"]["datetime"] = None + null_dt_item["properties"]["start_datetime"] = "2020-01-01T00:00:00Z" + null_dt_item["properties"]["end_datetime"] = "2020-01-02T00:00:00Z" + + # Item 2: Valid datetime, no start/end datetimes + valid_dt_item = deepcopy(base_item) + valid_dt_item["id"] = "valid-datetime-item" + valid_dt_item["properties"]["datetime"] = "2020-01-01T11:00:00Z" + valid_dt_item["properties"]["start_datetime"] = None + valid_dt_item["properties"]["end_datetime"] = None + + # Item 3: Valid datetime outside range, valid start/end datetimes + range_item = deepcopy(base_item) + range_item["id"] = "range-item" + range_item["properties"]["datetime"] = "2020-01-03T00:00:00Z" + range_item["properties"]["start_datetime"] = "2020-01-01T00:00:00Z" + range_item["properties"]["end_datetime"] = "2020-01-02T00:00:00Z" + + # Create valid items + items = [null_dt_item, valid_dt_item, range_item] + for item in items: + try: + await create_item(txn_client, item) + except Exception as e: + logger.error(f"Failed to create item {item['id']}: {e}") + pytest.fail(f"Item creation failed: {e}") + + # Refresh indices once + try: + await refresh_indices(txn_client) + except Exception as e: + logger.error(f"Failed to refresh indices: {e}") + pytest.fail(f"Index refresh failed: {e}") + + # Refresh indices once + try: + await refresh_indices(txn_client) + except Exception as e: + logger.error(f"Failed to refresh indices: {e}") + pytest.fail(f"Index refresh failed: {e}") + + # Test 1: Exact datetime matching valid-datetime-item and null-datetime-item + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-01T11:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "valid-datetime-item", # Matches properties__datetime + "null-datetime-item", # Matches start_datetime <= datetime <= end_datetime + }, "Exact datetime search failed" + + # Test 2: Range including valid-datetime-item, null-datetime-item, and range-item + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-01T00:00:00Z/2020-01-03T00:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "valid-datetime-item", # Matches properties__datetime in range + "null-datetime-item", # Matches start_datetime <= lte, end_datetime >= gte + "range-item", # Matches properties__datetime in range + }, "Range search failed" + + # Test 3: POST request for range matching null-datetime-item and valid-datetime-item + feature_ids = await _search_and_get_ids( + app_client, + method="post", + json={ + "datetime": "2020-01-01T00:00:00Z/2020-01-02T00:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "null-datetime-item", # Matches start_datetime <= lte, end_datetime >= gte + "valid-datetime-item", # Matches properties__datetime in range + }, "POST range search failed" + + # Test 4: Exact datetime matching only range-item's datetime + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-03T00:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "range-item", # Matches properties__datetime + }, "Exact datetime for range-item failed" + + # Test 5: Range matching null-datetime-item but not range-item's datetime + feature_ids = await _search_and_get_ids( + app_client, + params={ + "datetime": "2020-01-01T12:00:00Z/2020-01-02T12:00:00Z", + "collections": [collection_id], + }, + ) + assert feature_ids == { + "null-datetime-item", # Overlaps: search range [12:00-01-01 to 12:00-02-01] overlaps item range [00:00-01-01 to 00:00-02-01] + }, "Range search excluding range-item datetime failed" + + # Cleanup + try: + await txn_client.delete_collection(test_collection["id"]) + except Exception as e: + logger.warning(f"Failed to delete collection: {e}") pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy