diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 58a98598a5b..4775eedde32 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -31,6 +31,7 @@ if TYPE_CHECKING: from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence + from xarray.namedarray._typing import _OuterIndexerKey, _VectorizedIndexerKey T_Name = Union[Hashable, None] @@ -268,11 +269,35 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): - __slots__ = () - def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) - return self[key] # type: ignore[index] + return self[key] # type: ignore [index] + + +class NewBackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): + __slots__ = ("indexing_support",) + + def get_duck_array(self, dtype: np.typing.DTypeLike = None): + key = (slice(None),) * self.ndim + return self[key] # type: ignore [index] + + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + raise NotImplementedError( + f"{self.__class__.__name__}._oindex_get method should be overridden" + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + raise NotImplementedError( + f"{self.__class__.__name__}._vindex_get method should be overridden" + ) + + @property + def oindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._oindex_get) + + @property + def vindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._vindex_get) class AbstractDataStore: diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 717ee48db3b..e827ecbae6d 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -44,16 +44,33 @@ from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) class H5NetCDFArrayWrapper(BaseNetCDF4Array): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index a23d247b6c3..8130c264021 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -12,8 +12,8 @@ from xarray import coding from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -48,6 +48,11 @@ from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -56,7 +61,7 @@ NETCDF4_PYTHON_LOCK = combine_locks([NETCDFC_LOCK, HDF5_LOCK]) -class BaseNetCDF4Array(BackendArray): +class BaseNetCDF4Array(NewBackendArray): __slots__ = ("datastore", "dtype", "shape", "variable_name") def __init__(self, variable_name, datastore): @@ -88,7 +93,7 @@ def get_array(self, needs_lock=True): class NetCDF4ArrayWrapper(BaseNetCDF4Array): - __slots__ = () + indexing_support = indexing.IndexingSupport.OUTER def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) @@ -99,9 +104,19 @@ def get_array(self, needs_lock=True): variable.set_auto_chartostring(False) return variable - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def _oindex_get(self, key: _OuterIndexerKey): + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _vindex_get(self, key: _VectorizedIndexerKey): + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def __getitem__(self, key: _BasicIndexerKey): + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 74ddbc8443b..2ec260a3d11 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -8,8 +8,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, robust_getitem, ) from xarray.backends.store import StoreBackendEntrypoint @@ -29,10 +29,17 @@ from xarray.core.dataset import Dataset from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) -class PydapArrayWrapper(BackendArray): - def __init__(self, array): +class PydapArrayWrapper(NewBackendArray): + indexing_support = indexing.IndexingSupport.BASIC + + def __init__(self, array) -> None: self.array = array @property @@ -43,9 +50,19 @@ def shape(self) -> tuple[int, ...]: def dtype(self): return self.array.dtype - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 93d0e40a6e1..1793f619a85 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -10,8 +10,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, ) @@ -37,6 +37,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -54,7 +59,9 @@ def _decode_attrs(d): return {k: v if k == "_FillValue" else _decode_string(v) for (k, v) in d.items()} -class ScipyArrayWrapper(BackendArray): +class ScipyArrayWrapper(NewBackendArray): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name @@ -66,15 +73,7 @@ def get_variable(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] - def _getitem(self, key): - with self.datastore.lock: - data = self.get_variable(needs_lock=False).data - return data[key] - - def __getitem__(self, key): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem - ) + def _finalize_result(self, data): # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. @@ -87,7 +86,30 @@ def __getitem__(self, key): return np.array(data, dtype=self.dtype, copy=copy) - def __setitem__(self, key, value): + def _getitem(self, key): + with self.datastore.lock: + data = self.get_variable(needs_lock=False).data + return data[key] + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + data = indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + return self._finalize_result(data) + + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + data = indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + return self._finalize_result(data) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + data = indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + return self._finalize_result(data) + + def __setitem__(self, key, value) -> None: with self.datastore.lock: data = self.get_variable(needs_lock=False) try: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e83f5556369..e79cb488ac5 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -14,8 +14,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractWritableDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, _encode_variable_name, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -42,6 +42,11 @@ from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer, ZarrArray, ZarrGroup + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) def _get_mappers(*, storage_options, store, chunk_store): @@ -179,8 +184,8 @@ def encode_zarr_attr_value(value): return encoded -class ZarrArrayWrapper(BackendArray): - __slots__ = ("_array", "dtype", "shape") +class ZarrArrayWrapper(NewBackendArray): + indexing_support = indexing.IndexingSupport.VECTORIZED def __init__(self, zarr_array): # some callers attempt to evaluate an array if an `array` property exists on the object. @@ -203,25 +208,28 @@ def __init__(self, zarr_array): def get_array(self): return self._array - def _oindex(self, key): - return self._array.oindex[key] - - def _vindex(self, key): - return self._array.vindex[key] - - def _getitem(self, key): - return self._array[key] - - def __getitem__(self, key): - array = self._array - if isinstance(key, indexing.BasicIndexer): - method = self._getitem - elif isinstance(key, indexing.VectorizedIndexer): - method = self._vindex - elif isinstance(key, indexing.OuterIndexer): - method = self._oindex - return indexing.explicit_indexing_adapter( - key, array.shape, indexing.IndexingSupport.VECTORIZED, method + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + def raw_indexing_method(key): + return self._array.oindex[key] + + return indexing.outer_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + def raw_indexing_method(key): + return self._array.vindex[key] + + return indexing.vectorized_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method + ) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + def raw_indexing_method(key): + return self._array[key] + + return indexing.basic_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) # if self.ndim == 0: diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 4ca6a3f0a46..37b9e548e0f 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -3,6 +3,7 @@ from __future__ import annotations from functools import partial +from typing import TYPE_CHECKING import numpy as np @@ -22,6 +23,13 @@ HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") +if TYPE_CHECKING: + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) + def create_vlen_dtype(element_type): if element_type not in (str, bytes): @@ -220,8 +228,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[(slice(None),)] array(b'abc', dtype='|S3') """ @@ -240,7 +247,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype("S" + str(self.array.shape[-1])) + return np.dtype(f"S{self.array.shape[-1]!s}") @property def shape(self) -> tuple[int, ...]: @@ -249,15 +256,15 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key): + def _vindex_get(self, key: _VectorizedIndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key): + def _oindex_get(self, key: _OuterIndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key): + def __getitem__(self, key: _BasicIndexerKey): # require slicing the last dimension completely - key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) - if key.tuple[-1] != slice(None): + indexer = indexing.expanded_indexer(key, self.array.ndim) + if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return _numpy_char_to_bytes(self.array[indexer]) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 83112628dbb..ef794ec763a 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -99,8 +99,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> NativeEndiannessArray(x).dtype dtype('int16') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> NativeEndiannessArray(x)[indexer].dtype + >>> NativeEndiannessArray(x)[(slice(None),)].dtype dtype('int16') """ @@ -137,8 +136,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> BoolTypeArray(x).dtype dtype('bool') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> BoolTypeArray(x)[indexer].dtype + >>> BoolTypeArray(x)[(slice(None),)].dtype dtype('bool') """ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 51fc4a00421..3fda88956d1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -38,7 +38,17 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _Shape, duckarray + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _Chunks, + _IndexerKey, + _IndexKey, + _IndexKeys, + _OuterIndexerKey, + _Shape, + _VectorizedIndexerKey, + duckarray, + ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -209,7 +219,7 @@ def map_index_queries( return merged -def expanded_indexer(key, ndim): +def expanded_indexer(key: _IndexerKey | _IndexKeys, ndim: int) -> _IndexKeys: """Given a key for indexing an ndarray, return an equivalent key which is a tuple with length equal to the number of dimensions. @@ -220,14 +230,14 @@ def expanded_indexer(key, ndim): if not isinstance(key, tuple): # numpy treats non-tuple keys equivalent to tuples of length 1 key = (key,) - new_key = [] + new_key: list[_IndexKey] = [] # handling Ellipsis right is a little tricky, see: # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing found_ellipsis = False for k in key: if k is Ellipsis: if not found_ellipsis: - new_key.extend((ndim + 1 - len(key)) * [slice(None)]) + new_key.extend([slice(None)] * (ndim + 1 - len(key))) found_ellipsis = True else: new_key.append(slice(None)) @@ -235,7 +245,7 @@ def expanded_indexer(key, ndim): new_key.append(k) if len(new_key) > ndim: raise IndexError("too many indices") - new_key.extend((ndim - len(new_key)) * [slice(None)]) + new_key.extend([slice(None)] * (ndim - len(new_key))) return tuple(new_key) @@ -298,7 +308,7 @@ def slice_slice(old_slice: slice, applied_slice: slice, size: int) -> slice: return slice(start, stop, step) -def _index_indexer_1d(old_indexer, applied_indexer, size: int): +def _index_indexer_1d(old_indexer: Any, applied_indexer: Any, size: int) -> Any: if isinstance(applied_indexer, slice) and applied_indexer == slice(None): # shortcut for the usual case return old_indexer @@ -327,13 +337,13 @@ class ExplicitIndexer: __slots__ = ("_key",) - def __init__(self, key: tuple[Any, ...]): + def __init__(self, key: _IndexerKey): if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @property - def tuple(self) -> tuple[Any, ...]: + def tuple(self) -> _IndexerKey: return self._key def __repr__(self) -> str: @@ -387,11 +397,11 @@ class BasicIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[int | np.integer | slice, ...]): + def __init__(self, key: _BasicIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[int | np.integer | slice, ...] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -401,9 +411,9 @@ def __init__(self, key: tuple[int | np.integer | slice, ...]): raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class OuterIndexer(ExplicitIndexer): @@ -419,14 +429,14 @@ class OuterIndexer(ExplicitIndexer): def __init__( self, - key: tuple[ - int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... - ], + key: _OuterIndexerKey, ): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + ] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -447,9 +457,9 @@ def __init__( raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class VectorizedIndexer(ExplicitIndexer): @@ -464,11 +474,11 @@ class VectorizedIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...]): + def __init__(self, key: _VectorizedIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () ndim = None for k in key: if isinstance(k, slice): @@ -491,9 +501,9 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class ExplicitlyIndexed: @@ -517,26 +527,25 @@ def get_duck_array(self): class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () - def get_duck_array(self): - key = BasicIndexer((slice(None),) * self.ndim) - return self[key] + def get_duck_array(self) -> Any: + return self[(slice(None),) * self.ndim] - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -574,12 +583,12 @@ def __array__( else: return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def __getitem__(self, key: Any): - key = expanded_indexer(key, self.ndim) - indexer = self.indexer_cls(key) + def __getitem__(self, key) -> Any: + _key = expanded_indexer(key, self.ndim) + indexer = self.indexer_cls(_key) result = apply_indexer(self.array, indexer) @@ -591,6 +600,14 @@ def __getitem__(self, key: Any): return result +BackendArray_fallback_warning_message = ( + "The array `{0}` does not support indexing using the .vindex and .oindex properties. " + "The __getitem__ method is being used instead. This fallback behavior will be " + "removed in a future version. Please ensure that the backend array `{0}` implements " + "support for the .vindex and .oindex properties to avoid potential issues." +) + + class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" @@ -625,33 +642,31 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): shape += (k.size,) self._shape = shape - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) - full_key = [] + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) + full_key: tuple[int | np.integer, ...] = () for size, k in zip(self.array.shape, self.key.tuple, strict=True): if isinstance(k, integer_types): - full_key.append(k) + full_key += (k,) else: - full_key.append(_index_indexer_1d(k, next(iter_new_key), size)) - full_key_tuple = tuple(full_key) + full_key += (_index_indexer_1d(k, next(iter_new_key), size),) - if all(isinstance(k, integer_types + (slice,)) for k in full_key_tuple): - return BasicIndexer(full_key_tuple) - return OuterIndexer(full_key_tuple) + if all(isinstance(k, integer_types + (slice,)) for k in full_key): + return BasicIndexer(full_key) + return OuterIndexer(full_key) @property def shape(self) -> _Shape: return self._shape - def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - array = apply_indexer(self.array, self.key) - else: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] - - # self.array[self.key] is now a numpy array when + def get_duck_array(self) -> Any: + array = apply_indexer(self.array, self.key) + # array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -659,34 +674,32 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def transpose(self, order): + def transpose(self, order) -> Any: return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, key: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: OuterIndexer, value: Any) -> None: - full_key = self._updated_key(key) - self.array.oindex[full_key] = value + def _oindex_set(self, key: _OuterIndexerKey, value: Any) -> None: + full_key = self._updated_key(OuterIndexer(key)) + self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: BasicIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(key) - full_key = self._updated_key(key) - self.array[full_key] = value + def __setitem__(self, key: _BasicIndexerKey, value: Any) -> None: + full_key = self._updated_key(BasicIndexer(key)) + self.array[full_key.tuple] = value def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -719,14 +732,10 @@ def __init__(self, array: duckarray[Any, Any], key: ExplicitIndexer): def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape - def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - array = apply_indexer(self.array, self.key) - else: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] - # self.array[self.key] is now a numpy array when + def get_duck_array(self) -> Any: + array = apply_indexer(self.array, self.key) + + # array is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -734,28 +743,29 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def _updated_key(self, new_key: ExplicitIndexer): + def _updated_key(self, new_key: ExplicitIndexer) -> VectorizedIndexer: return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyVectorizedIndexedArray: + return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: VectorizedIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _vindex_get( + self, indexer: _VectorizedIndexerKey + ) -> LazilyVectorizedIndexedArray: + return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: # If the indexed array becomes a scalar, return LazilyIndexedArray - if all(isinstance(ind, integer_types) for ind in indexer.tuple): - key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) + if all(isinstance(ind, integer_types) for ind in indexer): + key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) - return type(self)(self.array, self._updated_key(indexer)) + return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) - def transpose(self, order): + def transpose(self, order) -> LazilyVectorizedIndexedArray: key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -780,42 +790,39 @@ def __init__(self, array: duckarray[Any, Any]): self.array = as_indexable(array) self._copied = False - def _ensure_copied(self): + def _ensure_copied(self) -> None: if not self._copied: self.array = as_indexable(np.array(self.array)) self._copied = True - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self._ensure_copied() - self.array[indexer] = value - def __deepcopy__(self, memo): + def __deepcopy__(self, memo) -> CopyOnWriteArray: # CopyOnWriteArray is used to wrap backend array objects, which might # point to files on disk, so we can't rely on the default deepcopy # implementation. @@ -828,38 +835,41 @@ class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin): def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) - def _ensure_cached(self): + def _ensure_cached(self) -> None: self.array = as_indexable(self.array.get_duck_array()) - def get_duck_array(self): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: + return np.asarray(self.get_duck_array(), dtype=dtype) + + def get_duck_array(self) -> Any: self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value -def as_indexable(array): +def as_indexable(array: Any): """ This function always returns a ExplicitlyIndexed subclass, so that the vectorized indexing is always possible with the returned @@ -904,21 +914,23 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): - new_key.append(np.array(k).reshape((1,) * n_dim)) + new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice if isinstance(k, slice): k = np.arange(*k.indices(size)) assert k.dtype.kind in {"i", "u"} new_shape = [(1,) * i_dim + (k.size,) + (1,) * (n_dim - i_dim - 1)] - new_key.append(k.reshape(*new_shape)) + new_key += (k.reshape(*new_shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) -def _outer_to_numpy_indexer(indexer: BasicIndexer | OuterIndexer, shape: _Shape): +def _outer_to_numpy_indexer( + indexer: BasicIndexer | OuterIndexer, shape: _Shape +) -> tuple[Any, ...]: """Convert an OuterIndexer into an indexer for NumPy. Parameters @@ -982,6 +994,118 @@ class IndexingSupport(enum.Enum): VECTORIZED = 3 +def _finish_indexing( + raw_indexing_method: Callable[..., Any], + *, + raw_key, + numpy_indices, +) -> Any: + result = raw_indexing_method(raw_key.tuple) + if numpy_indices.tuple: + result = apply_indexer(as_indexable(result), numpy_indices) + return result + + +def basic_indexing_adapter( + key: _BasicIndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit basic indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : IndexerKey + Tuple indexer + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + BasicIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def outer_indexing_adapter( + key: _OuterIndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit outer indexing by delegating to a raw indexing method. + + Parameters + ---------- + key : IndexerKey + tuple indexer + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + OuterIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def vectorized_indexing_adapter( + key: _VectorizedIndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit vectorized indexing by delegating to a raw indexing method. + + Parameters + ---------- + key : IndexerKey + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_vectorized_indexer( + VectorizedIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +# TODO: deprecate and delete this method once it is no longer used externally def explicit_indexing_adapter( key: ExplicitIndexer, shape: _Shape, @@ -1009,35 +1133,60 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ + + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(""), + # category=PendingDeprecationWarning, + # ) raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) result = raw_indexing_method(raw_key.tuple) if numpy_indices.tuple: - # index the loaded np.ndarray indexable = NumpyIndexingAdapter(result) result = apply_indexer(indexable, numpy_indices) return result -def apply_indexer(indexable, indexer: ExplicitIndexer): +def apply_indexer( + indexable: ExplicitlyIndexedNDArrayMixin, indexer: ExplicitIndexer +) -> Any: """Apply an indexer to an indexable object.""" + if not hasattr(indexable, "vindex") and not hasattr(indexable, "oindex"): + # This path is used by Lazily*IndexedArray.get_duck_array() + # classname = type(indexable).__name__ + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(classname), + # category=PendingDeprecationWarning, + # ) + return indexable[indexer] + if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer] + return indexable.vindex[indexer.tuple] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer] + return indexable.oindex[indexer.tuple] + elif isinstance(indexer, BasicIndexer): + return indexable[indexer.tuple] else: - return indexable[indexer] + raise TypeError( + f"Received indexer of type {type(indexer)!r}. " + "Expected BasicIndexer, OuterIndexer, or VectorizedIndexer" + ) def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: """Set values in an indexable object using an indexer.""" if isinstance(indexer, VectorizedIndexer): - indexable.vindex[indexer] = value + indexable.vindex[indexer.tuple] = value elif isinstance(indexer, OuterIndexer): - indexable.oindex[indexer] = value + indexable.oindex[indexer.tuple] = value else: - indexable[indexer] = value + indexable[indexer.tuple] = value +# TODO: delete this method once explicit_indexing_adapter is no longer used externally def decompose_indexer( indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: @@ -1109,10 +1258,10 @@ def _decompose_vectorized_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3]))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array).oindex[backend_indexer] + ... array = NumpyIndexingAdapter(array).oindex[backend_indexer.tuple] >>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # vectorized indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).vindex[np_indexer] + ... NumpyIndexingAdapter(array).vindex[np_indexer.tuple] array([ 2, 21, 8]) """ assert isinstance(indexer, VectorizedIndexer) @@ -1120,8 +1269,10 @@ def _decompose_vectorized_indexer( if indexing_support is IndexingSupport.VECTORIZED: return indexer, BasicIndexer(()) - backend_indexer_elems = [] - np_indexer_elems = [] + backend_indexer_elems: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.unsignedinteger]], ... + ] = () + np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () # convert negative indices indexer_elems = [ np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k @@ -1134,17 +1285,17 @@ def _decompose_vectorized_indexer( # (but make its step positive) in the backend, # and then use all of it (slice(None)) for the in-memory portion. bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer_elems.append(bk_slice) - np_indexer_elems.append(np_slice) + backend_indexer_elems += (bk_slice,) + np_indexer_elems += (np_slice,) else: # If it is a (multidimensional) np.ndarray, just pickup the used # keys without duplication and store them as a 1d-np.ndarray. oind, vind = np.unique(k, return_inverse=True) - backend_indexer_elems.append(oind) - np_indexer_elems.append(vind.reshape(*k.shape)) + backend_indexer_elems += (oind,) + np_indexer_elems += (vind.reshape(*k.shape),) - backend_indexer = OuterIndexer(tuple(backend_indexer_elems)) - np_indexer = VectorizedIndexer(tuple(np_indexer_elems)) + backend_indexer = OuterIndexer(backend_indexer_elems) + np_indexer = VectorizedIndexer(np_indexer_elems) if indexing_support is IndexingSupport.OUTER: return backend_indexer, np_indexer @@ -1191,42 +1342,42 @@ def _decompose_outer_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = BasicIndexer((slice(0, 3), slice(2, 4))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array)[backend_indexer] + ... array = NumpyIndexingAdapter(array)[backend_indexer.tuple] >>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # outer indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).oindex[np_indexer] + ... NumpyIndexingAdapter(array).oindex[np_indexer.tuple] array([[ 2, 3, 2], [14, 15, 14], [ 8, 9, 8]]) """ - backend_indexer: list[Any] = [] - np_indexer: list[Any] = [] + backend_indexer: tuple[Any, ...] = () + np_indexer: tuple[Any, ...] = () assert isinstance(indexer, OuterIndexer | BasicIndexer) if indexing_support == IndexingSupport.VECTORIZED: - for k, s in zip(indexer.tuple, shape, strict=False): + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, slice): # If it is a slice, then we will slice it as-is # (but make its step positive) in the backend, bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) else: - backend_indexer.append(k) + backend_indexer += (k,) if not is_scalar(k): - np_indexer.append(slice(None)) - return type(indexer)(tuple(backend_indexer)), BasicIndexer(tuple(np_indexer)) + np_indexer += (slice(None),) + return type(indexer)(backend_indexer), BasicIndexer(np_indexer) # make indexer positive - pos_indexer: list[np.ndarray | int | np.number] = [] - for k, s in zip(indexer.tuple, shape, strict=False): + pos_indexer: tuple[np.ndarray | int | np.number, ...] = () + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, np.ndarray): - pos_indexer.append(np.where(k < 0, k + s, k)) + pos_indexer += (np.where(k < 0, k + s, k),) elif isinstance(k, integer_types) and k < 0: - pos_indexer.append(k + s) + pos_indexer += (k + s,) else: - pos_indexer.append(k) + pos_indexer += (k,) indexer_elems = pos_indexer if indexing_support is IndexingSupport.OUTER_1VECTOR: @@ -1242,63 +1393,63 @@ def _decompose_outer_indexer( ] array_index = np.argmax(np.array(gains)) if len(gains) > 0 else None - for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=False)): + for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=True)): if isinstance(k, np.ndarray) and i != array_index: # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, np.ndarray): # Remove duplicates and sort them in the increasing order pkey, ekey = np.unique(k, return_inverse=True) - backend_indexer.append(pkey) - np_indexer.append(ekey) + backend_indexer += (pkey,) + np_indexer += (ekey,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) if indexing_support == IndexingSupport.OUTER: - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, slice): # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) elif isinstance(k, np.ndarray) and (np.diff(k) >= 0).all(): - backend_indexer.append(k) - np_indexer.append(slice(None)) + backend_indexer += (k,) + np_indexer += (slice(None),) else: # Remove duplicates and sort them in the increasing order oind, vind = np.unique(k, return_inverse=True) - backend_indexer.append(oind) - np_indexer.append(vind.reshape(*k.shape)) + backend_indexer += (oind,) + np_indexer += (vind.reshape(*k.shape),) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) # basic indexer assert indexing_support == IndexingSupport.BASIC - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, np.ndarray): # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (BasicIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return BasicIndexer(backend_indexer), OuterIndexer(np_indexer) def _arrayize_vectorized_indexer( @@ -1312,15 +1463,15 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for v, size in zip(indexer.tuple, shape, strict=True): if isinstance(v, np.ndarray): - new_key.append(np.reshape(v, v.shape + (1,) * len(slices))) + new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) else: # slice shape = (1,) * (n_dim + i_dim) + (-1,) + (1,) * (len(slices) - i_dim - 1) - new_key.append(np.arange(*v.indices(size)).reshape(shape)) + new_key += (np.arange(*v.indices(size)).reshape(shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) def _chunked_array_with_chunks_hint( @@ -1330,36 +1481,42 @@ def _chunked_array_with_chunks_hint( if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") - new_chunks = [] - for chunk, size in zip(chunks, array.shape, strict=False): - new_chunks.append(chunk if size > 1 else (1,)) - return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] + + new_chunks: _Chunks = tuple( + chunk if size > 1 else 1 + for chunk, size in zip(chunks, array.shape, strict=False) + ) + + return chunkmanager.from_array(array, new_chunks) def _logical_any(args): return functools.reduce(operator.or_, args) -def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): +def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None) -> Any: key = (k for k in key if not isinstance(k, slice)) chunks_hint = getattr(data, "chunks", None) - new_keys = [] + new_keys: tuple[Any, ...] = () for k in key: if isinstance(k, np.ndarray): if is_chunked_array(data): # type: ignore[arg-type] chunkmanager = get_chunked_array_type(data) - new_keys.append( - _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager) + # TODO: the chunks_hint is the chunks for the whole array, + # and has nothing to do with the axes indexed by `k` + # This is why we need to use `strict-False` :/ + new_keys += ( + _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager), ) elif isinstance(data, array_type("sparse")): import sparse - new_keys.append(sparse.COO.from_numpy(k)) + new_keys += (sparse.COO.from_numpy(k),) else: - new_keys.append(k) + new_keys += (k,) else: - new_keys.append(k) + new_keys += (k,) mask = _logical_any(k == -1 for k in new_keys) return mask @@ -1367,7 +1524,7 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): def create_mask( indexer: ExplicitIndexer, shape: _Shape, data: duckarray[Any, Any] | None = None -): +) -> duckarray[bool, Any]: """Create a mask for indexing with a fill-value. Parameters @@ -1495,29 +1652,31 @@ def __init__(self, array): ) self.array = array - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_get(self, indexer: _OuterIndexerKey) -> np.ndarray: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: VectorizedIndexer): - _assert_not_chunked_indexer(indexer.tuple) + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> np.ndarray: + _assert_not_chunked_indexer(indexer) array = NumpyVIndexAdapter(self.array) - return array[indexer.tuple] - - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + return array[indexer] + def __getitem__(self, indexer: _BasicIndexerKey) -> np.ndarray: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) return array[key] - def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: + def _safe_setitem(self, array, key: _BasicIndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: @@ -1530,21 +1689,24 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) - self._safe_setitem(array, indexer.tuple, value) + self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) self._safe_setitem(array, key, value) @@ -1573,30 +1735,28 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) - key = indexer.tuple value = self.array - for axis, subkey in reversed(list(enumerate(key))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - self.array[indexer.tuple] = value + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1636,34 +1796,34 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): - key = indexer.tuple + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: try: - return self.array[key] + return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(key))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: try: - return self.array.vindex[indexer.tuple] + return self.array.vindex[indexer] except IndexError as e: # TODO: upstream to dask - has_dask = any(is_duck_dask_array(i) for i in indexer.tuple) + has_dask = any(is_duck_dask_array(i) for i in indexer) # this only works for "small" 1d coordinate arrays with one chunk # it is intended for idxmin, idxmax, and allows indexing with # the nD array output of argmin, argmax if ( not has_dask - or len(indexer.tuple) > 1 + or len(indexer) > 1 or math.prod(self.array.numblocks) > 1 or self.array.ndim > 1 ): raise e - (idxr,) = indexer.tuple + (idxr,) = indexer if idxr.ndim == 0: return self.array[idxr.data] else: @@ -1678,26 +1838,24 @@ def _vindex_get(self, indexer: VectorizedIndexer): dtype=self.array.dtype, ) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer.tuple) + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( "xarray can't set arrays with multiple array indices to dask yet." ) - self.array[indexer.tuple] = value + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: - self.array.vindex[indexer.tuple] = value + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: + self.array.vindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: + self.array[indexer] = value - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) @@ -1746,7 +1904,7 @@ def get_duck_array(self) -> np.ndarray: def shape(self) -> _Shape: return (len(self.array),) - def _convert_scalar(self, item): + def _convert_scalar(self, item) -> Any: if item is pd.NaT: # work around the impossibility of casting NaT with asarray # note: it probably would be better in general to return @@ -1768,13 +1926,14 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: Any | tuple[Any, ...]) -> tuple[Any, ...]: - if isinstance(key, tuple) and len(key) == 1: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - (key,) = key + (_key,) = _key - return key + return _key def _handle_result( self, result: Any @@ -1791,7 +1950,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1799,7 +1958,7 @@ def _oindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1810,7 +1969,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1818,8 +1977,8 @@ def _vindex_get( | np.datetime64 | np.timedelta64 ): - _assert_not_chunked_indexer(indexer.tuple) - key = self._prepare_key(indexer.tuple) + _assert_not_chunked_indexer(indexer) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1830,7 +1989,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: ExplicitIndexer + self, indexer: _BasicIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1838,7 +1997,7 @@ def __getitem__( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1908,7 +2067,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1922,7 +2081,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1935,7 +2094,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: ExplicitIndexer): + def __getitem__(self, indexer: _BasicIndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level @@ -1957,7 +2116,7 @@ def _get_array_subset(self) -> np.ndarray: if self.size > threshold: pos = threshold // 2 indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)]) - subset = self[OuterIndexer((indices,))] + subset = self[(indices,)] else: subset = self diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 95e7d7adfc3..f99360ff078 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,12 @@ def dtype(self) -> _DType_co: ... _IndexKey = Union[int, slice, EllipsisType] _IndexKeys = tuple[_IndexKey, ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] +_IndexerKey = tuple[Any, ...] +_BasicIndexerKey = tuple[int | np.integer | slice, ...] +_OuterIndexerKey = tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.integer]], ... +] +_VectorizedIndexerKey = tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] _AttrsLike = Union[Mapping[Any, Any], None] diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 333434b30ea..ed3c5cdb5c0 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -237,16 +237,6 @@ def __getitem__(self, key): return key -class IndexerMaker: - def __init__(self, indexer_cls): - self._indexer_cls = indexer_cls - - def __getitem__(self, key): - if not isinstance(key, tuple): - key = (key,) - return self._indexer_cls(key) - - def source_ndarray(array): """Given an ndarray, return the base object which holds its memory, or the object itself. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 420e30b8526..629d58e3ffd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -39,6 +39,7 @@ open_mfdataset, save_mfdataset, ) +from xarray.backends.common import BackendArray as LegacyBackendArray from xarray.backends.common import robust_getitem from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint from xarray.backends.netcdf3 import _nc3_dtype_coercions @@ -55,6 +56,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing +from xarray.core.indexing import IndexingSupport from xarray.core.options import set_options from xarray.core.utils import module_available from xarray.namedarray.pycompat import array_type @@ -354,7 +356,147 @@ def test_dtype_coercion_error(self) -> None: ds.to_netcdf(path, format=format) -class DatasetIOBase: +class BackendIndexingTestsMixin: + def roundtrip(self, ds: Dataset, open_kwargs=None) -> Dataset: + raise NotImplementedError + + def test_orthogonal_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual) + # do it twice, to make sure we're switched from orthogonal -> numpy + # when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def test_vectorized_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = { + "dim1": DataArray([0, 2, 0], dims="a"), + "dim2": DataArray([0, 2, 3], dims="a"), + } + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual.load()) + # do it twice, to make sure we're switched from + # vectorized -> numpy when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # two-staged vectorized-indexing + indexers2 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), + }, + {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, + ] + multiple_indexing(indexers2) + + # vectorized-slice mixed + indexers3 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(None, 10), + } + ] + multiple_indexing(indexers3) + + # vectorized-integer mixed + indexers4 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": slice(None, None, 2)}, + ] + multiple_indexing(indexers4) + + # vectorized-integer mixed + indexers5 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": 1, "b": 0}, + ] + multiple_indexing(indexers5) + + def test_vectorized_indexing_negative_step(self) -> None: + # use dask explicitly when present + open_kwargs: dict[str, Any] | None + if has_dask: + open_kwargs = {"chunks": {}} + else: + open_kwargs = None + in_memory = create_test_data() + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -1), + } + ] + multiple_indexing(indexers) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -2), + } + ] + multiple_indexing(indexers) + + def test_outer_indexing_reversed(self) -> None: + # regression test for GH6560 + ds = xr.Dataset( + {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, + ) + + with self.roundtrip(ds) as on_disk: + subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] + assert subset.sizes == subset.load().sizes + + def test_isel_dataarray(self) -> None: + # Make sure isel works lazily. GH:issue:1688 + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + expected = in_memory.isel(dim2=in_memory["dim2"] < 3) + actual = on_disk.isel(dim2=on_disk["dim2"] < 3) + assert_identical(expected, actual) + + +class DatasetIOBase(BackendIndexingTestsMixin): engine: T_NetcdfEngine | None = None file_format: T_NetcdfTypes | None = None @@ -706,141 +848,6 @@ def test_roundtrip_boolean_dtype(self) -> None: assert_identical(original, actual2) assert actual2["x"].dtype == "bool" - def test_orthogonal_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual) - # do it twice, to make sure we're switched from orthogonal -> numpy - # when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def test_vectorized_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = { - "dim1": DataArray([0, 2, 0], dims="a"), - "dim2": DataArray([0, 2, 3], dims="a"), - } - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual.load()) - # do it twice, to make sure we're switched from - # vectorized -> numpy when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # two-staged vectorized-indexing - indexers2 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), - }, - {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, - ] - multiple_indexing(indexers2) - - # vectorized-slice mixed - indexers3 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(None, 10), - } - ] - multiple_indexing(indexers3) - - # vectorized-integer mixed - indexers4 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": slice(None, None, 2)}, - ] - multiple_indexing(indexers4) - - # vectorized-integer mixed - indexers5 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": 1, "b": 0}, - ] - multiple_indexing(indexers5) - - def test_vectorized_indexing_negative_step(self) -> None: - # use dask explicitly when present - open_kwargs: dict[str, Any] | None - if has_dask: - open_kwargs = {"chunks": {}} - else: - open_kwargs = None - in_memory = create_test_data() - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -1), - } - ] - multiple_indexing(indexers) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -2), - } - ] - multiple_indexing(indexers) - - def test_outer_indexing_reversed(self) -> None: - # regression test for GH6560 - ds = xr.Dataset( - {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, - ) - - with self.roundtrip(ds) as on_disk: - subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] - assert subset.sizes == subset.load().sizes - - def test_isel_dataarray(self) -> None: - # Make sure isel works lazily. GH:issue:1688 - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - expected = in_memory.isel(dim2=in_memory["dim2"] < 3) - actual = on_disk.isel(dim2=on_disk["dim2"] < 3) - assert_identical(expected, actual) - def validate_array_type(self, ds): # Make sure that only NumpyIndexingAdapter stores a bare np.ndarray. def find_and_validate_array(obj): @@ -6594,3 +6601,83 @@ def test_h5netcdf_storage_options() -> None: storage_options={"skip_instance_cache": False}, ) assert_identical(xr.concat([ds1, ds2], dim="time"), ds) + + +class LegacyBackendArrayWrapper(LegacyBackendArray): + def __init__(self, array: np.ndarray, indexing_support: IndexingSupport): + self.shape = array.shape + self.dtype = array.dtype + self.array = array + self.indexing_support = indexing_support + + def __getitem__(self, key: indexing.ExplicitIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _getitem(self, key: tuple[Any, ...]) -> np.ndarray: + return self.array[key] + + +def indexing_tests(*, indexing_support: IndexingSupport): + def wrapper(cls): + class NewClass(cls): + cls.indexing_support = indexing_support + + def roundtrip(self, ds: Dataset, *, open_kwargs=None) -> Dataset: + ds = ds.copy(deep=True) + for name in list(ds.data_vars) + list( + set(ds.coords) - set(ds.xindexes) + ): + var = ds._variables[name] + ds._variables[name] = var.copy( + # These tests assume that indexing is lazy (checks ._in_memory), + # so wrapping by LazilyIndexedArray is required. + data=indexing.LazilyIndexedArray( + LegacyBackendArrayWrapper(var.data, self.indexing_support) + ) + ) + return ds + + # def test_vectorized_indexing_negative_step(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing_negative_step() + + # def test_isel_dataarray(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_isel_dataarray() + + # def test_vectorized_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing() + + # def test_orthogonal_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_orthogonal_indexing() + + # def test_outer_indexing_reversed(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_outer_indexing_reversed() + + return NewClass + + return wrapper + + +@indexing_tests(indexing_support=IndexingSupport.BASIC) +class TestBasicIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +@indexing_tests(indexing_support=IndexingSupport.OUTER_1VECTOR) +class TestOuter1VectorIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +# @indexing_tests(indexing_support=IndexingSupport.OUTER) +# class TestOuterIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass + +# @indexing_tests(indexing_support=IndexingSupport.VECTORIZED) +# class TestVectorizedIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 17179a44a8a..af697b5c383 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -7,9 +7,7 @@ from xarray import Variable from xarray.coding import strings -from xarray.core import indexing from xarray.tests import ( - IndexerMaker, assert_array_equal, assert_identical, requires_dask, @@ -150,10 +148,9 @@ def test_StackedBytesArray() -> None: assert len(actual) == len(expected) assert_array_equal(expected, actual) - B = IndexerMaker(indexing.BasicIndexer) - assert_array_equal(expected[:1], actual[B[:1]]) + assert_array_equal(expected[:1], actual[(slice(1),)]) with pytest.raises(IndexError): - actual[B[:, :2]] + actual[slice(None), slice(2)] def test_StackedBytesArray_scalar() -> None: @@ -168,10 +165,8 @@ def test_StackedBytesArray_scalar() -> None: with pytest.raises(TypeError): len(actual) np.testing.assert_array_equal(expected, actual) - - B = IndexerMaker(indexing.BasicIndexer) with pytest.raises(IndexError): - actual[B[:2]] + actual[(slice(2),)] def test_StackedBytesArray_vectorized_indexing() -> None: @@ -179,9 +174,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: stacked = strings.StackedBytesArray(array) expected = np.array([[b"abc", b"def"], [b"def", b"abc"]]) - V = IndexerMaker(indexing.VectorizedIndexer) - indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer] + actual = stacked.vindex[(np.array([[0, 1], [1, 0]]),)] assert_array_equal(actual, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f3867bd67d2..fb3b71d5fa2 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -248,7 +248,7 @@ def get_array(self): return self.array def __getitem__(self, key): - return self.array[key.tuple] + return self.array[(key if isinstance(key, tuple) else key.tuple)] class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore): @@ -5215,7 +5215,8 @@ def test_lazy_load(self) -> None: ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - def test_lazy_load_duck_array(self) -> None: + @pytest.mark.parametrize("decode_cf", [True, False]) + def test_lazy_load_duck_array(self, decode_cf) -> None: store = AccessibleAsDuckArrayDataStore() create_test_data().dump_to_store(store) @@ -5230,13 +5231,11 @@ def test_lazy_load_duck_array(self) -> None: ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) repr(ds) - # preserve the duck array type and don't cast to array - assert isinstance(ds["var1"].load().data, DuckArrayWrapper) - assert isinstance( - ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper - ) + # preserve the duck array type and don't cast to array + assert isinstance(ds["var1"].load().data, DuckArrayWrapper) + assert isinstance(ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper) - ds.close() + ds.close() def test_dropna(self) -> None: x = np.random.randn(4, 4) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index d9784e6a62e..ae620adaac8 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -12,7 +12,6 @@ from xarray.core.indexes import PandasIndex, PandasMultiIndex from xarray.core.types import T_Xarray from xarray.tests import ( - IndexerMaker, ReturnItem, assert_array_equal, assert_identical, @@ -20,8 +19,6 @@ requires_dask, ) -B = IndexerMaker(indexing.BasicIndexer) - class TestIndexCallable: def test_getitem(self): @@ -425,7 +422,7 @@ def test_lazily_indexed_array_vindex_setitem(self) -> None: NotImplementedError, match=r"Lazy item assignment with the vectorized indexer is not yet", ): - lazy.vindex[indexer] = 0 + lazy.vindex[indexer.tuple] = 0 @pytest.mark.parametrize( "indexer_class, key, value", @@ -441,10 +438,10 @@ def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: if indexer_class is indexing.BasicIndexer: indexer = indexer_class(key) - lazy[indexer] = value + lazy[indexer.tuple] = value elif indexer_class is indexing.OuterIndexer: indexer = indexer_class(key) - lazy.oindex[indexer] = value + lazy.oindex[indexer.tuple] = value assert_array_equal(original[key], value) @@ -453,16 +450,16 @@ class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.CopyOnWriteArray) - child[B[:]] = 0 + child[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) @@ -470,7 +467,7 @@ def test_sub_array(self) -> None: def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" class TestMemoryCachedArray: @@ -483,7 +480,7 @@ def test_wrapper(self) -> None: def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) @@ -492,13 +489,13 @@ def test_sub_array(self) -> None: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" def test_base_explicit_indexer() -> None: @@ -607,7 +604,7 @@ def test_arrayize_vectorized_indexer(self) -> None: vindex, self.data.shape ) np.testing.assert_array_equal( - self.data.vindex[vindex], self.data.vindex[vindex_array] + self.data.vindex[vindex.tuple], self.data.vindex[vindex_array.tuple] ) actual = indexing._arrayize_vectorized_indexer( @@ -636,7 +633,8 @@ def test_arrayize_vectorized_indexer(self) -> None: np.testing.assert_array_equal(b, np.arange(5)[:, np.newaxis]) -def get_indexers(shape, mode): +def get_indexers(shape: tuple[int, ...], mode) -> indexing.ExplicitIndexer: + indexer: tuple[Any, ...] if mode == "vectorized": indexed_shape = (3, 4) indexer = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape) @@ -665,7 +663,7 @@ def get_indexers(shape, mode): return indexing.BasicIndexer(tuple(indexer)) elif mode == "basic1": # basic indexer - return indexing.BasicIndexer((3,)) + return indexing.BasicIndexer((2,) * len(shape)) elif mode == "basic2": # basic indexer indexer = [0, 2, 4] @@ -723,35 +721,35 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: # Dispatch to appropriate indexing method if indexer_mode.startswith("vectorized"): - expected = indexing_adapter.vindex[indexer] + expected = indexing_adapter.vindex[indexer.tuple] elif indexer_mode.startswith("outer"): - expected = indexing_adapter.oindex[indexer] + expected = indexing_adapter.oindex[indexer.tuple] else: - expected = indexing_adapter[indexer] # Basic indexing + expected = indexing_adapter[indexer.tuple] # Basic indexing if isinstance(backend_ind, indexing.VectorizedIndexer): - array = indexing_adapter.vindex[backend_ind] + array = indexing_adapter.vindex[backend_ind.tuple] elif isinstance(backend_ind, indexing.OuterIndexer): - array = indexing_adapter.oindex[backend_ind] + array = indexing_adapter.oindex[backend_ind.tuple] else: - array = indexing_adapter[backend_ind] + array = indexing_adapter[backend_ind.tuple] if len(np_ind.tuple) > 0: array_indexing_adapter = indexing.NumpyIndexingAdapter(array) if isinstance(np_ind, indexing.VectorizedIndexer): - array = array_indexing_adapter.vindex[np_ind] + array = array_indexing_adapter.vindex[np_ind.tuple] elif isinstance(np_ind, indexing.OuterIndexer): - array = array_indexing_adapter.oindex[np_ind] + array = array_indexing_adapter.oindex[np_ind.tuple] else: - array = array_indexing_adapter[np_ind] + array = array_indexing_adapter[np_ind.tuple] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) assert isinstance(combined_ind, indexing.VectorizedIndexer) - array = indexing_adapter.vindex[combined_ind] + array = indexing_adapter.vindex[combined_ind.tuple] np.testing.assert_array_equal(expected, array) @@ -824,14 +822,14 @@ def test_create_mask_outer_indexer() -> None: def test_create_mask_vectorized_indexer() -> None: indexer = indexing.VectorizedIndexer((np.array([0, -1, 2]), np.array([0, 1, -1]))) expected = np.array([False, True, True]) - actual = indexing.create_mask(indexer, (5,)) + actual = indexing.create_mask(indexer, (5, 5)) np.testing.assert_array_equal(expected, actual) indexer = indexing.VectorizedIndexer( (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1])) ) expected = np.array([[False, True, True]] * 2).T - actual = indexing.create_mask(indexer, (5, 2)) + actual = indexing.create_mask(indexer, (5, 2, 5)) np.testing.assert_array_equal(expected, actual) @@ -845,13 +843,14 @@ def test_create_mask_basic_indexer() -> None: np.testing.assert_array_equal(False, actual) +@requires_dask def test_create_mask_dask() -> None: - da = pytest.importorskip("dask.array") + import dask.array as da indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2]))) expected = np.array(2 * [[False, True, False]]) actual = indexing.create_mask( - indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1))) + indexer, (5, 5, 5), da.empty((2, 3, 3), chunks=((1, 1), (2, 1), (3,))) ) assert actual.chunks == ((1, 1), (2, 1)) np.testing.assert_array_equal(expected, actual) @@ -861,7 +860,7 @@ def test_create_mask_dask() -> None: ) expected = np.array([[False, True, True]] * 2).T actual = indexing.create_mask( - indexer_vec, (5, 2), da.empty((3, 2), chunks=((3,), (2,))) + indexer_vec, (3, 2), da.empty((3, 2, 3), chunks=((3,), (2,), (3,))) ) assert isinstance(actual, da.Array) np.testing.assert_array_equal(expected, actual) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy