From 30299439cae3f1102f60b75620abeced430fcd89 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 9 Apr 2024 17:43:28 -0700 Subject: [PATCH 01/32] temporary enable CI triggers on feature branch --- .github/workflows/ci-additional.yaml | 2 ++ .github/workflows/ci.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index c0f978fb0d8..bc2eb8d2cac 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -3,6 +3,7 @@ on: push: branches: - "main" + - "backend-indexing" pull_request: branches: - "main" @@ -12,6 +13,7 @@ on: - '/*' # covers files such as `pyproject.toml` - 'properties/**' - 'xarray/**' + - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b9b15d867a7..ca9ef397962 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,6 +3,7 @@ on: push: branches: - "main" + - "backend-indexing" pull_request: branches: - "main" @@ -12,6 +13,7 @@ on: - '/*' # covers files such as `pyproject.toml` - 'properties/**' - 'xarray/**' + - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: From ddd4cdb59a5793b9a15a28b6b0475eed95739916 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 16 Apr 2024 18:53:22 -0700 Subject: [PATCH 02/32] add `.oindex` and `.vindex` to `BackendArray` (#8885) * add .oindex and .vindex to BackendArray * Add support for .oindex and .vindex in H5NetCDFArrayWrapper * Add support for .oindex and .vindex in NetCDF4ArrayWrapper, PydapArrayWrapper, NioArrayWrapper, and ZarrArrayWrapper * add deprecation warning * Fix deprecation warning message formatting * add tests * Update xarray/core/indexing.py Co-authored-by: Deepak Cherian * Update ZarrArrayWrapper class in xarray/backends/zarr.py Co-authored-by: Deepak Cherian --------- Co-authored-by: Deepak Cherian --- xarray/backends/common.py | 18 +++++++++++++ xarray/backends/h5netcdf_.py | 12 ++++++++- xarray/backends/netCDF4_.py | 12 ++++++++- xarray/backends/pydap_.py | 12 ++++++++- xarray/backends/scipy_.py | 33 ++++++++++++++++------- xarray/backends/zarr.py | 49 ++++++++++++++++++++++------------- xarray/core/indexing.py | 36 ++++++++++++++++++++----- xarray/tests/test_backends.py | 46 ++++++++++++++++++++++++++++++++ 8 files changed, 182 insertions(+), 36 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index f318b4dd42f..f8f073f86a1 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -210,6 +210,24 @@ def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) return self[key] # type: ignore [index] + def _oindex_get(self, key: indexing.OuterIndexer): + raise NotImplementedError( + f"{self.__class__.__name__}._oindex_get method should be overridden" + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + raise NotImplementedError( + f"{self.__class__.__name__}._vindex_get method should be overridden" + ) + + @property + def oindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._oindex_get) + + @property + def vindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._vindex_get) + class AbstractDataStore: __slots__ = () diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 71463193939..07973c3cbd9 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -48,7 +48,17 @@ def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem ) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index ae86c4ce384..33d636b59cf 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -97,7 +97,17 @@ def get_array(self, needs_lock=True): variable.set_auto_chartostring(False) return variable - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.OUTER, self._getitem ) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 5a475a7c3be..2ce3a579b2d 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -43,7 +43,17 @@ def shape(self) -> tuple[int, ...]: def dtype(self): return self.array.dtype - def __getitem__(self, key): + def _oindex_get(self, key: indexing.OuterIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + ) + + def __getitem__(self, key: indexing.BasicIndexer): return indexing.explicit_indexing_adapter( key, self.shape, indexing.IndexingSupport.BASIC, self._getitem ) diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index f8c486e512c..cd2217c567f 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -67,15 +67,7 @@ def get_variable(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] - def _getitem(self, key): - with self.datastore.lock: - data = self.get_variable(needs_lock=False).data - return data[key] - - def __getitem__(self, key): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem - ) + def _finalize_result(self, data): # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. @@ -88,6 +80,29 @@ def __getitem__(self, key): return np.array(data, dtype=self.dtype, copy=copy) + def _getitem(self, key): + with self.datastore.lock: + data = self.get_variable(needs_lock=False).data + return data[key] + + def _vindex_get(self, key: indexing.VectorizedIndexer): + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + + def _oindex_get(self, key: indexing.OuterIndexer): + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + + def __getitem__(self, key): + data = indexing.explicit_indexing_adapter( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + ) + return self._finalize_result(data) + def __setitem__(self, key, value): with self.datastore.lock: data = self.get_variable(needs_lock=False) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e4a684e945d..4c2e8be0c16 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -85,25 +85,38 @@ def __init__(self, zarr_array): def get_array(self): return self._array - def _oindex(self, key): - return self._array.oindex[key] - - def _vindex(self, key): - return self._array.vindex[key] - - def _getitem(self, key): - return self._array[key] - - def __getitem__(self, key): - array = self._array - if isinstance(key, indexing.BasicIndexer): - method = self._getitem - elif isinstance(key, indexing.VectorizedIndexer): - method = self._vindex - elif isinstance(key, indexing.OuterIndexer): - method = self._oindex + def _oindex_get(self, key: indexing.OuterIndexer): + def raw_indexing_method(key): + return self._array.oindex[key] + + return indexing.explicit_indexing_adapter( + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, + ) + + def _vindex_get(self, key: indexing.VectorizedIndexer): + + def raw_indexing_method(key): + return self._array.vindex[key] + + return indexing.explicit_indexing_adapter( + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, + ) + + def __getitem__(self, key: indexing.BasicIndexer): + def raw_indexing_method(key): + return self._array[key] + return indexing.explicit_indexing_adapter( - key, array.shape, indexing.IndexingSupport.VECTORIZED, method + key, + self._array.shape, + indexing.IndexingSupport.VECTORIZED, + raw_indexing_method, ) # if self.ndim == 0: diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 0926da6fd80..7d6191883e1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -3,6 +3,7 @@ import enum import functools import operator +import warnings from collections import Counter, defaultdict from collections.abc import Hashable, Iterable, Mapping from contextlib import suppress @@ -588,6 +589,14 @@ def __getitem__(self, key: Any): return result +BackendArray_fallback_warning_message = ( + "The array `{0}` does not support indexing using the .vindex and .oindex properties. " + "The __getitem__ method is being used instead. This fallback behavior will be " + "removed in a future version. Please ensure that the backend array `{1}` implements " + "support for the .vindex and .oindex properties to avoid potential issues." +) + + class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" @@ -639,11 +648,18 @@ def shape(self) -> _Shape: return tuple(shape) def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + try: array = apply_indexer(self.array, self.key) - else: + except NotImplementedError as _: # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + warnings.warn( + BackendArray_fallback_warning_message.format( + self.array.__class__.__name__, self.array.__class__.__name__ + ), + category=DeprecationWarning, + stacklevel=2, + ) array = self.array[self.key] # self.array[self.key] is now a numpy array when @@ -715,12 +731,20 @@ def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + try: array = apply_indexer(self.array, self.key) - else: + except NotImplementedError as _: # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + warnings.warn( + BackendArray_fallback_warning_message.format( + self.array.__class__.__name__, self.array.__class__.__name__ + ), + category=PendingDeprecationWarning, + stacklevel=2, + ) array = self.array[self.key] + # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0126b130e7c..d7471ecbaf9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5787,3 +5787,49 @@ def test_zarr_region_chunk_partial_offset(tmp_path): # This write is unsafe, and should raise an error, but does not. # with pytest.raises(ValueError): # da.isel(x=slice(5, 25)).chunk(x=(10, 10)).to_zarr(store, region="auto") + + +def test_backend_array_deprecation_warning(capsys): + class CustomBackendArray(xr.backends.common.BackendArray): + def __init__(self): + array = self.get_array() + self.shape = array.shape + self.dtype = array.dtype + + def get_array(self): + return np.arange(10) + + def __getitem__(self, key): + return xr.core.indexing.explicit_indexing_adapter( + key, self.shape, xr.core.indexing.IndexingSupport.BASIC, self._getitem + ) + + def _getitem(self, key): + array = self.get_array() + return array[key] + + cba = CustomBackendArray() + indexer = xr.core.indexing.VectorizedIndexer(key=(np.array([0]),)) + + la = xr.core.indexing.LazilyIndexedArray(cba, indexer) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + la.vindex[indexer].get_duck_array() + + captured = capsys.readouterr() + assert len(w) == 1 + assert issubclass(w[-1].category, PendingDeprecationWarning) + assert ( + "The array `CustomBackendArray` does not support indexing using the .vindex and .oindex properties." + in str(w[-1].message) + ) + assert "The __getitem__ method is being used instead." in str(w[-1].message) + assert "This fallback behavior will be removed in a future version." in str( + w[-1].message + ) + assert ( + "Please ensure that the backend array `CustomBackendArray` implements support for the .vindex and .oindex properties to avoid potential issues." + in str(w[-1].message) + ) + assert captured.out == "" From 96ac4b7f2879268fe03e012114a96f3e680e44c6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 3 May 2024 08:27:22 -0700 Subject: [PATCH 03/32] Enable explicit use of key tuples (instead of *Indexer objects) in indexing adapters and explicitly indexed arrays (#8870) * pass key tuple to indexing adapters and explicitly indexed arrays * update indexing in StackedBytesArray * Update indexing in StackedBytesArray * Add _IndexerKey type to _typing.py * Update indexing in StackedBytesArray * use tuple indexing in test_backend_array_deprecation_warning * Add support for CompatIndexedTuple in explicit indexing adapter This commit updates the `explicit_indexing_adapter` function to accept both `ExplicitIndexer` and the new `CompatIndexedTuple`. The `CompatIndexedTuple` is designed to facilitate the transition towards using raw tuples by carrying additional metadata about the indexing type (basic, vectorized, or outer). * remove unused code * type hint fixes * fix docstrings * fix tests * fix docstrings * Apply suggestions from code review Co-authored-by: Deepak Cherian * update docstrings and pass tuples directly * Some test cleanup * update docstring * use `BasicIndexer` instead of `CompatIndexedTuple` * support explicit indexing with tuples * fix mypy errors * remove unused IndexerMaker * Update LazilyIndexedArray._updated_key to support explicit indexing with tuples --------- Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian --- xarray/coding/strings.py | 20 +- xarray/coding/variables.py | 6 +- xarray/core/indexing.py | 280 ++++++++++++++++------------ xarray/namedarray/_typing.py | 1 + xarray/tests/__init__.py | 10 - xarray/tests/test_backends.py | 2 +- xarray/tests/test_coding_strings.py | 15 +- xarray/tests/test_dataset.py | 32 ++-- xarray/tests/test_indexing.py | 45 +++-- 9 files changed, 212 insertions(+), 199 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index db95286f6aa..6df92c256b9 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -17,6 +17,7 @@ from xarray.core import indexing from xarray.core.utils import module_available from xarray.core.variable import Variable +from xarray.namedarray._typing import _IndexerKey from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array @@ -220,8 +221,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[(slice(None),)] array(b'abc', dtype='|S3') """ @@ -240,7 +240,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype("S" + str(self.array.shape[-1])) + return np.dtype(f"S{str(self.array.shape[-1])}") @property def shape(self) -> tuple[int, ...]: @@ -249,15 +249,17 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key): + def _vindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key): + def _oindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key): + def __getitem__(self, key: _IndexerKey): + from xarray.core.indexing import BasicIndexer + # require slicing the last dimension completely - key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) - if key.tuple[-1] != slice(None): + indexer = indexing.expanded_indexer(key, self.array.ndim) + if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return _numpy_char_to_bytes(self.array[BasicIndexer(indexer)]) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..98bbbbaeb2c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -99,8 +99,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> NativeEndiannessArray(x).dtype dtype('int16') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> NativeEndiannessArray(x)[indexer].dtype + >>> NativeEndiannessArray(x)[(slice(None),)].dtype dtype('int16') """ @@ -137,8 +136,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> BoolTypeArray(x).dtype dtype('bool') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> BoolTypeArray(x)[indexer].dtype + >>> BoolTypeArray(x)[(slice(None),)].dtype dtype('bool') """ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 7d6191883e1..2b8cd202e4e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Callable, overload +from typing import TYPE_CHECKING, Any, Callable, Literal, overload import numpy as np import pandas as pd @@ -36,7 +36,7 @@ from xarray.core.indexes import Index from xarray.core.variable import Variable - from xarray.namedarray._typing import _Shape, duckarray + from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -324,13 +324,13 @@ class ExplicitIndexer: __slots__ = ("_key",) - def __init__(self, key: tuple[Any, ...]): + def __init__(self, key: _IndexerKey): if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @property - def tuple(self) -> tuple[Any, ...]: + def tuple(self) -> _IndexerKey: return self._key def __repr__(self) -> str: @@ -516,30 +516,29 @@ class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () def get_duck_array(self): - key = BasicIndexer((slice(None),) * self.ndim) - return self[key] + return self[(slice(None),) * self.ndim] def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: # This is necessary because we apply the indexing key in self.get_duck_array() # Note this is the base class for all lazy indexing classes return np.asarray(self.get_duck_array(), dtype=dtype) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -575,9 +574,9 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: def get_duck_array(self): return self.array.get_duck_array() - def __getitem__(self, key: Any): - key = expanded_indexer(key, self.ndim) - indexer = self.indexer_cls(key) + def __getitem__(self, key: _IndexerKey | slice): + _key = expanded_indexer(key, self.ndim) + indexer = self.indexer_cls(_key) result = apply_indexer(self.array, indexer) @@ -623,8 +622,13 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): self.array = as_indexable(array) self.key = key - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) full_key = [] for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): @@ -673,31 +677,29 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, key: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: OuterIndexer, value: Any) -> None: - full_key = self._updated_key(key) - self.array.oindex[full_key] = value + def _oindex_set(self, key: _IndexerKey, value: Any) -> None: + full_key = self._updated_key(OuterIndexer(key)) + self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: BasicIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(key) - full_key = self._updated_key(key) - self.array[full_key] = value + def __setitem__(self, key: _IndexerKey, value: Any) -> None: + full_key = self._updated_key(BasicIndexer(key)) + self.array[full_key.tuple] = value def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -756,25 +758,25 @@ def get_duck_array(self): def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _oindex_get(self, indexer: _IndexerKey): + return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: VectorizedIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _vindex_get(self, indexer: _IndexerKey): + return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) + + def __getitem__(self, indexer: _IndexerKey): - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) # If the indexed array becomes a scalar, return LazilyIndexedArray - if all(isinstance(ind, integer_types) for ind in indexer.tuple): - key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) + if all(isinstance(ind, integer_types) for ind in indexer): + key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) - return type(self)(self.array, self._updated_key(indexer)) + return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) def transpose(self, order): key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -807,29 +809,27 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array[indexer] = value @@ -857,27 +857,25 @@ def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value @@ -1040,29 +1038,63 @@ def explicit_indexing_adapter( return result +class CompatIndexedTuple(tuple): + """ + A tuple subclass used to transition existing backend implementations towards the use of raw tuples + for indexing by carrying additional metadata about the type of indexing being + performed ('basic', 'vectorized', or 'outer'). This class serves as a bridge, allowing + backend arrays that currently expect this metadata to function correctly while + maintaining the outward behavior of a regular tuple. + + This class is particularly useful during the phase where the backend implementations are + not yet capable of directly accepting raw tuples without additional context about + the indexing type. It ensures that these backends can still correctly interpret and + process indexing operations by providing them with the necessary contextual information. + """ + + def __new__(cls, iterable, indexer_type: Literal["basic", "vectorized", "outer"]): + obj = super().__new__(cls, iterable) + obj.indexer_type = indexer_type # type: ignore[attr-defined] + return obj + + def __repr__(self): + return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" + + def apply_indexer(indexable, indexer: ExplicitIndexer): """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer] + return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer] + return indexable.oindex[CompatIndexedTuple(indexer.tuple, "outer")] else: - return indexable[indexer] + return indexable[CompatIndexedTuple(indexer.tuple, "basic")] def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: """Set values in an indexable object using an indexer.""" if isinstance(indexer, VectorizedIndexer): - indexable.vindex[indexer] = value + indexable.vindex[indexer.tuple] = value elif isinstance(indexer, OuterIndexer): - indexable.oindex[indexer] = value + indexable.oindex[indexer.tuple] = value else: - indexable[indexer] = value + indexable[indexer.tuple] = value def decompose_indexer( - indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport + indexer: ExplicitIndexer | CompatIndexedTuple, + shape: _Shape, + indexing_support: IndexingSupport, ) -> tuple[ExplicitIndexer, ExplicitIndexer]: + if isinstance(indexer, CompatIndexedTuple): + # recreate the indexer object from the tuple and the type of indexing. + # This is necessary to ensure that the backend array can correctly interpret the indexing operation. + if indexer.indexer_type == "vectorized": # type: ignore[attr-defined] + indexer = VectorizedIndexer(indexer) + elif indexer.indexer_type == "outer": # type: ignore[attr-defined] + indexer = OuterIndexer(indexer) + else: + indexer = BasicIndexer(indexer) if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, (BasicIndexer, OuterIndexer)): @@ -1131,10 +1163,10 @@ def _decompose_vectorized_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3]))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array).oindex[backend_indexer] + ... array = NumpyIndexingAdapter(array).oindex[backend_indexer.tuple] >>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # vectorized indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).vindex[np_indexer] + ... NumpyIndexingAdapter(array).vindex[np_indexer.tuple] array([ 2, 21, 8]) """ assert isinstance(indexer, VectorizedIndexer) @@ -1213,10 +1245,10 @@ def _decompose_outer_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = BasicIndexer((slice(0, 3), slice(2, 4))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array)[backend_indexer] + ... array = NumpyIndexingAdapter(array)[backend_indexer.tuple] >>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # outer indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).oindex[np_indexer] + ... NumpyIndexingAdapter(array).oindex[np_indexer.tuple] array([[ 2, 3, 2], [14, 15, 14], [ 8, 9, 8]]) @@ -1520,25 +1552,28 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_get(self, indexer: _IndexerKey): + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): array = NumpyVIndexAdapter(self.array) - return array[indexer.tuple] + return array[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) return array[key] - def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: + def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: @@ -1551,21 +1586,24 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) - self._safe_setitem(array, indexer.tuple, value) + self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _IndexerKey | ExplicitIndexer, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) self._safe_setitem(array, key, value) @@ -1594,30 +1632,28 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _IndexerKey): # manual orthogonal indexing (implemented like DaskIndexingAdapter) - key = indexer.tuple + value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _IndexerKey): raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _IndexerKey): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - self.array[indexer.tuple] = value + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1635,38 +1671,35 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): - key = indexer.tuple + def _oindex_get(self, indexer: _IndexerKey): try: - return self.array[key] + return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: VectorizedIndexer): - return self.array.vindex[indexer.tuple] + def _vindex_get(self, indexer: _IndexerKey): + return self.array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _IndexerKey): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer.tuple) + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( "xarray can't set arrays with multiple " "array indices to dask yet." ) - self.array[indexer.tuple] = value + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: - self.array.vindex[indexer.tuple] = value + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + self.array.vindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): return self.array.transpose(order) @@ -1728,13 +1761,14 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: tuple[Any, ...]) -> tuple[Any, ...]: - if isinstance(key, tuple) and len(key) == 1: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if isinstance(_key, tuple) and len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - (key,) = key + (_key,) = _key - return key + return _key def _handle_result( self, result: Any @@ -1751,7 +1785,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1759,7 +1793,7 @@ def _oindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1770,7 +1804,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1778,7 +1812,7 @@ def _vindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1789,7 +1823,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: ExplicitIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1797,7 +1831,7 @@ def __getitem__( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1862,7 +1896,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1876,7 +1910,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1889,7 +1923,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: ExplicitIndexer): + def __getitem__(self, indexer: _IndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level @@ -1911,7 +1945,7 @@ def _get_array_subset(self) -> np.ndarray: if self.size > threshold: pos = threshold // 2 indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)]) - subset = self[OuterIndexer((indices,))] + subset = self[(indices,)] else: subset = self diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index b715973814f..243c2382472 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,7 @@ def dtype(self) -> _DType_co: ... _IndexKey = Union[int, slice, "ellipsis"] _IndexKeys = tuple[Union[_IndexKey], ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] +_IndexerKey = tuple[Any, ...] _AttrsLike = Union[Mapping[Any, Any], None] diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 23fd590f4dc..64a879369f8 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -226,16 +226,6 @@ def __getitem__(self, key): return key -class IndexerMaker: - def __init__(self, indexer_cls): - self._indexer_cls = indexer_cls - - def __getitem__(self, key): - if not isinstance(key, tuple): - key = (key,) - return self._indexer_cls(key) - - def source_ndarray(array): """Given an ndarray, return the base object which holds its memory, or the object itself. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d7471ecbaf9..eb5e2ef6cf0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5815,7 +5815,7 @@ def _getitem(self, key): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - la.vindex[indexer].get_duck_array() + la.vindex[indexer.tuple].get_duck_array() captured = capsys.readouterr() assert len(w) == 1 diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 51f63ea72dd..0feac5b15eb 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -7,9 +7,7 @@ from xarray import Variable from xarray.coding import strings -from xarray.core import indexing from xarray.tests import ( - IndexerMaker, assert_array_equal, assert_identical, requires_dask, @@ -150,10 +148,9 @@ def test_StackedBytesArray() -> None: assert len(actual) == len(expected) assert_array_equal(expected, actual) - B = IndexerMaker(indexing.BasicIndexer) - assert_array_equal(expected[:1], actual[B[:1]]) + assert_array_equal(expected[:1], actual[(slice(1),)]) with pytest.raises(IndexError): - actual[B[:, :2]] + actual[slice(None), slice(2)] def test_StackedBytesArray_scalar() -> None: @@ -168,10 +165,8 @@ def test_StackedBytesArray_scalar() -> None: with pytest.raises(TypeError): len(actual) np.testing.assert_array_equal(expected, actual) - - B = IndexerMaker(indexing.BasicIndexer) with pytest.raises(IndexError): - actual[B[:2]] + actual[(slice(2),)] def test_StackedBytesArray_vectorized_indexing() -> None: @@ -179,9 +174,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: stacked = strings.StackedBytesArray(array) expected = np.array([[b"abc", b"def"], [b"def", b"abc"]]) - V = IndexerMaker(indexing.VectorizedIndexer) - indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer] + actual = stacked.vindex[(np.array([[0, 1], [1, 0]]),)] assert_array_equal(actual, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 584776197e3..ecca8c0c79e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -244,7 +244,7 @@ def get_array(self): return self.array def __getitem__(self, key): - return self.array[key.tuple] + return self.array[(key if isinstance(key, tuple) else key.tuple)] class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore): @@ -5096,28 +5096,26 @@ def test_lazy_load(self) -> None: ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - def test_lazy_load_duck_array(self) -> None: + @pytest.mark.parametrize("decode_cf", [True, False]) + def test_lazy_load_duck_array(self, decode_cf) -> None: store = AccessibleAsDuckArrayDataStore() create_test_data().dump_to_store(store) - for decode_cf in [True, False]: - ds = open_dataset(store, decode_cf=decode_cf) - with pytest.raises(UnexpectedDataAccess): - ds["var1"].values + ds = open_dataset(store, decode_cf=decode_cf) + with pytest.raises(UnexpectedDataAccess): + ds["var1"].values - # these should not raise UnexpectedDataAccess: - ds.var1.data - ds.isel(time=10) - ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - repr(ds) + # these should not raise UnexpectedDataAccess: + ds.var1.data + ds.isel(time=10) + ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) + repr(ds) - # preserve the duck array type and don't cast to array - assert isinstance(ds["var1"].load().data, DuckArrayWrapper) - assert isinstance( - ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper - ) + # preserve the duck array type and don't cast to array + assert isinstance(ds["var1"].load().data, DuckArrayWrapper) + assert isinstance(ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper) - ds.close() + ds.close() def test_dropna(self) -> None: x = np.random.randn(4, 4) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index f019d3c789c..b5da4a75439 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -12,7 +12,6 @@ from xarray.core.indexes import PandasIndex, PandasMultiIndex from xarray.core.types import T_Xarray from xarray.tests import ( - IndexerMaker, ReturnItem, assert_array_equal, assert_identical, @@ -20,8 +19,6 @@ requires_dask, ) -B = IndexerMaker(indexing.BasicIndexer) - class TestIndexCallable: def test_getitem(self): @@ -433,7 +430,7 @@ def test_lazily_indexed_array_vindex_setitem(self) -> None: NotImplementedError, match=r"Lazy item assignment with the vectorized indexer is not yet", ): - lazy.vindex[indexer] = 0 + lazy.vindex[indexer.tuple] = 0 @pytest.mark.parametrize( "indexer_class, key, value", @@ -449,10 +446,10 @@ def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: if indexer_class is indexing.BasicIndexer: indexer = indexer_class(key) - lazy[indexer] = value + lazy[indexer.tuple] = value elif indexer_class is indexing.OuterIndexer: indexer = indexer_class(key) - lazy.oindex[indexer] = value + lazy.oindex[indexer.tuple] = value assert_array_equal(original[key], value) @@ -461,16 +458,16 @@ class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.CopyOnWriteArray) - child[B[:]] = 0 + child[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) @@ -478,7 +475,7 @@ def test_sub_array(self) -> None: def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" class TestMemoryCachedArray: @@ -491,7 +488,7 @@ def test_wrapper(self) -> None: def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) @@ -500,13 +497,13 @@ def test_sub_array(self) -> None: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" def test_base_explicit_indexer() -> None: @@ -615,7 +612,7 @@ def test_arrayize_vectorized_indexer(self) -> None: vindex, self.data.shape ) np.testing.assert_array_equal( - self.data.vindex[vindex], self.data.vindex[vindex_array] + self.data.vindex[vindex.tuple], self.data.vindex[vindex_array.tuple] ) actual = indexing._arrayize_vectorized_indexer( @@ -731,35 +728,35 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: # Dispatch to appropriate indexing method if indexer_mode.startswith("vectorized"): - expected = indexing_adapter.vindex[indexer] + expected = indexing_adapter.vindex[indexer.tuple] elif indexer_mode.startswith("outer"): - expected = indexing_adapter.oindex[indexer] + expected = indexing_adapter.oindex[indexer.tuple] else: - expected = indexing_adapter[indexer] # Basic indexing + expected = indexing_adapter[indexer.tuple] # Basic indexing if isinstance(backend_ind, indexing.VectorizedIndexer): - array = indexing_adapter.vindex[backend_ind] + array = indexing_adapter.vindex[backend_ind.tuple] elif isinstance(backend_ind, indexing.OuterIndexer): - array = indexing_adapter.oindex[backend_ind] + array = indexing_adapter.oindex[backend_ind.tuple] else: - array = indexing_adapter[backend_ind] + array = indexing_adapter[backend_ind.tuple] if len(np_ind.tuple) > 0: array_indexing_adapter = indexing.NumpyIndexingAdapter(array) if isinstance(np_ind, indexing.VectorizedIndexer): - array = array_indexing_adapter.vindex[np_ind] + array = array_indexing_adapter.vindex[np_ind.tuple] elif isinstance(np_ind, indexing.OuterIndexer): - array = array_indexing_adapter.oindex[np_ind] + array = array_indexing_adapter.oindex[np_ind.tuple] else: - array = array_indexing_adapter[np_ind] + array = array_indexing_adapter[np_ind.tuple] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) assert isinstance(combined_ind, indexing.VectorizedIndexer) - array = indexing_adapter.vindex[combined_ind] + array = indexing_adapter.vindex[combined_ind.tuple] np.testing.assert_array_equal(expected, array) From 18c5c70c7c08414695f1f3abda86264f15fb88a5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 6 May 2024 13:21:14 -0600 Subject: [PATCH 04/32] Trigger CI only if code files are modified. (#9006) * Trigger CI only if code files are modified. Fixes #8705 * Apply suggestions from code review Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --------- Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- .github/workflows/ci-additional.yaml | 6 ++++++ .github/workflows/ci.yaml | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index bc2eb8d2cac..49a9272e4f0 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -14,6 +14,12 @@ on: - 'properties/**' - 'xarray/**' - "backend-indexing" + paths: + - 'ci/**' + - '.github/**' + - '/*' # covers files such as `pyproject.toml` + - 'properties/**' + - 'xarray/**' workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ca9ef397962..a4b165db06c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,6 +14,12 @@ on: - 'properties/**' - 'xarray/**' - "backend-indexing" + paths: + - 'ci/**' + - '.github/**' + - '/*' # covers files such as `pyproject.toml` + - 'properties/**' + - 'xarray/**' workflow_dispatch: # allows you to trigger manually concurrency: From 795daf2db5ad0b0a2ebbad4cc694e15417a124db Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sat, 11 May 2024 18:24:10 -0700 Subject: [PATCH 05/32] fix bad merge --- .github/workflows/ci-additional.yaml | 7 +------ .github/workflows/ci.yaml | 6 ------ 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 49a9272e4f0..d9ab8a9fc3e 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -7,12 +7,6 @@ on: pull_request: branches: - "main" - paths: - - 'ci/**' - - '.github/**' - - '/*' # covers files such as `pyproject.toml` - - 'properties/**' - - 'xarray/**' - "backend-indexing" paths: - 'ci/**' @@ -20,6 +14,7 @@ on: - '/*' # covers files such as `pyproject.toml` - 'properties/**' - 'xarray/**' + workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a4b165db06c..4263c313cbc 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -7,12 +7,6 @@ on: pull_request: branches: - "main" - paths: - - 'ci/**' - - '.github/**' - - '/*' # covers files such as `pyproject.toml` - - 'properties/**' - - 'xarray/**' - "backend-indexing" paths: - 'ci/**' From f2c4659c179aaca2819dd13d86c005e8914d9cf3 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Sat, 11 May 2024 21:34:26 -0400 Subject: [PATCH 06/32] Micro optimization -- use tuples throughout backend indexing (#9009) Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian Co-authored-by: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Co-authored-by: Anderson Banihirwe --- xarray/core/indexing.py | 185 +++++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 89 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 2b8cd202e4e..18f2489a505 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -36,7 +36,7 @@ from xarray.core.indexes import Index from xarray.core.variable import Variable - from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -218,22 +218,22 @@ def expanded_indexer(key, ndim): if not isinstance(key, tuple): # numpy treats non-tuple keys equivalent to tuples of length 1 key = (key,) - new_key = [] + new_key = () # handling Ellipsis right is a little tricky, see: # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing found_ellipsis = False for k in key: if k is Ellipsis: if not found_ellipsis: - new_key.extend((ndim + 1 - len(key)) * [slice(None)]) + new_key += (slice(None),) * (ndim + 1 - len(key)) found_ellipsis = True else: - new_key.append(slice(None)) + new_key += (slice(None),) else: - new_key.append(k) + new_key += (k,) if len(new_key) > ndim: raise IndexError("too many indices") - new_key.extend((ndim - len(new_key)) * [slice(None)]) + new_key += (slice(None),) * (ndim - len(new_key)) return tuple(new_key) @@ -388,7 +388,7 @@ def __init__(self, key: tuple[int | np.integer | slice, ...]): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[int | np.integer | slice, ...] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -398,9 +398,9 @@ def __init__(self, key: tuple[int | np.integer | slice, ...]): raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class OuterIndexer(ExplicitIndexer): @@ -423,7 +423,9 @@ def __init__( if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + ] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -444,9 +446,9 @@ def __init__( raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class VectorizedIndexer(ExplicitIndexer): @@ -465,7 +467,7 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () ndim = None for k in key: if isinstance(k, slice): @@ -494,9 +496,9 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class ExplicitlyIndexed: @@ -599,7 +601,7 @@ def __getitem__(self, key: _IndexerKey | slice): class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" - __slots__ = ("array", "key") + __slots__ = ("array", "key", "_shape") def __init__(self, array: Any, key: ExplicitIndexer | None = None): """ @@ -622,6 +624,14 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): self.array = as_indexable(array) self.key = key + shape: _Shape = () + for size, k in zip(self.array.shape, self.key.tuple): + if isinstance(k, slice): + shape += (len(range(*k.indices(size))),) + elif isinstance(k, np.ndarray): + shape += (k.size,) + self._shape = shape + def _updated_key( self, new_key: ExplicitIndexer | _IndexerKey ) -> BasicIndexer | OuterIndexer: @@ -629,27 +639,20 @@ def _updated_key( new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key ) iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) - full_key = [] + full_key: tuple[int | np.integer, ...] = () for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): - full_key.append(k) + full_key += (k,) else: - full_key.append(_index_indexer_1d(k, next(iter_new_key), size)) - full_key_tuple = tuple(full_key) + full_key += (_index_indexer_1d(k, next(iter_new_key), size),) - if all(isinstance(k, integer_types + (slice,)) for k in full_key_tuple): - return BasicIndexer(full_key_tuple) - return OuterIndexer(full_key_tuple) + if all(isinstance(k, integer_types + (slice,)) for k in full_key): + return BasicIndexer(full_key) + return OuterIndexer(full_key) @property def shape(self) -> _Shape: - shape = [] - for size, k in zip(self.array.shape, self.key.tuple): - if isinstance(k, slice): - shape.append(len(range(*k.indices(size)))) - elif isinstance(k, np.ndarray): - shape.append(k.size) - return tuple(shape) + return self._shape def get_duck_array(self): try: @@ -924,18 +927,18 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for k, size in zip(key, shape): if isinstance(k, integer_types): - new_key.append(np.array(k).reshape((1,) * n_dim)) + new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice if isinstance(k, slice): k = np.arange(*k.indices(size)) assert k.dtype.kind in {"i", "u"} new_shape = [(1,) * i_dim + (k.size,) + (1,) * (n_dim - i_dim - 1)] - new_key.append(k.reshape(*new_shape)) + new_key += (k.reshape(*new_shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) def _outer_to_numpy_indexer(indexer: BasicIndexer | OuterIndexer, shape: _Shape): @@ -1174,8 +1177,10 @@ def _decompose_vectorized_indexer( if indexing_support is IndexingSupport.VECTORIZED: return indexer, BasicIndexer(()) - backend_indexer_elems = [] - np_indexer_elems = [] + backend_indexer_elems: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + ] = () + np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () # convert negative indices indexer_elems = [ np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k @@ -1188,17 +1193,17 @@ def _decompose_vectorized_indexer( # (but make its step positive) in the backend, # and then use all of it (slice(None)) for the in-memory portion. bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer_elems.append(bk_slice) - np_indexer_elems.append(np_slice) + backend_indexer_elems += (bk_slice,) + np_indexer_elems += (np_slice,) else: # If it is a (multidimensional) np.ndarray, just pickup the used # keys without duplication and store them as a 1d-np.ndarray. oind, vind = np.unique(k, return_inverse=True) - backend_indexer_elems.append(oind) - np_indexer_elems.append(vind.reshape(*k.shape)) + backend_indexer_elems += (oind,) + np_indexer_elems += (vind.reshape(*k.shape),) - backend_indexer = OuterIndexer(tuple(backend_indexer_elems)) - np_indexer = VectorizedIndexer(tuple(np_indexer_elems)) + backend_indexer = OuterIndexer(backend_indexer_elems) + np_indexer = VectorizedIndexer(np_indexer_elems) if indexing_support is IndexingSupport.OUTER: return backend_indexer, np_indexer @@ -1253,8 +1258,8 @@ def _decompose_outer_indexer( [14, 15, 14], [ 8, 9, 8]]) """ - backend_indexer: list[Any] = [] - np_indexer: list[Any] = [] + backend_indexer: tuple[Any, ...] = () + np_indexer: tuple[Any, ...] = () assert isinstance(indexer, (OuterIndexer, BasicIndexer)) @@ -1264,23 +1269,23 @@ def _decompose_outer_indexer( # If it is a slice, then we will slice it as-is # (but make its step positive) in the backend, bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) else: - backend_indexer.append(k) + backend_indexer += (k,) if not is_scalar(k): - np_indexer.append(slice(None)) - return type(indexer)(tuple(backend_indexer)), BasicIndexer(tuple(np_indexer)) + np_indexer += (slice(None),) + return type(indexer)(backend_indexer), BasicIndexer(np_indexer) # make indexer positive - pos_indexer: list[np.ndarray | int | np.number] = [] + pos_indexer: tuple[np.ndarray | int | np.number, ...] = () for k, s in zip(indexer.tuple, shape): if isinstance(k, np.ndarray): - pos_indexer.append(np.where(k < 0, k + s, k)) + pos_indexer += (np.where(k < 0, k + s, k),) elif isinstance(k, integer_types) and k < 0: - pos_indexer.append(k + s) + pos_indexer += (k + s,) else: - pos_indexer.append(k) + pos_indexer += (k,) indexer_elems = pos_indexer if indexing_support is IndexingSupport.OUTER_1VECTOR: @@ -1300,41 +1305,41 @@ def _decompose_outer_indexer( if isinstance(k, np.ndarray) and i != array_index: # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, np.ndarray): # Remove duplicates and sort them in the increasing order pkey, ekey = np.unique(k, return_inverse=True) - backend_indexer.append(pkey) - np_indexer.append(ekey) + backend_indexer += (pkey,) + np_indexer += (ekey,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) if indexing_support == IndexingSupport.OUTER: for k, s in zip(indexer_elems, shape): if isinstance(k, slice): # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) elif isinstance(k, np.ndarray) and (np.diff(k) >= 0).all(): - backend_indexer.append(k) - np_indexer.append(slice(None)) + backend_indexer += (k,) + np_indexer += (slice(None),) else: # Remove duplicates and sort them in the increasing order oind, vind = np.unique(k, return_inverse=True) - backend_indexer.append(oind) - np_indexer.append(vind.reshape(*k.shape)) + backend_indexer += (oind,) + np_indexer += (vind.reshape(*k.shape),) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) # basic indexer assert indexing_support == IndexingSupport.BASIC @@ -1343,16 +1348,16 @@ def _decompose_outer_indexer( if isinstance(k, np.ndarray): # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (BasicIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return BasicIndexer(backend_indexer), OuterIndexer(np_indexer) def _arrayize_vectorized_indexer( @@ -1366,15 +1371,15 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for v, size in zip(indexer.tuple, shape): if isinstance(v, np.ndarray): - new_key.append(np.reshape(v, v.shape + (1,) * len(slices))) + new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) else: # slice shape = (1,) * (n_dim + i_dim) + (-1,) + (1,) * (len(slices) - i_dim - 1) - new_key.append(np.arange(*v.indices(size)).reshape(shape)) + new_key += (np.arange(*v.indices(size)).reshape(shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) def _chunked_array_with_chunks_hint( @@ -1384,10 +1389,12 @@ def _chunked_array_with_chunks_hint( if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") - new_chunks = [] - for chunk, size in zip(chunks, array.shape): - new_chunks.append(chunk if size > 1 else (1,)) - return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] + + new_chunks: _Chunks = tuple( + chunk if size > 1 else 1 for chunk, size in zip(chunks, array.shape) + ) + + return chunkmanager.from_array(array, new_chunks) def _logical_any(args): @@ -1398,22 +1405,22 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): key = (k for k in key if not isinstance(k, slice)) chunks_hint = getattr(data, "chunks", None) - new_keys = [] + new_keys: tuple[Any, ...] = () for k in key: if isinstance(k, np.ndarray): if is_chunked_array(data): # type: ignore[arg-type] chunkmanager = get_chunked_array_type(data) - new_keys.append( - _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager) + new_keys += ( + _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager), ) elif isinstance(data, array_type("sparse")): import sparse - new_keys.append(sparse.COO.from_numpy(k)) + new_keys += (sparse.COO.from_numpy(k),) else: - new_keys.append(k) + new_keys += (k,) else: - new_keys.append(k) + new_keys += (k,) mask = _logical_any(k == -1 for k in new_keys) return mask From 8b591e0073efa86ef8f6b5b9deb7b2886a681c20 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 30 Oct 2024 19:41:21 +0000 Subject: [PATCH 07/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/zarr.py | 1 - xarray/core/indexing.py | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aeedb1d0369..60fa51456d1 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -158,7 +158,6 @@ def raw_indexing_method(key): ) def _vindex_get(self, key: indexing.VectorizedIndexer): - def raw_indexing_method(key): return self._array.vindex[key] diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 1a968debfd7..b9ed9c2a45f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Callable, Literal, overload +from typing import TYPE_CHECKING, Any, Literal, overload import numpy as np import pandas as pd @@ -38,7 +38,7 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -771,7 +771,6 @@ def _vindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) def __getitem__(self, indexer: _IndexerKey): - # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer): key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) @@ -1566,7 +1565,6 @@ def _vindex_get(self, indexer: _IndexerKey): return array[indexer] def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): - array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see From 8cc0d2916bb413673199046b4847464cf71fd389 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 30 Oct 2024 15:03:07 -0700 Subject: [PATCH 08/32] enhance type annotations and improve clarity --- xarray/core/indexing.py | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b9ed9c2a45f..74e9a9689ab 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -38,7 +38,7 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -640,9 +640,14 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): shape += (k.size,) self._shape = shape - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) - full_key = [] + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) + full_key: tuple[int | np.integer, ...] = () for size, k in zip(self.array.shape, self.key.tuple, strict=True): if isinstance(k, integer_types): full_key += (k,) @@ -855,6 +860,9 @@ def __init__(self, array): def _ensure_cached(self): self.array = as_indexable(self.array.get_duck_array()) + def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: + return np.asarray(self.get_duck_array(), dtype=dtype) + def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() @@ -895,10 +903,10 @@ def as_indexable(array): return PandasIndexingAdapter(array) if is_duck_dask_array(array): return DaskIndexingAdapter(array) - if hasattr(array, "__array_namespace__"): - return ArrayApiIndexingAdapter(array) if hasattr(array, "__array_function__"): return NdArrayLikeIndexingAdapter(array) + if hasattr(array, "__array_namespace__"): + return ArrayApiIndexingAdapter(array) raise TypeError(f"Invalid array type: {type(array)}") @@ -926,7 +934,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) @@ -1277,7 +1285,7 @@ def _decompose_outer_indexer( return type(indexer)(backend_indexer), BasicIndexer(np_indexer) # make indexer positive - pos_indexer: list[np.ndarray | int | np.number] = [] + pos_indexer: tuple[np.ndarray | int | np.number, ...] = () for k, s in zip(indexer.tuple, shape, strict=False): if isinstance(k, np.ndarray): pos_indexer += (np.where(k < 0, k + s, k),) @@ -1370,7 +1378,7 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () for v, size in zip(indexer.tuple, shape, strict=True): if isinstance(v, np.ndarray): new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) @@ -1388,9 +1396,12 @@ def _chunked_array_with_chunks_hint( if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") - new_chunks = [] - for chunk, size in zip(chunks, array.shape, strict=False): - new_chunks.append(chunk if size > 1 else (1,)) + + new_chunks: _Chunks = tuple( + chunk if size > 1 else 1 + for chunk, size in zip(chunks, array.shape, strict=False) + ) + return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] @@ -1773,8 +1784,9 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: Any | tuple[Any, ...]) -> tuple[Any, ...]: - if isinstance(key, tuple) and len(key) == 1: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if isinstance(_key, tuple) and len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) (_key,) = _key From 58846055a3d735688ebd2b3aae823590d461955e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 30 Oct 2024 16:55:54 -0700 Subject: [PATCH 09/32] Fix indexing logic to correctly handle array with __array_function__ attribute --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 74e9a9689ab..e59bea42daf 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -903,10 +903,10 @@ def as_indexable(array): return PandasIndexingAdapter(array) if is_duck_dask_array(array): return DaskIndexingAdapter(array) - if hasattr(array, "__array_function__"): - return NdArrayLikeIndexingAdapter(array) if hasattr(array, "__array_namespace__"): return ArrayApiIndexingAdapter(array) + if hasattr(array, "__array_function__"): + return NdArrayLikeIndexingAdapter(array) raise TypeError(f"Invalid array type: {type(array)}") From 50791e018d29890a9c23292060f155db6f9b83af Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 18:49:46 -0700 Subject: [PATCH 10/32] update indexing methods to use OuterIndexer type --- xarray/coding/strings.py | 2 +- xarray/core/indexing.py | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 6d8dc03d8de..140f31a17e2 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -252,7 +252,7 @@ def __repr__(self): def _vindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key: _IndexerKey): + def _oindex_get(self, key: indexing.OuterIndexer): return _numpy_char_to_bytes(self.array.oindex[key]) def __getitem__(self, key: _IndexerKey): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e59bea42daf..86e747aedc4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -577,7 +577,7 @@ def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: if Version(np.__version__) >= Version("2.0.0"): - return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) # type: ignore[call-overload] else: return np.asarray(self.get_duck_array(), dtype=dtype) @@ -688,7 +688,7 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(self.array, self._updated_key(indexer)) def _vindex_get(self, indexer: _IndexerKey): @@ -769,7 +769,7 @@ def get_duck_array(self): def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) def _vindex_get(self, indexer: _IndexerKey): @@ -819,7 +819,7 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) def _vindex_get(self, indexer: _IndexerKey): @@ -860,14 +860,16 @@ def __init__(self, array): def _ensure_cached(self): self.array = as_indexable(self.array.get_duck_array()) - def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: - return np.asarray(self.get_duck_array(), dtype=dtype) + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) def _vindex_get(self, indexer: _IndexerKey): @@ -1402,7 +1404,7 @@ def _chunked_array_with_chunks_hint( for chunk, size in zip(chunks, array.shape, strict=False) ) - return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] + return chunkmanager.from_array(array, new_chunks) def _logical_any(args): @@ -1567,7 +1569,7 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] @@ -1646,7 +1648,7 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array @@ -1685,7 +1687,7 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: _IndexerKey): + def _oindex_get(self, indexer: OuterIndexer): try: return self.array[indexer] except NotImplementedError: @@ -1786,7 +1788,7 @@ def _convert_scalar(self, item): def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: _key = key.tuple if isinstance(key, ExplicitIndexer) else key - if isinstance(_key, tuple) and len(_key) == 1: + if len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) (_key,) = _key @@ -1808,7 +1810,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: _IndexerKey + self, indexer: OuterIndexer ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1924,7 +1926,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: _IndexerKey + self, indexer: OuterIndexer ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter From 014e7cf9d6871848cc1345cb2e7fa5814426874f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 18:56:56 -0700 Subject: [PATCH 11/32] remove unnecessary copy argument from __array__ method in MemoryCachedArray --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 86e747aedc4..1447f00df05 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -863,7 +863,7 @@ def _ensure_cached(self): def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: - return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) + return np.asarray(self.get_duck_array(), dtype=dtype) def get_duck_array(self): self._ensure_cached() From 5e22be6e5baf77f0c77fc7203a046ef4ac3b1616 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:05:00 -0700 Subject: [PATCH 12/32] another attempt at fixing types --- xarray/core/indexing.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 1447f00df05..8fe172f6d25 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -577,7 +577,7 @@ def __array__( self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None ) -> np.ndarray: if Version(np.__version__) >= Version("2.0.0"): - return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) # type: ignore[call-overload] + return np.asarray(self.get_duck_array(), dtype=dtype, copy=copy) else: return np.asarray(self.get_duck_array(), dtype=dtype) @@ -1652,7 +1652,8 @@ def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array - for axis, subkey in reversed(list(enumerate(indexer))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value @@ -1693,7 +1694,8 @@ def _oindex_get(self, indexer: OuterIndexer): except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(indexer))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore value = value[(slice(None),) * axis + (subkey,)] return value From 7056aba3d8c3ea1a8c05c9554c745b2aeea1106a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:06:51 -0700 Subject: [PATCH 13/32] remove backend-indexing branch from CI workflows --- .github/workflows/ci-additional.yaml | 2 -- .github/workflows/ci.yaml | 2 -- 2 files changed, 4 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 251a11f7e99..aeac92250b6 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -3,11 +3,9 @@ on: push: branches: - "main" - - "backend-indexing" pull_request: branches: - "main" - - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 276629791f0..e0f9489e325 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,11 +3,9 @@ on: push: branches: - "main" - - "backend-indexing" pull_request: branches: - "main" - - "backend-indexing" workflow_dispatch: # allows you to trigger manually concurrency: From 43046e88d24b637275c7da7041d7ca0922c30b3a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:09:14 -0700 Subject: [PATCH 14/32] remove unnecessary type ignore comments --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8fe172f6d25..5b0ea92fd84 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1653,7 +1653,7 @@ def _oindex_get(self, indexer: OuterIndexer): value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value @@ -1695,7 +1695,7 @@ def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): # type: ignore + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value From 2a7e2f2e7bbecfe9d62531c19459082f03ed6759 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:16:57 -0700 Subject: [PATCH 15/32] fix: update indexing to use tuple from indexer for improved compatibility --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 5b0ea92fd84..8a33ae35709 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1653,7 +1653,7 @@ def _oindex_get(self, indexer: OuterIndexer): value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): + for axis, subkey in reversed(list(enumerate(indexer.tuple))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value @@ -1695,7 +1695,7 @@ def _oindex_get(self, indexer: OuterIndexer): # manual orthogonal indexing value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer))): + for axis, subkey in reversed(list(enumerate(indexer.tuple))): value = value[(slice(None),) * axis + (subkey,)] return value From ead425196ac3177207081102d0c059d6d5cc21f2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 31 Oct 2024 19:35:29 -0700 Subject: [PATCH 16/32] more type hints --- xarray/core/indexing.py | 102 ++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8a33ae35709..b010cb6ce66 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -209,7 +209,7 @@ def map_index_queries( return merged -def expanded_indexer(key, ndim): +def expanded_indexer(key: Any, ndim: int) -> tuple[Any, ...]: """Given a key for indexing an ndarray, return an equivalent key which is a tuple with length equal to the number of dimensions. @@ -298,7 +298,7 @@ def slice_slice(old_slice: slice, applied_slice: slice, size: int) -> slice: return slice(start, stop, step) -def _index_indexer_1d(old_indexer, applied_indexer, size: int): +def _index_indexer_1d(old_indexer: Any, applied_indexer: Any, size: int) -> Any: if isinstance(applied_indexer, slice) and applied_indexer == slice(None): # shortcut for the usual case return old_indexer @@ -525,15 +525,15 @@ def get_duck_array(self): class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () - def get_duck_array(self): + def get_duck_array(self) -> Any: return self[(slice(None),) * self.ndim] - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) @@ -581,10 +581,10 @@ def __array__( else: return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def __getitem__(self, key: _IndexerKey | slice): + def __getitem__(self, key: _IndexerKey | slice) -> Any: _key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(_key) @@ -662,7 +662,7 @@ def _updated_key( def shape(self) -> _Shape: return self._shape - def get_duck_array(self): + def get_duck_array(self) -> Any: try: array = apply_indexer(self.array, self.key) except NotImplementedError as _: @@ -685,17 +685,17 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def transpose(self, order): + def transpose(self, order) -> Any: return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) def _vindex_set(self, key: _IndexerKey, value: Any) -> None: @@ -743,7 +743,7 @@ def __init__(self, array: duckarray[Any, Any], key: ExplicitIndexer): def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape - def get_duck_array(self): + def get_duck_array(self) -> Any: try: array = apply_indexer(self.array, self.key) except NotImplementedError as _: @@ -766,23 +766,23 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def _updated_key(self, new_key: ExplicitIndexer): + def _updated_key(self, new_key: ExplicitIndexer) -> VectorizedIndexer: return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> Any: # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer): key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) - def transpose(self, order): + def transpose(self, order) -> LazilyVectorizedIndexedArray: key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) @@ -811,24 +811,24 @@ def __init__(self, array: duckarray[Any, Any]): self.array = as_indexable(array) self._copied = False - def _ensure_copied(self): + def _ensure_copied(self) -> None: if not self._copied: self.array = as_indexable(np.array(self.array)) self._copied = True - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -844,7 +844,7 @@ def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value - def __deepcopy__(self, memo): + def __deepcopy__(self, memo) -> CopyOnWriteArray: # CopyOnWriteArray is used to wrap backend array objects, which might # point to files on disk, so we can't rely on the default deepcopy # implementation. @@ -857,7 +857,7 @@ class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin): def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) - def _ensure_cached(self): + def _ensure_cached(self) -> None: self.array = as_indexable(self.array.get_duck_array()) def __array__( @@ -865,20 +865,20 @@ def __array__( ) -> np.ndarray: return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): + def get_duck_array(self) -> Any: self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -891,7 +891,7 @@ def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value -def as_indexable(array): +def as_indexable(array: Any): """ This function always returns a ExplicitlyIndexed subclass, so that the vectorized indexing is always possible with the returned @@ -950,7 +950,9 @@ def _outer_to_vectorized_indexer( return VectorizedIndexer(new_key) -def _outer_to_numpy_indexer(indexer: BasicIndexer | OuterIndexer, shape: _Shape): +def _outer_to_numpy_indexer( + indexer: BasicIndexer | OuterIndexer, shape: _Shape +) -> tuple[Any, ...]: """Convert an OuterIndexer into an indexer for NumPy. Parameters @@ -1073,7 +1075,7 @@ def __repr__(self): return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" -def apply_indexer(indexable, indexer: ExplicitIndexer): +def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] @@ -1411,7 +1413,7 @@ def _logical_any(args): return functools.reduce(operator.or_, args) -def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): +def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None) -> Any: key = (k for k in key if not isinstance(k, slice)) chunks_hint = getattr(data, "chunks", None) @@ -1438,7 +1440,7 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): def create_mask( indexer: ExplicitIndexer, shape: _Shape, data: duckarray[Any, Any] | None = None -): +) -> duckarray[bool, Any]: """Create a mask for indexing with a fill-value. Parameters @@ -1566,18 +1568,18 @@ def __init__(self, array): ) self.array = array - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: array = NumpyVIndexAdapter(self.array) return array[indexer] - def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): + def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer) -> Any: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see @@ -1648,19 +1650,19 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer.tuple))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> Any: return self.array[indexer] def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -1688,21 +1690,21 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: OuterIndexer) -> Any: try: return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array subkey: Any - for axis, subkey in reversed(list(enumerate(indexer.tuple))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: _IndexerKey): + def _vindex_get(self, indexer: _IndexerKey) -> Any: return self.array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey) -> Any: return self.array[indexer] def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: @@ -1719,7 +1721,7 @@ def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) @@ -1768,7 +1770,7 @@ def get_duck_array(self) -> np.ndarray: def shape(self) -> _Shape: return (len(self.array),) - def _convert_scalar(self, item): + def _convert_scalar(self, item) -> Any: if item is pd.NaT: # work around the impossibility of casting NaT with asarray # note: it probably would be better in general to return From 76b2d5abadb5228488d702743f6baa6215da3f49 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 4 Nov 2024 16:44:37 -0800 Subject: [PATCH 17/32] update type hints for `expanded_indexer()` function --- xarray/core/indexing.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b010cb6ce66..92ab48f4b95 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -38,7 +38,14 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _Chunks, _IndexerKey, _Shape, duckarray + from xarray.namedarray._typing import ( + _Chunks, + _IndexerKey, + _IndexKey, + _IndexKeys, + _Shape, + duckarray, + ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -209,7 +216,7 @@ def map_index_queries( return merged -def expanded_indexer(key: Any, ndim: int) -> tuple[Any, ...]: +def expanded_indexer(key: _IndexerKey | _IndexKeys, ndim: int) -> _IndexKeys: """Given a key for indexing an ndarray, return an equivalent key which is a tuple with length equal to the number of dimensions. @@ -220,22 +227,22 @@ def expanded_indexer(key: Any, ndim: int) -> tuple[Any, ...]: if not isinstance(key, tuple): # numpy treats non-tuple keys equivalent to tuples of length 1 key = (key,) - new_key = () + new_key: list[_IndexKey] = [] # handling Ellipsis right is a little tricky, see: # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing found_ellipsis = False for k in key: if k is Ellipsis: if not found_ellipsis: - new_key += (slice(None),) * (ndim + 1 - len(key)) + new_key.extend([slice(None)] * (ndim + 1 - len(key))) found_ellipsis = True else: - new_key += (slice(None),) + new_key.append(slice(None)) else: - new_key += (k,) + new_key.append(k) if len(new_key) > ndim: raise IndexError("too many indices") - new_key += (slice(None),) * (ndim - len(new_key)) + new_key.extend([slice(None)] * (ndim - len(new_key))) return tuple(new_key) From ee81af3502dcf692d2972d4dcc8e0008fb22e302 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 Nov 2024 20:56:13 +0000 Subject: [PATCH 18/32] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 140f31a17e2..1b5eaeb36ae 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -240,7 +240,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype(f"S{str(self.array.shape[-1])}") + return np.dtype(f"S{self.array.shape[-1]!s}") @property def shape(self) -> tuple[int, ...]: From 0c86622b7549c79034b76c4378fdeaa16fc3aa49 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 10 Nov 2024 20:42:00 -0700 Subject: [PATCH 19/32] Use tuples for indexing --- xarray/backends/common.py | 9 ++- xarray/backends/h5netcdf_.py | 25 +++--- xarray/backends/netCDF4_.py | 25 +++--- xarray/backends/pydap_.py | 27 ++++--- xarray/backends/scipy_.py | 27 ++++--- xarray/backends/zarr.py | 34 ++++----- xarray/coding/strings.py | 4 +- xarray/core/indexing.py | 143 ++++++++++++++++++++++++++++++++--- xarray/namedarray/_typing.py | 3 + 9 files changed, 222 insertions(+), 75 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 6113298c8f5..c2db815fc9e 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -22,6 +22,7 @@ from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence + from xarray.namedarray._typing import _IndexerKey # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) @@ -219,18 +220,18 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): - __slots__ = () + __slots__ = ("indexing_support",) def get_duck_array(self, dtype: np.typing.DTypeLike = None): - key = indexing.BasicIndexer((slice(None),) * self.ndim) + key = (slice(None),) * self.ndim return self[key] # type: ignore [index] - def _oindex_get(self, key: indexing.OuterIndexer): + def _oindex_get(self, key: _IndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, key: indexing.VectorizedIndexer): + def _vindex_get(self, key: _IndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index b1756330d69..89360a6ebe6 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -44,26 +44,33 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) class H5NetCDFArrayWrapper(BaseNetCDF4Array): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] - def _oindex_get(self, key: indexing.OuterIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def _vindex_get(self, key: indexing.VectorizedIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def __getitem__(self, key: indexing.BasicIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index c097738b11d..8a96317453c 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -49,6 +49,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -89,7 +94,7 @@ def get_array(self, needs_lock=True): class NetCDF4ArrayWrapper(BaseNetCDF4Array): - __slots__ = () + indexing_support = indexing.IndexingSupport.OUTER def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) @@ -100,19 +105,19 @@ def get_array(self, needs_lock=True): variable.set_auto_chartostring(False) return variable - def _oindex_get(self, key: indexing.OuterIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def _oindex_get(self, key: _OuterIndexerKey): + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def _vindex_get(self, key: indexing.VectorizedIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey): + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def __getitem__(self, key: indexing.BasicIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def __getitem__(self, key: _BasicIndexerKey): + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 2ce3a579b2d..44b22e07036 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -29,10 +29,17 @@ from io import BufferedIOBase from xarray.core.dataset import Dataset + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) class PydapArrayWrapper(BackendArray): - def __init__(self, array): + indexing_support = indexing.IndexingSupport.BASIC + + def __init__(self, array) -> None: self.array = array @property @@ -43,19 +50,19 @@ def shape(self) -> tuple[int, ...]: def dtype(self): return self.array.dtype - def _oindex_get(self, key: indexing.OuterIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def _vindex_get(self, key: indexing.VectorizedIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) - def __getitem__(self, key: indexing.BasicIndexer): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index c9991b15a13..e25ff829d1f 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -38,6 +38,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -56,6 +61,8 @@ def _decode_attrs(d): class ScipyArrayWrapper(BackendArray): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name @@ -85,25 +92,25 @@ def _getitem(self, key): data = self.get_variable(needs_lock=False).data return data[key] - def _vindex_get(self, key: indexing.VectorizedIndexer): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + data = indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) return self._finalize_result(data) - def _oindex_get(self, key: indexing.OuterIndexer): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + data = indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) return self._finalize_result(data) - def __getitem__(self, key): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def __getitem__(self, key: _BasicIndexerKey) -> Any: + data = indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) return self._finalize_result(data) - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: with self.datastore.lock: data = self.get_variable(needs_lock=False) try: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index aef8ffd0b63..dc91ee57629 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -43,6 +43,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) def _get_mappers(*, storage_options, store, chunk_store): @@ -182,7 +187,7 @@ def encode_zarr_attr_value(value): class ZarrArrayWrapper(BackendArray): - __slots__ = ("_array", "dtype", "shape") + indexing_support = indexing.IndexingSupport.VECTORIZED def __init__(self, zarr_array): # some callers attempt to evaluate an array if an `array` property exists on the object. @@ -205,37 +210,28 @@ def __init__(self, zarr_array): def get_array(self): return self._array - def _oindex_get(self, key: indexing.OuterIndexer): + def _oindex_get(self, key: _OuterIndexerKey) -> Any: def raw_indexing_method(key): return self._array.oindex[key] - return indexing.explicit_indexing_adapter( - key, - self._array.shape, - indexing.IndexingSupport.VECTORIZED, - raw_indexing_method, + return indexing.outer_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) - def _vindex_get(self, key: indexing.VectorizedIndexer): + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: def raw_indexing_method(key): return self._array.vindex[key] - return indexing.explicit_indexing_adapter( - key, - self._array.shape, - indexing.IndexingSupport.VECTORIZED, - raw_indexing_method, + return indexing.vectorized_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) - def __getitem__(self, key: indexing.BasicIndexer): + def __getitem__(self, key: _BasicIndexerKey) -> Any: def raw_indexing_method(key): return self._array[key] - return indexing.explicit_indexing_adapter( - key, - self._array.shape, - indexing.IndexingSupport.VECTORIZED, - raw_indexing_method, + return indexing.basic_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) # if self.ndim == 0: diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 1b5eaeb36ae..fd2e396a17b 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -256,10 +256,8 @@ def _oindex_get(self, key: indexing.OuterIndexer): return _numpy_char_to_bytes(self.array.oindex[key]) def __getitem__(self, key: _IndexerKey): - from xarray.core.indexing import BasicIndexer - # require slicing the last dimension completely indexer = indexing.expanded_indexer(key, self.array.ndim) if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[BasicIndexer(indexer)]) + return _numpy_char_to_bytes(self.array[indexer]) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index f98774581b1..716ad9670de 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1023,6 +1023,124 @@ class IndexingSupport(enum.Enum): VECTORIZED = 3 +def _finish_indexing( + raw_indexing_method: Callable[..., Any], + *, + raw_key, + numpy_indices, +) -> Any: + result = raw_indexing_method(raw_key.tuple) + if numpy_indices.tuple: + # index the loaded np.ndarray + result = apply_indexer(NumpyIndexingAdapter(result), numpy_indices) + return result + + +def basic_indexing_adapter( + key: _IndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : ExplicitIndexer + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + BasicIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def outer_indexing_adapter( + key: _IndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : ExplicitIndexer + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + OuterIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def vectorized_indexing_adapter( + key: _IndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : ExplicitIndexer + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_vectorized_indexer( + VectorizedIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + def explicit_indexing_adapter( key: ExplicitIndexer, shape: _Shape, @@ -1050,13 +1168,13 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ - raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) - result = raw_indexing_method(raw_key.tuple) - if numpy_indices.tuple: - # index the loaded np.ndarray - indexable = NumpyIndexingAdapter(result) - result = apply_indexer(indexable, numpy_indices) - return result + if isinstance(key, VectorizedIndexer): + return vectorized_indexing_adapter(key.tuple, shape, indexing_support) + elif isinstance(key, OuterIndexer): + return outer_indexing_adapter(key.tuple, shape, indexing_support) + elif isinstance(key, BasicIndexer): + return basic_indexing_adapter(key.tuple, shape, indexing_support) + raise TypeError(f"unexpected key type: {key}") class CompatIndexedTuple(tuple): @@ -1085,11 +1203,16 @@ def __repr__(self): def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] + return indexable.vindex[indexer.tuple] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[CompatIndexedTuple(indexer.tuple, "outer")] + return indexable.oindex[indexer.tuple] + elif isinstance(indexer, BasicIndexer): + return indexable[indexer.tuple] else: - return indexable[CompatIndexedTuple(indexer.tuple, "basic")] + raise TypeError( + f"Received indexer of type {type(indexer)!r}. " + "Expected BasicIndexer, OuterIndexer, or VectorizedIndexer" + ) def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 1c26924a67d..a062882ae43 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,9 @@ def dtype(self) -> _DType_co: ... _IndexKeys = tuple[_IndexKey, ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] _IndexerKey = tuple[Any, ...] +_BasicIndexerKey = tuple[Any, ...] +_OuterIndexerKey = tuple[Any, ...] +_VectorizedIndexerKey = tuple[Any, ...] _AttrsLike = Union[Mapping[Any, Any], None] From 0f54b64e4f4226564e4975afe7b2f8ba0f03e9ea Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 17:51:16 -0700 Subject: [PATCH 20/32] Remove CompatIndexedTuple --- xarray/core/indexing.py | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 716ad9670de..674dfe5c0e7 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Literal, overload +from typing import TYPE_CHECKING, Any, overload import numpy as np import pandas as pd @@ -1177,29 +1177,6 @@ def explicit_indexing_adapter( raise TypeError(f"unexpected key type: {key}") -class CompatIndexedTuple(tuple): - """ - A tuple subclass used to transition existing backend implementations towards the use of raw tuples - for indexing by carrying additional metadata about the type of indexing being - performed ('basic', 'vectorized', or 'outer'). This class serves as a bridge, allowing - backend arrays that currently expect this metadata to function correctly while - maintaining the outward behavior of a regular tuple. - - This class is particularly useful during the phase where the backend implementations are - not yet capable of directly accepting raw tuples without additional context about - the indexing type. It ensures that these backends can still correctly interpret and - process indexing operations by providing them with the necessary contextual information. - """ - - def __new__(cls, iterable, indexer_type: Literal["basic", "vectorized", "outer"]): - obj = super().__new__(cls, iterable) - obj.indexer_type = indexer_type # type: ignore[attr-defined] - return obj - - def __repr__(self): - return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" - - def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): @@ -1226,19 +1203,8 @@ def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: def decompose_indexer( - indexer: ExplicitIndexer | CompatIndexedTuple, - shape: _Shape, - indexing_support: IndexingSupport, + indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: - if isinstance(indexer, CompatIndexedTuple): - # recreate the indexer object from the tuple and the type of indexing. - # This is necessary to ensure that the backend array can correctly interpret the indexing operation. - if indexer.indexer_type == "vectorized": # type: ignore[attr-defined] - indexer = VectorizedIndexer(indexer) - elif indexer.indexer_type == "outer": # type: ignore[attr-defined] - indexer = OuterIndexer(indexer) - else: - indexer = BasicIndexer(indexer) if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, BasicIndexer | OuterIndexer): From b60accd36de49ebf20ed4d8fd42af79e7028d66a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 20:36:07 -0700 Subject: [PATCH 21/32] Some typing work --- xarray/coding/strings.py | 15 +++- xarray/core/indexing.py | 147 ++++++++++++++++++----------------- xarray/namedarray/_typing.py | 8 +- 3 files changed, 91 insertions(+), 79 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index cd190e769af..37b9e548e0f 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -3,6 +3,7 @@ from __future__ import annotations from functools import partial +from typing import TYPE_CHECKING import numpy as np @@ -17,12 +18,18 @@ from xarray.core import indexing from xarray.core.utils import module_available from xarray.core.variable import Variable -from xarray.namedarray._typing import _IndexerKey from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") +if TYPE_CHECKING: + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) + def create_vlen_dtype(element_type): if element_type not in (str, bytes): @@ -249,13 +256,13 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key: _IndexerKey): + def _vindex_get(self, key: _VectorizedIndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key: indexing.OuterIndexer): + def _oindex_get(self, key: _OuterIndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key: _IndexerKey): + def __getitem__(self, key: _BasicIndexerKey): # require slicing the last dimension completely indexer = indexing.expanded_indexer(key, self.array.ndim) if indexer[-1] != slice(None): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 674dfe5c0e7..c02e43c3eb4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -39,11 +39,14 @@ from xarray.core.types import Self from xarray.core.variable import Variable from xarray.namedarray._typing import ( + _BasicIndexerKey, _Chunks, _IndexerKey, _IndexKey, _IndexKeys, + _OuterIndexerKey, _Shape, + _VectorizedIndexerKey, duckarray, ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -394,7 +397,7 @@ class BasicIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[int | np.integer | slice, ...]): + def __init__(self, key: _BasicIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -426,9 +429,7 @@ class OuterIndexer(ExplicitIndexer): def __init__( self, - key: tuple[ - int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... - ], + key: _OuterIndexerKey, ): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -473,7 +474,7 @@ class VectorizedIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...]): + def __init__(self, key: _VectorizedIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") @@ -482,7 +483,7 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... for k in key: if isinstance(k, slice): k = as_integer_slice(k) - elif is_duck_dask_array(k): + elif is_duck_dask_array(k): # type: ignore[arg-type] raise ValueError( "Vectorized indexing with Dask arrays is not supported. " "Please pass a numpy array by calling ``.compute``. " @@ -535,22 +536,22 @@ class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): def get_duck_array(self) -> Any: return self[(slice(None),) * self.ndim] - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -591,7 +592,7 @@ def __array__( def get_duck_array(self) -> Any: return self.array.get_duck_array() - def __getitem__(self, key: _IndexerKey | slice) -> Any: + def __getitem__(self, key) -> Any: _key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(_key) @@ -695,27 +696,27 @@ def get_duck_array(self) -> Any: def transpose(self, order) -> Any: return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer) -> LazilyIndexedArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey) -> LazilyIndexedArray: + def __getitem__(self, indexer: _BasicIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key: _IndexerKey, value: Any) -> None: + def _vindex_set(self, key: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: _IndexerKey, value: Any) -> None: + def _oindex_set(self, key: _OuterIndexerKey, value: Any) -> None: full_key = self._updated_key(OuterIndexer(key)) self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: _IndexerKey, value: Any) -> None: + def __setitem__(self, key: _BasicIndexerKey, value: Any) -> None: full_key = self._updated_key(BasicIndexer(key)) self.array[full_key.tuple] = value @@ -776,13 +777,15 @@ def get_duck_array(self) -> Any: def _updated_key(self, new_key: ExplicitIndexer) -> VectorizedIndexer: return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer) -> LazilyVectorizedIndexedArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: _IndexerKey) -> LazilyVectorizedIndexedArray: + def _vindex_get( + self, indexer: _VectorizedIndexerKey + ) -> LazilyVectorizedIndexedArray: return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) - def __getitem__(self, indexer: _IndexerKey) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer): key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) @@ -826,29 +829,28 @@ def _ensure_copied(self) -> None: def get_duck_array(self) -> Any: return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer) -> CopyOnWriteArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey) -> CopyOnWriteArray: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey) -> CopyOnWriteArray: + def __getitem__(self, indexer: _BasicIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self._ensure_copied() - self.array[indexer] = value def __deepcopy__(self, memo) -> CopyOnWriteArray: @@ -876,25 +878,25 @@ def get_duck_array(self) -> Any: self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer) -> MemoryCachedArray: + def _oindex_get(self, indexer: _OuterIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: _IndexerKey) -> MemoryCachedArray: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: _IndexerKey) -> MemoryCachedArray: + def __getitem__(self, indexer: _BasicIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value @@ -943,7 +945,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) @@ -1031,8 +1033,7 @@ def _finish_indexing( ) -> Any: result = raw_indexing_method(raw_key.tuple) if numpy_indices.tuple: - # index the loaded np.ndarray - result = apply_indexer(NumpyIndexingAdapter(result), numpy_indices) + result = apply_indexer(as_indexable(result), numpy_indices) return result @@ -1042,15 +1043,15 @@ def basic_indexing_adapter( indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], ) -> Any: - """Support explicit indexing by delegating to a raw indexing method. + """Support explicit basic indexing by delegating to a raw indexing method. Outer and/or vectorized indexers are supported by indexing a second time with a NumPy array. Parameters ---------- - key : ExplicitIndexer - Explicit indexing object. + key : IndexerKey + Tuple indexer shape : Tuple[int, ...] Shape of the indexed array. indexing_support : IndexingSupport enum @@ -1077,15 +1078,12 @@ def outer_indexing_adapter( indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], ) -> Any: - """Support explicit indexing by delegating to a raw indexing method. - - Outer and/or vectorized indexers are supported by indexing a second time - with a NumPy array. + """Support explicit outer indexing by delegating to a raw indexing method. Parameters ---------- - key : ExplicitIndexer - Explicit indexing object. + key : IndexerKey + tuple indexer shape : Tuple[int, ...] Shape of the indexed array. indexing_support : IndexingSupport enum @@ -1112,14 +1110,11 @@ def vectorized_indexing_adapter( indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], ) -> Any: - """Support explicit indexing by delegating to a raw indexing method. - - Outer and/or vectorized indexers are supported by indexing a second time - with a NumPy array. + """Support explicit vectorized indexing by delegating to a raw indexing method. Parameters ---------- - key : ExplicitIndexer + key : IndexerKey Explicit indexing object. shape : Tuple[int, ...] Shape of the indexed array. @@ -1168,16 +1163,25 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ + # TODO: raise PendingDeprecationWarning here. if isinstance(key, VectorizedIndexer): - return vectorized_indexing_adapter(key.tuple, shape, indexing_support) + return vectorized_indexing_adapter( + key.tuple, shape, indexing_support, raw_indexing_method + ) elif isinstance(key, OuterIndexer): - return outer_indexing_adapter(key.tuple, shape, indexing_support) + return outer_indexing_adapter( + key.tuple, shape, indexing_support, raw_indexing_method + ) elif isinstance(key, BasicIndexer): - return basic_indexing_adapter(key.tuple, shape, indexing_support) + return basic_indexing_adapter( + key.tuple, shape, indexing_support, raw_indexing_method + ) raise TypeError(f"unexpected key type: {key}") -def apply_indexer(indexable, indexer: ExplicitIndexer) -> Any: +def apply_indexer( + indexable: ExplicitlyIndexedNDArrayMixin, indexer: ExplicitIndexer +) -> Any: """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): return indexable.vindex[indexer.tuple] @@ -1285,9 +1289,9 @@ def _decompose_vectorized_indexer( return indexer, BasicIndexer(()) backend_indexer_elems: tuple[ - int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + int | np.integer | slice | np.ndarray[Any, np.dtype[np.unsignedinteger]], ... ] = () - np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () # convert negative indices indexer_elems = [ np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k @@ -1478,7 +1482,7 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for v, size in zip(indexer.tuple, shape, strict=True): if isinstance(v, np.ndarray): new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) @@ -1494,12 +1498,12 @@ def _chunked_array_with_chunks_hint( ): """Create a chunked array using the chunks hint for dimensions of size > 1.""" - if len(chunks) < array.ndim: + if len(chunks) != array.ndim: raise ValueError("not enough chunks in hint") new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=False) + for chunk, size in zip(chunks, array.shape, strict=True) ) return chunkmanager.from_array(array, new_chunks) @@ -1667,7 +1671,7 @@ def __init__(self, array): def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _IndexerKey) -> Any: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] @@ -1746,9 +1750,8 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _IndexerKey) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) - value = self.array subkey: Any for axis, subkey in reversed(list(enumerate(indexer))): @@ -1786,7 +1789,7 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: try: return self.array[indexer] except NotImplementedError: @@ -1797,13 +1800,13 @@ def _oindex_get(self, indexer: OuterIndexer) -> Any: value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: return self.array.vindex[indexer] - def __getitem__(self, indexer: _IndexerKey) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: return self.array[indexer] - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( @@ -1811,10 +1814,10 @@ def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: ) self.array[indexer] = value - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value def transpose(self, order) -> Any: @@ -1910,7 +1913,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -2026,7 +2029,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -2040,7 +2043,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: _IndexerKey + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -2053,7 +2056,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _BasicIndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index a062882ae43..b0cf7e2fd40 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,9 +95,11 @@ def dtype(self) -> _DType_co: ... _IndexKeys = tuple[_IndexKey, ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] _IndexerKey = tuple[Any, ...] -_BasicIndexerKey = tuple[Any, ...] -_OuterIndexerKey = tuple[Any, ...] -_VectorizedIndexerKey = tuple[Any, ...] +_BasicIndexerKey = tuple[int | np.integer | slice, ...] +_OuterIndexerKey = tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.integer]], ... +] +_VectorizedIndexerKey = tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] _AttrsLike = Union[Mapping[Any, Any], None] From 222c5c2bb4940ca5aeaddb5684251cc2d46d48b1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 20:50:00 -0700 Subject: [PATCH 22/32] more typing --- xarray/backends/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 4f1febf228e..d9ed89901a9 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -21,7 +21,7 @@ if TYPE_CHECKING: from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence - from xarray.namedarray._typing import _IndexerKey + from xarray.namedarray._typing import _OuterIndexerKey, _VectorizedIndexerKey # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) @@ -256,12 +256,12 @@ def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = (slice(None),) * self.ndim return self[key] # type: ignore [index] - def _oindex_get(self, key: _IndexerKey) -> Any: + def _oindex_get(self, key: _OuterIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, key: _IndexerKey) -> Any: + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) From 810b8224ebfeb6912f5e614b14e794ef6c0c9545 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:05:29 -0700 Subject: [PATCH 23/32] Fix test --- xarray/core/indexing.py | 6 +++--- xarray/tests/test_indexing.py | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c02e43c3eb4..c5eaf036027 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -946,7 +946,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () - for k, size in zip(key, shape, strict=True): + for k, size in zip(key, shape, strict=False): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice @@ -1498,12 +1498,12 @@ def _chunked_array_with_chunks_hint( ): """Create a chunked array using the chunks hint for dimensions of size > 1.""" - if len(chunks) != array.ndim: + if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=True) + for chunk, size in zip(chunks, array.shape, strict=False) ) return chunkmanager.from_array(array, new_chunks) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index b48fa76f589..fcaa637c332 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -842,13 +842,14 @@ def test_create_mask_basic_indexer() -> None: np.testing.assert_array_equal(False, actual) +@requires_dask def test_create_mask_dask() -> None: - da = pytest.importorskip("dask.array") + import dask.array as da indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2]))) expected = np.array(2 * [[False, True, False]]) actual = indexing.create_mask( - indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1))) + indexer, (5, 5, 5), da.empty((2, 3, 3), chunks=((1, 1), (2, 1), (3,))) ) assert actual.chunks == ((1, 1), (2, 1)) np.testing.assert_array_equal(expected, actual) From a4149654996549e955d48524243a60cd0d9c6db3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:12:50 -0700 Subject: [PATCH 24/32] strict=True --- xarray/core/indexing.py | 19 +++++++++++-------- xarray/tests/test_indexing.py | 8 ++++---- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index c5eaf036027..738cc1252d2 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -946,7 +946,7 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () - for k, size in zip(key, shape, strict=False): + for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice @@ -1375,7 +1375,7 @@ def _decompose_outer_indexer( assert isinstance(indexer, OuterIndexer | BasicIndexer) if indexing_support == IndexingSupport.VECTORIZED: - for k, s in zip(indexer.tuple, shape, strict=False): + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, slice): # If it is a slice, then we will slice it as-is # (but make its step positive) in the backend, @@ -1390,7 +1390,7 @@ def _decompose_outer_indexer( # make indexer positive pos_indexer: tuple[np.ndarray | int | np.number, ...] = () - for k, s in zip(indexer.tuple, shape, strict=False): + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, np.ndarray): pos_indexer += (np.where(k < 0, k + s, k),) elif isinstance(k, integer_types) and k < 0: @@ -1412,7 +1412,7 @@ def _decompose_outer_indexer( ] array_index = np.argmax(np.array(gains)) if len(gains) > 0 else None - for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=False)): + for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=True)): if isinstance(k, np.ndarray) and i != array_index: # np.ndarray key is converted to slice that covers the entire # entries of this key. @@ -1433,7 +1433,7 @@ def _decompose_outer_indexer( return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) if indexing_support == IndexingSupport.OUTER: - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, slice): # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) @@ -1455,7 +1455,7 @@ def _decompose_outer_indexer( # basic indexer assert indexing_support == IndexingSupport.BASIC - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, np.ndarray): # np.ndarray key is converted to slice that covers the entire # entries of this key. @@ -1503,7 +1503,7 @@ def _chunked_array_with_chunks_hint( new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=False) + for chunk, size in zip(chunks, array.shape, strict=True) ) return chunkmanager.from_array(array, new_chunks) @@ -1522,6 +1522,9 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None) -> A if isinstance(k, np.ndarray): if is_chunked_array(data): # type: ignore[arg-type] chunkmanager = get_chunked_array_type(data) + # TODO: the chunks_hint is the chunks for the whole array, + # and has nothing to do with the axes indexed by `k` + # This is why we need to use `strict-False` :/ new_keys += ( _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager), ) @@ -1570,7 +1573,7 @@ def create_mask( base_mask = _masked_result_drop_slice(key, data) slice_shape = tuple( np.arange(*k.indices(size)).size - for k, size in zip(key, shape, strict=False) + for k, size in zip(key, shape, strict=True) if isinstance(k, slice) ) expanded_mask = base_mask[(Ellipsis,) + (np.newaxis,) * len(slice_shape)] diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index fcaa637c332..ada0db889ee 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -633,7 +633,7 @@ def test_arrayize_vectorized_indexer(self) -> None: np.testing.assert_array_equal(b, np.arange(5)[:, np.newaxis]) -def get_indexers(shape, mode): +def get_indexers(shape: tuple[int, ...], mode) -> indexing.ExplicitIndexer: if mode == "vectorized": indexed_shape = (3, 4) indexer = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape) @@ -662,7 +662,7 @@ def get_indexers(shape, mode): return indexing.BasicIndexer(tuple(indexer)) elif mode == "basic1": # basic indexer - return indexing.BasicIndexer((3,)) + return indexing.BasicIndexer((2,) * len(shape)) elif mode == "basic2": # basic indexer indexer = [0, 2, 4] @@ -821,14 +821,14 @@ def test_create_mask_outer_indexer() -> None: def test_create_mask_vectorized_indexer() -> None: indexer = indexing.VectorizedIndexer((np.array([0, -1, 2]), np.array([0, 1, -1]))) expected = np.array([False, True, True]) - actual = indexing.create_mask(indexer, (5,)) + actual = indexing.create_mask(indexer, (5, 5)) np.testing.assert_array_equal(expected, actual) indexer = indexing.VectorizedIndexer( (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1])) ) expected = np.array([[False, True, True]] * 2).T - actual = indexing.create_mask(indexer, (5, 2)) + actual = indexing.create_mask(indexer, (5, 2, 5)) np.testing.assert_array_equal(expected, actual) From 0b99aea25eba1c736bf06c1ab9a9b2afc4d74537 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:21:24 -0700 Subject: [PATCH 25/32] more typing --- xarray/core/indexing.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 738cc1252d2..fdc35948a88 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1038,7 +1038,7 @@ def _finish_indexing( def basic_indexing_adapter( - key: _IndexerKey, + key: _BasicIndexerKey, shape: _Shape, indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], @@ -1073,7 +1073,7 @@ def basic_indexing_adapter( def outer_indexing_adapter( - key: _IndexerKey, + key: _OuterIndexerKey, shape: _Shape, indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], @@ -1105,7 +1105,7 @@ def outer_indexing_adapter( def vectorized_indexing_adapter( - key: _IndexerKey, + key: _VectorizedIndexerKey, shape: _Shape, indexing_support: IndexingSupport, raw_indexing_method: Callable[..., Any], @@ -1674,15 +1674,15 @@ def __init__(self, array): def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: _IndexerKey) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> np.ndarray: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> np.ndarray: array = NumpyVIndexAdapter(self.array) return array[indexer] - def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> np.ndarray: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see @@ -1707,15 +1707,15 @@ def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: _IndexerKey | ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see @@ -1753,7 +1753,7 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: _IndexerKey) -> Any: + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array subkey: Any @@ -1761,19 +1761,19 @@ def _oindex_get(self, indexer: _IndexerKey) -> Any: value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: _IndexerKey) -> Any: + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: _IndexerKey) -> Any: + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: return self.array[indexer] - def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self.array[indexer] = value - def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value def transpose(self, order): @@ -1916,7 +1916,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: _IndexerKey + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1935,7 +1935,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: _IndexerKey + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1954,7 +1954,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: _IndexerKey + self, indexer: _BasicIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter From 2ceaeac934a693aa36be192c29fab69c8845ea34 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:21:58 -0700 Subject: [PATCH 26/32] fix --- xarray/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index fdc35948a88..380700eb72e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1503,7 +1503,7 @@ def _chunked_array_with_chunks_hint( new_chunks: _Chunks = tuple( chunk if size > 1 else 1 - for chunk, size in zip(chunks, array.shape, strict=True) + for chunk, size in zip(chunks, array.shape, strict=False) ) return chunkmanager.from_array(array, new_chunks) From 749da0bc0de71580230eef0152883929d9305c7e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Nov 2024 21:28:42 -0700 Subject: [PATCH 27/32] some more fixes --- xarray/core/indexing.py | 4 ++-- xarray/tests/test_indexing.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 380700eb72e..e6e2848b157 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -478,7 +478,7 @@ def __init__(self, key: _VectorizedIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...] = () + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () ndim = None for k in key: if isinstance(k, slice): @@ -1573,7 +1573,7 @@ def create_mask( base_mask = _masked_result_drop_slice(key, data) slice_shape = tuple( np.arange(*k.indices(size)).size - for k, size in zip(key, shape, strict=True) + for k, size in zip(key, shape, strict=False) if isinstance(k, slice) ) expanded_mask = base_mask[(Ellipsis,) + (np.newaxis,) * len(slice_shape)] diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index ada0db889ee..5e1572cfa20 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -634,6 +634,7 @@ def test_arrayize_vectorized_indexer(self) -> None: def get_indexers(shape: tuple[int, ...], mode) -> indexing.ExplicitIndexer: + indexer: tuple[Any, ...] if mode == "vectorized": indexed_shape = (3, 4) indexer = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape) @@ -859,7 +860,7 @@ def test_create_mask_dask() -> None: ) expected = np.array([[False, True, True]] * 2).T actual = indexing.create_mask( - indexer_vec, (5, 2), da.empty((3, 2), chunks=((3,), (2,))) + indexer_vec, (3, 2), da.empty((3, 2, 3), chunks=((3,), (2,), (3,))) ) assert isinstance(actual, da.Array) np.testing.assert_array_equal(expected, actual) From f58262a426ae263f40cb37c80823073e21cbe0be Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 07:33:59 -0700 Subject: [PATCH 28/32] little more type narrowing --- xarray/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e6e2848b157..a27e1c0543d 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -796,7 +796,7 @@ def transpose(self, order) -> LazilyVectorizedIndexedArray: key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -1694,7 +1694,7 @@ def __getitem__(self, indexer: _BasicIndexerKey) -> np.ndarray: ) return array[key] - def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: + def _safe_setitem(self, array, key: _BasicIndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: From dcd3ac9b396b2bdbb823024ca30d87ac187ef0c8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 09:14:20 -0700 Subject: [PATCH 29/32] Refactor backend indexing tests --- xarray/tests/test_backends.py | 274 +++++++++++++++++----------------- 1 file changed, 138 insertions(+), 136 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fd866cae5ee..021cf5df1d4 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -351,7 +351,144 @@ def test_dtype_coercion_error(self) -> None: ds.to_netcdf(path, format=format) -class DatasetIOBase: +class BackendIndexingTestsMixin: + def test_orthogonal_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual) + # do it twice, to make sure we're switched from orthogonal -> numpy + # when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def test_vectorized_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = { + "dim1": DataArray([0, 2, 0], dims="a"), + "dim2": DataArray([0, 2, 3], dims="a"), + } + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual.load()) + # do it twice, to make sure we're switched from + # vectorized -> numpy when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # two-staged vectorized-indexing + indexers2 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), + }, + {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, + ] + multiple_indexing(indexers2) + + # vectorized-slice mixed + indexers3 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(None, 10), + } + ] + multiple_indexing(indexers3) + + # vectorized-integer mixed + indexers4 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": slice(None, None, 2)}, + ] + multiple_indexing(indexers4) + + # vectorized-integer mixed + indexers5 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": 1, "b": 0}, + ] + multiple_indexing(indexers5) + + def test_vectorized_indexing_negative_step(self) -> None: + # use dask explicitly when present + open_kwargs: dict[str, Any] | None + if has_dask: + open_kwargs = {"chunks": {}} + else: + open_kwargs = None + in_memory = create_test_data() + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -1), + } + ] + multiple_indexing(indexers) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -2), + } + ] + multiple_indexing(indexers) + + def test_outer_indexing_reversed(self) -> None: + # regression test for GH6560 + ds = xr.Dataset( + {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, + ) + + with self.roundtrip(ds) as on_disk: + subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] + assert subset.sizes == subset.load().sizes + + def test_isel_dataarray(self) -> None: + # Make sure isel works lazily. GH:issue:1688 + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + expected = in_memory.isel(dim2=in_memory["dim2"] < 3) + actual = on_disk.isel(dim2=on_disk["dim2"] < 3) + assert_identical(expected, actual) + + +class DatasetIOBase(BackendIndexingTestsMixin): engine: T_NetcdfEngine | None = None file_format: T_NetcdfTypes | None = None @@ -695,141 +832,6 @@ def test_roundtrip_boolean_dtype(self) -> None: assert_identical(original, actual2) assert actual2["x"].dtype == "bool" - def test_orthogonal_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual) - # do it twice, to make sure we're switched from orthogonal -> numpy - # when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def test_vectorized_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = { - "dim1": DataArray([0, 2, 0], dims="a"), - "dim2": DataArray([0, 2, 3], dims="a"), - } - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual.load()) - # do it twice, to make sure we're switched from - # vectorized -> numpy when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # two-staged vectorized-indexing - indexers2 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), - }, - {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, - ] - multiple_indexing(indexers2) - - # vectorized-slice mixed - indexers3 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(None, 10), - } - ] - multiple_indexing(indexers3) - - # vectorized-integer mixed - indexers4 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": slice(None, None, 2)}, - ] - multiple_indexing(indexers4) - - # vectorized-integer mixed - indexers5 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": 1, "b": 0}, - ] - multiple_indexing(indexers5) - - def test_vectorized_indexing_negative_step(self) -> None: - # use dask explicitly when present - open_kwargs: dict[str, Any] | None - if has_dask: - open_kwargs = {"chunks": {}} - else: - open_kwargs = None - in_memory = create_test_data() - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -1), - } - ] - multiple_indexing(indexers) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -2), - } - ] - multiple_indexing(indexers) - - def test_outer_indexing_reversed(self) -> None: - # regression test for GH6560 - ds = xr.Dataset( - {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, - ) - - with self.roundtrip(ds) as on_disk: - subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] - assert subset.sizes == subset.load().sizes - - def test_isel_dataarray(self) -> None: - # Make sure isel works lazily. GH:issue:1688 - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - expected = in_memory.isel(dim2=in_memory["dim2"] < 3) - actual = on_disk.isel(dim2=on_disk["dim2"] < 3) - assert_identical(expected, actual) - def validate_array_type(self, ds): # Make sure that only NumpyIndexingAdapter stores a bare np.ndarray. def find_and_validate_array(obj): From 2105aa049d3ac70af2b05e237ed973f0e9e7d653 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 09:43:05 -0700 Subject: [PATCH 30/32] Add legacy backend indexing tests --- xarray/backends/common.py | 6 +++ xarray/backends/netCDF4_.py | 4 +- xarray/backends/pydap_.py | 4 +- xarray/backends/scipy_.py | 4 +- xarray/backends/zarr.py | 4 +- xarray/core/indexing.py | 77 +++++++++++++------------------ xarray/tests/test_backends.py | 85 +++++++++++++++++++++++++++++++++++ 7 files changed, 131 insertions(+), 53 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index d9ed89901a9..3cd2079f909 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -250,6 +250,12 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): + def get_duck_array(self, dtype: np.typing.DTypeLike = None): + key = indexing.BasicIndexer((slice(None),) * self.ndim) + return self[key] # type: ignore [index] + + +class NewBackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): __slots__ = ("indexing_support",) def get_duck_array(self, dtype: np.typing.DTypeLike = None): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 42e593b4816..8130c264021 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -12,8 +12,8 @@ from xarray import coding from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -61,7 +61,7 @@ NETCDF4_PYTHON_LOCK = combine_locks([NETCDFC_LOCK, HDF5_LOCK]) -class BaseNetCDF4Array(BackendArray): +class BaseNetCDF4Array(NewBackendArray): __slots__ = ("datastore", "dtype", "shape", "variable_name") def __init__(self, variable_name, datastore): diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 49b22d78463..2ec260a3d11 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -8,8 +8,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, robust_getitem, ) from xarray.backends.store import StoreBackendEntrypoint @@ -36,7 +36,7 @@ ) -class PydapArrayWrapper(BackendArray): +class PydapArrayWrapper(NewBackendArray): indexing_support = indexing.IndexingSupport.BASIC def __init__(self, array) -> None: diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 30cd9927489..1793f619a85 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -10,8 +10,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, ) @@ -59,7 +59,7 @@ def _decode_attrs(d): return {k: v if k == "_FillValue" else _decode_string(v) for (k, v) in d.items()} -class ScipyArrayWrapper(BackendArray): +class ScipyArrayWrapper(NewBackendArray): indexing_support = indexing.IndexingSupport.OUTER_1VECTOR def __init__(self, variable_name, datastore): diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index ca09e06137b..b1435a039d0 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -14,8 +14,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractWritableDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, _encode_variable_name, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -185,7 +185,7 @@ def encode_zarr_attr_value(value): return encoded -class ZarrArrayWrapper(BackendArray): +class ZarrArrayWrapper(NewBackendArray): indexing_support = indexing.IndexingSupport.VECTORIZED def __init__(self, zarr_array): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index a27e1c0543d..92af1fcd146 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -3,7 +3,6 @@ import enum import functools import operator -import warnings from collections import Counter, defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress @@ -23,6 +22,7 @@ from xarray.core.utils import ( NDArrayMixin, either_dict_or_kwargs, + emit_user_level_warning, get_valid_numpy_dtype, is_duck_array, is_duck_dask_array, @@ -609,7 +609,7 @@ def __getitem__(self, key) -> Any: BackendArray_fallback_warning_message = ( "The array `{0}` does not support indexing using the .vindex and .oindex properties. " "The __getitem__ method is being used instead. This fallback behavior will be " - "removed in a future version. Please ensure that the backend array `{1}` implements " + "removed in a future version. Please ensure that the backend array `{0}` implements " "support for the .vindex and .oindex properties to avoid potential issues." ) @@ -671,21 +671,8 @@ def shape(self) -> _Shape: return self._shape def get_duck_array(self) -> Any: - try: - array = apply_indexer(self.array, self.key) - except NotImplementedError as _: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - warnings.warn( - BackendArray_fallback_warning_message.format( - self.array.__class__.__name__, self.array.__class__.__name__ - ), - category=DeprecationWarning, - stacklevel=2, - ) - array = self.array[self.key] - - # self.array[self.key] is now a numpy array when + array = apply_indexer(self.array, self.key) + # array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -752,21 +739,9 @@ def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self) -> Any: - try: - array = apply_indexer(self.array, self.key) - except NotImplementedError as _: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - warnings.warn( - BackendArray_fallback_warning_message.format( - self.array.__class__.__name__, self.array.__class__.__name__ - ), - category=PendingDeprecationWarning, - stacklevel=2, - ) - array = self.array[self.key] + array = apply_indexer(self.array, self.key) - # self.array[self.key] is now a numpy array when + # array is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -1136,6 +1111,7 @@ def vectorized_indexing_adapter( ) +# TODO: deprecate and delete this method once it is no longer used externally def explicit_indexing_adapter( key: ExplicitIndexer, shape: _Shape, @@ -1163,26 +1139,36 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ - # TODO: raise PendingDeprecationWarning here. - if isinstance(key, VectorizedIndexer): - return vectorized_indexing_adapter( - key.tuple, shape, indexing_support, raw_indexing_method - ) - elif isinstance(key, OuterIndexer): - return outer_indexing_adapter( - key.tuple, shape, indexing_support, raw_indexing_method - ) - elif isinstance(key, BasicIndexer): - return basic_indexing_adapter( - key.tuple, shape, indexing_support, raw_indexing_method - ) - raise TypeError(f"unexpected key type: {key}") + + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + emit_user_level_warning( + BackendArray_fallback_warning_message.format(""), + category=PendingDeprecationWarning, + ) + raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) + result = raw_indexing_method(raw_key.tuple) + if numpy_indices.tuple: + indexable = NumpyIndexingAdapter(result) + result = apply_indexer(indexable, numpy_indices) + return result def apply_indexer( indexable: ExplicitlyIndexedNDArrayMixin, indexer: ExplicitIndexer ) -> Any: """Apply an indexer to an indexable object.""" + if not hasattr(indexable, "vindex") and not hasattr(indexable, "oindex"): + # This path is used by Lazily*IndexedArray.get_duck_array() + classname = type(indexable).__name__ + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + emit_user_level_warning( + BackendArray_fallback_warning_message.format(classname), + category=PendingDeprecationWarning, + ) + return indexable[indexer] + if isinstance(indexer, VectorizedIndexer): return indexable.vindex[indexer.tuple] elif isinstance(indexer, OuterIndexer): @@ -1206,6 +1192,7 @@ def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: indexable[indexer.tuple] = value +# TODO: delete this method once explicit_indexing_adapter is no longer used externally def decompose_indexer( indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 021cf5df1d4..0c9813c971d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -39,6 +39,7 @@ open_mfdataset, save_mfdataset, ) +from xarray.backends.common import BackendArray as LegacyBackendArray from xarray.backends.common import robust_getitem from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint from xarray.backends.netcdf3 import _nc3_dtype_coercions @@ -53,6 +54,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing +from xarray.core.indexing import IndexingSupport from xarray.core.options import set_options from xarray.core.utils import module_available from xarray.namedarray.pycompat import array_type @@ -352,6 +354,9 @@ def test_dtype_coercion_error(self) -> None: class BackendIndexingTestsMixin: + def roundtrip(self, ds: Dataset, open_kwargs=None) -> Dataset: + raise NotImplementedError + def test_orthogonal_indexing(self) -> None: in_memory = create_test_data() with self.roundtrip(in_memory) as on_disk: @@ -6491,3 +6496,83 @@ def test_zarr_safe_chunk_region(tmp_path): chunk = ds.isel(region) chunk = chunk.chunk() chunk.chunk().to_zarr(store, region=region) + + +class LegacyBackendArrayWrapper(LegacyBackendArray): + def __init__(self, array: np.ndarray, indexing_support: IndexingSupport): + self.shape = array.shape + self.dtype = array.dtype + self.array = array + self.indexing_support = indexing_support + + def __getitem__(self, key: indexing.ExplicitIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _getitem(self, key: tuple[Any, ...]) -> np.ndarray: + return self.array[key] + + +def indexing_tests(*, indexing_support: IndexingSupport): + def wrapper(cls): + class NewClass(cls): + cls.indexing_support = indexing_support + + def roundtrip(self, ds: Dataset, *, open_kwargs=None) -> Dataset: + ds = ds.copy(deep=True) + for name in list(ds.data_vars) + list( + set(ds.coords) - set(ds.xindexes) + ): + var = ds._variables[name] + ds._variables[name] = var.copy( + # These tests assume that indexing is lazy (checks ._in_memory), + # so wrapping by LazilyIndexedArray is required. + data=indexing.LazilyIndexedArray( + LegacyBackendArrayWrapper(var.data, self.indexing_support) + ) + ) + return ds + + def test_vectorized_indexing_negative_step(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_vectorized_indexing_negative_step() + + def test_isel_dataarray(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_isel_dataarray() + + def test_vectorized_indexing(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_vectorized_indexing() + + def test_orthogonal_indexing(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_orthogonal_indexing() + + def test_outer_indexing_reversed(self) -> None: + with pytest.warns(PendingDeprecationWarning): + super().test_outer_indexing_reversed() + + return NewClass + + return wrapper + + +@indexing_tests(indexing_support=IndexingSupport.BASIC) +class TestBasicIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +@indexing_tests(indexing_support=IndexingSupport.OUTER_1VECTOR) +class TestOuter1VectorIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +# @indexing_tests(indexing_support=IndexingSupport.OUTER) +# class TestOuterIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass + +# @indexing_tests(indexing_support=IndexingSupport.VECTORIZED) +# class TestVectorizedIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass From fb24e9cb2d2434ce455b6c8c3686ce8033902c33 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Nov 2024 11:51:17 -0700 Subject: [PATCH 31/32] Avoid raising deprecation warning now. --- xarray/core/indexing.py | 19 +++++++++---------- xarray/tests/test_backends.py | 30 +++++++++++++++--------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 92af1fcd146..b3c6400e4d9 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -22,7 +22,6 @@ from xarray.core.utils import ( NDArrayMixin, either_dict_or_kwargs, - emit_user_level_warning, get_valid_numpy_dtype, is_duck_array, is_duck_dask_array, @@ -1142,10 +1141,10 @@ def explicit_indexing_adapter( # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - emit_user_level_warning( - BackendArray_fallback_warning_message.format(""), - category=PendingDeprecationWarning, - ) + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(""), + # category=PendingDeprecationWarning, + # ) raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) result = raw_indexing_method(raw_key.tuple) if numpy_indices.tuple: @@ -1160,13 +1159,13 @@ def apply_indexer( """Apply an indexer to an indexable object.""" if not hasattr(indexable, "vindex") and not hasattr(indexable, "oindex"): # This path is used by Lazily*IndexedArray.get_duck_array() - classname = type(indexable).__name__ + # classname = type(indexable).__name__ # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ - emit_user_level_warning( - BackendArray_fallback_warning_message.format(classname), - category=PendingDeprecationWarning, - ) + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(classname), + # category=PendingDeprecationWarning, + # ) return indexable[indexer] if isinstance(indexer, VectorizedIndexer): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0c9813c971d..298e46e6a67 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -6534,25 +6534,25 @@ def roundtrip(self, ds: Dataset, *, open_kwargs=None) -> Dataset: ) return ds - def test_vectorized_indexing_negative_step(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_vectorized_indexing_negative_step() + # def test_vectorized_indexing_negative_step(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing_negative_step() - def test_isel_dataarray(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_isel_dataarray() + # def test_isel_dataarray(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_isel_dataarray() - def test_vectorized_indexing(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_vectorized_indexing() + # def test_vectorized_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing() - def test_orthogonal_indexing(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_orthogonal_indexing() + # def test_orthogonal_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_orthogonal_indexing() - def test_outer_indexing_reversed(self) -> None: - with pytest.warns(PendingDeprecationWarning): - super().test_outer_indexing_reversed() + # def test_outer_indexing_reversed(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_outer_indexing_reversed() return NewClass From 60640466bd98c3a77bd48ece1d9eca07ecf6fe4d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 29 Jan 2025 22:33:47 -0700 Subject: [PATCH 32/32] fix --- xarray/core/indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 76bf5e6a027..3fda88956d1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1812,18 +1812,18 @@ def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: return self.array.vindex[indexer] except IndexError as e: # TODO: upstream to dask - has_dask = any(is_duck_dask_array(i) for i in indexer.tuple) + has_dask = any(is_duck_dask_array(i) for i in indexer) # this only works for "small" 1d coordinate arrays with one chunk # it is intended for idxmin, idxmax, and allows indexing with # the nD array output of argmin, argmax if ( not has_dask - or len(indexer.tuple) > 1 + or len(indexer) > 1 or math.prod(self.array.numblocks) > 1 or self.array.ndim > 1 ): raise e - (idxr,) = indexer.tuple + (idxr,) = indexer if idxr.ndim == 0: return self.array[idxr.data] else: pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy