diff --git a/.binder/environment.yml b/.binder/environment.yml index a60eb372831..2c57bd1121c 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -2,7 +2,7 @@ name: xarray-examples channels: - conda-forge dependencies: - - python=3.10 + - python=3.11 - boto3 - bottleneck - cartopy diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 95181ae3761..345c7a8e234 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -132,7 +132,7 @@ jobs: fail_ci_if_error: false mypy-min: - name: Mypy 3.10 + name: Mypy 3.11 runs-on: "ubuntu-latest" needs: detect-ci-trigger defaults: @@ -140,7 +140,7 @@ jobs: shell: bash -l {0} env: CONDA_ENV_FILE: ci/requirements/environment.yml - PYTHON_VERSION: "3.10" + PYTHON_VERSION: "3.11" steps: - uses: actions/checkout@v4 @@ -239,7 +239,7 @@ jobs: fail_ci_if_error: false pyright39: - name: Pyright 3.10 + name: Pyright 3.11 runs-on: "ubuntu-latest" needs: detect-ci-trigger if: | @@ -252,7 +252,7 @@ jobs: shell: bash -l {0} env: CONDA_ENV_FILE: ci/requirements/environment.yml - PYTHON_VERSION: "3.10" + PYTHON_VERSION: "3.11" steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b884b246f47..b115d605ee3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -47,15 +47,18 @@ jobs: matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] # Bookend python versions - python-version: ["3.10", "3.13"] + python-version: ["3.11", "3.13"] env: [""] include: # Minimum python version: - env: "bare-minimum" - python-version: "3.10" + python-version: "3.11" + os: ubuntu-latest + - env: "bare-min-and-scipy" + python-version: "3.11" os: ubuntu-latest - env: "min-all-deps" - python-version: "3.10" + python-version: "3.11" os: ubuntu-latest # Latest python version: - env: "all-but-numba" @@ -70,7 +73,7 @@ jobs: # The mypy tests must be executed using only 1 process in order to guarantee # predictable mypy output messages for comparison to expectations. - env: "mypy" - python-version: "3.10" + python-version: "3.11" numprocesses: 1 os: ubuntu-latest - env: "mypy" diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index add9261fcaf..55be8059306 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -6,11 +6,19 @@ on: push: tags: - "v*" + pull_request: + types: [opened, reopened, synchronize, labeled] + workflow_dispatch: jobs: build-artifacts: runs-on: ubuntu-latest - if: github.repository == 'pydata/xarray' + if: ${{ github.repository == 'pydata/xarray' && ( + (contains(github.event.pull_request.labels.*.name, 'Release') && github.event_name == 'pull_request') || + github.event_name == 'release' || + github.event_name == 'workflow_dispatch' || + startsWith(github.ref, 'refs/tags/v') + ) }} steps: - uses: actions/checkout@v4 with: @@ -64,7 +72,6 @@ jobs: ls -ltrh dist - name: Verify the built dist/wheel is valid - if: github.event_name == 'push' run: | python -m pip install --upgrade pip python -m pip install dist/xarray*.whl diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aebcb151959..e7d5a8567c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,8 +24,7 @@ repos: - id: rst-inline-touching-normal - id: text-unicode-replacement-char - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.11.12 + rev: v0.12.1 hooks: - id: ruff-format - id: ruff diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 2f3ddd49ba8..8124c5612cf 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,46 +1,23 @@ -# Contributor Covenant Code of Conduct +# NUMFOCUS CODE OF CONDUCT -## Our Pledge +You can find the full Code of Conduct on the NumFOCUS website: https://numfocus.org/code-of-conduct -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. +## THE SHORT VERSION -## Our Standards +NumFOCUS is dedicated to providing a harassment-free community for everyone, regardless of gender, sexual orientation, gender identity and expression, disability, physical appearance, body size, race, or religion. We do not tolerate harassment of community members in any form. -Examples of behavior that contributes to creating a positive environment include: +Be kind to others. Do not insult or put down others. Behave professionally. Remember that harassment and sexist, racist, or exclusionary jokes are not appropriate for NumFOCUS. -- Using welcoming and inclusive language -- Being respectful of differing viewpoints and experiences -- Gracefully accepting constructive criticism -- Focusing on what is best for the community -- Showing empathy towards other community members +All communication should be appropriate for a professional audience including people of many different backgrounds. Sexual language and imagery is not appropriate. -Examples of unacceptable behavior by participants include: +Thank you for helping make this a welcoming, friendly community for all. -- The use of sexualized language or imagery and unwelcome sexual attention or advances -- Trolling, insulting/derogatory comments, and personal or political attacks -- Public or private harassment -- Publishing others' private information, such as a physical or electronic address, without explicit permission -- Other conduct which could reasonably be considered inappropriate in a professional setting +## HOW TO REPORT -## Our Responsibilities +If you feel that the Code of Conduct has been violated, feel free to submit a report, by using the form: [NumFOCUS Code of Conduct Reporting Form](https://numfocus.typeform.com/to/ynjGdT?typeform-source=numfocus.org) -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. +## WHO WILL RECEIVE YOUR REPORT -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +Your report will be received and handled by NumFOCUS Code of Conduct Working Group; trained, and experienced contributors with diverse backgrounds. The group is making decisions independently from the project, PyData, NumFOCUS or any other organization. -## Scope - -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at xarray-core-team@googlegroups.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://contributor-covenant.org/version/1/4][version] - -[homepage]: https://contributor-covenant.org -[version]: https://contributor-covenant.org/version/1/4/ +You can learn more about the current group members, as well as the reporting procedure here: https://numfocus.org/code-of-conduct diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index d4ca0d9c2af..e775d63871d 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -48,6 +48,8 @@ upstream https://github.com/pydata/xarray (push) release. 5. Open a PR with the release summary and whatsnew changes; in particular the release headline should get feedback from the team on what's important to include. + Apply the `Release` label to the PR to trigger a test build action. + 6. After merging, again ensure your main branch is synced to upstream: ```sh git pull upstream main diff --git a/asv_bench/benchmarks/dataset_io.py b/asv_bench/benchmarks/dataset_io.py index f1296a8b44f..b8afabe802e 100644 --- a/asv_bench/benchmarks/dataset_io.py +++ b/asv_bench/benchmarks/dataset_io.py @@ -678,13 +678,7 @@ def open( lock: xr.backends.locks.SerializableLock | None = None, autoclose: bool = False, ): - if lock is None: - if mode == "r": - locker = xr.backends.locks.SerializableLock() - else: - locker = xr.backends.locks.SerializableLock() - else: - locker = lock + locker = lock or xr.backends.locks.SerializableLock() manager = xr.backends.CachingFileManager( xr.backends.DummyFileManager, diff --git a/ci/release_contributors.py b/ci/release_contributors.py index dab95c651c5..50501233f60 100644 --- a/ci/release_contributors.py +++ b/ci/release_contributors.py @@ -10,7 +10,7 @@ def main(): repo = git.Repo(".") - most_recent_release = last(repo.tags) + most_recent_release = last(list(repo.tags)) # extract information from commits contributors = {} diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index ca4943bddb1..5f5db4a0f18 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/all-but-numba.yml b/ci/requirements/all-but-numba.yml index fa7ad81f198..712055a0ec2 100644 --- a/ci/requirements/all-but-numba.yml +++ b/ci/requirements/all-but-numba.yml @@ -4,9 +4,9 @@ channels: - nodefaults dependencies: # Pin a "very new numpy" (updated Sept 24, 2024) - - numpy>=2.1.1 + - numpy>=2.2 - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/bare-min-and-scipy.yml b/ci/requirements/bare-min-and-scipy.yml new file mode 100644 index 00000000000..bb25af67651 --- /dev/null +++ b/ci/requirements/bare-min-and-scipy.yml @@ -0,0 +1,18 @@ +name: xarray-tests +channels: + - conda-forge + - nodefaults +dependencies: + - python=3.11 + - coveralls + - pip + - pytest + - pytest-cov + - pytest-env + - pytest-mypy-plugins + - pytest-timeout + - pytest-xdist + - numpy=1.26 + - packaging=24.1 + - pandas=2.2 + - scipy=1.13 diff --git a/ci/requirements/bare-minimum.yml b/ci/requirements/bare-minimum.yml index 02e99d34af2..fafc1aa034a 100644 --- a/ci/requirements/bare-minimum.yml +++ b/ci/requirements/bare-minimum.yml @@ -3,7 +3,7 @@ channels: - conda-forge - nodefaults dependencies: - - python=3.10 + - python=3.11 - coveralls - pip - pytest @@ -12,6 +12,6 @@ dependencies: - pytest-mypy-plugins - pytest-timeout - pytest-xdist - - numpy=1.24 - - packaging=23.1 - - pandas=2.1 + - numpy=1.26 + - packaging=24.1 + - pandas=2.2 diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 0559f393bd0..64ea08b73ff 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -23,7 +23,7 @@ dependencies: - ncdata - netcdf4 - numba - - numpy>=2 + - numpy>=2.2 - packaging - pandas - pooch diff --git a/ci/requirements/environment-3.14.yml b/ci/requirements/environment-3.14.yml index 1e6ee7ff5f9..06c4df82663 100644 --- a/ci/requirements/environment-3.14.yml +++ b/ci/requirements/environment-3.14.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment-windows-3.14.yml b/ci/requirements/environment-windows-3.14.yml index 4eb2049f2e6..dd48add6b73 100644 --- a/ci/requirements/environment-windows-3.14.yml +++ b/ci/requirements/environment-windows-3.14.yml @@ -2,7 +2,7 @@ name: xarray-tests channels: - conda-forge dependencies: - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 45cbebd38db..3213ef687d3 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -2,7 +2,7 @@ name: xarray-tests channels: - conda-forge dependencies: - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index a9499694e15..cc33d8b4681 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -4,7 +4,7 @@ channels: - nodefaults dependencies: - aiobotocore - - array-api-strict + - array-api-strict<2.4 - boto3 - bottleneck - cartopy @@ -26,7 +26,7 @@ dependencies: - numba - numbagg - numexpr - - numpy>=2 + - numpy>=2.2 - opt_einsum - packaging - pandas diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 03e14773d53..9183433e801 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -7,42 +7,38 @@ dependencies: # Run ci/min_deps_check.py to verify that this file respects the policy. # When upgrading python, numpy, or pandas, must also change # doc/user-guide/installing.rst, doc/user-guide/plotting.rst and setup.py. - - python=3.10 - - array-api-strict=1.0 # dependency for testing the array api compat - - boto3=1.29 - - bottleneck=1.3 - - cartopy=0.22 + - python=3.11 + - array-api-strict=1.1 # dependency for testing the array api compat + - boto3=1.34 + - bottleneck=1.4 + - cartopy=0.23 - cftime=1.6 - coveralls - - dask-core=2023.11 - - distributed=2023.11 - # Flox > 0.8 has a bug with numbagg versions - # It will require numbagg > 0.6 - # so we should just skip that series eventually - # or keep flox pinned for longer than necessary - - flox=0.7 + - dask-core=2024.6 + - distributed=2024.6 + - flox=0.9 - h5netcdf=1.3 # h5py and hdf5 tend to cause conflicts # for e.g. hdf5 1.12 conflicts with h5py=3.1 # prioritize bumping other packages instead - - h5py=3.8 - - hdf5=1.12 + - h5py=3.11 + - hdf5=1.14 - hypothesis - - iris=3.7 - - lxml=4.9 # Optional dep of pydap + - iris=3.9 + - lxml=5.1 # Optional dep of pydap - matplotlib-base=3.8 - nc-time-axis=1.4 # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) - - netcdf4=1.6.0 - - numba=0.57 - - numbagg=0.6 - - numpy=1.24 - - packaging=23.2 - - pandas=2.1 - - pint=0.22 + - netcdf4=1.6 + - numba=0.60 + - numbagg=0.8 + - numpy=1.26 + - packaging=24.1 + - pandas=2.2 + - pint=0.24 - pip - - pydap=3.5 + - pydap=3.5.0 - pytest - pytest-cov - pytest-env @@ -50,9 +46,8 @@ dependencies: - pytest-timeout - pytest-xdist - rasterio=1.3 - - scipy=1.11 + - scipy=1.13 - seaborn=0.13 - - sparse=0.14 + - sparse=0.15 - toolz=0.12 - - typing_extensions=4.8 - - zarr=2.16 + - zarr=2.18 diff --git a/doc/api.rst b/doc/api.rst index b6023866eb8..df6e87c0cf8 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1576,6 +1576,7 @@ Custom Indexes CFTimeIndex indexes.RangeIndex + indexes.CoordinateTransformIndex Creating custom indexes ----------------------- @@ -1588,6 +1589,13 @@ Creating custom indexes indexes.RangeIndex.arange indexes.RangeIndex.linspace +Building custom indexes +----------------------- +.. autosummary:: + :toctree: generated/ + + indexes.CoordinateTransform + Tutorial ======== diff --git a/doc/contribute/contributing.rst b/doc/contribute/contributing.rst index e0ece730cd1..339050a7f8a 100644 --- a/doc/contribute/contributing.rst +++ b/doc/contribute/contributing.rst @@ -290,7 +290,7 @@ We'll now kick off a two-step process: .. code-block:: sh # Create and activate the build environment - conda create -c conda-forge -n xarray-tests python=3.10 + conda create -c conda-forge -n xarray-tests python=3.11 # This is for Linux and MacOS conda env update -f ci/requirements/environment.yml diff --git a/doc/contribute/index.rst b/doc/contribute/index.rst index c4461d38039..2501376467d 100644 --- a/doc/contribute/index.rst +++ b/doc/contribute/index.rst @@ -1,11 +1,12 @@ -################## -Contributors Guide -################## +######################## +Xarray Developer's Guide +######################## We welcome your skills and enthusiasm at the Xarray project! There are numerous opportunities to contribute beyond just writing code. All contributions, including bug reports, bug fixes, documentation improvements, enhancement suggestions, -and other ideas are welcome. +and other ideas are welcome. Please review our Contributor's guide for more guidance. +In this section you will also find documentation on the internal organization of Xarray's source code, the roadmap for current development priorities, as well as how to engage with core maintainers of the Xarray codebase. .. toctree:: :maxdepth: 2 diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index 4910047014e..cca54585c5f 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -6,10 +6,10 @@ Installation Required dependencies --------------------- -- Python (3.10 or later) -- `numpy `__ (1.23 or later) -- `packaging `__ (23.1 or later) -- `pandas `__ (2.0 or later) +- Python (3.11 or later) +- `numpy `__ (1.26 or later) +- `packaging `__ (24.1 or later) +- `pandas `__ (2.2 or later) .. _optional-dependencies: diff --git a/doc/index.rst b/doc/index.rst index 455dd3c5e80..d540e9f79b1 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -70,5 +70,5 @@ efficient, and fun! Gallery API Reference Get Help - Contribute + Development Release Notes diff --git a/doc/user-guide/ecosystem.rst b/doc/user-guide/ecosystem.rst index 097dae55a23..c94082cc601 100644 --- a/doc/user-guide/ecosystem.rst +++ b/doc/user-guide/ecosystem.rst @@ -14,6 +14,7 @@ Geosciences - `aospy `_: Automated analysis and management of gridded climate data. - `argopy `_: xarray-based Argo data access, manipulation and visualisation for standard users as well as Argo experts. +- `cf_xarray `_: Provides an accessor (DataArray.cf or Dataset.cf) that allows you to interpret Climate and Forecast metadata convention attributes present on xarray objects. - `climpred `_: Analysis of ensemble forecast models for climate prediction. - `geocube `_: Tool to convert geopandas vector data into rasterized xarray data. - `GeoWombat `_: Utilities for analysis of remotely sensed and gridded raster data at scale (easily tame Landsat, Sentinel, Quickbird, and PlanetScope). diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4c07fe413e4..667bd3e3d98 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -5,6 +5,142 @@ What's New ========== +.. _whats-new.2025.07.0: + +v2025.07.0 (Jul 3, 2025) +------------------------ + +This release extends xarray's support for custom index classes, restores support for reading netCDF3 files with SciPy, updates minimum dependencies, and fixes a number of bugs. + +Thanks to the 17 contributors to this release: +Bas Nijholt, Benoit Bovy, Deepak Cherian, Dhruva Kumar Kaushal, Dimitri Papadopoulos Orfanos, Ian Hunt-Isaak, Kai Mühlbauer, Mathias Hauser, Maximilian Roos, Miguel Jimenez, Nick Hodgskin, Scott Henderson, Shuhao Cao, Spencer Clark, Stephan Hoyer, Tom Nicholas and Zsolt Cserna + +New Features +~~~~~~~~~~~~ + +- Expose :py:class:`~xarray.indexes.RangeIndex`, and :py:class:`~xarray.indexes.CoordinateTransformIndex` as public api + under the ``xarray.indexes`` namespace. By `Deepak Cherian `_. +- Support zarr-python's new ``.supports_consolidated_metadata`` store property (:pull:`10457``). + by `Tom Nicholas `_. +- Better error messages when encoding data to be written to disk fails (:pull:`10464`). + By `Stephan Hoyer `_ + +Breaking changes +~~~~~~~~~~~~~~~~ + +The minimum versions of some dependencies were changed (:issue:`10417`, :pull:`10438`): +By `Dhruva Kumar Kaushal `_. + +.. list-table:: + :header-rows: 1 + :widths: 30 20 20 + + * - Dependency + - Old Version + - New Version + * - Python + - 3.10 + - 3.11 + * - array-api-strict + - 1.0 + - 1.1 + * - boto3 + - 1.29 + - 1.34 + * - bottleneck + - 1.3 + - 1.4 + * - cartopy + - 0.22 + - 0.23 + * - dask-core + - 2023.11 + - 2024.6 + * - distributed + - 2023.11 + - 2024.6 + * - flox + - 0.7 + - 0.9 + * - h5py + - 3.8 + - 3.11 + * - hdf5 + - 1.12 + - 1.14 + * - iris + - 3.7 + - 3.9 + * - lxml + - 4.9 + - 5.1 + * - matplotlib-base + - 3.7 + - 3.8 + * - numba + - 0.57 + - 0.60 + * - numbagg + - 0.6 + - 0.8 + * - numpy + - 1.24 + - 1.26 + * - packaging + - 23.2 + - 24.1 + * - pandas + - 2.1 + - 2.2 + * - pint + - 0.22 + - 0.24 + * - pydap + - N/A + - 3.5 + * - scipy + - 1.11 + - 1.13 + * - sparse + - 0.14 + - 0.15 + * - typing_extensions + - 4.8 + - Removed + * - zarr + - 2.16 + - 2.18 + +Bug fixes +~~~~~~~~~ + +- Fix Pydap test_cmp_local_file for numpy 2.3.0 changes, 1. do always return arrays for all versions and 2. skip astype(str) for numpy >= 2.3.0 for expected data. (:pull:`10421`) + By `Kai Mühlbauer `_. +- Fix the SciPy backend for netCDF3 files . (:issue:`8909`, :pull:`10376`) + By `Deepak Cherian `_. +- Check and fix character array string dimension names, issue warnings as needed (:issue:`6352`, :pull:`10395`). + By `Kai Mühlbauer `_. +- Fix the error message of :py:func:`testing.assert_equal` when two different :py:class:`DataTree` objects + are passed (:pull:`10440`). By `Mathias Hauser `_. +- Fix :py:func:`testing.assert_equal` with ``check_dim_order=False`` for :py:class:`DataTree` objects + (:pull:`10442`). By `Mathias Hauser `_. +- Fix Pydap backend testing. Now test forces string arrays to dtype "S" (pydap converts them to unicode type by default). Removes conditional to numpy version. (:issue:`10261`, :pull:`10482`) + By `Miguel Jimenez-Urias `_. +- Fix attribute overwriting bug when decoding encoded + :py:class:`numpy.timedelta64` values from disk with a dtype attribute + (:issue:`10468`, :pull:`10469`). By `Spencer Clark + `_. +- Fix default ``"_FillValue"`` dtype coercion bug when encoding + :py:class:`numpy.timedelta64` values to an on-disk format that only supports + 32-bit integers (:issue:`10466`, :pull:`10469`). By `Spencer Clark + `_. + +Internal Changes +~~~~~~~~~~~~~~~~ + +- Forward variable name down to coders for AbstractWritableDataStore.encode_variable and subclasses. (:pull:`10395`). + By `Kai Mühlbauer `_. + .. _whats-new.2025.06.1: v2025.06.1 (Jun 11, 2025) @@ -93,13 +229,6 @@ Performance - Speed up encoding of :py:class:`cftime.datetime` objects by roughly a factor of three (:pull:`8324`). By `Antoine Gibek `_. -Documentation -~~~~~~~~~~~~~ - - -Internal Changes -~~~~~~~~~~~~~~~~ - .. _whats-new.2025.04.0: v2025.04.0 (Apr 29, 2025) @@ -977,7 +1106,7 @@ New Features for example, will retain the object. However, one cannot do operations that are not possible on the ``ExtensionArray`` then, such as broadcasting. (:issue:`5287`, :issue:`8463`, :pull:`8723`) By `Ilan Gold `_. -- :py:func:`testing.assert_allclose`/:py:func:`testing.assert_equal` now accept a new argument ``check_dims="transpose"``, controlling whether a transposed array is considered equal. (:issue:`5733`, :pull:`8991`) +- :py:func:`testing.assert_allclose` / :py:func:`testing.assert_equal` now accept a new argument ``check_dims="transpose"``, controlling whether a transposed array is considered equal. (:issue:`5733`, :pull:`8991`) By `Ignacio Martinez Vazquez `_. - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg ``create_index_for_new_dim=False``. (:pull:`8960`) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 9c3e6fba290..e04db83fd84 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -261,8 +261,6 @@ def assert_invariants(self): @pytest.mark.skip(reason="failure detected by hypothesis") def test_unstack_object(): - import xarray as xr - ds = xr.Dataset() ds["0"] = np.array(["", "\x000"], dtype=object) ds.stack({"1": ["0"]}).unstack() @@ -270,8 +268,6 @@ def test_unstack_object(): @pytest.mark.skip(reason="failure detected by hypothesis") def test_unstack_timedelta_index(): - import xarray as xr - ds = xr.Dataset() ds["0"] = np.array([0, 1, 2, 3], dtype="timedelta64[ns]") ds.stack({"1": ["0"]}).unstack() diff --git a/pyproject.toml b/pyproject.toml index c980c204b5f..8cfbb6851b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,6 @@ classifiers = [ "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", @@ -17,9 +16,9 @@ dynamic = ["version"] license = "Apache-2.0" name = "xarray" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.11" -dependencies = ["numpy>=1.24", "packaging>=23.2", "pandas>=2.1"] +dependencies = ["numpy>=1.26", "packaging>=24.1", "pandas>=2.2"] # We don't encode minimum requirements here (though if we can write a script to # generate the text from `min_deps_check.py`, that's welcome...). We do add @@ -27,21 +26,28 @@ dependencies = ["numpy>=1.24", "packaging>=23.2", "pandas>=2.1"] # note that it's not a direct dependency of xarray. [project.optional-dependencies] -accel = ["scipy", "bottleneck", "numbagg", "numba>=0.54", "flox", "opt_einsum"] +accel = [ + "scipy>=1.13", + "bottleneck", + "numbagg>=0.8", + "numba>=0.59", + "flox>=0.9", + "opt_einsum", +] complete = ["xarray[accel,etc,io,parallel,viz]"] io = [ - "netCDF4", + "netCDF4>=1.6.0", "h5netcdf", - "scipy", - 'pydap; python_version<"3.10"', - "zarr", + "pydap", + "scipy>=1.13", + "zarr>=2.18", "fsspec", "cftime", "pooch", ] -etc = ["sparse"] +etc = ["sparse>=0.15"] parallel = ["dask[complete]"] -viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"] +viz = ["cartopy>=0.23", "matplotlib", "nc-time-axis", "seaborn"] types = [ "pandas-stubs", "scipy-stubs", @@ -249,6 +255,7 @@ extend-exclude = ["doc", "_typed_ops.pyi"] [tool.ruff.lint] extend-select = [ + "YTT", # flake8-2020 "B", # flake8-bugbear "C4", # flake8-comprehensions "ISC", # flake8-implicit-str-concat @@ -260,7 +267,10 @@ extend-select = [ "PERF", # Perflint "W", # pycodestyle warnings "PGH", # pygrep-hooks + "PLC", # Pylint Convention "PLE", # Pylint Errors + "PLR", # Pylint Refactor + "PLW", # Pylint Warnings "UP", # pyupgrade "FURB", # refurb "RUF", @@ -276,6 +286,14 @@ ignore = [ "PERF203", # try-except within a loop incurs performance overhead "E402", # module level import not at top of file "E731", # do not assign a lambda expression, use a def + "PLC0415", # `import` should be at the top-level of a file + "PLC0206", # extracting value from dictionary without calling `.items()` + "PLR091", # too many arguments / branches / statements + "PLR2004", # magic value used in comparison + "PLW0603", # using the global statement to update is discouraged + "PLW0642", # reassigned `self` variable in instance method + "PLW1641", # object does not implement `__hash__` method + "PLW2901", # `for` loop variable overwritten by assignment target "UP007", # use X | Y for type annotations "FURB105", # unnecessary empty string passed to `print` "RUF001", # string contains ambiguous unicode character @@ -288,6 +306,8 @@ ignore = [ [tool.ruff.lint.per-file-ignores] # don't enforce absolute imports "asv_bench/**" = ["TID252"] +# comparison with itself in tests +"xarray/tests/**" = ["PLR0124"] # looks like ruff bugs "xarray/core/_typed_ops.py" = ["PYI034"] "xarray/namedarray/_typing.py" = ["PYI018", "PYI046"] diff --git a/xarray/__init__.py b/xarray/__init__.py index d1001b4470a..04fb5b03867 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -1,6 +1,6 @@ from importlib.metadata import version as _version -from xarray import coders, groupers, testing, tutorial, ufuncs +from xarray import coders, groupers, indexes, testing, tutorial, ufuncs from xarray.backends.api import ( load_dataarray, load_dataset, @@ -70,6 +70,7 @@ # Sub-packages "coders", "groupers", + "indexes", "testing", "tutorial", "ufuncs", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 79deaed927d..b80ec927b1e 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -11,6 +11,7 @@ ) from functools import partial from io import BytesIO +from itertools import starmap from numbers import Number from typing import ( TYPE_CHECKING, @@ -2109,10 +2110,9 @@ def save_mfdataset( import dask return dask.delayed( - [ - dask.delayed(_finalize_store)(w, s) - for w, s in zip(writes, stores, strict=True) - ] + list( + starmap(dask.delayed(_finalize_store), zip(writes, stores, strict=True)) + ) ) diff --git a/xarray/backends/chunks.py b/xarray/backends/chunks.py index 80aac75ecef..f17f5375976 100644 --- a/xarray/backends/chunks.py +++ b/xarray/backends/chunks.py @@ -51,7 +51,7 @@ def align_nd_chunks( # The ideal size of the chunks is the maximum of the two; this would avoid # that we use more memory than expected - max_chunk = max(fixed_chunk, max(var_chunks)) + max_chunk = max(fixed_chunk, *var_chunks) # The algorithm assumes that the chunks on this array are aligned except the last one # because it can be considered a partial one @@ -141,7 +141,7 @@ def build_grid_chunks( if region is None: region = slice(0, size) - region_start = region.start if region.start else 0 + region_start = region.start or 0 # Generate the zarr chunks inside the region of this dim chunks_on_region = [chunk_size - (region_start % chunk_size)] chunks_on_region.extend([chunk_size] * ((size - chunks_on_region[0]) // chunk_size)) @@ -224,7 +224,7 @@ def validate_grid_chunks_alignment( ) ) - interval_start = interval.start if interval.start else 0 + interval_start = interval.start or 0 if len(var_chunks) > 1: # The first border size is the amount of data that needs to be updated on the @@ -247,7 +247,7 @@ def validate_grid_chunks_alignment( ) if not allow_partial_chunks: - region_stop = interval.stop if interval.stop else size + region_stop = interval.stop or size error_on_last_chunk = base_error.format( var_chunk_pos=len(var_chunks) - 1, diff --git a/xarray/backends/common.py b/xarray/backends/common.py index e574f19e9d4..f478c2b882c 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -389,11 +389,25 @@ def encode(self, variables, attributes): attributes : dict-like """ - variables = {k: self.encode_variable(v) for k, v in variables.items()} - attributes = {k: self.encode_attribute(v) for k, v in attributes.items()} - return variables, attributes + encoded_variables = {} + for k, v in variables.items(): + try: + encoded_variables[k] = self.encode_variable(v) + except Exception as e: + e.add_note(f"Raised while encoding variable {k!r} with value {v!r}") + raise - def encode_variable(self, v): + encoded_attributes = {} + for k, v in attributes.items(): + try: + encoded_attributes[k] = self.encode_attribute(v) + except Exception as e: + e.add_note(f"Raised while encoding attribute {k!r} with value {v!r}") + raise + + return encoded_variables, encoded_attributes + + def encode_variable(self, v, name=None): """encode one variable""" return v @@ -641,9 +655,7 @@ def encode(self, variables, attributes): variables = { k: ensure_dtype_not_object(v, name=k) for k, v in variables.items() } - variables = {k: self.encode_variable(v) for k, v in variables.items()} - attributes = {k: self.encode_attribute(v) for k, v in attributes.items()} - return variables, attributes + return super().encode(variables, attributes) class BackendEntrypoint: diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index ba3a6d20e37..f3e434c6e5e 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -286,8 +286,8 @@ def set_dimension(self, name, length, is_unlimited=False): def set_attribute(self, key, value): self.ds.attrs[key] = value - def encode_variable(self, variable): - return _encode_nc4_variable(variable) + def encode_variable(self, variable, name=None): + return _encode_nc4_variable(variable, name=name) def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py index aba767ab731..22cb47d85f2 100644 --- a/xarray/backends/memory.py +++ b/xarray/backends/memory.py @@ -5,6 +5,7 @@ import numpy as np from xarray.backends.common import AbstractWritableDataStore +from xarray.core import indexing from xarray.core.variable import Variable @@ -24,7 +25,12 @@ def get_attrs(self): return self._attributes def get_variables(self): - return self._variables + res = {} + for k, v in self._variables.items(): + v = v.copy(deep=True) + res[k] = v + v._data = indexing.LazilyIndexedArray(v._data) + return res def get_dimensions(self): return {d: s for v in self._variables.values() for d, s in v.dims.items()} diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index a23d247b6c3..8c3a01eba66 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -9,7 +9,6 @@ import numpy as np -from xarray import coding from xarray.backends.common import ( BACKEND_ENTRYPOINTS, BackendArray, @@ -30,6 +29,12 @@ ) from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable from xarray.backends.store import StoreBackendEntrypoint +from xarray.coding.strings import ( + CharacterArrayCoder, + EncodedStringCoder, + create_vlen_dtype, + is_unicode_dtype, +) from xarray.coding.variables import pop_to from xarray.core import indexing from xarray.core.utils import ( @@ -73,7 +78,7 @@ def __init__(self, variable_name, datastore): # check vlen string dtype in further steps # it also prevents automatic string concatenation via # conventions.decode_cf_variable - dtype = coding.strings.create_vlen_dtype(str) + dtype = create_vlen_dtype(str) self.dtype = dtype def __setitem__(self, key, value): @@ -127,12 +132,12 @@ def _getitem(self, key): return array -def _encode_nc4_variable(var): +def _encode_nc4_variable(var, name=None): for coder in [ - coding.strings.EncodedStringCoder(allows_unicode=True), - coding.strings.CharacterArrayCoder(), + EncodedStringCoder(allows_unicode=True), + CharacterArrayCoder(), ]: - var = coder.encode(var) + var = coder.encode(var, name=name) return var @@ -164,7 +169,7 @@ def _nc4_dtype(var): if "dtype" in var.encoding: dtype = var.encoding.pop("dtype") _check_encoding_dtype_is_vlen_string(dtype) - elif coding.strings.is_unicode_dtype(var.dtype): + elif is_unicode_dtype(var.dtype): dtype = str elif var.dtype.kind in ["i", "u", "f", "c", "S"]: dtype = var.dtype @@ -535,12 +540,12 @@ def set_attribute(self, key, value): else: self.ds.setncattr(key, value) - def encode_variable(self, variable): + def encode_variable(self, variable, name=None): variable = _force_native_endianness(variable) if self.format == "NETCDF4": - variable = _encode_nc4_variable(variable) + variable = _encode_nc4_variable(variable, name=name) else: - variable = encode_nc3_variable(variable) + variable = encode_nc3_variable(variable, name=name) return variable def prepare_variable( diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 3ae024c9760..6f66b6c1059 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -118,12 +118,12 @@ def _maybe_prepare_times(var): return data -def encode_nc3_variable(var): +def encode_nc3_variable(var, name=None): for coder in [ coding.strings.EncodedStringCoder(allows_unicode=False), coding.strings.CharacterArrayCoder(), ]: - var = coder.encode(var) + var = coder.encode(var, name=name) data = _maybe_prepare_times(var) data = coerce_nc3_dtype(data) attrs = encode_nc3_attrs(var.attrs) diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 301ea430c4c..73b719f8260 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -158,11 +158,12 @@ def get_variables(self): except AttributeError: from pydap.model import GroupType - _vars = list(self.ds.keys()) - # check the key is a BaseType or GridType - for var in _vars: - if isinstance(self.ds[var], GroupType): - _vars.remove(var) + _vars = [ + var + for var in self.ds.keys() + # check the key is not a BaseType or GridType + if not isinstance(self.ds[var], GroupType) + ] return FrozenDict((k, self.open_store_variable(self.ds[k])) for k in _vars) def get_attrs(self): diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 93d0e40a6e1..b98d226cac6 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -190,7 +190,7 @@ def ds(self): def open_store_variable(self, name, var): return Variable( var.dimensions, - ScipyArrayWrapper(name, self), + indexing.LazilyIndexedArray(ScipyArrayWrapper(name, self)), _decode_attrs(var._attributes), ) @@ -227,8 +227,8 @@ def set_attribute(self, key, value): value = encode_nc3_attr_value(value) setattr(self.ds, key, value) - def encode_variable(self, variable): - variable = encode_nc3_variable(variable) + def encode_variable(self, variable, name=None): + variable = encode_nc3_variable(variable, name=name) return variable def prepare_variable( diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index b86b5d0b374..48405b906cd 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -855,8 +855,8 @@ def set_dimensions(self, variables, unlimited_dims=None): def set_attributes(self, attributes): _put_attrs(self.zarr_group, attributes) - def encode_variable(self, variable): - variable = encode_zarr_variable(variable) + def encode_variable(self, variable, name=None): + variable = encode_zarr_variable(variable, name=name) return variable def encode_attribute(self, a): @@ -1184,7 +1184,7 @@ def set_variables( # with chunk boundaries, then no synchronization is required." # TODO: incorporate synchronizer to allow writes from multiple dask # threads - shape = zarr_shape if zarr_shape else v.shape + shape = zarr_shape or v.shape validate_grid_chunks_alignment( nd_var_chunks=v.chunks, enc_chunks=encoding["chunks"], @@ -1545,7 +1545,7 @@ def guess_can_open( ) -> bool: if isinstance(filename_or_obj, str | os.PathLike): _, ext = os.path.splitext(filename_or_obj) - return ext in {".zarr"} + return ext == ".zarr" return False @@ -1768,6 +1768,11 @@ def _get_open_params( else: missing_exc = zarr.errors.GroupNotFoundError + if _zarr_v3(): + # zarr 3.0.8 and earlier did not support this property - it was effectively assumed true + if not getattr(store, "supports_consolidated_metadata", True): + consolidated = consolidate_on_close = False + if consolidated in [None, True]: # open the root of the store, in case there is metadata consolidated there group = open_kwargs.pop("path") @@ -1825,6 +1830,7 @@ def _get_open_params( else: # this was the default for v2 and should apply to most existing Zarr data use_zarr_fill_value_as_mask = True + return ( zarr_group, consolidate_on_close, diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 510e9dafad8..e8d407b282c 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -289,20 +289,18 @@ def _shift_month(date, months, day_option: DayOption = "start"): _ = attempt_import("cftime") has_year_zero = date.has_year_zero - delta_year = (date.month + months) // 12 + year = date.year + (date.month + months) // 12 month = (date.month + months) % 12 if month == 0: month = 12 - delta_year = delta_year - 1 + year -= 1 if not has_year_zero: - if date.year < 0 and date.year + delta_year >= 0: - delta_year = delta_year + 1 - elif date.year > 0 and date.year + delta_year <= 0: - delta_year = delta_year - 1 - - year = date.year + delta_year + if date.year < 0 <= year: + year += 1 + elif year <= 0 < date.year: + year -= 1 # Silence warnings associated with generating dates with years < 1. with warnings.catch_warnings(): @@ -1421,8 +1419,6 @@ def date_range( cftime_range date_range_like """ - from xarray.coding.times import _is_standard_calendar - if tz is not None: use_cftime = False diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 053e4e76a3d..210bbf263b1 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -43,7 +43,7 @@ import math from datetime import timedelta -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd @@ -549,7 +549,7 @@ def __rsub__(self, other): ) from err def to_datetimeindex( - self, unsafe: bool = False, time_unit: Optional[PDDatetimeUnitOptions] = None + self, unsafe: bool = False, time_unit: PDDatetimeUnitOptions | None = None ) -> pd.DatetimeIndex: """If possible, convert this index to a pandas.DatetimeIndex. @@ -689,8 +689,6 @@ def asi8(self): @property def calendar(self): """The calendar used by the datetimes in the index.""" - from xarray.coding.times import infer_calendar_name - if not self._data.size: return None diff --git a/xarray/coding/common.py b/xarray/coding/common.py index 1b455009668..0e8d7e1955e 100644 --- a/xarray/coding/common.py +++ b/xarray/coding/common.py @@ -63,6 +63,10 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): def dtype(self) -> np.dtype: return np.dtype(self._dtype) + def transpose(self, order): + # For elementwise functions, we can compose transpose and function application + return type(self)(self.array.transpose(order), self.func, self.dtype) + def _oindex_get(self, key): return type(self)(self.array.oindex[key], self.func, self.dtype) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 4ca6a3f0a46..8fba967240c 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re from functools import partial import numpy as np @@ -15,7 +16,7 @@ unpack_for_encoding, ) from xarray.core import indexing -from xarray.core.utils import module_available +from xarray.core.utils import emit_user_level_warning, module_available from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array @@ -113,6 +114,35 @@ def ensure_fixed_length_bytes(var: Variable) -> Variable: return var +def validate_char_dim_name(strlen, encoding, name) -> str: + """Check character array dimension naming and size and return it.""" + + if (char_dim_name := encoding.pop("char_dim_name", None)) is not None: + # 1 - extract all characters up to last number sequence + # 2 - extract last number sequence + match = re.search(r"^(.*?)(\d+)(?!.*\d)", char_dim_name) + if match: + new_dim_name = match.group(1) + if int(match.group(2)) != strlen: + emit_user_level_warning( + f"String dimension naming mismatch on variable {name!r}. {char_dim_name!r} provided by encoding, but data has length of '{strlen}'. Using '{new_dim_name}{strlen}' instead of {char_dim_name!r} to prevent possible naming clash.\n" + "To silence this warning either remove 'char_dim_name' from encoding or provide a fitting name." + ) + char_dim_name = f"{new_dim_name}{strlen}" + elif ( + original_shape := encoding.get("original_shape", [-1])[-1] + ) != -1 and original_shape != strlen: + emit_user_level_warning( + f"String dimension length mismatch on variable {name!r}. '{original_shape}' provided by encoding, but data has length of '{strlen}'. Using '{char_dim_name}{strlen}' instead of {char_dim_name!r} to prevent possible naming clash.\n" + f"To silence this warning remove 'original_shape' from encoding." + ) + char_dim_name = f"{char_dim_name}{strlen}" + else: + char_dim_name = f"string{strlen}" + + return char_dim_name + + class CharacterArrayCoder(VariableCoder): """Transforms between arrays containing bytes and character arrays.""" @@ -122,10 +152,7 @@ def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) if data.dtype.kind == "S" and encoding.get("dtype") is not str: data = bytes_to_char(data) - if "char_dim_name" in encoding.keys(): - char_dim_name = encoding.pop("char_dim_name") - else: - char_dim_name = f"string{data.shape[-1]}" + char_dim_name = validate_char_dim_name(data.shape[-1], encoding, name) dims = dims + (char_dim_name,) return Variable(dims, data, attrs, encoding) @@ -221,7 +248,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): values, when accessed, are automatically stacked along the last dimension. >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> np.array(StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer]) array(b'abc', dtype='|S3') """ @@ -250,14 +277,17 @@ def __repr__(self): return f"{type(self).__name__}({self.array!r})" def _vindex_get(self, key): - return _numpy_char_to_bytes(self.array.vindex[key]) + return type(self)(self.array.vindex[key]) def _oindex_get(self, key): - return _numpy_char_to_bytes(self.array.oindex[key]) + return type(self)(self.array.oindex[key]) def __getitem__(self, key): # require slicing the last dimension completely key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) if key.tuple[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return type(self)(self.array[key]) + + def get_duck_array(self): + return _numpy_char_to_bytes(self.array.get_duck_array()) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e6bc8ca59bd..d6567ba4c61 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -246,7 +246,7 @@ def build_pattern( ] pattern_list = [] for sep, name, sub_pattern in pieces: - pattern_list.append((sep if sep else "") + named(name, sub_pattern)) + pattern_list.append((sep or "") + named(name, sub_pattern)) # TODO: allow timezone offsets? return "^" + trailing_optional(pattern_list) + "$" @@ -1410,6 +1410,43 @@ def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool: return isinstance(dtype, str) and dtype.startswith("timedelta64") +def resolve_time_unit_from_attrs_dtype( + attrs_dtype: str, name: T_Name +) -> PDDatetimeUnitOptions: + dtype = np.dtype(attrs_dtype) + resolution, _ = np.datetime_data(dtype) + resolution = cast(NPDatetimeUnitOptions, resolution) + if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): + time_unit = cast(PDDatetimeUnitOptions, "s") + message = ( + f"Following pandas, xarray only supports decoding to timedelta64 " + f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded " + f"values for variable {name!r} have a resolution of " + f"{resolution!r}. Attempting to decode to a resolution of 's'. " + f"Note, depending on the encoded values, this may lead to an " + f"OverflowError. Additionally, data will not be identically round " + f"tripped; xarray will choose an encoding dtype of " + f"'timedelta64[s]' when re-encoding." + ) + emit_user_level_warning(message) + elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"): + time_unit = cast(PDDatetimeUnitOptions, "ns") + message = ( + f"Following pandas, xarray only supports decoding to timedelta64 " + f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded " + f"values for variable {name!r} have a resolution of " + f"{resolution!r}. Attempting to decode to a resolution of 'ns'. " + f"Note, depending on the encoded values, this may lead to loss of " + f"precision. Additionally, data will not be identically round " + f"tripped; xarray will choose an encoding dtype of " + f"'timedelta64[ns]' when re-encoding." + ) + emit_user_level_warning(message) + else: + time_unit = cast(PDDatetimeUnitOptions, resolution) + return time_unit + + class CFTimedeltaCoder(VariableCoder): """Coder for CF Timedelta coding. @@ -1430,7 +1467,7 @@ class CFTimedeltaCoder(VariableCoder): def __init__( self, - time_unit: PDDatetimeUnitOptions = "ns", + time_unit: PDDatetimeUnitOptions | None = None, decode_via_units: bool = True, decode_via_dtype: bool = True, ) -> None: @@ -1442,45 +1479,18 @@ def __init__( def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) - has_timedelta_dtype = has_timedelta64_encoding_dtype(encoding) - if ("units" in encoding or "dtype" in encoding) and not has_timedelta_dtype: - dtype = encoding.get("dtype", None) - units = encoding.pop("units", None) + dtype = encoding.get("dtype", None) + units = encoding.pop("units", None) - # in the case of packed data we need to encode into - # float first, the correct dtype will be established - # via CFScaleOffsetCoder/CFMaskCoder - if "add_offset" in encoding or "scale_factor" in encoding: - dtype = data.dtype if data.dtype.kind == "f" else "float64" + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + if "add_offset" in encoding or "scale_factor" in encoding: + dtype = data.dtype if data.dtype.kind == "f" else "float64" - else: - resolution, _ = np.datetime_data(variable.dtype) - dtype = np.int64 - attrs_dtype = f"timedelta64[{resolution}]" - units = _numpy_dtype_to_netcdf_timeunit(variable.dtype) - safe_setitem(attrs, "dtype", attrs_dtype, name=name) - # Remove dtype encoding if it exists to prevent it from - # interfering downstream in NonStringCoder. - encoding.pop("dtype", None) - - if any( - k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS - ): - raise ValueError( - f"Specifying 'add_offset' or 'scale_factor' is not " - f"supported when encoding the timedelta64 values of " - f"variable {name!r} with xarray's new default " - f"timedelta64 encoding approach. To encode {name!r} " - f"with xarray's previous timedelta64 encoding " - f"approach, which supports the 'add_offset' and " - f"'scale_factor' parameters, additionally set " - f"encoding['units'] to a unit of time, e.g. " - f"'seconds'. To proceed with encoding of {name!r} " - f"via xarray's new approach, remove any encoding " - f"entries for 'add_offset' or 'scale_factor'." - ) - if "_FillValue" not in encoding and "missing_value" not in encoding: - encoding["_FillValue"] = np.iinfo(np.int64).min + resolution, _ = np.datetime_data(variable.dtype) + attrs_dtype = f"timedelta64[{resolution}]" + safe_setitem(attrs, "dtype", attrs_dtype, name=name) data, units = encode_cf_timedelta(data, units, dtype) safe_setitem(attrs, "units", units, name=name) @@ -1499,54 +1509,13 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: ): dims, data, attrs, encoding = unpack_for_decoding(variable) units = pop_to(attrs, encoding, "units") - if is_dtype_decodable and self.decode_via_dtype: - if any( - k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS - ): - raise ValueError( - f"Decoding timedelta64 values via dtype is not " - f"supported when 'add_offset', or 'scale_factor' are " - f"present in encoding. Check the encoding parameters " - f"of variable {name!r}." - ) - dtype = pop_to(attrs, encoding, "dtype", name=name) - dtype = np.dtype(dtype) - resolution, _ = np.datetime_data(dtype) - resolution = cast(NPDatetimeUnitOptions, resolution) - if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): - time_unit = cast(PDDatetimeUnitOptions, "s") - dtype = np.dtype("timedelta64[s]") - message = ( - f"Following pandas, xarray only supports decoding to " - f"timedelta64 values with a resolution of 's', 'ms', " - f"'us', or 'ns'. Encoded values for variable {name!r} " - f"have a resolution of {resolution!r}. Attempting to " - f"decode to a resolution of 's'. Note, depending on " - f"the encoded values, this may lead to an " - f"OverflowError. Additionally, data will not be " - f"identically round tripped; xarray will choose an " - f"encoding dtype of 'timedelta64[s]' when re-encoding." - ) - emit_user_level_warning(message) - elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"): - time_unit = cast(PDDatetimeUnitOptions, "ns") - dtype = np.dtype("timedelta64[ns]") - message = ( - f"Following pandas, xarray only supports decoding to " - f"timedelta64 values with a resolution of 's', 'ms', " - f"'us', or 'ns'. Encoded values for variable {name!r} " - f"have a resolution of {resolution!r}. Attempting to " - f"decode to a resolution of 'ns'. Note, depending on " - f"the encoded values, this may lead to loss of " - f"precision. Additionally, data will not be " - f"identically round tripped; xarray will choose an " - f"encoding dtype of 'timedelta64[ns]' " - f"when re-encoding." - ) - emit_user_level_warning(message) + if is_dtype_decodable: + attrs_dtype = attrs.pop("dtype") + if self.time_unit is None: + time_unit = resolve_time_unit_from_attrs_dtype(attrs_dtype, name) else: - time_unit = cast(PDDatetimeUnitOptions, resolution) - elif self.decode_via_units: + time_unit = self.time_unit + else: if self._emit_decode_timedelta_future_warning: emit_user_level_warning( "In a future version, xarray will not decode " @@ -1564,8 +1533,19 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: "'CFTimedeltaCoder' instance.", FutureWarning, ) - dtype = np.dtype(f"timedelta64[{self.time_unit}]") - time_unit = self.time_unit + if self.time_unit is None: + time_unit = cast(PDDatetimeUnitOptions, "ns") + else: + time_unit = self.time_unit + + # Handle edge case that decode_via_dtype=False and + # decode_via_units=True, and timedeltas were encoded with a + # dtype attribute. We need to remove the dtype attribute + # to prevent an error during round tripping. + if has_timedelta_dtype: + attrs.pop("dtype") + + dtype = np.dtype(f"timedelta64[{time_unit}]") transform = partial(decode_cf_timedelta, units=units, time_unit=time_unit) data = lazy_elemwise_func(data, transform, dtype=dtype) return Variable(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 662fec4b2c4..3b7be898ccf 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -21,6 +21,7 @@ ) from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.core import dtypes, duck_array_ops, indexing +from xarray.core.types import Self from xarray.core.variable import Variable if TYPE_CHECKING: @@ -58,13 +59,16 @@ def dtype(self) -> np.dtype: return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize)) def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) + return type(self)(self.array.oindex[key]) def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) + return type(self)(self.array.vindex[key]) - def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) + def __getitem__(self, key) -> Self: + return type(self)(self.array[key]) + + def get_duck_array(self): + return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype) class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): @@ -96,13 +100,16 @@ def dtype(self) -> np.dtype: return np.dtype("bool") def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) + return type(self)(self.array.oindex[key]) def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) + return type(self)(self.array.vindex[key]) + + def __getitem__(self, key) -> Self: + return type(self)(self.array[key]) - def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) + def get_duck_array(self): + return duck_array_ops.astype(self.array.get_duck_array(), dtype=self.dtype) def _apply_mask( diff --git a/xarray/computation/apply_ufunc.py b/xarray/computation/apply_ufunc.py index 26c757dcdf8..678c702f3f3 100644 --- a/xarray/computation/apply_ufunc.py +++ b/xarray/computation/apply_ufunc.py @@ -529,8 +529,10 @@ def apply_dataset_vfunc( out: Dataset | tuple[Dataset, ...] if signature.num_outputs > 1: out = tuple( - _fast_dataset(*args) - for args in zip(result_vars, list_of_coords, list_of_indexes, strict=True) + itertools.starmap( + _fast_dataset, + zip(result_vars, list_of_coords, list_of_indexes, strict=True), + ) ) else: (coord_vars,) = list_of_coords @@ -567,7 +569,6 @@ def apply_groupby_func(func, *args): DataArray, Variable and/or ndarray objects. """ from xarray.core.groupby import GroupBy, peek_at - from xarray.core.variable import Variable groupbys = [arg for arg in args if isinstance(arg, GroupBy)] assert groupbys, "must have at least one groupby to iterate over" @@ -600,9 +601,7 @@ def apply_groupby_func(func, *args): iterator = itertools.repeat(arg) iterators.append(iterator) - applied: Iterator = ( - func(*zipped_args) for zipped_args in zip(*iterators, strict=False) - ) + applied: Iterator = itertools.starmap(func, zip(*iterators, strict=False)) applied_example, applied = peek_at(applied) combine = first_groupby._combine # type: ignore[attr-defined] if isinstance(applied_example, tuple): diff --git a/xarray/computation/computation.py b/xarray/computation/computation.py index 4ec9651dc07..14b1ae6e240 100644 --- a/xarray/computation/computation.py +++ b/xarray/computation/computation.py @@ -258,7 +258,7 @@ def _cov_corr( weights: T_DataArray | None = None, dim: Dims = None, ddof: int = 0, - method: Literal["cov", "corr", None] = None, + method: Literal["cov", "corr"] | None = None, ) -> T_DataArray: """ Internal method for xr.cov() and xr.corr() so only have to @@ -574,7 +574,6 @@ def dot( array(235) """ from xarray.core.dataarray import DataArray - from xarray.core.variable import Variable if any(not isinstance(arr, Variable | DataArray) for arr in arrays): raise TypeError( diff --git a/xarray/computation/fit.py b/xarray/computation/fit.py index 7c9c25abe52..cc7289ac647 100644 --- a/xarray/computation/fit.py +++ b/xarray/computation/fit.py @@ -474,7 +474,7 @@ def _wrapper(Y, *args, **kwargs): mask = np.all([np.any(~np.isnan(x), axis=0), ~np.isnan(y)], axis=0) x = x[:, mask] y = y[mask] - if not len(y): + if y.size == 0: popt = np.full([n_params], np.nan) pcov = np.full([n_params, n_params], np.nan) return popt, pcov diff --git a/xarray/computation/nanops.py b/xarray/computation/nanops.py index 17c60b6f663..a28078540bb 100644 --- a/xarray/computation/nanops.py +++ b/xarray/computation/nanops.py @@ -105,14 +105,12 @@ def nansum(a, axis=None, dtype=None, out=None, min_count=None): def _nanmean_ddof_object(ddof, value, axis=None, dtype=None, **kwargs): """In house nanmean. ddof argument will be used in _nanvar method""" - from xarray.core.duck_array_ops import count, fillna, where_method - valid_count = count(value, axis=axis) value = fillna(value, 0) # As dtype inference is impossible for object dtype, we assume float # https://github.com/dask/dask/issues/3162 if dtype is None and value.dtype.kind == "O": - dtype = value.dtype if value.dtype.kind in ["cf"] else float + dtype = float data = np.sum(value, axis=axis, dtype=dtype, **kwargs) data = data / (valid_count - ddof) diff --git a/xarray/computation/rolling.py b/xarray/computation/rolling.py index 519d1f7eae6..e7718560559 100644 --- a/xarray/computation/rolling.py +++ b/xarray/computation/rolling.py @@ -131,13 +131,11 @@ def __init__( def __repr__(self) -> str: """provide a nice str repr of our rolling object""" - attrs = [ + attrs = ",".join( "{k}->{v}{c}".format(k=k, v=w, c="(center)" if c else "") for k, w, c in zip(self.dim, self.window, self.center, strict=True) - ] - return "{klass} [{attrs}]".format( - klass=self.__class__.__name__, attrs=",".join(attrs) ) + return f"{self.__class__.__name__} [{attrs}]" def __len__(self) -> int: return math.prod(self.obj.sizes[d] for d in self.dim) @@ -1106,14 +1104,12 @@ def _get_keep_attrs(self, keep_attrs): def __repr__(self) -> str: """provide a nice str repr of our coarsen object""" - attrs = [ + attrs = ",".join( f"{k}->{getattr(self, k)}" for k in self._attributes if getattr(self, k, None) is not None - ] - return "{klass} [{attrs}]".format( - klass=self.__class__.__name__, attrs=",".join(attrs) ) + return f"{self.__class__.__name__} [{attrs}]" def construct( self, diff --git a/xarray/conventions.py b/xarray/conventions.py index c9cd2a5dcdc..17f1e0666b6 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -18,7 +18,7 @@ ) from xarray.core.utils import emit_user_level_warning from xarray.core.variable import IndexVariable, Variable -from xarray.namedarray.utils import is_duck_dask_array +from xarray.namedarray.utils import is_duck_array CF_RELATED_DATA = ( "bounds", @@ -248,7 +248,15 @@ def decode_cf_variable( encoding.setdefault("dtype", original_dtype) - if not is_duck_dask_array(data): + if ( + # we don't need to lazily index duck arrays + not is_duck_array(data) + # These arrays already support lazy indexing + # OR for IndexingAdapters, it makes no sense to wrap them + and not isinstance(data, indexing.ExplicitlyIndexedNDArrayMixin) + ): + # this path applies to bare BackendArray objects. + # It is not hit for any internal Xarray backend data = indexing.LazilyIndexedArray(data) return Variable(dimensions, data, attributes, encoding=encoding, fastpath=True) @@ -784,7 +792,13 @@ def cf_encoder(variables: T_Variables, attributes: T_Attrs): # add encoding for time bounds variables if present. _update_bounds_encoding(variables) - new_vars = {k: encode_cf_variable(v, name=k) for k, v in variables.items()} + new_vars = {} + for k, v in variables.items(): + try: + new_vars[k] = encode_cf_variable(v, name=k) + except Exception as e: + e.add_note(f"Raised while encoding variable {k!r} with value {v!r}") + raise # Remove attrs from bounds variables (issue #2921) for var in new_vars.values(): diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index c78b38caf63..86e875cab5c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -20,7 +20,7 @@ from xarray.namedarray.utils import is_duck_dask_array if TYPE_CHECKING: - import sys + from typing import Self from numpy.typing import DTypeLike @@ -28,11 +28,6 @@ from xarray.core.dataset import Dataset from xarray.core.types import CFCalendar - if sys.version_info >= (3, 11): - from typing import Self - else: - from typing_extensions import Self - def _season_from_months(months): """Compute season (DJF, MAM, JJA, SON) from month ordinal""" diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 06570ceba3a..0bab92963a5 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -349,7 +349,7 @@ def f(x, iind): islice = slice(-1, None) if iind == -1 else slice(iind, iind + 1) item = x[islice] - return item if item else default + return item or default return self._apply(func=f, func_args=(i,)) diff --git a/xarray/core/common.py b/xarray/core/common.py index 6181aa6a8c1..a190766b01a 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1782,7 +1782,7 @@ def _full_like_variable( other.shape, fill_value, dtype=dtype, - chunks=chunks if chunks else other.data.chunks, + chunks=chunks or other.data.chunks, **from_array_kwargs, ) else: diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 13fe0a791bb..28cbc5b7622 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -788,7 +788,7 @@ def _update_coords( # check for inconsistent state *before* modifying anything in-place dims = calculate_dimensions(variables) new_coord_names = set(coords) - for dim in dims.keys(): + for dim in dims: if dim in variables: new_coord_names.add(dim) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c13d33872b6..0bfb0b7ab1c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -498,7 +498,7 @@ def _replace( self, variable: Variable | None = None, coords=None, - name: Hashable | None | Default = _default, + name: Hashable | Default | None = _default, attrs=_default, indexes=None, ) -> Self: @@ -520,7 +520,7 @@ def _replace( def _replace_maybe_drop_dims( self, variable: Variable, - name: Hashable | None | Default = _default, + name: Hashable | Default | None = _default, ) -> Self: if self.sizes == variable.sizes: coords = self._coords.copy() @@ -581,7 +581,7 @@ def _to_temp_dataset(self) -> Dataset: return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False) def _from_temp_dataset( - self, dataset: Dataset, name: Hashable | None | Default = _default + self, dataset: Dataset, name: Hashable | Default | None = _default ) -> Self: variable = dataset._variables.pop(_THIS_ARRAY) coords = dataset._variables @@ -2609,8 +2609,8 @@ def swap_dims( def expand_dims( self, - dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, - axis: None | int | Sequence[int] = None, + dim: Hashable | Sequence[Hashable] | Mapping[Any, Any] | None = None, + axis: int | Sequence[int] | None = None, create_index_for_new_dim: bool = True, **dim_kwargs: Any, ) -> Self: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 367da2f60a5..6de626a159b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -791,9 +791,9 @@ def _replace( variables: dict[Hashable, Variable] | None = None, coord_names: set[Hashable] | None = None, dims: dict[Any, int] | None = None, - attrs: dict[Hashable, Any] | None | Default = _default, + attrs: dict[Hashable, Any] | Default | None = _default, indexes: dict[Hashable, Index] | None = None, - encoding: dict | None | Default = _default, + encoding: dict | Default | None = _default, inplace: bool = False, ) -> Self: """Fastpath constructor for internal use. @@ -840,7 +840,7 @@ def _replace_with_new_dims( self, variables: dict[Hashable, Variable], coord_names: set | None = None, - attrs: dict[Hashable, Any] | None | Default = _default, + attrs: dict[Hashable, Any] | Default | None = _default, indexes: dict[Hashable, Index] | None = None, inplace: bool = False, ) -> Self: @@ -855,7 +855,7 @@ def _replace_vars_and_dims( variables: dict[Hashable, Variable], coord_names: set | None = None, dims: dict[Hashable, int] | None = None, - attrs: dict[Hashable, Any] | None | Default = _default, + attrs: dict[Hashable, Any] | Default | None = _default, inplace: bool = False, ) -> Self: """Deprecated version of _replace_with_new_dims(). @@ -1359,7 +1359,6 @@ def _setitem_check(self, key, value): to avoid leaving the dataset in a partially updated state when an error occurs. """ from xarray.core.dataarray import DataArray - from xarray.structure.alignment import align if isinstance(value, Dataset): missing_vars = [ @@ -2334,9 +2333,10 @@ def info(self, buf: IO | None = None) -> None: if buf is None: # pragma: no cover buf = sys.stdout - lines = [] - lines.append("xarray.Dataset {") - lines.append("dimensions:") + lines = [ + "xarray.Dataset {", + "dimensions:", + ] for name, size in self.sizes.items(): lines.append(f"\t{name} = {size} ;") lines.append("\nvariables:") @@ -2550,7 +2550,6 @@ def _validate_indexers( + string indexers are cast to the appropriate date type if the associated index is a DatetimeIndex or CFTimeIndex """ - from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.dataarray import DataArray indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) @@ -4349,8 +4348,8 @@ def swap_dims( def expand_dims( self, - dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, - axis: None | int | Sequence[int] = None, + dim: Hashable | Sequence[Hashable] | Mapping[Any, Any] | None = None, + axis: int | Sequence[int] | None = None, create_index_for_new_dim: bool = True, **dim_kwargs: Any, ) -> Self: @@ -8267,8 +8266,6 @@ def differentiate( -------- numpy.gradient: corresponding numpy function """ - from xarray.core.variable import Variable - if coord not in self.variables and coord not in self.dims: variables_and_dims = tuple(set(self.variables.keys()).union(self.dims)) raise ValueError( @@ -9708,7 +9705,7 @@ def convert_calendar( self, calendar: CFCalendar, dim: Hashable = "time", - align_on: Literal["date", "year", None] = None, + align_on: Literal["date", "year"] | None = None, missing: Any | None = None, use_cftime: bool | None = None, ) -> Self: diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 734927fd3d1..afef2f20094 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -347,9 +347,9 @@ def _replace( # type: ignore[override] variables: dict[Hashable, Variable] | None = None, coord_names: set[Hashable] | None = None, dims: dict[Any, int] | None = None, - attrs: dict[Hashable, Any] | None | Default = _default, + attrs: dict[Hashable, Any] | Default | None = _default, indexes: dict[Hashable, Index] | None = None, - encoding: dict | None | Default = _default, + encoding: dict | Default | None = _default, inplace: bool = False, ) -> Dataset: """ @@ -1629,7 +1629,6 @@ def _unary_op(self, f, *args, **kwargs) -> DataTree: return self.map_over_datasets(functools.partial(f, **kwargs), *args) # type: ignore[return-value] def _binary_op(self, other, f, reflexive=False, join=None) -> DataTree: - from xarray.core.dataset import Dataset from xarray.core.groupby import GroupBy if isinstance(other, GroupBy): diff --git a/xarray/core/datatree_mapping.py b/xarray/core/datatree_mapping.py index 6262c7f19cd..f9fd5505b66 100644 --- a/xarray/core/datatree_mapping.py +++ b/xarray/core/datatree_mapping.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from collections.abc import Callable, Mapping from typing import TYPE_CHECKING, Any, cast, overload @@ -42,7 +41,7 @@ def map_over_datasets( def map_over_datasets( - func: Callable[..., Dataset | None | tuple[Dataset | None, ...]], + func: Callable[..., Dataset | tuple[Dataset | None, ...] | None], *args: Any, kwargs: Mapping[str, Any] | None = None, ) -> DataTree | tuple[DataTree, ...]: @@ -106,7 +105,7 @@ def map_over_datasets( # Walk all trees simultaneously, applying func to all nodes that lie in same position in different trees # We don't know which arguments are DataTrees so we zip all arguments together as iterables # Store tuples of results in a dict because we don't yet know how many trees we need to rebuild to return - out_data_objects: dict[str, Dataset | None | tuple[Dataset | None, ...]] = {} + out_data_objects: dict[str, Dataset | tuple[Dataset | None, ...] | None] = {} tree_args = [arg for arg in args if isinstance(arg, DataTree)] name = result_name(tree_args) @@ -162,16 +161,12 @@ def wrapper(*args, **kwargs): def add_note(err: BaseException, msg: str) -> None: - # TODO: remove once python 3.10 can be dropped - if sys.version_info < (3, 11): - err.__notes__ = getattr(err, "__notes__", []) + [msg] # type: ignore[attr-defined] - else: - err.add_note(msg) + err.add_note(msg) def _check_single_set_return_values(path_to_node: str, obj: Any) -> int | None: """Check types returned from single evaluation of func, and return number of return values received from func.""" - if isinstance(obj, None | Dataset): + if isinstance(obj, Dataset | None): return None # no need to pack results if not isinstance(obj, tuple) or not all( diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index e98ac0f36a1..0c7d40113d6 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -49,8 +49,6 @@ def einsum(*args, **kwargs): - from xarray.core.options import OPTIONS - if OPTIONS["use_opt_einsum"] and module_available("opt_einsum"): import opt_einsum @@ -157,7 +155,7 @@ def isna(data: Any) -> bool: ------- Whether or not the data is np.nan or pd.NA """ - return data is pd.NA or data is np.nan + return data is pd.NA or data is np.nan # noqa: PLW0177 def isnull(data): @@ -193,7 +191,7 @@ def isnull(data): # types. For full consistency with pandas, we should accept None as # a null value as well as NaN, but it isn't clear how to do this # with duck typing. - return data != data + return data != data # noqa: PLR0124 def notnull(data): @@ -681,9 +679,7 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): The output data type. """ - import datetime as dt - - if isinstance(value, dt.timedelta): + if isinstance(value, datetime.timedelta): out = py_timedelta_to_float(value, datetime_unit) elif isinstance(value, np.timedelta64): out = np_timedelta64_to_float(value, datetime_unit) diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index 7cc9db96d0d..d85f7e66b55 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -83,7 +83,7 @@ def __extension_duck_array__reshape( @dataclass(frozen=True) -class PandasExtensionArray(Generic[T_ExtensionArray], NDArrayMixin): +class PandasExtensionArray(NDArrayMixin, Generic[T_ExtensionArray]): """NEP-18 compliant wrapper for pandas extension arrays. Parameters diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 69359462cde..14e70b0550c 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -190,7 +190,7 @@ def format_item(x, timedelta_format=None, quote_strings=True): if hasattr(x, "dtype"): x = x.item() return repr(x) if quote_strings else x - elif hasattr(x, "dtype") and np.issubdtype(x.dtype, np.floating): + elif hasattr(x, "dtype") and np.issubdtype(x.dtype, np.floating) and x.shape == (): return f"{x.item():.4}" else: return str(x) @@ -464,7 +464,7 @@ def inherited_coords_repr(node: DataTree, col_width=None, max_rows=None): ) -def inline_index_repr(index: pd.Index, max_width=None): +def inline_index_repr(index: pd.Index, max_width: int) -> str: if hasattr(index, "_repr_inline_"): repr_ = index._repr_inline_(max_width=max_width) else: @@ -1039,15 +1039,13 @@ def diff_dataset_repr(a, b, compat): def diff_nodewise_summary(a: DataTree, b: DataTree, compat): """Iterates over all corresponding nodes, recording differences between data at each location.""" - compat_str = _compat_to_str(compat) - summary = [] for path, (node_a, node_b) in group_subtrees(a, b): a_ds, b_ds = node_a.dataset, node_b.dataset if not a_ds._all_compat(b_ds, compat): path_str = "root node" if path == "." else f"node {path!r}" - dataset_diff = diff_dataset_repr(a_ds, b_ds, compat_str) + dataset_diff = diff_dataset_repr(a_ds, b_ds, compat) data_diff = indent( "\n".join(dataset_diff.split("\n", 1)[1:]), prefix=" " ) diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index c0601e3326a..46c6709d118 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -145,7 +145,7 @@ def summarize_index(coord_names, index) -> str: name = "
".join([escape(str(n)) for n in coord_names]) index_id = f"index-{uuid.uuid4()}" - preview = escape(inline_index_repr(index)) + preview = escape(inline_index_repr(index, max_width=70)) details = short_index_repr_html(index) data_icon = _icon("icon-database") diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 1bcda765f1d..446fd1343cc 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -540,7 +540,7 @@ def factorize(self) -> EncodedGroups: _flatcodes = where(mask.data, -1, _flatcodes) full_index = pd.MultiIndex.from_product( - list(grouper.full_index.values for grouper in groupers), + [grouper.full_index.values for grouper in groupers], names=tuple(grouper.name for grouper in groupers), ) if not full_index.is_unique: diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index a785e9ea9ef..1756fb54c1b 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -479,7 +479,7 @@ def _copy(self, deep: bool = True, memo: dict[int, Any] | None = None) -> Self: def __getitem__(self, indexer: Any) -> Self: raise NotImplementedError() - def _repr_inline_(self, max_width): + def _repr_inline_(self, max_width: int) -> str: return self.__class__.__name__ @@ -717,7 +717,7 @@ def from_variables( # preserve wrapped pd.Index (if any) # accessing `.data` can load data from disk, so we only access if needed - data = var._data.array if hasattr(var._data, "array") else var.data + data = var._data if isinstance(var._data, PandasIndexingAdapter) else var.data # type: ignore[redundant-expr] # multi-index level variable: get level index if isinstance(var._data, PandasMultiIndexingAdapter): level = var._data.level @@ -1955,7 +1955,7 @@ def _wrap_index_equals( f"the signature ``{index_cls_name}.equals(self, other)`` is deprecated. " f"Please update it to " f"``{index_cls_name}.equals(self, other, *, exclude=None)`` " - "or kindly ask the maintainers of ``{index_cls_name}`` to do it. " + f"or kindly ask the maintainers of ``{index_cls_name}`` to do it. " "See documentation of xarray.Index.equals() for more info.", FutureWarning, ) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e14543e646f..35278efdeaf 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2151,7 +2151,7 @@ def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: ) def transpose(self, order: Iterable[int]) -> Self: - new_dims = tuple([self._dims[i] for i in order]) + new_dims = tuple(self._dims[i] for i in order) return type(self)(self._transform, self._coord_name, new_dims) def __repr__(self: Any) -> str: diff --git a/xarray/core/options.py b/xarray/core/options.py index adaa563d09b..90775360ab5 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -46,16 +46,16 @@ class T_Options(TypedDict): display_values_threshold: int display_style: Literal["text", "html"] display_width: int - display_expand_attrs: Literal["default", True, False] - display_expand_coords: Literal["default", True, False] - display_expand_data_vars: Literal["default", True, False] - display_expand_data: Literal["default", True, False] - display_expand_groups: Literal["default", True, False] - display_expand_indexes: Literal["default", True, False] - display_default_indexes: Literal["default", True, False] + display_expand_attrs: Literal["default"] | bool + display_expand_coords: Literal["default"] | bool + display_expand_data_vars: Literal["default"] | bool + display_expand_data: Literal["default"] | bool + display_expand_groups: Literal["default"] | bool + display_expand_indexes: Literal["default"] | bool + display_default_indexes: Literal["default"] | bool enable_cftimeindex: bool file_cache_maxsize: int - keep_attrs: Literal["default", True, False] + keep_attrs: Literal["default"] | bool warn_for_unclosed_files: bool use_bottleneck: bool use_flox: bool diff --git a/xarray/core/treenode.py b/xarray/core/treenode.py index c5d910994b6..df58f7aed6f 100644 --- a/xarray/core/treenode.py +++ b/xarray/core/treenode.py @@ -786,12 +786,12 @@ def _path_to_ancestor(self, ancestor: NamedNode) -> NodePath: raise NotFoundInTreeError( "Cannot find relative path to ancestor because nodes do not lie within the same tree" ) - if ancestor.path not in list(a.path for a in (self, *self.parents)): + if ancestor.path not in [a.path for a in (self, *self.parents)]: raise NotFoundInTreeError( "Cannot find relative path to ancestor because given node is not an ancestor of this node" ) - parents_paths = list(parent.path for parent in (self, *self.parents)) + parents_paths = [parent.path for parent in (self, *self.parents)] generation_gap = list(parents_paths).index(ancestor.path) path_upwards = "../" * generation_gap if generation_gap > 0 else "." return NodePath(path_upwards) diff --git a/xarray/core/types.py b/xarray/core/types.py index 1e5ae9aa342..736a11f5f17 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -1,7 +1,6 @@ from __future__ import annotations import datetime -import sys from collections.abc import Callable, Collection, Hashable, Iterator, Mapping, Sequence from types import EllipsisType from typing import ( @@ -9,7 +8,9 @@ Any, Literal, Protocol, + Self, SupportsIndex, + TypeAlias, TypeVar, Union, overload, @@ -18,21 +19,6 @@ import numpy as np import pandas as pd - -try: - if sys.version_info >= (3, 11): - from typing import Self, TypeAlias - else: - from typing import TypeAlias - - from typing_extensions import Self -except ImportError: - if TYPE_CHECKING: - raise - else: - Self: Any = None - - from numpy._typing import _SupportsDType from numpy.typing import ArrayLike @@ -195,7 +181,7 @@ def copy( # Temporary placeholder for indicating an array api compliant type. # hopefully in the future we can narrow this down more: -T_DuckArray = TypeVar("T_DuckArray", bound=Any, covariant=True) +T_DuckArray = TypeVar("T_DuckArray", bound=Any, covariant=True) # noqa: PLC0105 # For typing pandas extension arrays. T_ExtensionArray = TypeVar("T_ExtensionArray", bound=pd.api.extensions.ExtensionArray) @@ -214,7 +200,7 @@ def copy( # FYI in some cases we don't allow `None`, which this doesn't take account of. # FYI the `str` is for a size string, e.g. "16MB", supported by dask. -T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | tuple[int, ...] # noqa: PYI051 +T_ChunkDim: TypeAlias = str | int | Literal["auto"] | tuple[int, ...] | None # noqa: PYI051 T_ChunkDimFreq: TypeAlias = Union["TimeResampler", T_ChunkDim] T_ChunksFreq: TypeAlias = T_ChunkDim | Mapping[Any, T_ChunkDimFreq] # We allow the tuple form of this (though arguably we could transition to named dims only) @@ -253,16 +239,16 @@ def copy( InterpnOptions = Literal["linear", "nearest", "slinear", "cubic", "quintic", "pchip"] InterpOptions = Union[Interp1dOptions, InterpolantOptions, InterpnOptions] -DatetimeUnitOptions = Literal[ - "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None -] +DatetimeUnitOptions = ( + Literal["W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"] | None +) NPDatetimeUnitOptions = Literal["D", "h", "m", "s", "ms", "us", "ns"] PDDatetimeUnitOptions = Literal["s", "ms", "us", "ns"] -QueryEngineOptions = Literal["python", "numexpr", None] +QueryEngineOptions = Literal["python", "numexpr"] | None QueryParserOptions = Literal["pandas", "python"] -ReindexMethodOptions = Literal["nearest", "pad", "ffill", "backfill", "bfill", None] +ReindexMethodOptions = Literal["nearest", "pad", "ffill", "backfill", "bfill"] | None PadModeOptions = Literal[ "constant", @@ -281,7 +267,7 @@ def copy( T_DatasetPadConstantValues = ( T_VarPadConstantValues | Mapping[Any, T_VarPadConstantValues] ) -PadReflectOptions = Literal["even", "odd", None] +PadReflectOptions = Literal["even", "odd"] | None CFCalendar = Literal[ "standard", @@ -299,10 +285,10 @@ def copy( SideOptions = Literal["left", "right"] InclusiveOptions = Literal["both", "neither", "left", "right"] -ScaleOptions = Literal["linear", "symlog", "log", "logit", None] -HueStyleOptions = Literal["continuous", "discrete", None] +ScaleOptions = Literal["linear", "symlog", "log", "logit"] | None +HueStyleOptions = Literal["continuous", "discrete"] | None AspectOptions = Union[Literal["auto", "equal"], float, None] -ExtendOptions = Literal["neither", "both", "min", "max", None] +ExtendOptions = Literal["neither", "both", "min", "max"] | None _T_co = TypeVar("_T_co", covariant=True) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c792e4ce60f..562706a1ac0 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -241,7 +241,7 @@ def equivalent(first: T, second: T) -> bool: def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: if len(first) != len(second): return False - return all(equivalent(f, s) for f, s in zip(first, second, strict=True)) + return all(itertools.starmap(equivalent, zip(first, second, strict=True))) def peek_at(iterable: Iterable[T]) -> tuple[T, Iterator[T]]: @@ -897,7 +897,7 @@ def parse_dims_as_tuple( *, check_exists: bool = True, replace_none: Literal[False], -) -> tuple[Hashable, ...] | None | EllipsisType: ... +) -> tuple[Hashable, ...] | EllipsisType | None: ... def parse_dims_as_tuple( @@ -906,7 +906,7 @@ def parse_dims_as_tuple( *, check_exists: bool = True, replace_none: bool = True, -) -> tuple[Hashable, ...] | None | EllipsisType: +) -> tuple[Hashable, ...] | EllipsisType | None: """Parse one or more dimensions. A single dimension must be always a str, multiple dimensions @@ -958,7 +958,7 @@ def parse_dims_as_set( *, check_exists: bool = True, replace_none: Literal[False], -) -> set[Hashable] | None | EllipsisType: ... +) -> set[Hashable] | EllipsisType | None: ... def parse_dims_as_set( @@ -967,7 +967,7 @@ def parse_dims_as_set( *, check_exists: bool = True, replace_none: bool = True, -) -> set[Hashable] | None | EllipsisType: +) -> set[Hashable] | EllipsisType | None: """Like parse_dims_as_tuple, but returning a set instead of a tuple.""" # TODO: Consider removing parse_dims_as_tuple? if dim is None or dim is ...: @@ -999,7 +999,7 @@ def parse_ordered_dims( *, check_exists: bool = True, replace_none: Literal[False], -) -> tuple[Hashable, ...] | None | EllipsisType: ... +) -> tuple[Hashable, ...] | EllipsisType | None: ... def parse_ordered_dims( @@ -1008,7 +1008,7 @@ def parse_ordered_dims( *, check_exists: bool = True, replace_none: bool = True, -) -> tuple[Hashable, ...] | None | EllipsisType: +) -> tuple[Hashable, ...] | EllipsisType | None: """Parse one or more dimensions. A single dimension must be always a str, multiple dimensions @@ -1086,7 +1086,7 @@ def __get__(self, obj: None, cls) -> type[_Accessor]: ... @overload def __get__(self, obj: object, cls) -> _Accessor: ... - def __get__(self, obj: None | object, cls) -> type[_Accessor] | _Accessor: + def __get__(self, obj: object | None, cls) -> type[_Accessor] | _Accessor: if obj is None: return self._accessor @@ -1287,12 +1287,12 @@ def attempt_import(module: str) -> ModuleType: matplotlib="for plotting", hypothesis="for the `xarray.testing.strategies` submodule", ) - package_name = module.split(".")[0] # e.g. "zarr" from "zarr.storage" + package_name = module.split(".", maxsplit=1)[0] # e.g. "zarr" from "zarr.storage" install_name = install_mapping.get(package_name, package_name) reason = package_purpose.get(package_name, "") try: return importlib.import_module(module) - except (ImportError, ModuleNotFoundError) as e: + except ImportError as e: raise ImportError( f"The {install_name} package is required {reason}" " but could not be imported." diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9c753a2ffa7..00d97e868c4 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2232,8 +2232,7 @@ def coarsen_reshape(self, windows, boundary, side): for i, d in enumerate(variable.dims): if d in windows: size = variable.shape[i] - shape.append(int(size / windows[d])) - shape.append(windows[d]) + shape.extend((int(size / windows[d]), windows[d])) axis_count += 1 axes.append(i + axis_count) else: diff --git a/xarray/groupers.py b/xarray/groupers.py index f6c77d888a7..3a27d725116 100644 --- a/xarray/groupers.py +++ b/xarray/groupers.py @@ -484,8 +484,6 @@ def reset(self) -> Self: ) def _init_properties(self, group: T_Group) -> None: - from xarray import CFTimeIndex - group_as_index = safe_cast_to_index(group) offset = self.offset @@ -494,8 +492,6 @@ def _init_properties(self, group: T_Group) -> None: raise ValueError("Index must be monotonic for resampling") if isinstance(group_as_index, CFTimeIndex): - from xarray.core.resample_cftime import CFTimeGrouper - self.index_grouper = CFTimeGrouper( freq=self.freq, closed=self.closed, @@ -553,7 +549,7 @@ def factorize(self, group: T_Group) -> EncodedGroups: full_index, first_items, codes_ = self._get_index_and_items() sbins = first_items.values.astype(np.int64) group_indices: GroupIndices = tuple( - [slice(i, j) for i, j in pairwise(sbins)] + [slice(sbins[-1], None)] + list(itertools.starmap(slice, pairwise(sbins))) + [slice(sbins[-1], None)] ) unique_coord = Variable( diff --git a/xarray/indexes/__init__.py b/xarray/indexes/__init__.py index fafdb49c7e1..c53a4b8c2ce 100644 --- a/xarray/indexes/__init__.py +++ b/xarray/indexes/__init__.py @@ -3,11 +3,20 @@ """ +from xarray.core.coordinate_transform import CoordinateTransform from xarray.core.indexes import ( + CoordinateTransformIndex, Index, PandasIndex, PandasMultiIndex, ) from xarray.indexes.range_index import RangeIndex -__all__ = ["Index", "PandasIndex", "PandasMultiIndex", "RangeIndex"] +__all__ = [ + "CoordinateTransform", + "CoordinateTransformIndex", + "Index", + "PandasIndex", + "PandasMultiIndex", + "RangeIndex", +] diff --git a/xarray/indexes/range_index.py b/xarray/indexes/range_index.py index 2b9a5e5071a..34e6c2f7c00 100644 --- a/xarray/indexes/range_index.py +++ b/xarray/indexes/range_index.py @@ -9,7 +9,7 @@ from xarray.core.coordinate_transform import CoordinateTransform from xarray.core.dataarray import DataArray from xarray.core.indexes import CoordinateTransformIndex, Index, PandasIndex -from xarray.core.indexing import IndexSelResult, normalize_slice +from xarray.core.indexing import IndexSelResult from xarray.core.variable import Variable @@ -79,14 +79,10 @@ def equals( ) def slice(self, sl: slice) -> "RangeCoordinateTransform": - sl = normalize_slice(sl, self.size) - - # TODO: support reverse transform (i.e., start > stop)? - assert sl.start < sl.stop - - new_size = (sl.stop - sl.start) // sl.step - new_start = self.start + sl.start * self.step - new_stop = new_start + new_size * sl.step * self.step + new_range = range(self.size)[sl] + new_size = len(new_range) + new_start = self.start + new_range.start * self.step + new_stop = self.start + new_range.stop * self.step return type(self)( new_start, new_stop, new_size, self.coord_name, self.dim, dtype=self.dtype diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 2dba06a5d44..9610b96d4f9 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence from enum import Enum from types import EllipsisType, ModuleType @@ -20,10 +19,7 @@ import numpy as np try: - if sys.version_info >= (3, 11): - from typing import TypeAlias - else: - from typing import TypeAlias + from typing import TypeAlias except ImportError: if TYPE_CHECKING: raise @@ -78,7 +74,7 @@ def dtype(self) -> _DType_co: ... _NormalizedChunks = tuple[tuple[int, ...], ...] # FYI in some cases we don't allow `None`, which this doesn't take account of. # # FYI the `str` is for a size string, e.g. "16MB", supported by dask. -T_ChunkDim: TypeAlias = str | int | Literal["auto"] | None | tuple[int, ...] # noqa: PYI051 +T_ChunkDim: TypeAlias = str | int | Literal["auto"] | tuple[int, ...] | None # noqa: PYI051 # We allow the tuple form of this (though arguably we could transition to named dims only) T_Chunks: TypeAlias = T_ChunkDim | Mapping[Any, T_ChunkDim] diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index f9c1919201f..dac8162ca45 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -2,9 +2,9 @@ import copy import math -import sys import warnings from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence +from itertools import starmap from types import EllipsisType from typing import ( TYPE_CHECKING, @@ -85,10 +85,7 @@ PostComputeCallable: Any # type: ignore[no-redef] PostPersistCallable: Any # type: ignore[no-redef] - if sys.version_info >= (3, 11): - from typing import Self - else: - from typing_extensions import Self + from typing import Self T_NamedArray = TypeVar("T_NamedArray", bound="_NamedArray[Any]") T_NamedArrayInteger = TypeVar( @@ -849,7 +846,7 @@ def chunk( ndata = ImplicitToExplicitIndexingAdapter(data_old, OuterIndexer) # type: ignore[assignment] if is_dict_like(chunks): - chunks = tuple(chunks.get(n, s) for n, s in enumerate(ndata.shape)) + chunks = tuple(starmap(chunks.get, enumerate(ndata.shape))) data_chunked = chunkmanager.from_array(ndata, chunks, **from_array_kwargs) # type: ignore[arg-type] diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index ee49928aa01..537fbd5bafb 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -1479,7 +1479,7 @@ def newplotfunc( if subplot_kws is None: subplot_kws = dict() - if plotfunc.__name__ == "surface" and not kwargs.get("_is_facetgrid", False): + if plotfunc.__name__ == "surface" and not kwargs.get("_is_facetgrid"): if ax is None: # TODO: Importing Axes3D is no longer necessary in matplotlib >= 3.2. # Remove when minimum requirement of matplotlib is 3.2: @@ -1511,7 +1511,7 @@ def newplotfunc( if ( plotfunc.__name__ == "surface" - and not kwargs.get("_is_facetgrid", False) + and not kwargs.get("_is_facetgrid") and ax is not None ): import mpl_toolkits diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py index 37bdcbac94e..ff508ee213c 100644 --- a/xarray/plot/dataset_plot.py +++ b/xarray/plot/dataset_plot.py @@ -630,6 +630,8 @@ def streamplot( cmap_params = kwargs.pop("cmap_params") if hue: + if xdim is not None and ydim is not None: + ds[hue] = ds[hue].transpose(ydim, xdim) kwargs["color"] = ds[hue].values # TODO: Fix this by always returning a norm with vmin, vmax in cmap_params diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index 719b1fde619..e64348c7281 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -796,7 +796,7 @@ def _get_largest_lims(self) -> dict[str, tuple[float, float]]: # Find the plot with the largest xlim values: lower, upper = lims_largest[axis] for ax in self.axs.flat: - get_lim: None | Callable[[], tuple[float, float]] = getattr( + get_lim: Callable[[], tuple[float, float]] | None = getattr( ax, f"get_{axis}lim", None ) if get_lim: @@ -862,15 +862,15 @@ def _set_labels( for ax in axes: getattr(ax, f"set_{axis}label")(label, **kwargs) - def set_xlabels(self, label: None | str = None, **kwargs: Any) -> None: + def set_xlabels(self, label: str | None = None, **kwargs: Any) -> None: """Label the x axis on the bottom row of the grid.""" self._set_labels("x", self._bottom_axes, label, **kwargs) - def set_ylabels(self, label: None | str = None, **kwargs: Any) -> None: + def set_ylabels(self, label: str | None = None, **kwargs: Any) -> None: """Label the y axis on the left column of the grid.""" self._set_labels("y", self._left_axes, label, **kwargs) - def set_zlabels(self, label: None | str = None, **kwargs: Any) -> None: + def set_zlabels(self, label: str | None = None, **kwargs: Any) -> None: """Label the z axis.""" self._set_labels("z", self._left_axes, label, **kwargs) @@ -910,14 +910,14 @@ def set_titles( # Only label the ones with data if d is not None: coord, value = list(d.items()).pop() - title = nicetitle(coord, value, maxchar=maxchar) + title = nicetitle(coord, value) ax.set_title(title, size=size, **kwargs) else: # The row titles on the right edge of the grid for index, (ax, row_name, handle) in enumerate( zip(self.axs[:, -1], self.row_names, self.row_labels, strict=True) ): - title = nicetitle(coord=self._row_var, value=row_name, maxchar=maxchar) + title = nicetitle(coord=self._row_var, value=row_name) if not handle: self.row_labels[index] = ax.annotate( title, @@ -936,7 +936,7 @@ def set_titles( for index, (ax, col_name, handle) in enumerate( zip(self.axs[0, :], self.col_names, self.col_labels, strict=True) ): - title = nicetitle(coord=self._col_var, value=col_name, maxchar=maxchar) + title = nicetitle(coord=self._col_var, value=col_name) if not handle: self.col_labels[index] = ax.set_title(title, size=size, **kwargs) else: diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index a35128cadb6..fc4ca1532e5 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -248,9 +248,7 @@ def _determine_cmap_params( isinstance(levels, Iterable) and levels[0] * levels[-1] < 0 ) # kwargs not specific about divergent or not: infer defaults from data - divergent = ( - ((vmin < 0) and (vmax > 0)) or not center_is_none or levels_are_divergent - ) + divergent = (vmin < 0 < vmax) or not center_is_none or levels_are_divergent else: divergent = False @@ -459,8 +457,6 @@ def get_axis( ax: Axes | None = None, **subplot_kws: Any, ) -> Axes: - from xarray.core.utils import attempt_import - if TYPE_CHECKING: import matplotlib as mpl import matplotlib.pyplot as plt @@ -957,7 +953,7 @@ def _process_cmap_cbar_kwargs( cmap_kwargs = { "plot_data": data, "levels": levels, - "cmap": colors if colors else cmap, + "cmap": colors or cmap, "filled": func.__name__ != "contour", } @@ -1050,8 +1046,6 @@ def legend_elements( labels : list of str The string labels for elements of the legend. """ - import warnings - import matplotlib as mpl mlines = mpl.lines @@ -1328,7 +1322,7 @@ def _parse_size( def _parse_size( data: DataArray | None, norm: tuple[float | None, float | None, bool] | Normalize | None, -) -> None | pd.Series: +) -> pd.Series | None: import matplotlib as mpl if data is None: @@ -1593,7 +1587,7 @@ def ticks(self) -> np.ndarray | None: >>> _Normalize(a).ticks array([1, 3, 5]) """ - val: None | np.ndarray + val: np.ndarray | None if self.data_is_numeric: val = None else: @@ -1652,13 +1646,13 @@ def format(self) -> FuncFormatter: """ import matplotlib.pyplot as plt - def _func(x: Any, pos: None | Any = None): + def _func(x: Any, pos: Any | None = None): return f"{self._lookup_arr([x])[0]}" return plt.FuncFormatter(_func) @property - def func(self) -> Callable[[Any, None | Any], Any]: + def func(self) -> Callable[[Any, Any | None], Any]: """ Return a lambda function that maps self.values elements back to the original value as a numpy array. Useful with ax.legend_elements. @@ -1677,7 +1671,7 @@ def func(self) -> Callable[[Any, None | Any], Any]: array([0.5, 3. ]) """ - def _func(x: Any, pos: None | Any = None): + def _func(x: Any, pos: Any | None = None): return self._lookup_arr(x) return _func @@ -1686,8 +1680,8 @@ def _func(x: Any, pos: None | Any = None): def _determine_guide( hueplt_norm: _Normalize, sizeplt_norm: _Normalize, - add_colorbar: None | bool = None, - add_legend: None | bool = None, + add_colorbar: bool | None = None, + add_legend: bool | None = None, plotfunc_name: str | None = None, ) -> tuple[bool, bool]: if plotfunc_name == "hist": diff --git a/xarray/structure/alignment.py b/xarray/structure/alignment.py index b89dbb15964..b05271dfdf5 100644 --- a/xarray/structure/alignment.py +++ b/xarray/structure/alignment.py @@ -5,6 +5,7 @@ from collections import defaultdict from collections.abc import Callable, Hashable, Iterable, Mapping from contextlib import suppress +from itertools import starmap from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, cast, overload import numpy as np @@ -120,7 +121,9 @@ def _normalize_indexes( ) data: T_DuckArray = as_compatible_data(idx) pd_idx = safe_cast_to_index(data) - pd_idx.name = k + if pd_idx.name != k: + pd_idx = pd_idx.copy() + pd_idx.name = k if isinstance(pd_idx, pd.MultiIndex): idx = PandasMultiIndex(pd_idx, k) else: @@ -610,12 +613,14 @@ def _reindex_one( def reindex_all(self) -> None: self.results = tuple( - self._reindex_one(obj, matching_indexes, matching_index_vars) - for obj, matching_indexes, matching_index_vars in zip( - self.objects, - self.objects_matching_indexes, - self.objects_matching_index_vars, - strict=True, + starmap( + self._reindex_one, + zip( + self.objects, + self.objects_matching_indexes, + self.objects_matching_index_vars, + strict=True, + ), ) ) diff --git a/xarray/structure/chunks.py b/xarray/structure/chunks.py index e6dcd7b8b83..281cfe278f1 100644 --- a/xarray/structure/chunks.py +++ b/xarray/structure/chunks.py @@ -74,7 +74,7 @@ def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint): # Determine the explicit requested chunks. preferred_chunks = var.encoding.get("preferred_chunks", {}) preferred_chunk_shape = tuple( - preferred_chunks.get(dim, size) for dim, size in zip(dims, shape, strict=True) + itertools.starmap(preferred_chunks.get, zip(dims, shape, strict=True)) ) if isinstance(chunks, Number) or (chunks == "auto"): chunks = dict.fromkeys(dims, chunks) @@ -138,7 +138,7 @@ def _maybe_chunk( # by providing chunks as an input to tokenize. # subtle bugs result otherwise. see GH3350 # we use str() for speed, and use the name for the final array name on the next line - token2 = tokenize(token if token else var._data, str(chunks)) + token2 = tokenize(token or var._data, str(chunks)) name2 = f"{name_prefix}{name}-{token2}" from_array_kwargs = utils.consolidate_dask_from_array_kwargs( diff --git a/xarray/structure/combine.py b/xarray/structure/combine.py index 01c14dffee4..63c3e4cc166 100644 --- a/xarray/structure/combine.py +++ b/xarray/structure/combine.py @@ -383,7 +383,7 @@ def _nested_combine( def combine_nested( datasets: DATASET_HYPERCUBE, - concat_dim: str | DataArray | None | Sequence[str | DataArray | pd.Index | None], + concat_dim: str | DataArray | Sequence[str | DataArray | pd.Index | None] | None, compat: str = "no_conflicts", data_vars: str = "all", coords: str = "different", diff --git a/xarray/structure/concat.py b/xarray/structure/concat.py index 54f006a2a0a..2f1f3c28b02 100644 --- a/xarray/structure/concat.py +++ b/xarray/structure/concat.py @@ -418,7 +418,7 @@ def process_subset_opt(opt, subset): elif opt == "all": concat_over.update( set().union( - *list(set(getattr(d, subset)) - set(d.dims) for d in datasets) + *[set(getattr(d, subset)) - set(d.dims) for d in datasets] ) ) elif opt == "minimal": diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index b2a459ba652..403186272b9 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -718,7 +718,7 @@ def merge_core( coord_names.intersection_update(variables) if explicit_coords is not None: coord_names.update(explicit_coords) - for dim in dims.keys(): + for dim in dims: if dim in variables: coord_names.add(dim) ambiguous_coords = coord_names.intersection(noncoord_names) diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index e524603c9a5..474a72da739 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -12,6 +12,7 @@ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree +from xarray.core.datatree_mapping import map_over_datasets from xarray.core.formatting import diff_datatree_repr from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable @@ -85,14 +86,25 @@ def assert_isomorphic(a: DataTree, b: DataTree): def maybe_transpose_dims(a, b, check_dim_order: bool): """Helper for assert_equal/allclose/identical""" + __tracebackhide__ = True - if not isinstance(a, Variable | DataArray | Dataset): + + def _maybe_transpose_dims(a, b): + if not isinstance(a, Variable | DataArray | Dataset): + return b + if set(a.dims) == set(b.dims): + # Ensure transpose won't fail if a dimension is missing + # If this is the case, the difference will be caught by the caller + return b.transpose(*a.dims) + return b + + if check_dim_order: return b - if not check_dim_order and set(a.dims) == set(b.dims): - # Ensure transpose won't fail if a dimension is missing - # If this is the case, the difference will be caught by the caller - return b.transpose(*a.dims) - return b + + if isinstance(a, DataTree): + return map_over_datasets(_maybe_transpose_dims, a, b) + + return _maybe_transpose_dims(a, b) @ensure_warnings diff --git a/xarray/tests/arrays.py b/xarray/tests/arrays.py index cc4c480c437..31f380d2c66 100644 --- a/xarray/tests/arrays.py +++ b/xarray/tests/arrays.py @@ -1,3 +1,7 @@ +""" +This module contains various lazy array classes which can be wrapped and manipulated by xarray objects but will raise on data access. +""" + from collections.abc import Callable, Iterable from typing import Any @@ -6,10 +10,6 @@ from xarray.core import utils from xarray.core.indexing import ExplicitlyIndexed -""" -This module contains various lazy array classes which can be wrapped and manipulated by xarray objects but will raise on data access. -""" - class UnexpectedDataAccess(Exception): pass diff --git a/xarray/tests/conftest.py b/xarray/tests/conftest.py index 0725559fa09..a5964e166bb 100644 --- a/xarray/tests/conftest.py +++ b/xarray/tests/conftest.py @@ -234,6 +234,6 @@ def simple_datatree(create_test_datatree): return create_test_datatree() -@pytest.fixture(scope="module", params=["s", "ms", "us", "ns"]) +@pytest.fixture(params=["s", "ms", "us", "ns"]) def time_unit(request): return request.param diff --git a/xarray/tests/test_assertions.py b/xarray/tests/test_assertions.py index cef965f9854..a0a2c02d578 100644 --- a/xarray/tests/test_assertions.py +++ b/xarray/tests/test_assertions.py @@ -88,6 +88,19 @@ def test_assert_allclose_equal_transpose(func) -> None: getattr(xr.testing, func)(ds1, ds2, check_dim_order=False) +def test_assert_equal_transpose_datatree() -> None: + """Ensure `check_dim_order=False` works for transposed DataTree""" + ds = xr.Dataset(data_vars={"data": (("x", "y"), [[1, 2]])}) + + a = xr.DataTree.from_dict({"node": ds}) + b = xr.DataTree.from_dict({"node": ds.transpose("y", "x")}) + + with pytest.raises(AssertionError): + xr.testing.assert_equal(a, b) + + xr.testing.assert_equal(a, b, check_dim_order=False) + + @pytest.mark.filterwarnings("error") @pytest.mark.parametrize( "duckarray", diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e40213e6f46..785b06a26fd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -56,6 +56,7 @@ from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing from xarray.core.options import set_options +from xarray.core.types import PDDatetimeUnitOptions from xarray.core.utils import module_available from xarray.namedarray.pycompat import array_type from xarray.tests import ( @@ -88,6 +89,7 @@ requires_scipy, requires_scipy_or_netCDF4, requires_zarr, + requires_zarr_v3, ) from xarray.tests.test_coding_times import ( _ALL_CALENDARS, @@ -116,6 +118,7 @@ if has_zarr_v3: from zarr.storage import MemoryStore as KVStore + from zarr.storage import WrapperStore ZARR_FORMATS = [2, 3] else: @@ -126,8 +129,11 @@ ) except ImportError: KVStore = None # type: ignore[assignment,misc,unused-ignore] + + WrapperStore = object # type: ignore[assignment,misc,unused-ignore] else: KVStore = None # type: ignore[assignment,misc,unused-ignore] + WrapperStore = object # type: ignore[assignment,misc,unused-ignore] ZARR_FORMATS = [] @@ -642,6 +648,16 @@ def test_roundtrip_timedelta_data(self) -> None: ) as actual: assert_identical(expected, actual) + def test_roundtrip_timedelta_data_via_dtype( + self, time_unit: PDDatetimeUnitOptions + ) -> None: + time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit(time_unit) # type: ignore[arg-type, unused-ignore] + expected = Dataset( + {"td": ("td", time_deltas), "td0": time_deltas[0].to_numpy()} + ) + with self.roundtrip(expected) as actual: + assert_identical(expected, actual) + def test_roundtrip_float64_data(self) -> None: expected = Dataset({"x": ("y", np.array([1.0, 2.0, np.pi], dtype="float64"))}) with self.roundtrip(expected) as actual: @@ -1427,6 +1443,25 @@ def test_string_object_warning(self) -> None: with self.roundtrip(original) as actual: assert_identical(original, actual) + @pytest.mark.parametrize( + "indexer", + ( + {"y": [1]}, + {"y": slice(2)}, + {"y": 1}, + {"x": [1], "y": [1]}, + {"x": ("x0", [0, 1]), "y": ("x0", [0, 1])}, + ), + ) + def test_indexing_roundtrip(self, indexer) -> None: + # regression test for GH8909 + ds = xr.Dataset() + ds["A"] = xr.DataArray([[1, "a"], [2, "b"]], dims=["x", "y"]) + with self.roundtrip(ds) as ds2: + expected = ds2.sel(indexer) + with self.roundtrip(expected) as actual: + assert_identical(actual, expected) + class NetCDFBase(CFEncodedBase): """Tests for all netCDF3 and netCDF4 backends.""" @@ -2370,16 +2405,18 @@ def test_read_non_consolidated_warning(self) -> None: self.save( expected, store_target=store, consolidated=False, **self.version_kwargs ) - with pytest.warns( - RuntimeWarning, - match="Failed to open Zarr store with consolidated", - ): - with xr.open_zarr(store, **self.version_kwargs) as ds: - assert_identical(ds, expected) + if getattr(store, "supports_consolidated_metadata", True): + with pytest.warns( + RuntimeWarning, + match="Failed to open Zarr store with consolidated", + ): + with xr.open_zarr(store, **self.version_kwargs) as ds: + assert_identical(ds, expected) def test_non_existent_store(self) -> None: with pytest.raises( - FileNotFoundError, match="(No such file or directory|Unable to find group)" + FileNotFoundError, + match="(No such file or directory|Unable to find group|No group found)", ): xr.open_zarr(f"{uuid.uuid4()}") @@ -3582,7 +3619,7 @@ def test_append(self) -> None: @requires_dask @pytest.mark.skipif( - sys.version_info.major == 3 and sys.version_info.minor < 11, + sys.version_info < (3, 11), reason="zarr too old", ) def test_region_write(self) -> None: @@ -3725,6 +3762,42 @@ def test_chunk_key_encoding_v2(self) -> None: assert actual["var1"].encoding["chunks"] == (2, 2) +class NoConsolidatedMetadataSupportStore(WrapperStore): + """ + Store that explicitly does not support consolidated metadata. + + Useful as a proxy for stores like Icechunk, see https://github.com/zarr-developers/zarr-python/pull/3119. + """ + + supports_consolidated_metadata = False + + def __init__( + self, + store, + *, + read_only: bool = False, + ) -> None: + self._store = store.with_read_only(read_only=read_only) + + def with_read_only( + self, read_only: bool = False + ) -> NoConsolidatedMetadataSupportStore: + return type(self)( + store=self._store, + read_only=read_only, + ) + + +@requires_zarr_v3 +class TestZarrNoConsolidatedMetadataSupport(ZarrBase): + @contextlib.contextmanager + def create_zarr_target(self): + # TODO the zarr version would need to be >3.08 for the supports_consolidated_metadata property to have any effect + yield NoConsolidatedMetadataSupportStore( + zarr.storage.MemoryStore({}, read_only=False) + ) + + @requires_zarr @pytest.mark.skipif( ON_WINDOWS, @@ -3784,13 +3857,11 @@ def assert_expected_files(expected: list[str], store: str) -> None: ] ) - assert set(expected) == set( - [ - file.lstrip("c/") - for file in ls - if (file not in (".zattrs", ".zarray", "zarr.json")) - ] - ) + assert set(expected) == { + file.lstrip("c/") + for file in ls + if (file not in (".zattrs", ".zarray", "zarr.json")) + } # The zarr format is set by the `default_zarr_format` # pytest fixture that acts on a superclass @@ -5408,7 +5479,9 @@ def convert_to_pydap_dataset(self, original): ds = DatasetType("bears", **original.attrs) for key, var in original.data_vars.items(): - ds[key] = BaseType(key, var.values, dims=var.dims, **var.attrs) + ds[key] = BaseType( + key, var.values, dtype=var.values.dtype.kind, dims=var.dims, **var.attrs + ) # check all dims are stored in ds for d in original.coords: ds[d] = BaseType(d, original[d].values, dims=(d,), **original[d].attrs) @@ -5417,11 +5490,12 @@ def convert_to_pydap_dataset(self, original): @contextlib.contextmanager def create_datasets(self, **kwargs): with open_example_dataset("bears.nc") as expected: + # print("QQ0:", expected["bears"].load()) pydap_ds = self.convert_to_pydap_dataset(expected) actual = open_dataset(PydapDataStore(pydap_ds)) - # TODO solve this workaround: # netcdf converts string to byte not unicode - expected["bears"] = expected["bears"].astype(str) + # fixed in pydap 3.5.6. https://github.com/pydap/pydap/issues/510 + actual["bears"].values = actual["bears"].values.astype("S") yield actual, expected def test_cmp_local_file(self) -> None: @@ -5441,7 +5515,9 @@ def test_cmp_local_file(self) -> None: assert_equal(actual[{"l": 2}], expected[{"l": 2}]) with self.create_datasets() as (actual, expected): - assert_equal(actual.isel(i=0, j=-1), expected.isel(i=0, j=-1)) + # always return arrays and not scalars + # scalars will be promoted to unicode for numpy >= 2.3.0 + assert_equal(actual.isel(i=[0], j=[-1]), expected.isel(i=[0], j=[-1])) with self.create_datasets() as (actual, expected): assert_equal(actual.isel(j=slice(1, 2)), expected.isel(j=slice(1, 2))) @@ -5463,7 +5539,6 @@ def test_compatible_to_netcdf(self) -> None: with create_tmp_file() as tmp_file: actual.to_netcdf(tmp_file) with open_dataset(tmp_file) as actual2: - actual2["bears"] = actual2["bears"].astype(str) assert_equal(actual2, expected) @requires_dask @@ -5481,9 +5556,11 @@ def create_dap2_datasets(self, **kwargs): # in pydap 3.5.0, urls defaults to dap2. url = "http://test.opendap.org/opendap/data/nc/bears.nc" actual = open_dataset(url, engine="pydap", **kwargs) + # pydap <3.5.6 converts to unicode dtype=|U. Not what + # xarray expects. Thus force to bytes dtype. pydap >=3.5.6 + # does not convert to unicode. https://github.com/pydap/pydap/issues/510 + actual["bears"].values = actual["bears"].values.astype("S") with open_example_dataset("bears.nc") as expected: - # workaround to restore string which is converted to byte - expected["bears"] = expected["bears"].astype(str) yield actual, expected def output_grid_deprecation_warning_dap2dataset(self): @@ -5496,7 +5573,8 @@ def create_dap4_dataset(self, **kwargs): actual = open_dataset(url, engine="pydap", **kwargs) with open_example_dataset("bears.nc") as expected: # workaround to restore string which is converted to byte - expected["bears"] = expected["bears"].astype(str) + # only needed for pydap <3.5.6 https://github.com/pydap/pydap/issues/510 + expected["bears"].values = expected["bears"].values.astype("S") yield actual, expected def test_session(self) -> None: diff --git a/xarray/tests/test_backends_common.py b/xarray/tests/test_backends_common.py index dc89ecefbfe..33da027ac97 100644 --- a/xarray/tests/test_backends_common.py +++ b/xarray/tests/test_backends_common.py @@ -1,9 +1,13 @@ from __future__ import annotations +import re + import numpy as np import pytest +import xarray as xr from xarray.backends.common import _infer_dtype, robust_getitem +from xarray.tests import requires_scipy class DummyFailure(Exception): @@ -43,3 +47,16 @@ def test_robust_getitem() -> None: def test_infer_dtype_error_on_mixed_types(data): with pytest.raises(ValueError, match="unable to infer dtype on variable"): _infer_dtype(data, "test") + + +@requires_scipy +def test_encoding_failure_note(): + # Create an arbitrary value that cannot be encoded in netCDF3 + ds = xr.Dataset({"invalid": np.array([2**63 - 1], dtype=np.int64)}) + with pytest.raises( + ValueError, + match=re.escape( + "Raised while encoding variable 'invalid' with value None: with pytest.raises(ValueError): date_range(start, end, periods, freq, inclusive=inclusive, use_cftime=True) # type: ignore[arg-type] diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 17179a44a8a..e7971a311f5 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -139,6 +139,45 @@ def test_CharacterArrayCoder_char_dim_name(original, expected_char_dim_name) -> assert roundtripped.dims[-1] == original.dims[-1] +@pytest.mark.parametrize( + [ + "original", + "expected_char_dim_name", + "expected_char_dim_length", + "warning_message", + ], + [ + ( + Variable(("x",), [b"ab", b"cde"], encoding={"char_dim_name": "foo4"}), + "foo3", + 3, + "String dimension naming mismatch", + ), + ( + Variable( + ("x",), + [b"ab", b"cde"], + encoding={"original_shape": (2, 4), "char_dim_name": "foo"}, + ), + "foo3", + 3, + "String dimension length mismatch", + ), + ], +) +def test_CharacterArrayCoder_dim_mismatch_warnings( + original, expected_char_dim_name, expected_char_dim_length, warning_message +) -> None: + coder = strings.CharacterArrayCoder() + with pytest.warns(UserWarning, match=warning_message): + encoded = coder.encode(original) + roundtripped = coder.decode(encoded) + assert encoded.dims[-1] == expected_char_dim_name + assert encoded.sizes[expected_char_dim_name] == expected_char_dim_length + assert roundtripped.encoding["char_dim_name"] == expected_char_dim_name + assert roundtripped.dims[-1] == original.dims[-1] + + def test_StackedBytesArray() -> None: array = np.array([[b"a", b"b", b"c"], [b"d", b"e", b"f"]], dtype="S") actual = strings.StackedBytesArray(array) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 8a021d4d2d5..af29716fec0 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2,7 +2,7 @@ import warnings from datetime import datetime, timedelta -from itertools import product +from itertools import product, starmap from typing import Literal import numpy as np @@ -20,7 +20,6 @@ ) from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.coding.times import ( - _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS, _encode_datetime_with_cftime, _netcdf_to_numpy_timeunit, _numpy_to_netcdf_timeunit, @@ -576,7 +575,7 @@ def test_infer_datetime_units_with_NaT(dates, expected) -> None: @pytest.mark.parametrize(("date_args", "expected"), _CFTIME_DATETIME_UNITS_TESTS) def test_infer_cftime_datetime_units(calendar, date_args, expected) -> None: date_type = _all_cftime_date_types()[calendar] - dates = [date_type(*args) for args in date_args] + dates = list(starmap(date_type, date_args)) assert expected == infer_datetime_units(dates) @@ -1824,8 +1823,9 @@ def test_encode_cf_timedelta_small_dtype_missing_value(use_dask) -> None: assert_equal(variable, decoded) -_DECODE_TIMEDELTA_TESTS = { +_DECODE_TIMEDELTA_VIA_UNITS_TESTS = { "default": (True, None, np.dtype("timedelta64[ns]"), True), + "decode_timedelta=True": (True, True, np.dtype("timedelta64[ns]"), False), "decode_timedelta=False": (True, False, np.dtype("int64"), False), "inherit-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="s"), @@ -1856,16 +1856,16 @@ def test_encode_cf_timedelta_small_dtype_missing_value(use_dask) -> None: @pytest.mark.parametrize( ("decode_times", "decode_timedelta", "expected_dtype", "warns"), - list(_DECODE_TIMEDELTA_TESTS.values()), - ids=list(_DECODE_TIMEDELTA_TESTS.keys()), + list(_DECODE_TIMEDELTA_VIA_UNITS_TESTS.values()), + ids=list(_DECODE_TIMEDELTA_VIA_UNITS_TESTS.keys()), ) -def test_decode_timedelta( +def test_decode_timedelta_via_units( decode_times, decode_timedelta, expected_dtype, warns ) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) - encoding = {"units": "days"} - var = Variable(["time"], timedeltas, encoding=encoding) - encoded = conventions.encode_cf_variable(var) + attrs = {"units": "days"} + var = Variable(["time"], timedeltas, encoding=attrs) + encoded = Variable(["time"], np.array([0, 1, 2]), attrs=attrs) if warns: with pytest.warns(FutureWarning, match="decode_timedelta"): decoded = conventions.decode_cf_variable( @@ -1885,6 +1885,57 @@ def test_decode_timedelta( assert decoded.dtype == expected_dtype +_DECODE_TIMEDELTA_VIA_DTYPE_TESTS = { + "default": (True, None, np.dtype("timedelta64[ns]")), + "decode_timedelta=False": (True, False, np.dtype("int64")), + "decode_timedelta=True": (True, True, np.dtype("timedelta64[ns]")), + "inherit-time_unit-from-decode_times": ( + CFDatetimeCoder(time_unit="s"), + None, + np.dtype("timedelta64[s]"), + ), + "set-time_unit-via-CFTimedeltaCoder-decode_times=True": ( + True, + CFTimedeltaCoder(time_unit="s"), + np.dtype("timedelta64[s]"), + ), + "set-time_unit-via-CFTimedeltaCoder-decode_times=False": ( + False, + CFTimedeltaCoder(time_unit="s"), + np.dtype("timedelta64[s]"), + ), + "override-time_unit-from-decode_times": ( + CFDatetimeCoder(time_unit="ns"), + CFTimedeltaCoder(time_unit="s"), + np.dtype("timedelta64[s]"), + ), +} + + +@pytest.mark.parametrize( + ("decode_times", "decode_timedelta", "expected_dtype"), + list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.values()), + ids=list(_DECODE_TIMEDELTA_VIA_DTYPE_TESTS.keys()), +) +def test_decode_timedelta_via_dtype( + decode_times, decode_timedelta, expected_dtype +) -> None: + timedeltas = pd.timedelta_range(0, freq="D", periods=3) + encoding = {"units": "days"} + var = Variable(["time"], timedeltas, encoding=encoding) + encoded = conventions.encode_cf_variable(var) + assert encoded.attrs["dtype"] == "timedelta64[ns]" + assert encoded.attrs["units"] == encoding["units"] + decoded = conventions.decode_cf_variable( + "foo", encoded, decode_times=decode_times, decode_timedelta=decode_timedelta + ) + if decode_timedelta is False: + assert_equal(encoded, decoded) + else: + assert_equal(var, decoded) + assert decoded.dtype == expected_dtype + + def test_lazy_decode_timedelta_unexpected_dtype() -> None: attrs = {"units": "seconds"} encoded = Variable(["time"], [0, 0.5, 1], attrs=attrs) @@ -1940,7 +1991,12 @@ def test_duck_array_decode_times(calendar) -> None: def test_decode_timedelta_mask_and_scale( decode_timedelta: bool, mask_and_scale: bool ) -> None: - attrs = {"units": "nanoseconds", "_FillValue": np.int16(-1), "add_offset": 100000.0} + attrs = { + "dtype": "timedelta64[ns]", + "units": "nanoseconds", + "_FillValue": np.int16(-1), + "add_offset": 100000.0, + } encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) decoded = conventions.decode_cf_variable( "foo", encoded, mask_and_scale=mask_and_scale, decode_timedelta=decode_timedelta @@ -1958,19 +2014,17 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None: decoded.load() -def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: +def test_timedelta64_coding_via_dtype(time_unit: PDDatetimeUnitOptions) -> None: timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") variable = Variable(["time"], timedeltas) - expected_dtype = f"timedelta64[{time_unit}]" expected_units = _numpy_to_netcdf_timeunit(time_unit) encoded = conventions.encode_cf_variable(variable) - assert encoded.attrs["dtype"] == expected_dtype + assert encoded.attrs["dtype"] == f"timedelta64[{time_unit}]" assert encoded.attrs["units"] == expected_units - assert encoded.attrs["_FillValue"] == np.iinfo(np.int64).min decoded = conventions.decode_cf_variable("timedeltas", encoded) - assert decoded.encoding["dtype"] == expected_dtype + assert decoded.encoding["dtype"] == np.dtype("int64") assert decoded.encoding["units"] == expected_units assert_identical(decoded, variable) @@ -1981,7 +2035,7 @@ def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: assert reencoded.dtype == encoded.dtype -def test_literal_timedelta_coding_non_pandas_coarse_resolution_warning() -> None: +def test_timedelta_coding_via_dtype_non_pandas_coarse_resolution_warning() -> None: attrs = {"dtype": "timedelta64[D]", "units": "days"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs) with pytest.warns(UserWarning, match="xarray only supports"): @@ -1994,7 +2048,7 @@ def test_literal_timedelta_coding_non_pandas_coarse_resolution_warning() -> None @pytest.mark.xfail(reason="xarray does not recognize picoseconds as time-like") -def test_literal_timedelta_coding_non_pandas_fine_resolution_warning() -> None: +def test_timedelta_coding_via_dtype_non_pandas_fine_resolution_warning() -> None: attrs = {"dtype": "timedelta64[ps]", "units": "picoseconds"} encoded = Variable(["time"], [0, 1000, 2000], attrs=attrs) with pytest.warns(UserWarning, match="xarray only supports"): @@ -2006,17 +2060,16 @@ def test_literal_timedelta_coding_non_pandas_fine_resolution_warning() -> None: assert decoded.dtype == np.dtype("timedelta64[ns]") -@pytest.mark.parametrize("attribute", ["dtype", "units"]) -def test_literal_timedelta_decode_invalid_encoding(attribute) -> None: +def test_timedelta_decode_via_dtype_invalid_encoding() -> None: attrs = {"dtype": "timedelta64[s]", "units": "seconds"} - encoding = {attribute: "foo"} + encoding = {"units": "foo"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs, encoding=encoding) with pytest.raises(ValueError, match="failed to prevent"): conventions.decode_cf_variable("timedeltas", encoded) @pytest.mark.parametrize("attribute", ["dtype", "units"]) -def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: +def test_timedelta_encode_via_dtype_invalid_attribute(attribute) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) attrs = {attribute: "foo"} variable = Variable(["time"], timedeltas, attrs=attrs) @@ -2024,23 +2077,6 @@ def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: conventions.encode_cf_variable(variable) -@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) -def test_literal_timedelta_encoding_invalid_key_error(invalid_key) -> None: - encoding = {invalid_key: 1.0} - timedeltas = pd.timedelta_range(0, freq="D", periods=3) - variable = Variable(["time"], timedeltas, encoding=encoding) - with pytest.raises(ValueError, match=invalid_key): - conventions.encode_cf_variable(variable) - - -@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) -def test_literal_timedelta_decoding_invalid_key_error(invalid_key) -> None: - attrs = {invalid_key: 1.0, "dtype": "timedelta64[s]", "units": "seconds"} - variable = Variable(["time"], [0, 1, 2], attrs=attrs) - with pytest.raises(ValueError, match=invalid_key): - conventions.decode_cf_variable("foo", variable) - - @pytest.mark.parametrize( ("decode_via_units", "decode_via_dtype", "attrs", "expect_timedelta64"), [ @@ -2058,12 +2094,6 @@ def test_literal_timedelta_decoding_invalid_key_error(invalid_key) -> None: def test_timedelta_decoding_options( decode_via_units, decode_via_dtype, attrs, expect_timedelta64 ) -> None: - # Note with literal timedelta encoding, we always add a _FillValue, even - # if one is not present in the original encoding parameters, which is why - # we ensure one is defined here when "dtype" is present in attrs. - if "dtype" in attrs: - attrs["_FillValue"] = np.iinfo(np.int64).min - array = np.array([0, 1, 2], dtype=np.dtype("int64")) encoded = Variable(["time"], array, attrs=attrs) @@ -2083,7 +2113,11 @@ def test_timedelta_decoding_options( # Confirm we exactly roundtrip. reencoded = conventions.encode_cf_variable(decoded) - assert_identical(reencoded, encoded) + + expected = encoded.copy() + if "dtype" not in attrs and decode_via_units: + expected.attrs["dtype"] = "timedelta64[s]" + assert_identical(reencoded, expected) def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: @@ -2093,20 +2127,21 @@ def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: encoded = conventions.encode_cf_variable(variable) assert encoded.attrs["units"] == "days" + assert encoded.attrs["dtype"] == "timedelta64[ns]" assert encoded.dtype == np.dtype("int32") - with pytest.warns(FutureWarning, match="timedelta"): - decoded = conventions.decode_cf_variable("foo", encoded) + decoded = conventions.decode_cf_variable("foo", encoded) assert_identical(decoded, variable) reencoded = conventions.encode_cf_variable(decoded) assert_identical(reencoded, encoded) assert encoded.attrs["units"] == "days" + assert encoded.attrs["dtype"] == "timedelta64[ns]" assert encoded.dtype == np.dtype("int32") @pytest.mark.parametrize("mask_attribute", ["_FillValue", "missing_value"]) -def test_literal_timedelta64_coding_with_mask( +def test_timedelta64_coding_via_dtype_with_mask( time_unit: PDDatetimeUnitOptions, mask_attribute: str ) -> None: timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") @@ -2122,7 +2157,7 @@ def test_literal_timedelta64_coding_with_mask( assert encoded[-1] == mask decoded = conventions.decode_cf_variable("timedeltas", encoded) - assert decoded.encoding["dtype"] == expected_dtype + assert decoded.encoding["dtype"] == np.dtype("int64") assert decoded.encoding["units"] == expected_units assert decoded.encoding[mask_attribute] == mask assert np.isnat(decoded[-1]) @@ -2144,7 +2179,7 @@ def test_roundtrip_0size_timedelta(time_unit: PDDatetimeUnitOptions) -> None: assert encoded.dtype == encoding["dtype"] assert encoded.attrs["units"] == encoding["units"] decoded = conventions.decode_cf_variable("foo", encoded, decode_timedelta=True) - assert decoded.dtype == np.dtype("=m8[ns]") + assert decoded.dtype == np.dtype(f"=m8[{time_unit}]") with assert_no_warnings(): decoded.load() assert decoded.dtype == np.dtype("=m8[s]") diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 91a380e840f..569013b43dc 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1397,7 +1397,7 @@ def test_apply_dask_new_output_sizes_not_supplied_same_dim_names() -> None: da, input_core_dims=[["i", "j"]], output_core_dims=[["i", "j"]], - exclude_dims=set(("i", "j")), + exclude_dims={"i", "j"}, dask="parallelized", ) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 961df78154e..ce792c83740 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -555,10 +555,10 @@ def test_decode_cf_time_kwargs(self, time_unit) -> None: class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): - def encode_variable(self, var): + def encode_variable(self, var, name=None): """encode one variable""" coder = coding.strings.EncodedStringCoder(allows_unicode=True) - var = coder.encode(var) + var = coder.encode(var, name=name) return var diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 50870ca6976..eefa3c2b4f8 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1372,7 +1372,7 @@ def test_map_blocks_da_ds_with_template(obj): # Check that indexes are written into the graph directly dsk = dict(actual.__dask_graph__()) - assert len({k for k in dsk if "x-coordinate" in k}) + assert {k for k in dsk if "x-coordinate" in k} assert all( isinstance(v, PandasIndex) for k, v in dsk.items() if "x-coordinate" in k ) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b2b9ae314c4..45c169bb931 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2920,7 +2920,7 @@ def test_reduce_keepdims(self) -> None: expected = DataArray( orig.data.mean(keepdims=True), dims=orig.dims, - coords={k: v for k, v in coords.items() if k in ["c"]}, + coords={k: v for k, v in coords.items() if k == "c"}, ) assert_equal(actual, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b17ea252a58..3e0734c8a1a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1214,7 +1214,7 @@ def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> No import dask.array N = 365 * 2 - ΔN = 28 + ΔN = 28 # noqa: PLC2401 time = xr.date_range( "2001-01-01", periods=N + ΔN, freq="D", calendar=calendar ).to_numpy(copy=True) @@ -2369,6 +2369,19 @@ def test_reindex_str_dtype(self, dtype) -> None: assert_identical(expected, actual) assert actual.x.dtype == expected.x.dtype + def test_reindex_with_multiindex_level(self) -> None: + # test for https://github.com/pydata/xarray/issues/10347 + mindex = pd.MultiIndex.from_product( + [[100, 200, 300], [1, 2, 3, 4]], names=["x", "y"] + ) + y_idx = PandasIndex(mindex.levels[1], "y") + + ds1 = xr.Dataset(coords={"y": [1, 2, 3]}) + ds2 = xr.Dataset(coords=xr.Coordinates.from_xindex(y_idx)) + + actual = ds1.reindex(y=ds2.y) + assert_identical(actual, ds2) + @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {"foo": 2, "bar": 1}]) def test_align_fill_value(self, fill_value) -> None: x = Dataset({"foo": DataArray([1, 2], dims=["x"], coords={"x": [1, 2]})}) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 82c624b9bf6..2bf079a7cbd 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -837,7 +837,7 @@ def test_nones(self) -> None: def test_full(self, simple_datatree) -> None: dt = simple_datatree - paths = list(node.path for node in dt.subtree) + paths = [node.path for node in dt.subtree] assert paths == [ "/", "/set1", @@ -1433,7 +1433,6 @@ def test_doc_example(self) -> None: def _exact_match(message: str) -> str: return re.escape(dedent(message).strip()) - return "^" + re.escape(dedent(message.rstrip())) + "$" class TestInheritance: @@ -1657,7 +1656,7 @@ def test_drop_nodes(self) -> None: # test drop multiple nodes dropped = sue.drop_nodes(names=["Mary", "Kate"]) - assert not set(["Mary", "Kate"]).intersection(set(dropped.children)) + assert not {"Mary", "Kate"}.intersection(set(dropped.children)) assert "Ashley" in dropped.children # test raise @@ -2310,7 +2309,7 @@ class TestUFuncs: @pytest.mark.xfail(reason="__array_ufunc__ not implemented yet") def test_tree(self, create_test_datatree): dt = create_test_datatree() - expected = create_test_datatree(modify=lambda ds: np.sin(ds)) + expected = create_test_datatree(modify=np.sin) result_tree = np.sin(dt) assert_equal(result_tree, expected) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index eaafe2d4536..5928581de30 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -587,7 +587,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): if dask and not has_dask: pytest.skip("requires dask") - if dask and skipna is False and dtype in [np.bool_]: + if dask and skipna is False and dtype == np.bool_: pytest.skip("dask does not compute object-typed array") rtol = 1e-04 if dtype == np.float32 else 1e-05 diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 2e0925c1b9a..88c2c819405 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -101,6 +101,9 @@ def test_format_item(self) -> None: (np.float16(1.1234), "1.123"), (np.float32(1.0111111), "1.011"), (np.float64(22.222222), "22.22"), + (np.zeros((1, 1)), "[[0.]]"), + (np.zeros(2), "[0. 0.]"), + (np.zeros((2, 2)), "[[0. 0.]\n [0. 0.]]"), ] for item, expected in cases: actual = formatting.format_item(item) @@ -718,6 +721,27 @@ def test_diff_datatree_repr_node_data(self): actual = formatting.diff_datatree_repr(dt_1, dt_2, "identical") assert actual == expected + def test_diff_datatree_repr_equals(self) -> None: + ds1 = xr.Dataset(data_vars={"data": ("y", [5, 2])}) + ds2 = xr.Dataset(data_vars={"data": (("x", "y"), [[5, 2]])}) + dt1 = xr.DataTree.from_dict({"node": ds1}) + dt2 = xr.DataTree.from_dict({"node": ds2}) + + expected = dedent( + """\ + Left and right DataTree objects are not equal + + Data at node 'node' does not match: + Differing dimensions: + (y: 2) != (x: 1, y: 2) + Differing data variables: + L data (y) int64 16B 5 2 + R data (x, y) int64 16B 5 2""" + ) + + actual = formatting.diff_datatree_repr(dt1, dt2, "equals") + assert actual == expected + def test_inline_variable_array_repr_custom_repr() -> None: class CustomArray: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index a64dfc97bb6..54cc21b5d2c 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1321,8 +1321,7 @@ def test_groupby_properties(self) -> None: grouped = self.da.groupby("abc") expected_groups = {"a": range(9), "c": [9], "b": range(10, 20)} assert expected_groups.keys() == grouped.groups.keys() - for key in expected_groups: - expected_group = expected_groups[key] + for key, expected_group in expected_groups.items(): actual_group = grouped.groups[key] # TODO: array_api doesn't allow slice: @@ -3257,8 +3256,6 @@ def test_shuffle_simple() -> None: def test_shuffle_by(chunks, expected_chunks): import dask.array - from xarray.groupers import UniqueGrouper - da = xr.DataArray( dims="x", data=dask.array.arange(10, chunks=chunks), diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 3d6fbcf025f..7d5a9bf3db4 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -124,7 +124,7 @@ def test_interpolate_1d(method: InterpOptions, dim: str, case: int) -> None: if not has_scipy: pytest.skip("scipy is not installed.") - if not has_dask and case in [1]: + if not has_dask and case == 1: pytest.skip("dask is not installed in the environment.") da = get_example_data(case) @@ -433,7 +433,7 @@ def test_interpolate_nd_with_nan() -> None: "case", [pytest.param(0, id="no_chunk"), pytest.param(1, id="chunk_y")] ) def test_interpolate_scalar(method: InterpOptions, case: int) -> None: - if not has_dask and case in [1]: + if not has_dask and case == 1: pytest.skip("dask is not installed in the environment.") da = get_example_data(case) @@ -463,7 +463,7 @@ def func(obj, new_x): "case", [pytest.param(3, id="no_chunk"), pytest.param(4, id="chunked")] ) def test_interpolate_nd_scalar(method: InterpOptions, case: int) -> None: - if not has_dask and case in [4]: + if not has_dask and case == 4: pytest.skip("dask is not installed in the environment.") da = get_example_data(case) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 3c7d83d2825..bfe0b81e4b6 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -235,7 +235,7 @@ def test_1d_x_y_kw(self) -> None: z = np.arange(10) da = DataArray(np.cos(z), dims=["z"], coords=[z], name="f") - xy: list[list[None | str]] = [[None, None], [None, "z"], ["z", None]] + xy: list[list[str | None]] = [[None, None], [None, "z"], ["z", None]] f, axs = plt.subplots(3, 1, squeeze=False) for aa, (x, y) in enumerate(xy): @@ -828,7 +828,7 @@ def test_slice_in_title_single_item_array(self) -> None: darray = self.darray.expand_dims({"d": np.array([10.009])}) darray.plot.line(x="period") title = plt.gca().get_title() - assert "d = 10.01" == title + assert "d = [10.009]" == title class TestPlotStep(PlotTestCase): @@ -2698,9 +2698,9 @@ class TestDatasetStreamplotPlots(PlotTestCase): def setUp(self) -> None: das = [ DataArray( - np.random.randn(3, 3, 2, 2), + np.random.randn(3, 4, 2, 2), dims=["x", "y", "row", "col"], - coords=[range(k) for k in [3, 3, 2, 2]], + coords=[range(k) for k in [3, 4, 2, 2]], ) for _ in [1, 2] ] @@ -2789,7 +2789,7 @@ def test_accessor(self) -> None: def test_add_guide( self, add_guide: bool | None, - hue_style: Literal["continuous", "discrete", None], + hue_style: Literal["continuous", "discrete"] | None, legend: bool, colorbar: bool, ) -> None: diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index b4817d7442f..e2129229c2c 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -2,6 +2,7 @@ import sys from importlib.metadata import EntryPoint, EntryPoints +from itertools import starmap from unittest import mock import pytest @@ -48,7 +49,7 @@ def dummy_duplicated_entrypoints(): ["engine2", "xarray.tests.test_plugins:backend_1", "xarray.backends"], ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] - eps = [EntryPoint(name, value, group) for name, value, group in specs] + eps = list(starmap(EntryPoint, specs)) return eps @@ -91,7 +92,7 @@ def test_backends_dict_from_pkg() -> None: ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"], ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] - entrypoints = [EntryPoint(name, value, group) for name, value, group in specs] + entrypoints = list(starmap(EntryPoint, specs)) engines = plugins.backends_dict_from_pkg(entrypoints) assert len(engines) == 2 assert engines.keys() == {"engine1", "engine2"} diff --git a/xarray/tests/test_range_index.py b/xarray/tests/test_range_index.py index 3a30650ebda..d0644ba73a2 100644 --- a/xarray/tests/test_range_index.py +++ b/xarray/tests/test_range_index.py @@ -121,6 +121,28 @@ def test_range_index_isel() -> None: expected = create_dataset_arange(0.0, 1.0, 0.2) assert_identical(actual, expected, check_default_indexes=False) + actual = ds.isel(x=slice(None, None, -1)) + expected = create_dataset_arange(0.9, -0.1, -0.1) + assert_identical(actual, expected, check_default_indexes=False) + + actual = ds.isel(x=slice(None, 4, -1)) + expected = create_dataset_arange(0.9, 0.4, -0.1) + assert_identical(actual, expected, check_default_indexes=False) + + actual = ds.isel(x=slice(8, 4, -1)) + expected = create_dataset_arange(0.8, 0.4, -0.1) + assert_identical(actual, expected, check_default_indexes=False) + + actual = ds.isel(x=slice(8, None, -1)) + expected = create_dataset_arange(0.8, -0.1, -0.1) + assert_identical(actual, expected, check_default_indexes=False) + + # https://github.com/pydata/xarray/issues/10441 + ds2 = create_dataset_arange(0.0, 3.0, 0.1) + actual = ds2.isel(x=slice(4, None, 3)) + expected = create_dataset_arange(0.4, 3.0, 0.3) + assert_identical(actual, expected, check_default_indexes=False) + # scalar actual = ds.isel(x=0) expected = xr.Dataset(coords={"x": 0.0}) @@ -220,6 +242,6 @@ def test_range_index_repr() -> None: def test_range_index_repr_inline() -> None: index = RangeIndex.arange(0.0, 1.0, 0.1, dim="x") - actual = index._repr_inline_(max_width=None) + actual = index._repr_inline_(max_width=70) expected = "RangeIndex (start=0, stop=1, step=0.1)" assert actual == expected diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 3d7f5657567..d93216a3ccc 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -341,7 +341,7 @@ def test_ndrolling_reduce( assert_allclose(actual, expected) assert actual.sizes == expected.sizes - if name in ["mean"]: + if name == "mean": # test our reimplementation of nanmean using np.nanmean expected = getattr(rolling_obj.construct({"time": "tw", "x": "xw"}), name)( ["tw", "xw"] diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index ab4ec36ea97..d98d72d9876 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2650,7 +2650,7 @@ def test_searchsorted(self, func, unit, error, dtype): data_array = xr.DataArray(data=array) scalar_types = (int, float) - args = list(value * unit for value in func.args) + args = [value * unit for value in func.args] kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() @@ -2708,7 +2708,7 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): data_array = xr.DataArray(data=array) scalar_types = (int, float) - args = list(value * unit for value in func.args) + args = [value * unit for value in func.args] kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 9873b271033..0e6bbf29a45 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -300,7 +300,7 @@ def test_parse_dims_set() -> None: @pytest.mark.parametrize( "dim", [pytest.param(None, id="None"), pytest.param(..., id="ellipsis")] ) -def test_parse_dims_replace_none(dim: None | EllipsisType) -> None: +def test_parse_dims_replace_none(dim: EllipsisType | None) -> None: all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables actual = utils.parse_dims_as_tuple(dim, all_dims, replace_none=True) assert actual == all_dims diff --git a/xarray/tutorial.py b/xarray/tutorial.py index ec832694a99..70e7fc68f7e 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -85,7 +85,7 @@ def _check_netcdf_engine_installed(name): def open_dataset( name: str, cache: bool = True, - cache_dir: None | str | os.PathLike = None, + cache_dir: str | os.PathLike | None = None, *, engine: T_Engine = None, **kws, @@ -216,7 +216,7 @@ def load_dataset(*args, **kwargs) -> Dataset: return ds.load() -def scatter_example_dataset(*, seed: None | int = None) -> Dataset: +def scatter_example_dataset(*, seed: int | None = None) -> Dataset: """ Create an example dataset. @@ -255,7 +255,7 @@ def scatter_example_dataset(*, seed: None | int = None) -> Dataset: def open_datatree( name: str, cache: bool = True, - cache_dir: None | str | os.PathLike = None, + cache_dir: str | os.PathLike | None = None, *, engine: T_Engine = None, **kws, diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py index e25657216fd..83acbde858b 100644 --- a/xarray/ufuncs.py +++ b/xarray/ufuncs.py @@ -39,7 +39,7 @@ def get_array_namespace(*args): names = [module.__name__ for module in xps] raise ValueError(f"Mixed array types {names} are not supported.") - return next(iter(xps)) if len(xps) else np + return next(iter(xps)) if xps else np class _ufunc_wrapper(ABC): diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 8812a1abb22..15319e2f6c8 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -692,8 +692,7 @@ def write_methods(filepath, generators, preamble): f.write(preamble) for gen in generators: for lines in gen.generate_methods(): - for line in lines: - f.write(line + "\n") + f.writelines(line + "\n" for line in lines) if __name__ == "__main__": diff --git a/xarray/util/generate_ops.py b/xarray/util/generate_ops.py index 3300bbf594a..74fec786afa 100644 --- a/xarray/util/generate_ops.py +++ b/xarray/util/generate_ops.py @@ -133,7 +133,7 @@ def {{ method }}(self, *args: Any, **kwargs: Any) -> Self: # We require a "hack" to tell type checkers that e.g. Variable + DataArray = DataArray # In reality this returns NotImplemented, but this is not a valid type in python 3.9. # Therefore, we return DataArray. In reality this would call DataArray.__add__(Variable) -# TODO: change once python 3.10 is the minimum. +# TODO: change once python 3.11 is the minimum. # # Mypy seems to require that __iadd__ and __add__ have the same signature. # This requires some extra type: ignores[misc] in the inplace methods :/ @@ -222,32 +222,33 @@ def unops() -> list[OpsType]: # ruff does not reformat everything. When reformatting, the # type-ignores end up in the wrong line :/ -ops_info = {} -# TODO add inplace ops for DataTree? -ops_info["DataTreeOpsMixin"] = binops(other_type="DtCompatible") + unops() -ops_info["DatasetOpsMixin"] = ( - binops_overload(other_type="DsCompatible", overload_types=["DataTree"]) - + inplace(other_type="DsCompatible", type_ignore="misc") - + unops() -) -ops_info["DataArrayOpsMixin"] = ( - binops_overload(other_type="DaCompatible", overload_types=["Dataset", "DataTree"]) - + inplace(other_type="DaCompatible", type_ignore="misc") - + unops() -) -ops_info["VariableOpsMixin"] = ( - binops_overload( - other_type="VarCompatible", overload_types=["T_DA", "Dataset", "DataTree"] - ) - + inplace(other_type="VarCompatible", type_ignore="misc") - + unops() -) -ops_info["DatasetGroupByOpsMixin"] = binops( - other_type="Dataset | DataArray", return_type="Dataset" -) -ops_info["DataArrayGroupByOpsMixin"] = binops( - other_type="T_Xarray", return_type="T_Xarray" -) +ops_info = { + # TODO add inplace ops for DataTree? + "DataTreeOpsMixin": binops(other_type="DtCompatible") + unops(), + "DatasetOpsMixin": ( + binops_overload(other_type="DsCompatible", overload_types=["DataTree"]) + + inplace(other_type="DsCompatible", type_ignore="misc") + + unops() + ), + "DataArrayOpsMixin": ( + binops_overload( + other_type="DaCompatible", overload_types=["Dataset", "DataTree"] + ) + + inplace(other_type="DaCompatible", type_ignore="misc") + + unops() + ), + "VariableOpsMixin": ( + binops_overload( + other_type="VarCompatible", overload_types=["T_DA", "Dataset", "DataTree"] + ) + + inplace(other_type="VarCompatible", type_ignore="misc") + + unops() + ), + "DatasetGroupByOpsMixin": binops( + other_type="Dataset | DataArray", return_type="Dataset" + ), + "DataArrayGroupByOpsMixin": binops(other_type="T_Xarray", return_type="T_Xarray"), +} MODULE_PREAMBLE = '''\ """Mixin classes with arithmetic operators.""" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy