From e809ae182f1ae45d30a83731a3312033732ce162 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Thu, 1 Dec 2022 17:34:21 +0000 Subject: [PATCH 01/13] mark tests as needing pyarrow --- dask/dataframe/io/tests/test_parquet.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py index c12f956f35f..4624066c80d 100644 --- a/dask/dataframe/io/tests/test_parquet.py +++ b/dask/dataframe/io/tests/test_parquet.py @@ -661,6 +661,7 @@ def write_partition(df, i): assert_eq(df, ddf2, check_index=False) +@PYARROW_MARK @pytest.mark.xfail( not PANDAS_GT_130, reason=( @@ -3005,6 +3006,7 @@ def test_chunksize_aggregate_files(tmpdir, write_engine, read_engine, aggregate_ assert_eq(df1[["c", "d"]], df2[["c", "d"]], check_index=False) +@PYARROW_MARK @pytest.mark.parametrize("metadata", [True, False]) @pytest.mark.parametrize("chunksize", [None, 1024, 4096, "1MiB"]) def test_chunksize(tmpdir, chunksize, engine, metadata): @@ -3998,6 +4000,7 @@ def test_metadata_task_size(tmpdir, engine, write_metadata_file, metadata_task_s assert_eq(ddf2b, ddf2c) +@PYARROW_MARK @pytest.mark.parametrize("partition_on", ("b", None)) def test_extra_file(tmpdir, engine, partition_on): # Check that read_parquet can handle spark output From f8d22c95e91727c026b7ed7cd5b67e2c4cb6f53b Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Thu, 1 Dec 2022 17:37:20 +0000 Subject: [PATCH 02/13] test on python 3.11 --- .github/workflows/tests.yml | 2 +- continuous_integration/environment-3.11.yaml | 77 ++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 continuous_integration/environment-3.11.yaml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 06df8ce5fc8..8fcd87d5d04 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: os: ["windows-latest", "ubuntu-latest", "macos-latest"] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] exclude: - os: "macos-latest" python-version: "3.8" diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml new file mode 100644 index 00000000000..b5b0700fa55 --- /dev/null +++ b/continuous_integration/environment-3.11.yaml @@ -0,0 +1,77 @@ +# This job includes coverage +name: test-environment +channels: + - conda-forge + - nodefaults +dependencies: + # required dependencies + - python=3.11 + - packaging + - numpy + - pandas + # test dependencies + - pre-commit + - pytest + - pytest-cov + - pytest-rerunfailures + - pytest-timeout + - pytest-xdist + - moto + - flask + - fastparquet>=0.8.0 + - h5py + - pytables + # - zarr + # `tiledb-py=0.17.5` lead to strange seg faults in CI, However 0.18 is needed for 3.11 + # We should unpin when possible. + # https://github.com/dask/dask/pull/9569 + - tiledb-py + # - pyspark + - tiledb>=2.5.0 + - xarray + - fsspec + - sqlalchemy>=1.4.0 + # - pyarrow needs 10+ for python3.11, conda-forge only has v9 + - coverage + - jsonschema + # # other -- IO + - boto3 + - botocore + # Temporary restriction until https://github.com/dask/distributed/issues/7173 is resolved + - bokeh + - httpretty + - aiohttp + # # Need recent version of s3fs to support newer aiobotocore versions + # # https://github.com/dask/s3fs/issues/514 + - s3fs>=2021.8.0 + - click + - cloudpickle + - crick + - cytoolz + - distributed + - ipython + - ipycytoscape + - lz4 + # https://github.com/numba/numba/issues/8304 + # - numba # not supported on 3.11 + - partd + - psutil + - requests + - scikit-image + - scikit-learn + - scipy + - toolz + - python-snappy + # - sparse needs numba + - cachey + - python-graphviz + - python-xxhash + - mmh3 + - jinja2 + - pip + # The nightly pyarrow / arrow-cpp packages currently don't install with latest + # protobuf / abseil, see https://github.com/dask/dask/issues/9449 + - libprotobuf=3.19 + - pip: + - git+https://github.com/graingert/distributed@python-311 + - pyarrow # pyarrow on conda-forge is 9.0 needs 10+ for python3.11 From 92b8f9794176f4a5fe0f42b92f4185e11eaa6867 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 7 Dec 2022 11:21:44 +0000 Subject: [PATCH 03/13] test additional imports on 3.11 --- .github/workflows/additional.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/additional.yml b/.github/workflows/additional.yml index 2cde74f9a10..e8866c175cf 100644 --- a/.github/workflows/additional.yml +++ b/.github/workflows/additional.yml @@ -65,7 +65,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout source uses: actions/checkout@v3.1.0 From e28b604d75b7f84d33a5d97cabc6d3704da8bb5c Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 7 Dec 2022 16:25:50 +0000 Subject: [PATCH 04/13] skip hdf tests that segfault due to pandas bug --- dask/dataframe/io/tests/test_hdf.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dask/dataframe/io/tests/test_hdf.py b/dask/dataframe/io/tests/test_hdf.py index 4f47b2401d5..2f72e3e229e 100644 --- a/dask/dataframe/io/tests/test_hdf.py +++ b/dask/dataframe/io/tests/test_hdf.py @@ -1,5 +1,6 @@ import os import pathlib +import sys from time import sleep import numpy as np @@ -46,6 +47,10 @@ def test_to_hdf(): tm.assert_frame_equal(df, out[:]) +@pytest.mark.skipif( + sys.version_info >= (3, 11), + reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", +) def test_to_hdf_multiple_nodes(): pytest.importorskip("tables") df = pd.DataFrame( @@ -388,6 +393,10 @@ def test_to_hdf_link_optimizations(): assert dependency_depth(d.dask) == 2 + a.npartitions +@pytest.mark.skipif( + sys.version_info >= (3, 11), + reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", +) @pytest.mark.slow def test_to_hdf_lock_delays(): pytest.importorskip("tables") @@ -478,6 +487,10 @@ def test_to_hdf_exceptions(): a.to_hdf(hdf, "/data_*_*") +@pytest.mark.skipif( + sys.version_info >= (3, 11), + reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", +) @pytest.mark.parametrize("scheduler", ["sync", "threads", "processes"]) @pytest.mark.parametrize("npartitions", [1, 4, 10]) def test_to_hdf_schedulers(scheduler, npartitions): @@ -679,6 +692,10 @@ def test_read_hdf_multiply_open(): dd.read_hdf(fn, "/data", chunksize=2, mode="r") +@pytest.mark.skipif( + sys.version_info >= (3, 11), + reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", +) def test_read_hdf_multiple(): pytest.importorskip("tables") df = pd.DataFrame( From c7fc0a85515843bcee12bb097fcb713824385219 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 14 Dec 2022 19:34:23 +0000 Subject: [PATCH 05/13] Apply suggestions from code review Co-authored-by: crusaderky --- continuous_integration/environment-3.11.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index b5b0700fa55..3e1de8f4e4e 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -23,7 +23,6 @@ dependencies: - pytables # - zarr # `tiledb-py=0.17.5` lead to strange seg faults in CI, However 0.18 is needed for 3.11 - # We should unpin when possible. # https://github.com/dask/dask/pull/9569 - tiledb-py # - pyspark From a91326b6fd73e4d05edb51733fdd976a61e34083 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 14 Dec 2022 19:34:32 +0000 Subject: [PATCH 06/13] Update continuous_integration/environment-3.11.yaml --- continuous_integration/environment-3.11.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index 3e1de8f4e4e..bfb653836c6 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -72,5 +72,5 @@ dependencies: # protobuf / abseil, see https://github.com/dask/dask/issues/9449 - libprotobuf=3.19 - pip: - - git+https://github.com/graingert/distributed@python-311 + - git+https://github.com/dask/distributed - pyarrow # pyarrow on conda-forge is 9.0 needs 10+ for python3.11 From 608077ebcebe66f115d4f2c6d8dc4166947512dc Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 14 Dec 2022 19:35:23 +0000 Subject: [PATCH 07/13] Apply suggestions from code review Co-authored-by: crusaderky --- continuous_integration/environment-3.11.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index bfb653836c6..d582a3f0814 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -30,7 +30,7 @@ dependencies: - xarray - fsspec - sqlalchemy>=1.4.0 - # - pyarrow needs 10+ for python3.11, conda-forge only has v9 + - pyarrow>=10 - coverage - jsonschema # # other -- IO From 3ec2337223ef7361b728f8fabeaec3fcd43536fb Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 14 Dec 2022 19:35:46 +0000 Subject: [PATCH 08/13] Apply suggestions from code review --- continuous_integration/environment-3.11.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index d582a3f0814..5c00912bc8d 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -73,4 +73,3 @@ dependencies: - libprotobuf=3.19 - pip: - git+https://github.com/dask/distributed - - pyarrow # pyarrow on conda-forge is 9.0 needs 10+ for python3.11 From 13397c69a519dd0122ad8ccdaa08c5c3f4facf5f Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Thu, 15 Dec 2022 15:59:35 -0600 Subject: [PATCH 09/13] Remove unneeded libprotobuf pin --- continuous_integration/environment-3.11.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index 5c00912bc8d..4246639ec5f 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -68,8 +68,5 @@ dependencies: - mmh3 - jinja2 - pip - # The nightly pyarrow / arrow-cpp packages currently don't install with latest - # protobuf / abseil, see https://github.com/dask/dask/issues/9449 - - libprotobuf=3.19 - pip: - git+https://github.com/dask/distributed From e4ed63d5f1210c87c5409d601c551363be20c2fe Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Thu, 15 Dec 2022 16:37:27 -0600 Subject: [PATCH 10/13] Update issue link and add setup.py classifier --- dask/dataframe/io/tests/test_hdf.py | 8 ++++---- setup.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dask/dataframe/io/tests/test_hdf.py b/dask/dataframe/io/tests/test_hdf.py index f22fb87824c..94249ff1993 100644 --- a/dask/dataframe/io/tests/test_hdf.py +++ b/dask/dataframe/io/tests/test_hdf.py @@ -49,7 +49,7 @@ def test_to_hdf(): @pytest.mark.skipif( sys.version_info >= (3, 11), - reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) def test_to_hdf_multiple_nodes(): pytest.importorskip("tables") @@ -395,7 +395,7 @@ def test_to_hdf_link_optimizations(): @pytest.mark.skipif( sys.version_info >= (3, 11), - reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) @pytest.mark.slow def test_to_hdf_lock_delays(): @@ -489,7 +489,7 @@ def test_to_hdf_exceptions(): @pytest.mark.skipif( sys.version_info >= (3, 11), - reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) @pytest.mark.parametrize("scheduler", ["sync", "threads", "processes"]) @pytest.mark.parametrize("npartitions", [1, 4, 10]) @@ -694,7 +694,7 @@ def test_read_hdf_multiply_open(): @pytest.mark.skipif( sys.version_info >= (3, 11), - reason="segfaults due to https://github.com/pandas-dev/pandas/issues/50105", + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) def test_read_hdf_multiple(): pytest.importorskip("tables") diff --git a/setup.py b/setup.py index 8f20c4dfb86..629bf6ec95d 100755 --- a/setup.py +++ b/setup.py @@ -83,6 +83,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering", "Topic :: System :: Distributed Computing", ], From 1b4bbde6b4d02775344f38dcf57d718333428fad Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Thu, 15 Dec 2022 16:42:47 -0600 Subject: [PATCH 11/13] Use _PY_VERSION --- dask/dataframe/io/tests/test_hdf.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dask/dataframe/io/tests/test_hdf.py b/dask/dataframe/io/tests/test_hdf.py index 94249ff1993..a8defb9246d 100644 --- a/dask/dataframe/io/tests/test_hdf.py +++ b/dask/dataframe/io/tests/test_hdf.py @@ -1,14 +1,15 @@ import os import pathlib -import sys from time import sleep import numpy as np import pandas as pd import pytest +from packaging.version import Version import dask import dask.dataframe as dd +from dask.compatibility import _PY_VERSION from dask.dataframe._compat import tm from dask.dataframe.optimize import optimize_dataframe_getitem from dask.dataframe.utils import assert_eq @@ -48,7 +49,7 @@ def test_to_hdf(): @pytest.mark.skipif( - sys.version_info >= (3, 11), + _PY_VERSION >= Version("3.11"), reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) def test_to_hdf_multiple_nodes(): @@ -394,7 +395,7 @@ def test_to_hdf_link_optimizations(): @pytest.mark.skipif( - sys.version_info >= (3, 11), + _PY_VERSION >= Version("3.11"), reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) @pytest.mark.slow @@ -488,7 +489,7 @@ def test_to_hdf_exceptions(): @pytest.mark.skipif( - sys.version_info >= (3, 11), + _PY_VERSION >= Version("3.11"), reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) @pytest.mark.parametrize("scheduler", ["sync", "threads", "processes"]) @@ -693,7 +694,7 @@ def test_read_hdf_multiply_open(): @pytest.mark.skipif( - sys.version_info >= (3, 11), + _PY_VERSION >= Version("3.11"), reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", ) def test_read_hdf_multiple(): From 0e8bb6122923a54ba8bc168da220ab6a86fe6094 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Fri, 16 Dec 2022 09:42:23 -0600 Subject: [PATCH 12/13] Retrigger CI From d2585447f56818bcb6e3c23533a96888ea885bd5 Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Fri, 16 Dec 2022 12:24:29 -0600 Subject: [PATCH 13/13] Remove tiledb from Python 3.11 build --- continuous_integration/environment-3.11.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml index 4246639ec5f..190ef10942e 100644 --- a/continuous_integration/environment-3.11.yaml +++ b/continuous_integration/environment-3.11.yaml @@ -24,9 +24,9 @@ dependencies: # - zarr # `tiledb-py=0.17.5` lead to strange seg faults in CI, However 0.18 is needed for 3.11 # https://github.com/dask/dask/pull/9569 - - tiledb-py + # - tiledb-py # crashes on Python 3.11 # - pyspark - - tiledb>=2.5.0 + # - tiledb>=2.5.0 # crashes on Python 3.11 - xarray - fsspec - sqlalchemy>=1.4.0