diff --git a/.github/workflows/additional.yml b/.github/workflows/additional.yml index 8d808aac677..d73d79dad8b 100644 --- a/.github/workflows/additional.yml +++ b/.github/workflows/additional.yml @@ -65,7 +65,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout source uses: actions/checkout@v3.2.0 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2742022bca5..0bbbfe593df 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: os: ["windows-latest", "ubuntu-latest", "macos-latest"] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] exclude: - os: "macos-latest" python-version: "3.8" diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml new file mode 100644 index 00000000000..190ef10942e --- /dev/null +++ b/continuous_integration/environment-3.11.yaml @@ -0,0 +1,72 @@ +# This job includes coverage +name: test-environment +channels: + - conda-forge + - nodefaults +dependencies: + # required dependencies + - python=3.11 + - packaging + - numpy + - pandas + # test dependencies + - pre-commit + - pytest + - pytest-cov + - pytest-rerunfailures + - pytest-timeout + - pytest-xdist + - moto + - flask + - fastparquet>=0.8.0 + - h5py + - pytables + # - zarr + # `tiledb-py=0.17.5` lead to strange seg faults in CI, However 0.18 is needed for 3.11 + # https://github.com/dask/dask/pull/9569 + # - tiledb-py # crashes on Python 3.11 + # - pyspark + # - tiledb>=2.5.0 # crashes on Python 3.11 + - xarray + - fsspec + - sqlalchemy>=1.4.0 + - pyarrow>=10 + - coverage + - jsonschema + # # other -- IO + - boto3 + - botocore + # Temporary restriction until https://github.com/dask/distributed/issues/7173 is resolved + - bokeh + - httpretty + - aiohttp + # # Need recent version of s3fs to support newer aiobotocore versions + # # https://github.com/dask/s3fs/issues/514 + - s3fs>=2021.8.0 + - click + - cloudpickle + - crick + - cytoolz + - distributed + - ipython + - ipycytoscape + - lz4 + # https://github.com/numba/numba/issues/8304 + # - numba # not supported on 3.11 + - partd + - psutil + - requests + - scikit-image + - scikit-learn + - scipy + - toolz + - python-snappy + # - sparse needs numba + - cachey + - python-graphviz + - python-xxhash + - mmh3 + - jinja2 + - pip + - pip: + - git+https://github.com/dask/distributed diff --git a/dask/dataframe/io/tests/test_hdf.py b/dask/dataframe/io/tests/test_hdf.py index 5ed102b23ab..a8defb9246d 100644 --- a/dask/dataframe/io/tests/test_hdf.py +++ b/dask/dataframe/io/tests/test_hdf.py @@ -5,9 +5,11 @@ import numpy as np import pandas as pd import pytest +from packaging.version import Version import dask import dask.dataframe as dd +from dask.compatibility import _PY_VERSION from dask.dataframe._compat import tm from dask.dataframe.optimize import optimize_dataframe_getitem from dask.dataframe.utils import assert_eq @@ -46,6 +48,10 @@ def test_to_hdf(): tm.assert_frame_equal(df, out[:]) +@pytest.mark.skipif( + _PY_VERSION >= Version("3.11"), + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", +) def test_to_hdf_multiple_nodes(): pytest.importorskip("tables") df = pd.DataFrame( @@ -388,6 +394,10 @@ def test_to_hdf_link_optimizations(): assert dependency_depth(d.dask) == 2 + a.npartitions +@pytest.mark.skipif( + _PY_VERSION >= Version("3.11"), + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", +) @pytest.mark.slow def test_to_hdf_lock_delays(): pytest.importorskip("tables") @@ -478,6 +488,10 @@ def test_to_hdf_exceptions(): a.to_hdf(hdf, "/data_*_*") +@pytest.mark.skipif( + _PY_VERSION >= Version("3.11"), + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", +) @pytest.mark.parametrize("scheduler", ["sync", "threads", "processes"]) @pytest.mark.parametrize("npartitions", [1, 4, 10]) def test_to_hdf_schedulers(scheduler, npartitions): @@ -679,6 +693,10 @@ def test_read_hdf_multiply_open(): dd.read_hdf(fn, "/data", chunksize=2, mode="r") +@pytest.mark.skipif( + _PY_VERSION >= Version("3.11"), + reason="segfaults due to https://github.com/PyTables/PyTables/issues/977", +) def test_read_hdf_multiple(): pytest.importorskip("tables") df = pd.DataFrame( diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py index 5a1089a693d..4326813be83 100644 --- a/dask/dataframe/io/tests/test_parquet.py +++ b/dask/dataframe/io/tests/test_parquet.py @@ -661,6 +661,7 @@ def write_partition(df, i): assert_eq(df, ddf2, check_index=False) +@PYARROW_MARK @pytest.mark.xfail( not PANDAS_GT_130, reason=( @@ -3005,6 +3006,7 @@ def test_chunksize_aggregate_files(tmpdir, write_engine, read_engine, aggregate_ assert_eq(df1[["c", "d"]], df2[["c", "d"]], check_index=False) +@PYARROW_MARK @pytest.mark.parametrize("metadata", [True, False]) @pytest.mark.parametrize("chunksize", [None, 1024, 4096, "1MiB"]) def test_chunksize(tmpdir, chunksize, engine, metadata): @@ -3998,6 +4000,7 @@ def test_metadata_task_size(tmpdir, engine, write_metadata_file, metadata_task_s assert_eq(ddf2b, ddf2c) +@PYARROW_MARK @pytest.mark.parametrize("partition_on", ("b", None)) def test_extra_file(tmpdir, engine, partition_on): # Check that read_parquet can handle spark output diff --git a/setup.py b/setup.py index 8f20c4dfb86..629bf6ec95d 100755 --- a/setup.py +++ b/setup.py @@ -83,6 +83,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering", "Topic :: System :: Distributed Computing", ],