From a532127c1e16f1306265eaed8ca670ff0830ff81 Mon Sep 17 00:00:00 2001 From: Jacob Hayes Date: Wed, 30 Nov 2022 00:20:44 -0500 Subject: [PATCH 1/2] Fix pandas 1.5+ FutureWarning in .str.split(..., expand=True) --- dask/dataframe/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dask/dataframe/accessor.py b/dask/dataframe/accessor.py index 05213d54fa9..f56e976040a 100644 --- a/dask/dataframe/accessor.py +++ b/dask/dataframe/accessor.py @@ -265,7 +265,7 @@ def _split(self, method, pat=None, n=-1, expand=False): delimiter = " " if pat is None else pat meta = self._series._meta._constructor( [delimiter.join(["a"] * (n + 1))], - index=self._series._meta_nonempty[:1].index, + index=self._series._meta_nonempty.iloc[:1].index, ) meta = getattr(meta.str, method)(n=n, expand=expand, pat=pat) else: From d218d22cf5bff8c567c795bef7e292170f9b2f54 Mon Sep 17 00:00:00 2001 From: Jacob Hayes Date: Wed, 30 Nov 2022 14:27:15 -0500 Subject: [PATCH 2/2] Add test_str_split_no_warning Co-authored-by: Paul Hobson --- dask/dataframe/tests/test_accessors.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dask/dataframe/tests/test_accessors.py b/dask/dataframe/tests/test_accessors.py index 318474168ef..ca54d75ad6a 100644 --- a/dask/dataframe/tests/test_accessors.py +++ b/dask/dataframe/tests/test_accessors.py @@ -294,6 +294,17 @@ def test_str_accessor_split_expand_more_columns(): ds.str.split(n=10, expand=True).compute() +@pytest.mark.parametrize("index", [None, [0]], ids=["range_index", "other index"]) +def test_str_split_no_warning(index): + df = pd.DataFrame({"a": ["a\nb"]}, index=index) + ddf = dd.from_pandas(df, npartitions=1) + + pd_a = df["a"].str.split("\n", n=1, expand=True) + dd_a = ddf["a"].str.split("\n", n=1, expand=True) + + assert_eq(dd_a, pd_a) + + def test_string_nullable_types(df_ddf): df, ddf = df_ddf assert_eq(ddf.string_col.str.count("A"), df.string_col.str.count("A"))