dask · jrbourbeau · Nov 16, 2022 · Nov 11, 2022 · Nov 14, 2022 · Nov 14, 2022
diff --git a/dask/dataframe/io/parquet/fastparquet.py b/dask/dataframe/io/parquet/fastparquet.py
@@ -401,7 +401,7 @@ def _collect_dataset_info(
             # Find all files if we are not using a _metadata file
             if ignore_metadata_file or not _metadata_exists:
                 # For now, we need to discover every file under paths[0]
-                paths, base, fns = _sort_and_analyze_paths(fs.find(base), fs)
+                paths, base, fns = _sort_and_analyze_paths(fs.find(base), fs, root=base)
                 _update_paths = False
                 for fn in ["_metadata", "_common_metadata"]:
                     try:
@@ -490,6 +490,7 @@ def _collect_dataset_info(
                 raise ValueError(
                     "No partition-columns should be written in the \n"
                     "file unless they are ALL written in the file.\n"
+                    "This restriction is removed as of fastparquet 0.8.4\n"
                     "columns: {} | partitions: {}".format(pf.columns, pf.cats.keys())
                 )
 

diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py
@@ -3155,13 +3155,17 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols):
     else:
         path = str(tmpdir)
 
-    if write_cols == ["part", "kind", "col"]:
+    expect = pd.concat([_df1, _df2], ignore_index=True)
+    if engine == "fastparquet" and fastparquet_version > parse_version("0.8.3"):
+        # columns will change order and partitions will be categorical
+        result = dd.read_parquet(path, engine=engine)
+        assert result.compute().reset_index(drop=True).to_dict() == expect.to_dict()
+    elif write_cols == ["part", "kind", "col"]:
         result = dd.read_parquet(path, engine=engine)
-        expect = pd.concat([_df1, _df2], ignore_index=True)
         assert_eq(result, expect, check_index=False)
     else:
         # For now, partial overlap between partition columns and
-        # real columns is not allowed
+        # real columns is not allowed for pyarrow or older fastparquet
         with pytest.raises(ValueError):
             dd.read_parquet(path, engine=engine)