diff --git a/dask/dataframe/io/parquet/fastparquet.py b/dask/dataframe/io/parquet/fastparquet.py index 67d6bcbd902..7d6e5934a11 100644 --- a/dask/dataframe/io/parquet/fastparquet.py +++ b/dask/dataframe/io/parquet/fastparquet.py @@ -401,7 +401,7 @@ def _collect_dataset_info( # Find all files if we are not using a _metadata file if ignore_metadata_file or not _metadata_exists: # For now, we need to discover every file under paths[0] - paths, base, fns = _sort_and_analyze_paths(fs.find(base), fs) + paths, base, fns = _sort_and_analyze_paths(fs.find(base), fs, root=base) _update_paths = False for fn in ["_metadata", "_common_metadata"]: try: @@ -490,6 +490,7 @@ def _collect_dataset_info( raise ValueError( "No partition-columns should be written in the \n" "file unless they are ALL written in the file.\n" + "This restriction is removed as of fastparquet 0.8.4\n" "columns: {} | partitions: {}".format(pf.columns, pf.cats.keys()) ) diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py index 57aeb4defc2..5e6e47a9f1f 100644 --- a/dask/dataframe/io/tests/test_parquet.py +++ b/dask/dataframe/io/tests/test_parquet.py @@ -3155,13 +3155,17 @@ def test_partitioned_column_overlap(tmpdir, engine, write_cols): else: path = str(tmpdir) - if write_cols == ["part", "kind", "col"]: + expect = pd.concat([_df1, _df2], ignore_index=True) + if engine == "fastparquet" and fastparquet_version > parse_version("0.8.3"): + # columns will change order and partitions will be categorical + result = dd.read_parquet(path, engine=engine) + assert result.compute().reset_index(drop=True).to_dict() == expect.to_dict() + elif write_cols == ["part", "kind", "col"]: result = dd.read_parquet(path, engine=engine) - expect = pd.concat([_df1, _df2], ignore_index=True) assert_eq(result, expect, check_index=False) else: # For now, partial overlap between partition columns and - # real columns is not allowed + # real columns is not allowed for pyarrow or older fastparquet with pytest.raises(ValueError): dd.read_parquet(path, engine=engine)