Skip to content

Commit

Permalink
parametrize tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fjetter committed Dec 19, 2023
1 parent 66891a7 commit 9112797
Showing 1 changed file with 18 additions and 20 deletions.
38 changes: 18 additions & 20 deletions dask/dataframe/tests/test_dataframe.py
Expand Up @@ -1236,7 +1236,9 @@ def test_drop_duplicates_subset():
assert_eq(df.drop_duplicates(ss, **kwarg), ddf.drop_duplicates(ss, **kwarg))


def test_drop_duplicates_keep_stable():
@pytest.mark.parametrize("keep", ["first", "last"])
@pytest.mark.parametrize("shuffle_method", ["p2p", "tasks"])
def test_drop_duplicates_keep_stable(keep, shuffle_method):
pytest.importorskip("distributed")
from distributed.utils_test import gen_cluster

Expand All @@ -1246,26 +1248,22 @@ def test_drop_duplicates_keep_stable():
scheduler_kwargs={"dashboard_address": False},
)
async def _(c, s, a, b):
# If we defined all of this as pytest fixtures, we'd need to restart the
# cluster for each test case which is not worth it
for shuffle_method in ["tasks", "p2p"]:
for keep in ["first", "last"]:
pdf = pd.DataFrame(
{"x": [1, 2, 3, 4, 5, 6] * 10, "y": list("abdabd") * 10},
index=pd.Series(list(range(0, 30)) * 2),
)
df = dd.from_pandas(pdf, npartitions=2)
result_pd = pdf.drop_duplicates(subset=["x"], keep=keep)
result_dd = await c.compute(
df.drop_duplicates(
subset=["x"],
keep=keep,
split_out=df.npartitions,
shuffle=shuffle_method,
)
)
pdf = pd.DataFrame(
{"x": [1, 2, 3, 4, 5, 6] * 10, "y": list("abdabd") * 10},
index=pd.Series(list(range(0, 30)) * 2),
)
df = dd.from_pandas(pdf, npartitions=2)
result_pd = pdf.drop_duplicates(subset=["x"], keep=keep)
result_dd = await c.compute(
df.drop_duplicates(
subset=["x"],
keep=keep,
split_out=df.npartitions,
shuffle=shuffle_method,
)
)

dd.assert_eq(result_pd, result_dd)
dd.assert_eq(result_pd, result_dd)

_()

Expand Down

0 comments on commit 9112797

Please sign in to comment.