Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test query-planning in gpuCI #11060

Merged
merged 8 commits into from
May 10, 2024
10 changes: 5 additions & 5 deletions continuous_integration/gpuci/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ cd "$WORKSPACE"
# Determine CUDA release version
export CUDA_REL=${CUDA_VERSION%.*}

# TODO: remove once RAPIDS 24.06 has full support for dask-expr
export DASK_DATAFRAME__QUERY_PLANNING=false

################################################################################
# SETUP - Check environment
################################################################################
Expand Down Expand Up @@ -57,5 +54,8 @@ conda info
conda config --show-sources
conda list --show-channel-urls

rapids-logger "Python py.test for dask"
py.test $WORKSPACE -n 3 -v -m gpu --junitxml="$WORKSPACE/junit-dask.xml" --cov-config="$WORKSPACE/pyproject.toml" --cov=dask --cov-report=xml:"$WORKSPACE/dask-coverage.xml" --cov-report term
rapids-logger "Python py.test for dask (LEGACY)"
DASK_DATAFRAME__QUERY_PLANNING=False py.test $WORKSPACE -n 3 -v -m gpu --junitxml="$WORKSPACE/junit-dask-legacy.xml" --cov-config="$WORKSPACE/pyproject.toml" --cov=dask --cov-report=xml:"$WORKSPACE/dask-coverage-legacy.xml" --cov-report term

rapids-logger "Python py.test for dask (NEW)"
DASK_DATAFRAME__QUERY_PLANNING=True py.test $WORKSPACE -n 3 -v -m gpu --junitxml="$WORKSPACE/junit-dask.xml" --cov-config="$WORKSPACE/pyproject.toml" --cov=dask --cov-report=xml:"$WORKSPACE/dask-coverage.xml" --cov-report term
rjzamora marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 13 additions & 1 deletion dask/dataframe/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5184,7 +5184,19 @@ def test_datetime_loc_open_slicing():
assert_eq(df[0].loc["02.02.2015":], ddf[0].loc["02.02.2015":])


@pytest.mark.parametrize("gpu", [False, pytest.param(True, marks=pytest.mark.gpu)])
@pytest.mark.parametrize(
"gpu",
[
False,
pytest.param(
True,
marks=[
pytest.mark.gpu,
pytest.mark.xfail(DASK_EXPR_ENABLED, reason="not supported"),
rjzamora marked this conversation as resolved.
Show resolved Hide resolved
],
),
],
)
def test_to_datetime(gpu):
xd = pd if not gpu else pytest.importorskip("cudf")

Expand Down
4 changes: 3 additions & 1 deletion dask/dataframe/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3035,7 +3035,9 @@ def test_groupby_apply_cudf(group_keys):
)

assert_eq(res_pd, res_dd)
assert_eq(res_dd, res_dc)
# Pandas and cudf return different `index.name`
# for empty MultiIndex (use `check_names=False`)
assert_eq(res_dd, res_dc, check_names=False)


@pytest.mark.parametrize("sort", [True, False])
Expand Down
13 changes: 12 additions & 1 deletion dask/dataframe/tests/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,18 @@ def test_merge_how_raises():


@pytest.mark.parametrize("parts", [(3, 3), (3, 1), (1, 3)])
@pytest.mark.parametrize("how", ["leftsemi", "leftanti"])
@pytest.mark.parametrize(
"how",
[
"leftsemi",
pytest.param(
"leftanti",
marks=pytest.mark.xfail(
DASK_EXPR_ENABLED, reason="leftanti is not supported yet"
),
),
],
)
@pytest.mark.parametrize(
"engine",
[
Expand Down