Skip to content

Commit

Permalink
Resolve test failures from fugue=0.8.0 (#990)
Browse files Browse the repository at this point in the history
* Resolve fugue failures

* Bump fugue min version to 0.7.3
  • Loading branch information
charlesbluca committed Jan 9, 2023
1 parent 472eff0 commit fa36ac0
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 25 deletions.
2 changes: 1 addition & 1 deletion continuous_integration/environment-3.10-dev.yaml
Expand Up @@ -5,7 +5,7 @@ channels:
dependencies:
- dask>=2022.3.0
- fastapi>=0.69.0
- fugue>=0.7.0
- fugue>=0.7.3
- intake>=0.6.0
- jsonschema
- lightgbm
Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/environment-3.8-dev.yaml
Expand Up @@ -5,7 +5,7 @@ channels:
dependencies:
- dask=2022.3.0
- fastapi=0.69.0
- fugue=0.7.0
- fugue=0.7.3
- intake=0.6.0
- jsonschema
- lightgbm
Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/environment-3.9-dev.yaml
Expand Up @@ -5,7 +5,7 @@ channels:
dependencies:
- dask>=2022.3.0
- fastapi>=0.69.0
- fugue>=0.7.0
- fugue>=0.7.3
- intake>=0.6.0
- jsonschema
- lightgbm
Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/gpuci/environment.yaml
Expand Up @@ -8,7 +8,7 @@ channels:
dependencies:
- dask>=2022.3.0
- fastapi>=0.69.0
- fugue>=0.7.0
- fugue>=0.7.3
- intake>=0.6.0
- jsonschema
- lightgbm
Expand Down
2 changes: 1 addition & 1 deletion dask_sql/physical/rel/custom/metrics.py
Expand Up @@ -36,7 +36,7 @@ def accuracy_score(
Otherwise, return the fraction of correctly classified samples.
sample_weight : 1d array-like, optional
Sample weights.
.. versionadded:: 0.7.0
.. versionadded:: 0.7.3
Returns
-------
score : scalar dask Array
Expand Down
2 changes: 1 addition & 1 deletion docs/environment.yml
Expand Up @@ -9,7 +9,7 @@ dependencies:
- dask-sphinx-theme>=2.0.3
- dask>=2022.3.0
- pandas>=1.4.0
- fugue>=0.7.0
- fugue>=0.7.3
- jpype1>=1.0.2
- fastapi>=0.69.0
- uvicorn>=0.13.4
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements-docs.txt
Expand Up @@ -3,7 +3,7 @@ sphinx-tabs
dask-sphinx-theme>=3.0.0
dask>=2022.3.0
pandas>=1.4.0
fugue>=0.7.0
fugue>=0.7.3
fastapi>=0.69.0
uvicorn>=0.13.4
tzlocal>=2.1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -65,7 +65,7 @@
"black==22.3.0",
"isort==5.7.0",
],
"fugue": ["fugue>=0.7.0"],
"fugue": ["fugue>=0.7.3"],
},
entry_points={
"console_scripts": [
Expand Down
35 changes: 18 additions & 17 deletions tests/integration/test_fugue.py
Expand Up @@ -8,44 +8,45 @@

fugue_sql = pytest.importorskip("fugue_sql")

# needs to be imported after the check for fugue
if fugue_sql:
from dask_sql.integrations.fugue import DaskSQLExecutionEngine, fsql_dask
from dask_sql.integrations.fugue import fsql_dask # noqa: E402


@skip_if_external_scheduler
def test_simple_statement(client):
with fugue_sql.FugueSQLWorkflow(DaskSQLExecutionEngine) as dag:
df = dag.df([[0, "hello"], [1, "world"]], "a:int64,b:str")
dag("SELECT * FROM df WHERE a > 0 YIELD DATAFRAME AS result")
result = dag.run()
def test_fugue_workflow(client):
dag = fugue_sql.FugueSQLWorkflow()
df = dag.df([[0, "hello"], [1, "world"]], "a:int64,b:str")
dag("SELECT * FROM df WHERE a > 0 YIELD DATAFRAME AS result")

result = dag.run("dask")
return_df = result["result"].as_pandas()
assert_eq(return_df, pd.DataFrame({"a": [1], "b": ["world"]}))

# A more elegant way to do things
result = dag.run(client)
return_df = result["result"].as_pandas()
assert_eq(return_df, pd.DataFrame({"a": [1], "b": ["world"]}))


@skip_if_external_scheduler
def test_fugue_fsql(client):
pdf = pd.DataFrame([[0, "hello"], [1, "world"]], columns=["a", "b"])
result = fugue_sql.fsql(
dag = fugue_sql.fsql(
"SELECT * FROM df WHERE a > 0 YIELD DATAFRAME AS result",
df=pdf,
).run("dask")
)

result = dag.run("dask")
return_df = result["result"].as_pandas()
assert_eq(return_df, pd.DataFrame({"a": [1], "b": ["world"]}))

result = fugue_sql.fsql(
"SELECT * FROM df WHERE a > 0 YIELD DATAFRAME AS result",
df=pdf,
).run(client)

result = dag.run(client)
return_df = result["result"].as_pandas()
assert_eq(return_df, pd.DataFrame({"a": [1], "b": ["world"]}))


# TODO: Revisit fixing this on an independant cluster (without dask-sql) based on the
# discussion in https://github.com/dask-contrib/dask-sql/issues/407
@skip_if_external_scheduler
def test_fsql(client):
def test_dask_fsql(client):
def assert_fsql(df: pd.DataFrame) -> None:
assert_eq(df, pd.DataFrame({"a": [1]}))

Expand Down

0 comments on commit fa36ac0

Please sign in to comment.