New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stop using Java8 because we no longer support spark < 3.0 #5234
Changes from 7 commits
6f1b8f9
a9bce56
6ecdfeb
5ce1e8b
107de7f
52ad3d2
e90cdf5
927e97e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,6 @@ | |
import posixpath | ||
import re | ||
import shutil | ||
import traceback | ||
import uuid | ||
import yaml | ||
|
||
|
@@ -71,10 +70,6 @@ | |
_logger = logging.getLogger(__name__) | ||
|
||
|
||
def _format_exception(ex): | ||
return "".join(traceback.format_exception(type(ex), ex, ex.__traceback__)) | ||
Comment on lines
-74
to
-75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed this unused function to run cross version tests for spark. |
||
|
||
|
||
def get_default_pip_requirements(): | ||
""" | ||
:return: A list of default pip requirements for MLflow Models produced by this flavor. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -189,3 +189,20 @@ def test_mleap_module_model_save_with_invalid_sample_input_type_raises_exception | |
mlflow.spark.save_model( | ||
spark_model=spark_model_iris.model, path=model_path, sample_input=invalid_input | ||
) | ||
|
||
|
||
@pytest.mark.large | ||
def test_spark_module_model_save_with_mleap_and_unsupported_transformer_raises_exception( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved this test here from |
||
spark_model_iris, model_path | ||
): | ||
class CustomTransformer(JavaModel): | ||
def _transform(self, dataset): | ||
return dataset | ||
|
||
unsupported_pipeline = Pipeline(stages=[CustomTransformer()]) | ||
unsupported_model = unsupported_pipeline.fit(spark_model_iris.spark_df) | ||
|
||
with pytest.raises(ValueError, match="CustomTransformer"): | ||
mlflow.spark.save_model( | ||
spark_model=unsupported_model, path=model_path, sample_input=spark_model_iris.spark_df | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,12 +9,12 @@ | |
from pyspark.ml.classification import LogisticRegression | ||
from pyspark.ml.feature import VectorAssembler | ||
from pyspark.ml.pipeline import Pipeline | ||
from pyspark.ml.wrapper import JavaModel | ||
import pytest | ||
from sklearn import datasets | ||
import shutil | ||
from collections import namedtuple | ||
import yaml | ||
from packaging.version import Version | ||
|
||
import mlflow | ||
import mlflow.pyfunc.scoring_server as pyfunc_scoring_server | ||
|
@@ -62,6 +62,22 @@ def spark_custom_env(tmpdir): | |
# other tests. | ||
@pytest.fixture(scope="session", autouse=True) | ||
def spark_context(): | ||
if Version(pyspark.__version__) < Version("3.1"): | ||
# A workaround for this issue: | ||
# https://stackoverflow.com/questions/62109276/errorjava-lang-unsupportedoperationexception-for-pyspark-pandas-udf-documenta | ||
spark_home = ( | ||
os.environ.get("SPARK_HOME") | ||
if "SPARK_HOME" in os.environ | ||
else os.path.dirname(pyspark.__file__) | ||
) | ||
conf_dir = os.path.join(spark_home, "conf") | ||
os.makedirs(conf_dir, exist_ok=True) | ||
with open(os.path.join(conf_dir, "spark-defaults.conf"), "w") as f: | ||
conf = """ | ||
spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" | ||
spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" | ||
""" | ||
f.write(conf) | ||
Comment on lines
+65
to
+80
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A workaround for an issue with spark < 3.1, java11, and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 writing to spark-defaults config file. Good solution for this. |
||
conf = pyspark.SparkConf() | ||
max_tries = 3 | ||
for num_tries in range(max_tries): | ||
|
@@ -635,23 +651,6 @@ def test_pyspark_version_is_logged_without_dev_suffix(spark_model_iris): | |
assert any(x == f"pyspark=={unaffected_version}" for x in pip_deps) | ||
|
||
|
||
@pytest.mark.large | ||
def test_spark_module_model_save_with_mleap_and_unsupported_transformer_raises_exception( | ||
spark_model_iris, model_path | ||
): | ||
class CustomTransformer(JavaModel): | ||
def _transform(self, dataset): | ||
return dataset | ||
|
||
unsupported_pipeline = Pipeline(stages=[CustomTransformer()]) | ||
unsupported_model = unsupported_pipeline.fit(spark_model_iris.spark_df) | ||
|
||
with pytest.raises(ValueError, match="CustomTransformer"): | ||
sparkm.save_model( | ||
spark_model=unsupported_model, path=model_path, sample_input=spark_model_iris.spark_df | ||
) | ||
|
||
|
||
def test_shutil_copytree_without_file_permissions(tmpdir): | ||
src_dir = tmpdir.mkdir("src-dir") | ||
dst_dir = tmpdir.mkdir("dst-dir") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change for running cross version tests for mleap.