Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
Signed-off-by: harupy <hkawamura0130@gmail.com>
  • Loading branch information
harupy committed Jan 7, 2022
2 parents 341898e + d5b0b55 commit a357c88
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 40 deletions.
4 changes: 3 additions & 1 deletion .github/actions/setup-python/action.yml
Expand Up @@ -24,8 +24,10 @@ runs:
python_version="3.6.13"
elif [[ "$python_version" == "3.7" ]]; then
python_version="3.7.12"
elif [[ "$python_version" == "3.8" ]]; then
python_version="3.8.12"
else
echo "Invalid python version: '$python_version'. Must be one of ['3.6', '3.7']"
echo "Invalid python version: '$python_version'. Must be one of ['3.6', '3.7', '3.8']"
exit 1
fi
echo "::set-output name=version::$python_version"
Expand Down
19 changes: 15 additions & 4 deletions .github/workflows/cross-version-tests.yml
Expand Up @@ -103,16 +103,27 @@ jobs:
with:
repository: ${{ github.event.inputs.repository }}
ref: ${{ github.event.inputs.ref }}
- name: Get Java version
id: get-java-version
run: |
if [ "${{ matrix.package }}" = "mleap" ]
then
java_version=8
else
java_version=11
fi
echo "::set-output name=version::$java_version"
- uses: actions/setup-java@v2
with:
# GitHub Actions' Ubuntu 20.04 image uses Java 11 (which is incompatible with Spark 2.4.x) by default:
# https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-README.md#java
java-version: 8
java-version: ${{ steps.get-java-version.outputs.version }}
distribution: "adopt"
- name: Get python version
id: get-python-version
run: |
if [[ "${{ matrix.version }}" = "dev" ]] || \
if [[ "${{ matrix.package }}" = "statsmodels" && "${{ matrix.version }}" = "dev" ]]
then
python_version=3.8
elif [[ "${{ matrix.version }}" = "dev" ]] || \
[[ "${{ matrix.package }}" = "scikit-learn" && ! "${{ matrix.version }}" =~ ^0\.* ]] || \
[[ "${{ matrix.package }}" = "statsmodels" ]] || \
[[ "${{ matrix.package }}" = "tensorflow" && ! "${{ matrix.version }}" =~ ^2\.[0-6] ]] || \
Expand Down
16 changes: 9 additions & 7 deletions .github/workflows/master.yml
Expand Up @@ -54,6 +54,10 @@ jobs:
working-directory: mlflow/R/mlflow
steps:
- uses: actions/checkout@v2
- uses: actions/setup-java@v2
with:
java-version: 11
distribution: 'adopt'
- uses: r-lib/actions/setup-r@v1
# This step dumps the current set of R dependencies and R version into files to be used
# as a cache key when caching/restoring R dependencies.
Expand Down Expand Up @@ -155,9 +159,7 @@ jobs:
python-version: 3.6
- uses: actions/setup-java@v2
with:
# GitHub Actions' Ubuntu 20.04 image uses Java 11 (which is incompatible with Spark 2.4.x) by default:
# https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-README.md#java
java-version: 8
java-version: 11
distribution: 'adopt'
- uses: ./.github/actions/cache-pip
- name: Install dependencies
Expand Down Expand Up @@ -203,7 +205,7 @@ jobs:
- name: Set up Java
uses: actions/setup-java@v2
with:
java-version: 8
java-version: 11
distribution: 'adopt'
- name: Install dependencies
run: |
Expand Down Expand Up @@ -272,7 +274,7 @@ jobs:
python-version: 3.6
- uses: actions/setup-java@v2
with:
java-version: 8
java-version: 11
distribution: 'adopt'
- uses: ./.github/actions/cache-pip
- name: Install dependencies
Expand Down Expand Up @@ -300,7 +302,7 @@ jobs:
python-version: 3.6
- uses: actions/setup-java@v2
with:
java-version: 8
java-version: 11
distribution: 'adopt'
- name: Install dependencies
env:
Expand Down Expand Up @@ -345,7 +347,7 @@ jobs:
python-version: 3.6
- uses: actions/setup-java@v2
with:
java-version: 8
java-version: 11
distribution: 'adopt'
- name: Install dependencies
env:
Expand Down
13 changes: 9 additions & 4 deletions dev/install-common-deps.sh
Expand Up @@ -27,18 +27,19 @@ else
fi
export MLFLOW_HOME=$(pwd)

req_files=""
# Install Python test dependencies only if we're running Python tests
if [[ "$INSTALL_SMALL_PYTHON_DEPS" == "true" ]]; then
# When downloading large packages from PyPI, the connection is sometimes aborted by the
# remote host. See https://github.com/pypa/pip/issues/8510.
# As a workaround, we retry installation of large packages.
retry-with-backoff pip install -r requirements/small-requirements.txt
req_files+=" -r requirements/small-requirements.txt"
fi
if [[ "$INSTALL_SKINNY_PYTHON_DEPS" == "true" ]]; then
retry-with-backoff pip install -r requirements/skinny-requirements.txt
req_files+=" -r requirements/skinny-requirements.txt"
fi
if [[ "$INSTALL_LARGE_PYTHON_DEPS" == "true" ]]; then
retry-with-backoff pip install -r requirements/large-requirements.txt
req_files+=" -r requirements/large-requirements.txt"

# Install prophet's dependencies beforehand, otherwise pip would fail to build a wheel for prophet
if [[ -z "$(pip cache list prophet --format abspath)" ]]; then
Expand All @@ -49,7 +50,11 @@ if [[ "$INSTALL_LARGE_PYTHON_DEPS" == "true" ]]; then
rm -rf $tmp_dir
fi

retry-with-backoff pip install -r requirements/extra-ml-requirements.txt
req_files+=" -r requirements/extra-ml-requirements.txt"
fi

if [[ ! -z $req_files ]]; then
retry-with-backoff pip install $req_files
fi

# Install `mlflow-test-plugin` without dependencies
Expand Down
3 changes: 3 additions & 0 deletions mlflow/mleap.py
Expand Up @@ -211,6 +211,9 @@ def add_to_model(mlflow_model, path, spark_model, sample_input):
from pyspark.ml.pipeline import PipelineModel
from pyspark.sql import DataFrame
import mleap.version

# This import statement adds `serializeToBundle` and `deserializeFromBundle` to `Transformer`:
# https://github.com/combust/mleap/blob/37f6f61634798118e2c2eb820ceeccf9d234b810/python/mleap/pyspark/spark_support.py#L32-L33
from mleap.pyspark.spark_support import SimpleSparkSerializer # pylint: disable=unused-import
from py4j.protocol import Py4JError

Expand Down
5 changes: 0 additions & 5 deletions mlflow/spark.py
Expand Up @@ -23,7 +23,6 @@
import posixpath
import re
import shutil
import traceback
import uuid
import yaml

Expand Down Expand Up @@ -71,10 +70,6 @@
_logger = logging.getLogger(__name__)


def _format_exception(ex):
return "".join(traceback.format_exception(type(ex), ex, ex.__traceback__))


def get_default_pip_requirements():
"""
:return: A list of default pip requirements for MLflow Models produced by this flavor.
Expand Down
2 changes: 1 addition & 1 deletion mlflow/statsmodels.py
Expand Up @@ -12,9 +12,9 @@
https://www.statsmodels.org/stable/_modules/statsmodels/base/model.html#Results
"""
import logging
import os
import yaml
import logging

import mlflow
from mlflow import pyfunc
Expand Down
17 changes: 17 additions & 0 deletions tests/mleap/test_mleap_model_export.py
Expand Up @@ -189,3 +189,20 @@ def test_mleap_module_model_save_with_invalid_sample_input_type_raises_exception
mlflow.spark.save_model(
spark_model=spark_model_iris.model, path=model_path, sample_input=invalid_input
)


@pytest.mark.large
def test_spark_module_model_save_with_mleap_and_unsupported_transformer_raises_exception(
spark_model_iris, model_path
):
class CustomTransformer(JavaModel):
def _transform(self, dataset):
return dataset

unsupported_pipeline = Pipeline(stages=[CustomTransformer()])
unsupported_model = unsupported_pipeline.fit(spark_model_iris.spark_df)

with pytest.raises(ValueError, match="CustomTransformer"):
mlflow.spark.save_model(
spark_model=unsupported_model, path=model_path, sample_input=spark_model_iris.spark_df
)
35 changes: 17 additions & 18 deletions tests/spark/test_spark_model_export.py
Expand Up @@ -9,12 +9,12 @@
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.pipeline import Pipeline
from pyspark.ml.wrapper import JavaModel
import pytest
from sklearn import datasets
import shutil
from collections import namedtuple
import yaml
from packaging.version import Version

import mlflow
import mlflow.pyfunc.scoring_server as pyfunc_scoring_server
Expand Down Expand Up @@ -62,6 +62,22 @@ def spark_custom_env(tmpdir):
# other tests.
@pytest.fixture(scope="session", autouse=True)
def spark_context():
if Version(pyspark.__version__) < Version("3.1"):
# A workaround for this issue:
# https://stackoverflow.com/questions/62109276/errorjava-lang-unsupportedoperationexception-for-pyspark-pandas-udf-documenta
spark_home = (
os.environ.get("SPARK_HOME")
if "SPARK_HOME" in os.environ
else os.path.dirname(pyspark.__file__)
)
conf_dir = os.path.join(spark_home, "conf")
os.makedirs(conf_dir, exist_ok=True)
with open(os.path.join(conf_dir, "spark-defaults.conf"), "w") as f:
conf = """
spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"
spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"
"""
f.write(conf)
conf = pyspark.SparkConf()
max_tries = 3
for num_tries in range(max_tries):
Expand Down Expand Up @@ -635,23 +651,6 @@ def test_pyspark_version_is_logged_without_dev_suffix(spark_model_iris):
assert any(x == f"pyspark=={unaffected_version}" for x in pip_deps)


@pytest.mark.large
def test_spark_module_model_save_with_mleap_and_unsupported_transformer_raises_exception(
spark_model_iris, model_path
):
class CustomTransformer(JavaModel):
def _transform(self, dataset):
return dataset

unsupported_pipeline = Pipeline(stages=[CustomTransformer()])
unsupported_model = unsupported_pipeline.fit(spark_model_iris.spark_df)

with pytest.raises(ValueError, match="CustomTransformer"):
sparkm.save_model(
spark_model=unsupported_model, path=model_path, sample_input=spark_model_iris.spark_df
)


def test_shutil_copytree_without_file_permissions(tmpdir):
src_dir = tmpdir.mkdir("src-dir")
dst_dir = tmpdir.mkdir("dst-dir")
Expand Down

0 comments on commit a357c88

Please sign in to comment.