From d44567feb2d17f328d6d0a580948882c3d9b9d5c Mon Sep 17 00:00:00 2001 From: Harutaka Kawamura Date: Thu, 13 Jan 2022 10:17:59 +0900 Subject: [PATCH] Unpin alembic (#5249) * unpin Signed-off-by: harupy Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * pin alembic Signed-off-by: harupy Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * unpin Signed-off-by: harupy Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * update migration file Signed-off-by: harupy Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * handle error Signed-off-by: harupy Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * test Signed-off-by: harupy Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * remove try catch Signed-off-by: harupy Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * pin alembic Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * do not pin mysql version Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * upgrade mysql Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * list images Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * docker image ls Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * add check for killed status Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * grep Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * fix Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * specify commands Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * try old alembic Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * nit Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * unpin Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- .github/workflows/master.yml | 17 +++++------ ...491aaa_drop_duplicate_killed_constraint.py | 5 +++- ...pdate_run_status_constraint_with_killed.py | 30 +++++++++++++++---- setup.py | 2 +- tests/db/Dockerfile | 5 ++-- tests/db/Dockerfile.mssql | 7 ++--- tests/db/build_wheel.sh | 9 ++++++ tests/db/docker-compose.yml | 7 ++++- tests/db/run_checks.py | 6 ++++ 9 files changed, 64 insertions(+), 24 deletions(-) create mode 100755 tests/db/build_wheel.sh diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index a3eaa964f281f..070ada3d0d895 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -161,21 +161,20 @@ jobs: ./dev/run-large-python-tests.sh # Separate build and run to make it easier to explore logs - name: Run database initialization tests - build + working-directory: tests/db run: | - python setup.py bdist_wheel - cp -r dist tests/db - cd tests/db + ./build_wheel.sh docker-compose pull + docker image ls | grep -E '(REPOSITORY|postgres|mysql|mssql)' docker-compose build - name: Run database initialization tests - run working-directory: tests/db run: | - docker-compose run mlflow-sqlite python run_checks.py --schema-output schemas/sqlite.sql - docker-compose run mlflow-postgres python run_checks.py --schema-output schemas/postgres.sql - docker-compose run mlflow-mysql python run_checks.py --schema-output schemas/mysql.sql - docker-compose run mlflow-mssql ./init-mssql-db.sh - docker-compose run mlflow-mssql python run_checks.py --schema-output schemas/mssql.sql - docker-compose down --rmi all --volumes --remove-orphans + docker-compose run mlflow-sqlite + docker-compose run mlflow-postgres + docker-compose run mlflow-mysql + docker-compose run mlflow-mssql + docker-compose down --volumes --remove-orphans --rmi all - name: Run anaconda compatibility tests run: | ./dev/test-anaconda-compatibility.sh "anaconda3:2020.11" diff --git a/mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py b/mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py index 53f037714dab0..6c1c631e7b069 100644 --- a/mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py +++ b/mlflow/store/db_migrations/versions/0a8213491aaa_drop_duplicate_killed_constraint.py @@ -31,7 +31,10 @@ def upgrade(): # operation is expected to fail under certain circumstances, we execute `drop_constraint()` # outside of the batch operation context. try: - op.drop_constraint(constraint_name="status", table_name="runs", type_="check") + # For other database backends, the status check constraint is dropped by + # cfd24bdc0731_update_run_status_constraint_with_killed.py + if op.get_bind().engine.name == "mysql": + op.drop_constraint(constraint_name="status", table_name="runs", type_="check") except Exception as e: _logger.warning( "Failed to drop check constraint. Dropping check constraints may not be supported" diff --git a/mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py b/mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py index d2244185228df..beb02a2d3549e 100644 --- a/mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py +++ b/mlflow/store/db_migrations/versions/cfd24bdc0731_update_run_status_constraint_with_killed.py @@ -5,11 +5,14 @@ Create Date: 2019-10-11 15:55:10.853449 """ +import alembic from alembic import op +from packaging.version import Version +from sqlalchemy import CheckConstraint, Enum + from mlflow.entities import RunStatus, ViewType from mlflow.entities.lifecycle_stage import LifecycleStage from mlflow.store.tracking.dbmodels.models import SqlRun, SourceTypes -from sqlalchemy import CheckConstraint, Enum # revision identifiers, used by Alembic. revision = "cfd24bdc0731" @@ -17,14 +20,15 @@ branch_labels = None depends_on = None -new_run_statuses = [ +old_run_statuses = [ RunStatus.to_string(RunStatus.SCHEDULED), RunStatus.to_string(RunStatus.FAILED), RunStatus.to_string(RunStatus.FINISHED), RunStatus.to_string(RunStatus.RUNNING), - RunStatus.to_string(RunStatus.KILLED), ] +new_run_statuses = [*old_run_statuses, RunStatus.to_string(RunStatus.KILLED)] + # Certain SQL backends (e.g., SQLite) do not preserve CHECK constraints during migrations. # For these backends, CHECK constraints must be specified as table arguments. Here, we define # the collection of CHECK constraints that should be preserved when performing the migration. @@ -40,13 +44,29 @@ def upgrade(): - with op.batch_alter_table("runs", table_args=check_constraint_table_args) as batch_op: + # In alembic >= 1.7.0, `table_args` is unnecessary since CHECK constraints are preserved + # during migrations. + table_args = ( + [] if Version(alembic.__version__) >= Version("1.7.0") else check_constraint_table_args + ) + with op.batch_alter_table("runs", table_args=table_args) as batch_op: # Transform the "status" column to an `Enum` and define a new check constraint. Specify # `native_enum=False` to create a check constraint rather than a # database-backend-dependent enum (see https://docs.sqlalchemy.org/en/13/core/ # type_basics.html#sqlalchemy.types.Enum.params.native_enum) batch_op.alter_column( - "status", type_=Enum(*new_run_statuses, create_constraint=True, native_enum=False) + "status", + type_=Enum( + *new_run_statuses, + create_constraint=True, + native_enum=False, + ), + existing_type=Enum( + *old_run_statuses, + create_constraint=True, + native_enum=False, + name="status", + ), ) diff --git a/setup.py b/setup.py index fa89da45814fe..371766fada576 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ def package_files(directory): other capabilities. """ CORE_REQUIREMENTS = SKINNY_REQUIREMENTS + [ - "alembic<=1.4.1", + "alembic", # Required "docker>=4.0.0", "Flask", diff --git a/tests/db/Dockerfile b/tests/db/Dockerfile index 5a3210a81ad88..d0601c68bdba2 100644 --- a/tests/db/Dockerfile +++ b/tests/db/Dockerfile @@ -2,8 +2,7 @@ FROM python:3.6 WORKDIR /tmp/mlflow -COPY dist ./dist - -RUN pip install dist/*.whl RUN pip install psycopg2 pymysql mysqlclient +COPY dist ./dist +RUN pip install dist/mlflow-*.whl RUN pip list diff --git a/tests/db/Dockerfile.mssql b/tests/db/Dockerfile.mssql index 3cb56c1e9b968..6f3516edff0e9 100644 --- a/tests/db/Dockerfile.mssql +++ b/tests/db/Dockerfile.mssql @@ -2,11 +2,9 @@ FROM python:3.6 WORKDIR /tmp/mlflow -COPY dist ./dist - # apt-get and system utilities RUN apt-get update && apt-get install -y \ - curl apt-transport-https debconf-utils \ + curl apt-transport-https debconf-utils \ && rm -rf /var/lib/apt/lists/* # adding custom MS repository @@ -16,6 +14,7 @@ RUN curl https://packages.microsoft.com/config/ubuntu/20.04/prod.list > /etc/apt # install SQL Server drivers and tools RUN apt-get update && ACCEPT_EULA=Y apt-get install -y mssql-tools unixodbc-dev -RUN pip install dist/*.whl RUN pip install pyodbc +COPY dist ./dist +RUN pip install dist/mlflow-*.whl RUN pip list diff --git a/tests/db/build_wheel.sh b/tests/db/build_wheel.sh new file mode 100755 index 0000000000000..b97b4468c0137 --- /dev/null +++ b/tests/db/build_wheel.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -ex + +rm -rf dist +prefix=$(git rev-parse --show-prefix) +pushd $(git rev-parse --show-cdup) +python setup.py bdist_wheel --dist-dir $prefix/dist +popd diff --git a/tests/db/docker-compose.yml b/tests/db/docker-compose.yml index 66b9b4a6605c3..60f037bb3ff60 100644 --- a/tests/db/docker-compose.yml +++ b/tests/db/docker-compose.yml @@ -17,15 +17,17 @@ services: - .:/tmp/mlflow environment: MLFLOW_TRACKING_URI: postgresql://mlflowuser:mlflowpassword@postgres:5432/mlflowdb + command: python run_checks.py --schema-output schemas/postgres.sql mysql: - image: mysql:5.7 + image: mysql restart: always environment: MYSQL_ROOT_PASSWORD: root-password MYSQL_DATABASE: mlflowdb MYSQL_USER: mlflowuser MYSQL_PASSWORD: mlflowpassword + command: mysqld --default-authentication-plugin=mysql_native_password mlflow-mysql: depends_on: @@ -36,6 +38,7 @@ services: - .:/tmp/mlflow environment: MLFLOW_TRACKING_URI: mysql://mlflowuser:mlflowpassword@mysql:3306/mlflowdb + command: python run_checks.py --schema-output schemas/mysql.sql mssql: image: mcr.microsoft.com/mssql/server @@ -54,6 +57,7 @@ services: - .:/tmp/mlflow environment: MLFLOW_TRACKING_URI: mssql+pyodbc://mlflowuser:Mlfl*wpassword1@mssql/mlflowdb?driver=ODBC+Driver+17+for+SQL+Server + command: bash -ex -c "./init-mssql-db.sh && python run_checks.py --schema-output schemas/mssql.sql" mlflow-sqlite: depends_on: @@ -64,3 +68,4 @@ services: - .:/tmp/mlflow environment: MLFLOW_TRACKING_URI: sqlite:////tmp/mlflow/mlflowdb + command: python run_checks.py --schema-output schemas/sqlite.sql diff --git a/tests/db/run_checks.py b/tests/db/run_checks.py index f79ce1e1a4aad..a358f5a0e214e 100644 --- a/tests/db/run_checks.py +++ b/tests/db/run_checks.py @@ -35,6 +35,12 @@ def run_logging_operations(): ) print(mlflow.get_run(run.info.run_id)) + # Ensure the following migration scripts work correctly: + # - cfd24bdc0731_update_run_status_constraint_with_killed.py + # - 0a8213491aaa_drop_duplicate_killed_constraint.py + client = mlflow.tracking.MlflowClient() + client.set_terminated(run_id=run.info.run_id, status="KILLED") + def get_db_schema(): engine = sqlalchemy.create_engine(mlflow.get_tracking_uri())