Skip to content

Commit

Permalink
Print table schema in DB initialization test (#5248)
Browse files Browse the repository at this point in the history
* Print out table schemas in DB initialization check

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* rename

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* fix arg name

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* rename to run_logging_operations

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* remove COPY log.py

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* move assert

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* print schema after logging

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* fix

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* add sqlite

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* update step

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* move parse_args

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* separate step

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* pull

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* remove comments

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
  • Loading branch information
harupy committed Jan 11, 2022
1 parent d3ddd59 commit 499284d
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 46 deletions.
21 changes: 11 additions & 10 deletions .github/workflows/master.yml
Expand Up @@ -159,22 +159,23 @@ jobs:
SPARK_LOCAL_IP: 127.0.0.1
run: |
./dev/run-large-python-tests.sh
- name: Run database initialization tests
# Separate build and run to make it easier to explore logs
- name: Run database initialization tests - build
run: |
# Build wheel and copy it under tests/db
python setup.py bdist_wheel
cp -r dist tests/db
# Run tests
cd tests/db
docker-compose pull
docker-compose build
docker-compose run mlflow-postgres python log.py
docker-compose run mlflow-mysql python log.py
- name: Run database initialization tests - run
working-directory: tests/db
run: |
docker-compose run mlflow-sqlite python run_checks.py --schema-output schemas/sqlite.sql
docker-compose run mlflow-postgres python run_checks.py --schema-output schemas/postgres.sql
docker-compose run mlflow-mysql python run_checks.py --schema-output schemas/mysql.sql
docker-compose run mlflow-mssql ./init-mssql-db.sh
docker-compose run mlflow-mssql python log.py
# Clean up
docker-compose down --rmi all --volumes
docker-compose run mlflow-mssql python run_checks.py --schema-output schemas/mssql.sql
docker-compose down --rmi all --volumes --remove-orphans
- name: Run anaconda compatibility tests
run: |
./dev/test-anaconda-compatibility.sh "anaconda3:2020.11"
Expand Down
1 change: 0 additions & 1 deletion tests/db/.dockerignore
@@ -1,6 +1,5 @@
**

!dist/*.whl
!log.py
!init-mssql-db.sh
!init-mssql-db.sql
2 changes: 2 additions & 0 deletions tests/db/.gitignore
@@ -0,0 +1,2 @@
schemas
mlflowdb
2 changes: 0 additions & 2 deletions tests/db/Dockerfile
Expand Up @@ -7,5 +7,3 @@ COPY dist ./dist
RUN pip install dist/*.whl
RUN pip install psycopg2 pymysql mysqlclient
RUN pip list

COPY log.py .
6 changes: 0 additions & 6 deletions tests/db/Dockerfile.mssql
Expand Up @@ -19,9 +19,3 @@ RUN apt-get update && ACCEPT_EULA=Y apt-get install -y mssql-tools unixodbc-dev
RUN pip install dist/*.whl
RUN pip install pyodbc
RUN pip list

COPY log.py .
COPY init-mssql-db.sh .
COPY init-mssql-db.sql .

RUN chmod +x init-mssql-db.sh
16 changes: 16 additions & 0 deletions tests/db/docker-compose.yml
Expand Up @@ -13,6 +13,8 @@ services:
- postgres
build:
context: .
volumes:
- .:/tmp/mlflow
environment:
MLFLOW_TRACKING_URI: postgresql://mlflowuser:mlflowpassword@postgres:5432/mlflowdb

Expand All @@ -30,6 +32,8 @@ services:
- mysql
build:
context: .
volumes:
- .:/tmp/mlflow
environment:
MLFLOW_TRACKING_URI: mysql://mlflowuser:mlflowpassword@mysql:3306/mlflowdb

Expand All @@ -46,5 +50,17 @@ services:
build:
context: .
dockerfile: Dockerfile.mssql
volumes:
- .:/tmp/mlflow
environment:
MLFLOW_TRACKING_URI: mssql+pyodbc://mlflowuser:Mlfl*wpassword1@mssql/mlflowdb?driver=ODBC+Driver+17+for+SQL+Server

mlflow-sqlite:
depends_on:
- postgres
build:
context: .
volumes:
- .:/tmp/mlflow
environment:
MLFLOW_TRACKING_URI: sqlite:////tmp/mlflow/mlflowdb
Empty file modified tests/db/init-mssql-db.sh 100644 → 100755
Empty file.
Empty file modified tests/db/init-mssql-db.sql 100644 → 100755
Empty file.
27 changes: 0 additions & 27 deletions tests/db/log.py

This file was deleted.

67 changes: 67 additions & 0 deletions tests/db/run_checks.py
@@ -0,0 +1,67 @@
import os
import argparse

import sqlalchemy
from sqlalchemy.schema import MetaData, CreateTable

import mlflow
from mlflow.tracking._tracking_service.utils import _TRACKING_URI_ENV_VAR


class MockModel(mlflow.pyfunc.PythonModel):
def load_context(self, context):
pass

def predict(self, context, model_input):
pass


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--schema-output", required=True, help="Output path of DB schema")
return parser.parse_args()


def run_logging_operations():
with mlflow.start_run() as run:
print("Tracking URI:", mlflow.get_tracking_uri())
mlflow.log_param("p", "param")
mlflow.log_metric("m", 1.0)
mlflow.set_tag("t", "tag")
mlflow.pyfunc.log_model(
artifact_path="model",
python_model=MockModel(),
registered_model_name="mock",
)
print(mlflow.get_run(run.info.run_id))


def get_db_schema():
engine = sqlalchemy.create_engine(mlflow.get_tracking_uri())
created_tables_metadata = MetaData(bind=engine)
created_tables_metadata.reflect()
# Write out table schema as described in
# https://docs.sqlalchemy.org/en/13/faq/metadata_schema.html#how-can-i-get-the-create-table-drop-table-output-as-a-string
lines = []
for ti in created_tables_metadata.sorted_tables:
lines += list(map(str.rstrip, str(CreateTable(ti)).splitlines()))
return "\n".join(lines)


def main():
assert _TRACKING_URI_ENV_VAR in os.environ

args = parse_args()
run_logging_operations()
schema = get_db_schema()
title = "Schema"
print("=" * 10, title, "=" * 10)
print(schema)
print("=" * (20 + 2 + len(title)))
os.makedirs(os.path.dirname(args.schema_output), exist_ok=True)
with open(args.schema_output, "w") as f:
f.write(schema)


if __name__ == "__main__":
main()

0 comments on commit 499284d

Please sign in to comment.