Skip to content

Commit

Permalink
[updater] update models 110 to 130 (#1218)
Browse files Browse the repository at this point in the history
* WIP

* WIP

* WIP

* update

* update

* move sandbox.py to common utils;
add invoker_cmd_sandbox_version to ymir controller

* update

* add sandbox state: SANDBOX_STATE_UNKNOWN

* WIP: test cases for invoker_cmd_sandbox_version.py

* add RequestType.SANDBOX_VERSION

* fix invoker_cmd_sandbox_version test case

* controller: fix flake8 and mypy errors

* update

* update

* update

* update

* remove unused imports

* update

* fix ymir 1.1.0 mir_command files

* update

* update

* update

* WIP

* get mir_command_pb2 from git

* update

* update

* add env version check

* unchange main.py for ymir_app

* update

* update

* app, ctl and cmd use YMIR_VERSION from mir/version.py

* using YMIR_VERSION from mir.version

* update

* remove SandboxInfo

* update

* update

* UpdateErrorCode -> UpdaterErrorCode

* WIP

* update test case for controller sandbox version invoker

* update

* update

* update

* update

* update

* update

* update

* update

* update

* revert deps.py

* update

* update

* update

* labels.py: add validator for ymir_version

* remove update_errors

* remove some unused error codes

* update doc

* update

* labels.py: change validator
update start.py: remove UpdateError

* remove EXPECTED_YMIR_VERSION

* update

* update

* update

* split sandbox.py into sandbox_util.py and sandbox_updater.py

* update

* update

* update

* SANDBOX_VERSION -> CMD_VERSION_GET

* update

* update

* update

* update ignore files; add some logging to sandbox_util

* update

* WIP

* fix dependency of step updater 110 to 130

* update mirpb for 130

* add mso110 as reader

* WIP: step updater

* WIP

* move some functions from mir_storage to mir_storage_ops

* update

* update

* WIP

* 110 -> 130 update dev done

* add user labels updater

* remove git config file mode false from dockerfile backend (moved into git.config)

* add step updater doc;
add class_names when training finished

* add class_names when training finished

* update

* revert changes

* revert changes to mir storage ops in ymir-cmd

* remove unused code

* update

* add update sub command to ymir.sh

* update

* update

* update

* rename variables

* update

* add update_proto_py in step updater 110->130

* ignore all pyi files in ymir updater

* remove pyi files

* update ignore

* WIP: updating models

* add a smaller pydantic module;
WIP: model update

* WIP

* stop service before update

* update model structure

* update

* update

* update

* update

* add ymir version to model package

* update

* update

* update

* update

* update

* update

* in ymir-info.yaml: ymir_version -> package_version

* set backup dir to sandbox/ymir-models-bk and sandbox/sandbox-bk

* update

* update

* update

* update

* in ModelStorage: package_version is a required field

* check model structure before update

* update

* update

Co-authored-by: ymir <ymir@intellif.com>
  • Loading branch information
fenrir-z and ymir committed Oct 10, 2022
1 parent 4e5bda7 commit a831843
Show file tree
Hide file tree
Showing 14 changed files with 182 additions and 42 deletions.
76 changes: 48 additions & 28 deletions ymir/backend/src/common/common_utils/sandbox_updater.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import logging
import os
import shutil
from typing import Callable, List
from typing import Callable, List, Optional, Tuple

import yaml

from common_utils.sandbox_util import detect_users_and_repos, SandboxError
from common_utils.sandbox_util import detect_users_and_repos
from common_utils.version import ymir_salient_version
from id_definition.error_codes import UpdaterErrorCode

from update_1_1_0_to_1_3_0.step_updater import update_all as update_110_130
from update_1_1_0_to_1_3_0.step_updater import update_models as update_models_110_130
from update_1_1_0_to_1_3_0.step_updater import update_repo as update_repo_110_130


_RepoUpdaterType = Callable[[str, str, str], None]
_ModelsUpdaterType = Callable[[str], None]
_StepUpdaterType = Tuple[Optional[_RepoUpdaterType], Optional[_ModelsUpdaterType]]


def update(sandbox_root: str, assets_root: str, models_root: str, src_ver: str, dst_ver: str) -> None:
Expand All @@ -21,56 +23,74 @@ def update(sandbox_root: str, assets_root: str, models_root: str, src_ver: str,
logging.info(f"nothing to update {src_ver} -> {dst_ver}")
return

_backup(sandbox_root)
_backup(sandbox_root=sandbox_root, models_root=models_root)

# update
user_to_repos = detect_users_and_repos(sandbox_root)
try:
for repo_func in steps:
for user_id, repo_ids in user_to_repos.items():
for repo_id in repo_ids:
repo_func(os.path.join(sandbox_root, user_id, repo_id), assets_root, models_root)
for repo_func, models_func in steps:
# update user repos
if repo_func:
for user_id, repo_ids in user_to_repos.items():
for repo_id in repo_ids:
repo_func(os.path.join(sandbox_root, user_id, repo_id), assets_root, models_root)
# update models
if models_func:
models_func(models_root)

for user_id in user_to_repos:
_update_user_labels(label_path=os.path.join(sandbox_root, user_id, 'labels.yaml'), dst_ver=dst_ver)
except Exception as e:
_roll_back(sandbox_root)
_roll_back(sandbox_root=sandbox_root, models_root=models_root)
raise e

# cleanup
shutil.rmtree(os.path.join(sandbox_root, 'backup'))
shutil.rmtree(os.path.join(sandbox_root, 'sandbox-bk'))
shutil.rmtree(os.path.join(sandbox_root, 'ymir-models-bk'))


def _backup(sandbox_root: str) -> None:
backup_dir = os.path.join(sandbox_root, 'backup')
os.makedirs(backup_dir, exist_ok=True)
if os.listdir(backup_dir):
raise SandboxError(error_code=UpdaterErrorCode.BACKUP_DIR_NOT_EMPTY,
error_message=f"Backup directory not empty: {backup_dir}")

def _backup(sandbox_root: str, models_root: str) -> None:
# user dirs in sandbox_root
sandbox_backup_dir = os.path.join(sandbox_root, 'sandbox-bk')
os.makedirs(sandbox_backup_dir, exist_ok=False)
for user_id in detect_users_and_repos(sandbox_root):
shutil.copytree(src=os.path.join(sandbox_root, user_id), dst=os.path.join(backup_dir, user_id), symlinks=True)
shutil.copytree(src=os.path.join(sandbox_root, user_id),
dst=os.path.join(sandbox_backup_dir, user_id),
symlinks=True)

models_backup_dir = os.path.join(sandbox_root, 'ymir-models-bk')
shutil.copytree(src=models_root, dst=models_backup_dir)


def _roll_back(sandbox_root: str) -> None:
backup_dir = os.path.join(sandbox_root, 'backup')
def _roll_back(sandbox_root: str, models_root: str) -> None:
sandbox_backup_dir = os.path.join(sandbox_root, 'sandbox-bk')
for user_id in detect_users_and_repos(sandbox_root):
src_user_dir = os.path.join(backup_dir, user_id)
src_user_dir = os.path.join(sandbox_backup_dir, user_id)
dst_user_dir = os.path.join(sandbox_root, user_id)
shutil.rmtree(dst_user_dir)
shutil.copytree(src=src_user_dir, dst=dst_user_dir, symlinks=True)

shutil.rmtree(os.path.join(sandbox_root, 'backup'))

shutil.move(src=src_user_dir, dst=dst_user_dir)

# models_root
models_backup_dir = os.path.join(sandbox_root, 'ymir-models-bk')
for model_hash in os.listdir(models_backup_dir):
src_model_path = os.path.join(models_backup_dir, model_hash)
if not os.path.isfile(src_model_path):
continue
dst_model_path = os.path.join(models_root, model_hash)
os.remove(dst_model_path)
shutil.move(src=src_model_path, dst=dst_model_path)

shutil.rmtree(sandbox_backup_dir)
shutil.rmtree(models_backup_dir)
logging.info('roll back done')


def _get_update_steps(src_ver: str, dst_ver: str) -> List[_RepoUpdaterType]:
def _get_update_steps(src_ver: str, dst_ver: str) -> List[_StepUpdaterType]:
eq_src_ver = ymir_salient_version(src_ver)
eq_dst_ver = ymir_salient_version(dst_ver)

_UPDATE_NODES: List[str] = ['1.1.0', '1.3.0']
_UPDATE_FUNCS: List[_RepoUpdaterType] = [update_110_130]
_UPDATE_FUNCS: List[_StepUpdaterType] = [(update_repo_110_130, update_models_110_130)]
return _UPDATE_FUNCS[_UPDATE_NODES.index(eq_src_ver):_UPDATE_NODES.index(eq_dst_ver)]


Expand Down
3 changes: 1 addition & 2 deletions ymir/backend/src/common/id_definition/error_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,5 +176,4 @@ class APIErrorCode(IntEnum):

class UpdaterErrorCode(IntEnum):
INVALID_USER_LABEL_FILE = 170001
BACKUP_DIR_NOT_EMPTY = 170002
SANDBOX_VERSION_NOT_SUPPORTED = 170003
SANDBOX_VERSION_NOT_SUPPORTED = 170002
6 changes: 6 additions & 0 deletions ymir/command/mir/commands/import_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import shutil
import tarfile
from mir.version import DEFAULT_YMIR_SRC_VERSION, YMIR_VERSION, ymir_model_salient_version

import yaml

Expand Down Expand Up @@ -54,6 +55,11 @@ def run_with_args(mir_root: str, dst_rev: str, src_revs: str, work_dir: str, pac

with open(os.path.join(extract_model_dir_path, 'ymir-info.yaml'), 'r') as f:
ymir_info_dict = yaml.safe_load(f.read())

package_version = ymir_info_dict.get('package_version', DEFAULT_YMIR_SRC_VERSION)
if ymir_model_salient_version(package_version) != ymir_model_salient_version(YMIR_VERSION):
raise MirRuntimeError(error_code=MirCode.RC_CMD_INVALID_MODEL_PACKAGE_VERSION,
error_message=f"Invalid model package version: {package_version}")
model_storage = models.ModelStorage.parse_obj(ymir_info_dict)

logging.info(f"importing model with storage: {model_storage}")
Expand Down
4 changes: 3 additions & 1 deletion ymir/command/mir/commands/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import time
from subprocess import CalledProcessError
from typing import Any, Dict, List, Optional, Tuple
from mir.version import ymir_model_salient_version, YMIR_VERSION

from tensorboardX import SummaryWriter
import yaml
Expand Down Expand Up @@ -35,7 +36,8 @@ def _find_and_save_model(out_root: str, model_upload_location: str, executor_con
type=mirpb.TaskType.TaskTypeTraining),
stages=model_stages,
best_stage_name=best_stage_name,
attachments=attachments)
attachments=attachments,
package_version=ymir_model_salient_version(YMIR_VERSION))
models.pack_and_copy_models(model_storage=model_storage,
model_dir_path=out_model_dir,
model_location=model_upload_location)
Expand Down
1 change: 1 addition & 0 deletions ymir/command/mir/tools/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class MirCode(IntEnum):
RC_CMD_OPENPAI_ERROR = 160012
RC_CMD_NO_ANNOTATIONS = 160013
RC_CMD_CAN_NOT_CALC_CONFUSION_MATRIX = 160014
RC_CMD_INVALID_MODEL_PACKAGE_VERSION = 160015 # invalid model package version
RC_CMD_ERROR_UNKNOWN = 169999


Expand Down
1 change: 1 addition & 0 deletions ymir/command/mir/tools/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class ModelStorage(BaseModel):
model_hash: str = ''
stage_name: str = ''
attachments: Dict[str, List[str]] = {}
package_version: str = Field(..., min_length=1)

@property
def class_names(self) -> List[str]:
Expand Down
11 changes: 11 additions & 0 deletions ymir/command/mir/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,14 @@ def ymir_salient_version(ver: str) -> str:
'1.3.0': '1.3.0',
}
return _SALIENT_VERSIONS[ver]


def ymir_model_salient_version(ver: str) -> str:
"""
get model package version from ymir version
"""
_PACKAGE_VERSIONS = {
DEFAULT_YMIR_SRC_VERSION: DEFAULT_YMIR_SRC_VERSION,
'1.3.0': '1.3.0',
}
return _PACKAGE_VERSIONS[ver]
4 changes: 3 additions & 1 deletion ymir/command/tests/unit/test_cmd_import_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mir.protos import mir_command_pb2 as mirpb
from mir.tools import mir_storage_ops, models, settings as mir_settings
from mir.tools.code import MirCode
from mir.version import YMIR_VERSION
from tests import utils as test_utils


Expand Down Expand Up @@ -64,7 +65,8 @@ def _prepare_model(self):
'mAP': 0.5
},
stages={mss.stage_name: mss},
best_stage_name=mss.stage_name)
best_stage_name=mss.stage_name,
package_version=YMIR_VERSION)
with open(os.path.join(self._src_model_root, 'ymir-info.yaml'), 'w') as f:
yaml.safe_dump(model_storage.dict(), f)
with tarfile.open(self._src_model_package_path, 'w:gz') as tar_gz_f:
Expand Down
4 changes: 3 additions & 1 deletion ymir/command/tests/unit/test_cmd_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
import unittest
from unittest import mock
from mir.version import YMIR_VERSION

import yaml

Expand Down Expand Up @@ -90,7 +91,8 @@ def _prepare_model(self):
'dst_rev': 'a'
},
stages={model_stage.stage_name: model_stage},
best_stage_name=model_stage.stage_name)
best_stage_name=model_stage.stage_name,
package_version=YMIR_VERSION)

with open(os.path.join(self._models_location, 'ymir-info.yaml'), 'w') as f:
yaml.dump(model_storage.dict(), f)
Expand Down
4 changes: 3 additions & 1 deletion ymir/command/tests/unit/test_cmd_mining.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from unittest import mock

from google.protobuf.json_format import ParseDict
from mir.version import YMIR_VERSION, ymir_model_salient_version
import yaml

from mir.commands.mining import CmdMining
Expand Down Expand Up @@ -89,7 +90,8 @@ def _mock_prepare_model(*args, **kwargs):
stages={mss.stage_name: mss},
best_stage_name=mss.stage_name,
model_hash='xyz',
stage_name=mss.stage_name)
stage_name=mss.stage_name,
package_version=ymir_model_salient_version(YMIR_VERSION))
return ms

# protected: custom: env prepare
Expand Down
7 changes: 4 additions & 3 deletions ymir/command/tests/unit/test_cmd_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@

from mir.commands import training
from mir.protos import mir_command_pb2 as mirpb
from mir.tools import mir_repo_utils, mir_storage_ops, models, settings as mir_settings, mir_storage
from mir.tools import mir_storage_ops, models, settings as mir_settings, mir_storage
from mir.tools.code import MirCode
from mir.tools.errors import MirRuntimeError
from mir.tools.mir_storage import sha1sum_for_file
from mir.version import ymir_model_salient_version, YMIR_VERSION
from tests import utils as test_utils


Expand Down Expand Up @@ -235,7 +235,8 @@ def __mock_process_model_storage(*args, **kwargs):
},
stages={mss.stage_name: mss},
best_stage_name=mss.stage_name,
model_hash='xyz')
model_hash='xyz',
package_version=ymir_model_salient_version(YMIR_VERSION))
return ms

# public: test cases
Expand Down
3 changes: 3 additions & 0 deletions ymir/updater/app/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
fasteners==0.16.3
protobuf==3.18.1
pyyaml==5.4.1
pydantic==1.9.0

--no-binary=pydantic
11 changes: 11 additions & 0 deletions ymir/updater/app/tools.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@

import os
import re
from typing import List

from mir.scm.cmd import CmdScm


# repo funcs
def get_repo_tags(mir_root: str) -> List[str]:
git_cmd = CmdScm(working_dir=mir_root, scm_executable='git')
tags: str = git_cmd.tag()
Expand All @@ -13,3 +16,11 @@ def get_repo_tags(mir_root: str) -> List[str]:
def remove_old_tag(mir_root: str, tag: str) -> None:
git_cmd = CmdScm(working_dir=mir_root, scm_executable='git')
git_cmd.tag(['-d', tag])


# detect models
def get_model_hashes(models_root: str) -> List[str]:
return [
h for h in os.listdir(models_root)
if re.match(pattern=r'^.{40}$', string=h) and os.path.isfile(os.path.join(models_root, h))
]

0 comments on commit a831843

Please sign in to comment.