From ae7d7af4c36069f020c23810e89d6f54ae31084b Mon Sep 17 00:00:00 2001 From: gtarpenning Date: Mon, 7 Nov 2022 17:38:30 -0800 Subject: [PATCH 1/3] first pass, massive improvements in fetch efficiency --- wandb/sdk/launch/utils.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/wandb/sdk/launch/utils.py b/wandb/sdk/launch/utils.py index 76c2a577ad9..2c1e78f541f 100644 --- a/wandb/sdk/launch/utils.py +++ b/wandb/sdk/launch/utils.py @@ -414,6 +414,24 @@ def apply_patch(patch_string: str, dst_dir: str) -> None: raise wandb.Error("Failed to apply diff.patch associated with run.") +def _make_refspec_from_version(version: Optional[str]) -> List[str]: + """ + Helper to create a refspec that checks for the existence of origin/main + and the version, if provided. + """ + refspec = [ + "+refs/heads/main*:refs/remotes/origin/main*", + "+refs/heads/master*:refs/remotes/origin/master*", + ] + if not version or version in ["main", "master"]: + return refspec + elif len(version) == 40: # if hash, only return hash + # TODO(gst): need better check for SHA here + return [f"+{version}"] + else: + return refspec + [f"+refs/heads/{version}*:refs/remotes/origin/{version}*"] + + def _fetch_git_repo(dst_dir: str, uri: str, version: Optional[str]) -> str: """Clones the git repo at ``uri`` into ``dst_dir``. @@ -428,7 +446,8 @@ def _fetch_git_repo(dst_dir: str, uri: str, version: Optional[str]) -> str: _logger.info("Fetching git repo") repo = git.Repo.init(dst_dir) origin = repo.create_remote("origin", uri) - origin.fetch() + refspec = _make_refspec_from_version(version) + origin.fetch(refspec=refspec, depth=1) if version is not None: try: From 9ce39c9f6879ecb1306fd4018126eced0db90f9c Mon Sep 17 00:00:00 2001 From: gtarpenning Date: Mon, 7 Nov 2022 18:23:30 -0800 Subject: [PATCH 2/3] wip with explicit sha handling --- wandb/sdk/launch/utils.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/wandb/sdk/launch/utils.py b/wandb/sdk/launch/utils.py index 2c1e78f541f..a5614c2e778 100644 --- a/wandb/sdk/launch/utils.py +++ b/wandb/sdk/launch/utils.py @@ -419,17 +419,21 @@ def _make_refspec_from_version(version: Optional[str]) -> List[str]: Helper to create a refspec that checks for the existence of origin/main and the version, if provided. """ - refspec = [ + + def _is_sha1(version: str): + if len(version) == 40: + return True + return False + + if version: + if _is_sha1(version): + return [f"+{version}"] + else: + return [f"+refs/heads/{version}*:refs/remotes/origin/{version}*"] + return [ "+refs/heads/main*:refs/remotes/origin/main*", "+refs/heads/master*:refs/remotes/origin/master*", ] - if not version or version in ["main", "master"]: - return refspec - elif len(version) == 40: # if hash, only return hash - # TODO(gst): need better check for SHA here - return [f"+{version}"] - else: - return refspec + [f"+refs/heads/{version}*:refs/remotes/origin/{version}*"] def _fetch_git_repo(dst_dir: str, uri: str, version: Optional[str]) -> str: @@ -443,11 +447,17 @@ def _fetch_git_repo(dst_dir: str, uri: str, version: Optional[str]) -> str: # executable is available on the PATH, so we only want to fail if we actually need it. import git # type: ignore + import time + _logger.info("Fetching git repo") repo = git.Repo.init(dst_dir) origin = repo.create_remote("origin", uri) refspec = _make_refspec_from_version(version) + + tic = time.perf_counter() origin.fetch(refspec=refspec, depth=1) + toc = time.perf_counter() + wandb.termlog(f"fetched: {[x.name for x in origin.refs]} in {toc-tic:0.4f} seconds") if version is not None: try: From e56b68bb2732aae18ae63c466c1584ff2599244c Mon Sep 17 00:00:00 2001 From: gtarpenning Date: Mon, 7 Nov 2022 18:24:27 -0800 Subject: [PATCH 3/3] stripped down version --- wandb/sdk/launch/utils.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/wandb/sdk/launch/utils.py b/wandb/sdk/launch/utils.py index a5614c2e778..54bdabb21aa 100644 --- a/wandb/sdk/launch/utils.py +++ b/wandb/sdk/launch/utils.py @@ -419,17 +419,9 @@ def _make_refspec_from_version(version: Optional[str]) -> List[str]: Helper to create a refspec that checks for the existence of origin/main and the version, if provided. """ - - def _is_sha1(version: str): - if len(version) == 40: - return True - return False - if version: - if _is_sha1(version): - return [f"+{version}"] - else: - return [f"+refs/heads/{version}*:refs/remotes/origin/{version}*"] + return [f"+{version}"] + return [ "+refs/heads/main*:refs/remotes/origin/main*", "+refs/heads/master*:refs/remotes/origin/master*", @@ -447,17 +439,11 @@ def _fetch_git_repo(dst_dir: str, uri: str, version: Optional[str]) -> str: # executable is available on the PATH, so we only want to fail if we actually need it. import git # type: ignore - import time - _logger.info("Fetching git repo") repo = git.Repo.init(dst_dir) origin = repo.create_remote("origin", uri) refspec = _make_refspec_from_version(version) - - tic = time.perf_counter() origin.fetch(refspec=refspec, depth=1) - toc = time.perf_counter() - wandb.termlog(f"fetched: {[x.name for x in origin.refs]} in {toc-tic:0.4f} seconds") if version is not None: try: