Skip to content

Commit

Permalink
fix(cli): Deflake wandb verify (#4438)
Browse files Browse the repository at this point in the history
fix(cli): Deflake wandb verify
  • Loading branch information
vanpelt authored and andrewtruong committed Dec 2, 2022
1 parent 75ea890 commit 2556966
Showing 1 changed file with 21 additions and 5 deletions.
26 changes: 21 additions & 5 deletions wandb/sdk/verify/verify.py
Expand Up @@ -23,6 +23,11 @@
MIN_RETRYS = 3
CHECKMARK = "\u2705"
RED_X = "\u274C"
ID_PREFIX = wandb.util.generate_id()


def nice_id(name):
return ID_PREFIX + "-" + name


def print_results(
Expand Down Expand Up @@ -125,7 +130,9 @@ def check_run(api: Api) -> bool:
f.write("test")
f.close()

with wandb.init(reinit=True, config=config, project=PROJECT_NAME) as run:
with wandb.init(
id=nice_id("check_run"), reinit=True, config=config, project=PROJECT_NAME
) as run:
run_id = run.id
entity = run.entity
logged = True
Expand Down Expand Up @@ -180,7 +187,9 @@ def check_run(api: Api) -> bool:
# TODO: (kdg) refactor this so it doesn't rely on an exception handler
try:
read_file = retry_fn(partial(prev_run.file, filepath))
read_file = read_file.download(replace=True)
# There's a race where the file hasn't been processed in the queue,
# we just retry until we get a download
read_file = retry_fn(partial(read_file.download, replace=True))
except Exception:
failed_test_strings.append(
"Unable to download file. Check SQS configuration, topic configuration and bucket permissions."
Expand Down Expand Up @@ -261,7 +270,10 @@ def log_use_download_artifact(
add_extra_file: bool,
) -> Tuple[bool, Optional["ArtifactAPI"], List[str]]:
with wandb.init(
reinit=True, project=PROJECT_NAME, config={"test": "artifact log"}
id=nice_id("log_artifact"),
reinit=True,
project=PROJECT_NAME,
config={"test": "artifact log"},
) as log_art_run:

if add_extra_file:
Expand All @@ -277,6 +289,7 @@ def log_use_download_artifact(
return False, None, failed_test_strings

with wandb.init(
id=nice_id("use_artifact"),
project=PROJECT_NAME,
config={"test": "artifact use"},
) as use_art_run:
Expand Down Expand Up @@ -363,7 +376,10 @@ def check_graphql_put(api: Api, host: str) -> Tuple[bool, Optional[str]]:
f.write("test2")
f.close()
with wandb.init(
reinit=True, project=PROJECT_NAME, config={"test": "put to graphql"}
id=nice_id("graphql_put"),
reinit=True,
project=PROJECT_NAME,
config={"test": "put to graphql"},
) as run:
wandb.save(gql_fp)
public_api = wandb.Api()
Expand All @@ -378,7 +394,7 @@ def check_graphql_put(api: Api, host: str) -> Tuple[bool, Optional[str]]:
try:
read_file = retry_fn(partial(prev_run.file, gql_fp))
url = read_file.url
read_file = read_file.download(replace=True)
read_file = retry_fn(partial(read_file.download, replace=True))
except Exception:
failed_test_strings.append(
"Unable to read file successfully saved through a put request. Check SQS configurations, bucket permissions and topic configs."
Expand Down

0 comments on commit 2556966

Please sign in to comment.