From e15e9e279defc1d4231dcf53520912258d552009 Mon Sep 17 00:00:00 2001 From: Chris Van Pelt Date: Thu, 3 Nov 2022 16:50:11 -0700 Subject: [PATCH 1/2] Deflake wandb verify --- wandb/sdk/verify/verify.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/wandb/sdk/verify/verify.py b/wandb/sdk/verify/verify.py index 3d0b1829cb1..4a0d55668f4 100644 --- a/wandb/sdk/verify/verify.py +++ b/wandb/sdk/verify/verify.py @@ -23,6 +23,11 @@ MIN_RETRYS = 3 CHECKMARK = "\u2705" RED_X = "\u274C" +ID_PREFIX = wandb.util.generate_id() + + +def nice_id(name): + return ID_PREFIX + "-" + name def print_results( @@ -125,7 +130,7 @@ def check_run(api: Api) -> bool: f.write("test") f.close() - with wandb.init(reinit=True, config=config, project=PROJECT_NAME) as run: + with wandb.init(id=nice_id("check_run"), reinit=True, config=config, project=PROJECT_NAME) as run: run_id = run.id entity = run.entity logged = True @@ -180,7 +185,9 @@ def check_run(api: Api) -> bool: # TODO: (kdg) refactor this so it doesn't rely on an exception handler try: read_file = retry_fn(partial(prev_run.file, filepath)) - read_file = read_file.download(replace=True) + # There's a race where the file hasn't been processed in the queue, + # we just retry until we get a download + read_file = retry_fn(partial(read_file.download, replace=True)) except Exception: failed_test_strings.append( "Unable to download file. Check SQS configuration, topic configuration and bucket permissions." @@ -261,7 +268,7 @@ def log_use_download_artifact( add_extra_file: bool, ) -> Tuple[bool, Optional["ArtifactAPI"], List[str]]: with wandb.init( - reinit=True, project=PROJECT_NAME, config={"test": "artifact log"} + id=nice_id("log_artifact"), reinit=True, project=PROJECT_NAME, config={"test": "artifact log"} ) as log_art_run: if add_extra_file: @@ -277,6 +284,7 @@ def log_use_download_artifact( return False, None, failed_test_strings with wandb.init( + id=nice_id("use_artifact"), project=PROJECT_NAME, config={"test": "artifact use"}, ) as use_art_run: @@ -363,7 +371,7 @@ def check_graphql_put(api: Api, host: str) -> Tuple[bool, Optional[str]]: f.write("test2") f.close() with wandb.init( - reinit=True, project=PROJECT_NAME, config={"test": "put to graphql"} + id=nice_id("graphql_put"), reinit=True, project=PROJECT_NAME, config={"test": "put to graphql"} ) as run: wandb.save(gql_fp) public_api = wandb.Api() @@ -378,7 +386,7 @@ def check_graphql_put(api: Api, host: str) -> Tuple[bool, Optional[str]]: try: read_file = retry_fn(partial(prev_run.file, gql_fp)) url = read_file.url - read_file = read_file.download(replace=True) + read_file = retry_fn(partial(read_file.download, replace=True)) except Exception: failed_test_strings.append( "Unable to read file successfully saved through a put request. Check SQS configurations, bucket permissions and topic configs." From e28f9cc0c891a6dd8a2fb8bbfcd583c553217a70 Mon Sep 17 00:00:00 2001 From: Chris Van Pelt Date: Thu, 3 Nov 2022 18:04:40 -0700 Subject: [PATCH 2/2] Formatting --- wandb/sdk/verify/verify.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/wandb/sdk/verify/verify.py b/wandb/sdk/verify/verify.py index 4a0d55668f4..4baa4830062 100644 --- a/wandb/sdk/verify/verify.py +++ b/wandb/sdk/verify/verify.py @@ -130,7 +130,9 @@ def check_run(api: Api) -> bool: f.write("test") f.close() - with wandb.init(id=nice_id("check_run"), reinit=True, config=config, project=PROJECT_NAME) as run: + with wandb.init( + id=nice_id("check_run"), reinit=True, config=config, project=PROJECT_NAME + ) as run: run_id = run.id entity = run.entity logged = True @@ -268,7 +270,10 @@ def log_use_download_artifact( add_extra_file: bool, ) -> Tuple[bool, Optional["ArtifactAPI"], List[str]]: with wandb.init( - id=nice_id("log_artifact"), reinit=True, project=PROJECT_NAME, config={"test": "artifact log"} + id=nice_id("log_artifact"), + reinit=True, + project=PROJECT_NAME, + config={"test": "artifact log"}, ) as log_art_run: if add_extra_file: @@ -371,7 +376,10 @@ def check_graphql_put(api: Api, host: str) -> Tuple[bool, Optional[str]]: f.write("test2") f.close() with wandb.init( - id=nice_id("graphql_put"), reinit=True, project=PROJECT_NAME, config={"test": "put to graphql"} + id=nice_id("graphql_put"), + reinit=True, + project=PROJECT_NAME, + config={"test": "put to graphql"}, ) as run: wandb.save(gql_fp) public_api = wandb.Api()