Skip to content

Commit

Permalink
fix_resume_logger (#3375)
Browse files Browse the repository at this point in the history
Signed-off-by: Peng Zhang <pengz@uber.com>
  • Loading branch information
irasit committed Jan 24, 2022
1 parent d261b5e commit e0e982f
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions horovod/spark/lightning/remote.py
Expand Up @@ -123,15 +123,26 @@ def train(serialized_model):
train_logger = TensorBoardLogger(logs_path)
print(f"Setup logger: Using TensorBoardLogger: {train_logger}")

elif isinstance(logger, CometLogger) and logger._experiment_key is None:
# Resume logger experiment key if passed correctly from CPU.
train_logger = CometLogger(
save_dir=logs_path,
api_key=logger.api_key,
experiment_key=logger_experiment_key,
)

print(f"Setup logger: Resume comet logger: {vars(train_logger)}")
elif isinstance(logger, CometLogger):
if logger._experiment_key:
# use logger passed in.
train_logger = logger
train_logger._save_dir = logs_path
print(f"Setup logger: change save_dir of the logger to {logs_path}")

elif logger_experiment_key:
# Resume logger experiment with new log path if key passed correctly from CPU.
train_logger = CometLogger(
save_dir=logs_path,
api_key=logger.api_key,
experiment_key=logger_experiment_key,
)

print(f"Setup logger: Resume comet logger: {vars(train_logger)}")

else:
print(f"Failed to setup or resume comet logger. origin logger: {vars(logger)}")

else:
# use logger passed in.
train_logger = logger
Expand Down

0 comments on commit e0e982f

Please sign in to comment.