From eac6c3fadd78306170026d40708f85a1eee8944f Mon Sep 17 00:00:00 2001 From: Luca Antiga Date: Mon, 21 Nov 2022 21:53:25 +0100 Subject: [PATCH] Switch from tensorboard to tensorboardx in logger (#15728) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Switch from tensorboard to tensorboardx in logger * Warn if log_graph is set to True but tensorboard is not installed * Fix warning message formatting * Apply suggestions from code review * simplify for TBX as required pkg * docs example * chlog * tbx 2.2 Co-authored-by: Luca Antiga Co-authored-by: William Falcon Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Carlos MocholĂ­ Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka (cherry picked from commit 9c2eb52c866d98768c3fb43c61cf035b2e3da0ae) --- requirements/pytorch/base.txt | 2 +- requirements/pytorch/extra.txt | 1 - requirements/pytorch/test.txt | 3 +++ src/pytorch_lightning/CHANGELOG.md | 4 ++- src/pytorch_lightning/loggers/tensorboard.py | 25 +++++++++++++++---- tests/tests_pytorch/conftest.py | 1 + .../tests_pytorch/loggers/test_tensorboard.py | 2 ++ 7 files changed, 30 insertions(+), 8 deletions(-) diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index ad9573493ae6f..374fbcb41e54a 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -6,7 +6,7 @@ torch>=1.9.*, <=1.13.0 tqdm>=4.57.0, <4.65.0 PyYAML>=5.4, <=6.0 fsspec[http]>2021.06.0, <2022.8.0 -tensorboard>=2.9.1, <2.12.0 +tensorboardX>=2.2, <=2.5.1 # min version is set by torch.onnx missing attribute torchmetrics>=0.7.0, <0.10.1 # needed for using fixed compare_version packaging>=17.0, <=21.3 typing-extensions>=4.0.0, <=4.4.0 diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 471f0aafbd50b..3eb221d020230 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -7,4 +7,3 @@ omegaconf>=2.0.5, <2.3.0 hydra-core>=1.0.5, <1.3.0 jsonargparse[signatures]>=4.15.2, <4.16.0 rich>=10.14.0, !=10.15.0.a, <13.0.0 -protobuf<=3.20.1 # strict # an extra is updating protobuf, this pin prevents TensorBoard failure diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 5ba99b269e002..d27e3677690a5 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -14,3 +14,6 @@ psutil<5.9.4 # for `DeviceStatsMonitor` pandas>1.0, <1.5.2 # needed in benchmarks fastapi<0.87.0 uvicorn<0.19.1 + +tensorboard>=2.9.1, <2.12.0 +protobuf<=3.20.1 # strict # an extra is updating protobuf, this pin prevents TensorBoard failure diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 5420f2ed2a446..b22ee5c8f994c 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -16,6 +16,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/lightning/pull/15737)) +- Switch from `tensorboard` to `tensorboardx` in `TensorBoardLogger` ([#15728](https://github.com/Lightning-AI/lightning/pull/15728)) + + ### Fixed - @@ -46,7 +49,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [1.8.0] - 2022-11-01 - ### Added - Added support for requeueing slurm array jobs ([#15040](https://github.com/Lightning-AI/lightning/pull/15040)) diff --git a/src/pytorch_lightning/loggers/tensorboard.py b/src/pytorch_lightning/loggers/tensorboard.py index 50d6e95add25b..1c840a3dea7e1 100644 --- a/src/pytorch_lightning/loggers/tensorboard.py +++ b/src/pytorch_lightning/loggers/tensorboard.py @@ -22,9 +22,10 @@ from typing import Any, Dict, Mapping, Optional, Union import numpy as np +from lightning_utilities.core.imports import RequirementCache +from tensorboardX import SummaryWriter +from tensorboardX.summary import hparams from torch import Tensor -from torch.utils.tensorboard import SummaryWriter -from torch.utils.tensorboard.summary import hparams import pytorch_lightning as pl from lightning_lite.utilities.cloud_io import get_filesystem @@ -38,6 +39,8 @@ log = logging.getLogger(__name__) +_TENSORBOARD_AVAILABLE = RequirementCache("tensorboard") + if _OMEGACONF_AVAILABLE: from omegaconf import Container, OmegaConf @@ -46,7 +49,7 @@ class TensorBoardLogger(Logger): r""" Log to local file system in `TensorBoard `_ format. - Implemented using :class:`~torch.utils.tensorboard.SummaryWriter`. Logs are saved to + Implemented using :class:`~tensorboardX.SummaryWriter`. Logs are saved to ``os.path.join(save_dir, name, version)``. This is the default logger in Lightning, it comes preinstalled. @@ -77,11 +80,20 @@ class TensorBoardLogger(Logger): sub_dir: Sub-directory to group TensorBoard logs. If a sub_dir argument is passed then logs are saved in ``/save_dir/name/version/sub_dir/``. Defaults to ``None`` in which logs are saved in ``/save_dir/name/version/``. - \**kwargs: Additional arguments used by :class:`SummaryWriter` can be passed as keyword + \**kwargs: Additional arguments used by :class:`tensorboardX.SummaryWriter` can be passed as keyword arguments in this logger. To automatically flush to disk, `max_queue` sets the size of the queue for pending logs before flushing. `flush_secs` determines how many seconds elapses before flushing. + Example: + >>> import shutil, tempfile + >>> tmp = tempfile.mkdtemp() + >>> tbl = TensorBoardLogger(tmp) + >>> tbl.log_hyperparams({"epochs": 5, "optimizer": "Adam"}) + >>> tbl.log_metrics({"acc": 0.75}) + >>> tbl.log_metrics({"acc": 0.9}) + >>> tbl.finalize("success") + >>> shutil.rmtree(tmp) """ NAME_HPARAMS_FILE = "hparams.yaml" LOGGER_JOIN_CHAR = "-" @@ -103,7 +115,10 @@ def __init__( self._name = name or "" self._version = version self._sub_dir = None if sub_dir is None else os.fspath(sub_dir) - self._log_graph = log_graph + if log_graph and not _TENSORBOARD_AVAILABLE: + rank_zero_warn("You set `TensorBoardLogger(log_graph=True)` but `tensorboard` is not available.") + self._log_graph = log_graph and _TENSORBOARD_AVAILABLE + self._default_hp_metric = default_hp_metric self._prefix = prefix self._fs = get_filesystem(save_dir) diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py index 2f5607828a232..a4ddd88a39ae5 100644 --- a/tests/tests_pytorch/conftest.py +++ b/tests/tests_pytorch/conftest.py @@ -75,6 +75,7 @@ def restore_env_variables(): "CUDA_MODULE_LOADING", # leaked since PyTorch 1.13 "KMP_INIT_AT_FORK", # leaked since PyTorch 1.13 "KMP_DUPLICATE_LIB_OK", # leaked since PyTorch 1.13 + "CRC32C_SW_MODE", # leaked by tensorboardX } leaked_vars.difference_update(allowlist) assert not leaked_vars, f"test is leaking environment variable(s): {set(leaked_vars)}" diff --git a/tests/tests_pytorch/loggers/test_tensorboard.py b/tests/tests_pytorch/loggers/test_tensorboard.py index 90d15c06d7bf1..ddab738269904 100644 --- a/tests/tests_pytorch/loggers/test_tensorboard.py +++ b/tests/tests_pytorch/loggers/test_tensorboard.py @@ -24,6 +24,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.demos.boring_classes import BoringModel from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.loggers.tensorboard import _TENSORBOARD_AVAILABLE from pytorch_lightning.utilities.imports import _OMEGACONF_AVAILABLE from tests_pytorch.helpers.runif import RunIf @@ -220,6 +221,7 @@ def test_tensorboard_log_graph(tmpdir, example_input_array): logger.log_graph(model, example_input_array) +@pytest.mark.skipif(not _TENSORBOARD_AVAILABLE, reason=str(_TENSORBOARD_AVAILABLE)) def test_tensorboard_log_graph_warning_no_example_input_array(tmpdir): """test that log graph throws warning if model.example_input_array is None.""" model = BoringModel()