Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CSV Logger #1005

Merged
merged 4 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions catalyst/callbacks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
VerboseLogger,
ConsoleLogger,
TensorboardLogger,
CSVLogger,
)
from catalyst.callbacks.meter import MeterMetricsCallback
from catalyst.callbacks.metric import (
Expand Down
101 changes: 101 additions & 0 deletions catalyst/callbacks/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,110 @@ def on_stage_end(self, runner: "IRunner"):
logger.close()


class CSVLogger(ILoggerCallback):
"""Logs metrics to csv file on epoch end"""

def __init__(
self, metric_names: List[str] = None,
):
"""
Args:
metric_names: list of metric names to log,
if none - logs everything
"""
super().__init__(order=CallbackOrder.logging, node=CallbackNode.master)
self.metrics_to_log = metric_names

self.loggers = {}
self.header_created = {}

def on_loader_start(self, runner: "IRunner") -> None:
"""
On loader start action.

Args:
runner: current runner
"""
if runner.loader_key not in self.loggers:
log_dir = os.path.join(runner.logdir, f"{runner.loader_key}_log")
os.makedirs(log_dir, exist_ok=True)
self.loggers[runner.loader_key] = open(
os.path.join(log_dir, "logs.csv"), "a+"
)
self.header_created[runner.loader_key] = False

def _log_metrics(
self, metrics: Dict[str, float], step: int, loader_key: str
):
if self.metrics_to_log is None:
metrics_to_log = sorted(metrics.keys())
else:
metrics_to_log = self.metrics_to_log

log_line_csv = f"{step},"
for metric in metrics_to_log:
log_line_csv += str(metrics[metric]) + ","
log_line_csv = (
log_line_csv[:-1] + "\n"
) # replace last "," with new line
self.loggers[loader_key].write(log_line_csv)

def _make_header(self, metrics: Dict[str, float], loader_key: str):
if self.metrics_to_log is None:
metrics_to_log = sorted(metrics.keys())
else:
metrics_to_log = self.metrics_to_log
log_line_header = "step,"
for metric in metrics_to_log:
log_line_header += metric + ","
log_line_header = (
log_line_header[:-1] + "\n"
) # replace last "," with new line
self.loggers[loader_key].write(log_line_header)

def on_epoch_end(self, runner: "IRunner"):
"""
Logs metrics here

Args:
runner: runner for experiment
"""
if runner.logdir is None:
return
per_loader_metrics = split_dict_to_subdicts(
dct=runner.epoch_metrics,
prefixes=list(runner.loaders.keys()),
extra_key="_base",
)
for loader_key, per_loader_metrics in per_loader_metrics.items():
if "base" in loader_key:
continue
if not self.header_created[loader_key]:
self._make_header(
metrics=per_loader_metrics, loader_key=loader_key
)
self.header_created[loader_key] = True
self._log_metrics(
metrics=per_loader_metrics,
step=runner.global_epoch,
loader_key=loader_key,
)

def on_stage_end(self, runner: "IRunner") -> None:
"""
Closes loggers

Args:
runner: runner for experiment
"""
for _k, logger in self.loggers.items():
logger.close()


__all__ = [
"ILoggerCallback",
"ConsoleLogger",
"TensorboardLogger",
"VerboseLogger",
"CSVLogger",
]
48 changes: 48 additions & 0 deletions catalyst/callbacks/tests/test_csv_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# flake8: noqa
import os

import pytest

import torch
from torch.utils.data import DataLoader, TensorDataset

from catalyst.callbacks.logging import CSVLogger
from catalyst.dl import SupervisedRunner


def test_logger():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here :)

# data
num_samples, num_features = int(1e4), int(1e1)
X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}

# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])

# model training
runner = SupervisedRunner()
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
callbacks=[CSVLogger()],
loaders=loaders,
logdir="./logdir/test_csv",
num_epochs=8,
verbose=True,
)
assert os.path.exists("./logdir/test_csv/train_log/logs.csv")
assert os.path.exists("./logdir/test_csv/valid_log/logs.csv")
with open("./logdir/test_csv/train_log/logs.csv", "r") as log:
length = 0
for i, line in enumerate(log):
if i == 0:
assert "step,loss" in line
length += 1
assert length == 9