diff --git a/catalyst/callbacks/early_stop.py b/catalyst/callbacks/early_stop.py index 11e3986197..9cf1b112ee 100644 --- a/catalyst/callbacks/early_stop.py +++ b/catalyst/callbacks/early_stop.py @@ -7,7 +7,46 @@ class CheckRunCallback(Callback): - """Executes only a pipeline part from the ``Experiment``.""" + """Executes only a pipeline part from the ``Experiment``. + + Minimal working example (Notebook API): + + .. code-block:: python + + import torch + from torch.utils.data import DataLoader, TensorDataset + from catalyst import dl + + # data + num_samples, num_features = int(1e4), int(1e1) + X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) + dataset = TensorDataset(X, y) + loader = DataLoader(dataset, batch_size=32, num_workers=1) + loaders = {"train": loader, "valid": loader} + + # model, criterion, optimizer, scheduler + model = torch.nn.Linear(num_features, 1) + criterion = torch.nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters()) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) + + # model training + runner = dl.SupervisedRunner() + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + scheduler=scheduler, + loaders=loaders, + logdir="./logdir", + num_epochs=8, + verbose=True, + callbacks=[ + dl.CheckRunCallback(num_batch_steps=3, num_epoch_steps=3) + ] + ) + + """ def __init__(self, num_batch_steps: int = 3, num_epoch_steps: int = 2): """ @@ -41,24 +80,42 @@ def on_batch_end(self, runner: "IRunner"): class EarlyStoppingCallback(Callback): """Early exit based on metric. - Example of usage in notebook API: + Minimal working example (Notebook API): .. code-block:: python - runner = SupervisedRunner() + import torch + from torch.utils.data import DataLoader, TensorDataset + from catalyst import dl + + # data + num_samples, num_features = int(1e4), int(1e1) + X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) + dataset = TensorDataset(X, y) + loader = DataLoader(dataset, batch_size=32, num_workers=1) + loaders = {"train": loader, "valid": loader} + + # model, criterion, optimizer, scheduler + model = torch.nn.Linear(num_features, 1) + criterion = torch.nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters()) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) + + # model training + runner = dl.SupervisedRunner() runner.train( - ... - callbacks=[ - ... - EarlyStoppingCallback( - patience=5, - metric="my_metric", - minimize=True, - ) - ... - ] + model=model, + criterion=criterion, + optimizer=optimizer, + scheduler=scheduler, + loaders=loaders, + logdir="./logdir", + num_epochs=8, + verbose=True, + callbacks=[ + dl.EarlyStoppingCallback(patience=2, metric="loss", minimize=True) + ] ) - ... Example of usage in config API: diff --git a/catalyst/metrics/__init__.py b/catalyst/metrics/__init__.py index 04b7a45e79..8437923844 100644 --- a/catalyst/metrics/__init__.py +++ b/catalyst/metrics/__init__.py @@ -1,16 +1,4 @@ # flake8: noqa -from catalyst.metrics.accuracy import accuracy, multi_label_accuracy -from catalyst.metrics.avg_precision import avg_precision, mean_avg_precision -from catalyst.metrics.auc import auc -from catalyst.metrics.cmc_score import cmc_score, cmc_score_count -from catalyst.metrics.ndcg import dcg, ndcg -from catalyst.metrics.dice import dice, calculate_dice -from catalyst.metrics.f1_score import f1_score, fbeta_score -from catalyst.metrics.hitrate import hitrate -from catalyst.metrics.classification import precision_recall_fbeta_support -from catalyst.metrics.precision import precision -from catalyst.metrics.recall import recall -from catalyst.metrics.focal import sigmoid_focal_loss, reduced_focal_loss from catalyst.metrics.functional import ( process_multilabel_components, get_binary_statistics, @@ -20,6 +8,18 @@ wrap_class_metric2dict, wrap_topk_metric2dict, ) +from catalyst.metrics.classification import precision_recall_fbeta_support + +from catalyst.metrics.accuracy import accuracy, multi_label_accuracy +from catalyst.metrics.auc import auc +from catalyst.metrics.avg_precision import avg_precision, mean_avg_precision +from catalyst.metrics.cmc_score import cmc_score, cmc_score_count +from catalyst.metrics.dice import dice, calculate_dice +from catalyst.metrics.f1_score import f1_score, fbeta_score +from catalyst.metrics.focal import sigmoid_focal_loss, reduced_focal_loss +from catalyst.metrics.hitrate import hitrate from catalyst.metrics.iou import iou, jaccard from catalyst.metrics.mrr import mrr -from catalyst.metrics.precision import average_precision +from catalyst.metrics.ndcg import dcg, ndcg +from catalyst.metrics.precision import average_precision, precision +from catalyst.metrics.recall import recall diff --git a/catalyst/metrics/recall.py b/catalyst/metrics/recall.py index 52319a2b70..4a65f388a8 100644 --- a/catalyst/metrics/recall.py +++ b/catalyst/metrics/recall.py @@ -37,3 +37,6 @@ def recall( ) return recall_score + + +__all__ = ["recall"] diff --git a/docs/api/metrics.rst b/docs/api/metrics.rst index 00acbf14b0..dfda80dad6 100644 --- a/docs/api/metrics.rst +++ b/docs/api/metrics.rst @@ -78,13 +78,6 @@ MRR MAP ------------------------ -.. automodule:: catalyst.metrics.mean_avg_precision - :members: - :undoc-members: - :show-inheritance: - -AP ------------------------- .. automodule:: catalyst.metrics.avg_precision :members: :undoc-members: @@ -97,16 +90,16 @@ NDCG :undoc-members: :show-inheritance: -DCG +Precision ------------------------ -.. automodule:: catalyst.metrics.dcg +.. automodule:: catalyst.metrics.precision :members: :undoc-members: :show-inheritance: -Precision +Recall ------------------------ -.. automodule:: catalyst.metrics.precision +.. automodule:: catalyst.metrics.recall :members: :undoc-members: :show-inheritance: diff --git a/docs/contributing/how_to_start.rst b/docs/contributing/how_to_start.rst deleted file mode 100644 index d2884757d1..0000000000 --- a/docs/contributing/how_to_start.rst +++ /dev/null @@ -1,123 +0,0 @@ -How to start -============================================================================== - -Issues ------- - -We use `GitHub issues`_ for bug reports and feature requests. - -Step-by-step guide -^^^^^^^^^^^^^^^^^^ - -New feature -''''''''''' - -1. Make an issue with your feature description; -2. We shall discuss the design and its implementation details; -3. Once we agree that the plan looks good, go ahead and implement it. - -Bugfix -'''''' - -1. Goto `GitHub issues`_; -2. Pick an issue and comment on the task that you want to work on this - feature; -3. If you need more context on a specific issue, please ask, and we will - discuss the details. - -Once you finish implementing a feature or bugfix, please send a Pull -Request. - -If you are not familiar with creating a Pull Request, here are some -guides: - -- http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request -- https://help.github.com/articles/creating-a-pull-request/ - -Contribution best practices -''''''''''''''''''''''''''' - -1. Install requirements - -.. code-block:: bash - - brew install bash # for MacOS users - pip install -r requirements/requirements.txt -r requirements/requirements-dev.txt - -2. Break your work into small, single-purpose updates if possible. It's much harder to merge in a large change with a lot of disjoint features. -3. Submit the update as a GitHub pull request against the `master` branch. -4. Make sure that you provide docstrings for all your new methods and classes. -5. Add new unit tests for your code. -6. Check the codestyle -7. Make sure that your code passes the unit tests - - -Codestyle -^^^^^^^^^ - -Do not forget to check the codestyle for your PR with - -.. code-block:: bash - - catalyst-make-codestyle && catalyst-check-codestyle - -Make sure to have your python packages complied with `requirements/requirements.txt` and `requirements/requirements-dev.txt` to get codestyle run clean. - - -Unit tests -^^^^^^^^^^ - -Do not forget to check that your code passes the unit tests - -.. code-block:: bash - - pytest . - - -Documentation -------------- - -Catalyst uses `Google style`_ for formatting `docstrings`_. Length of line -inside docstrings block must be limited to 80 characters to fit into -Jupyter documentation popups. - -Check that you have written working docs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: bash - - make check-docs - -The command requires Sphinx and some sphinx-specific libraries. -If you don't want to install them, you may make a catalyst-dev container - -.. code-block:: bash - - make docker-dev - docker run \\ - -v `pwd`/:/workspace/ \\ - catalyst-dev:latest \\ - bash -c "make check-docs" - -To build docs add environment variable REMOVE_BUILDS=0 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: bash - - REMOVE_BUILDS=0 make check-docs - -or through docker - -.. code-block:: bash - - docker run \\ - -v `pwd`/:/workspace/ \\ - catalyst-dev:latest \\ - bash -c "REMOVE_BUILDS=0 make check-docs" - -The docs will be stored in `builds/` folder. - - -.. _GitHub issues: https://github.com/catalyst-team/catalyst/issues -.. _Google style: http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html -.. _docstrings: https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings \ No newline at end of file diff --git a/docs/core/callback.rst b/docs/core/callback.rst new file mode 100644 index 0000000000..4f2614c2ab --- /dev/null +++ b/docs/core/callback.rst @@ -0,0 +1,3 @@ +Callback +============================================================================== + diff --git a/docs/core/experiment.rst b/docs/core/experiment.rst new file mode 100644 index 0000000000..1e8490b55c --- /dev/null +++ b/docs/core/experiment.rst @@ -0,0 +1,29 @@ +Experiment +============================================================================== + +Experiment - an abstraction that contains information about the experiment +- a model, a criterion, an optimizer, a scheduler, and their hyperparameters. +It also holds information about the data and transformations to apply. +The Experiment knows **what** you would like to run. + +Each deep learning project has several main components. +These primitives define what we want to use during the experiment: + +- the data +- the model(s) +- the optimizer(s) +- the loss(es) +- and the scheduler(s) if we need them. + +That are the abstractions that Experiment covers in Catalyst, +with a few modifications for easier experiment monitoring +and hyperparameters logging. For each stage of our experiment, +the Experiment provides interfaces to all primitives above + the callbacks. + +.. image:: https://raw.githubusercontent.com/catalyst-team/catalyst-pics/master/third_party_pics/catalyst102-experiment.png + :alt: Experiment + + +--- + +- what is the difference between different Experiments? \ No newline at end of file diff --git a/docs/core/runner.rst b/docs/core/runner.rst new file mode 100644 index 0000000000..eff4a1fa8b --- /dev/null +++ b/docs/core/runner.rst @@ -0,0 +1,6 @@ +Runner +============================================================================== + +--- + +- what is the difference between different Runners? \ No newline at end of file diff --git a/docs/faq/amp.rst b/docs/faq/amp.rst new file mode 100644 index 0000000000..801218ab2a --- /dev/null +++ b/docs/faq/amp.rst @@ -0,0 +1,9 @@ +Mixed precision training +============================================================================== + +- How to use Nvidia Apex? +- How to use torch.amp? + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/checkpointing.rst b/docs/faq/checkpointing.rst new file mode 100644 index 0000000000..db1ec46329 --- /dev/null +++ b/docs/faq/checkpointing.rst @@ -0,0 +1,12 @@ +[WIP] Model checkpointing +============================================================================== + +- how to load bset model? +- notebook and config api +- how to save model? +- how to load model? +- whats the difference between checkpoint and checkpoint_full? + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/contributing/codestyle.rst b/docs/faq/config_api.rst similarity index 60% rename from docs/contributing/codestyle.rst rename to docs/faq/config_api.rst index 3b92a95ea2..37a07199e3 100644 --- a/docs/contributing/codestyle.rst +++ b/docs/faq/config_api.rst @@ -1,4 +1,4 @@ -Codestyle +[WIP] Config API ============================================================================== Hi, @@ -11,4 +11,8 @@ But don't feel upset, check out the kitty... `Kittylyst`_ tutorial. .. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg :target: https://github.com/Scitator/kittylyst - :alt: kitty \ No newline at end of file + :alt: kitty + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw diff --git a/docs/faq/data.rst b/docs/faq/data.rst new file mode 100644 index 0000000000..8040697076 --- /dev/null +++ b/docs/faq/data.rst @@ -0,0 +1,321 @@ +Dataflow +============================================================================== + +Base dataflow +---------------------------------------------------- +Catalyst uses the "key-value is all you need" approach. +Speaking so, it expects key-value outputs from your Dataset/Dataloader. + +Example dataflow: + +.. code-block:: python + + class MyDataset: + + def __get_item__(self, index): + ... + return {"features": np.ndarray, "targets": np.ndarray} + + class MyModel: + + def forward(self, features): + ... + return logits + + class MyRunner: + + def _handle_batch(self, batch): + # on this step we also have self.input = batch = {"features": ..., "targets": ...} + logits = self.model(batch["features"]) + loss = self.criterion(logits, batch["targets"]) + self.output = {"logits": logits} + # this is useful for other components of the pipiline + + loader = MyDataset() + model = MyModel() + runner = MyRunner() + runner.train(...) + +.. note:: + + ``SupervisedRunner`` has data preprocessing features to transform + tuple/list-based data into key-value. + +Such approach is easily extensible for any number of features, targets and +very convenient to read, thanks to "automatic naming documentation" - keys for the values: + +.. code-block:: python + + class MyDataset: + + def __get_item__(self, index): + ... + return {"features": np.ndarray, "extra_features": np.ndarray, "targets": np.ndarray} + + class MyModel: + + def forward(self, features, extra_features): + ... + return logits + + class MyRunner: + + def _handle_batch(self, batch): + # on this step we also have self.input = batch = {"features": ..., "extra_features": ...,"targets": ...} + logits = self.model(batch["features"], batch["extra_features"]) + loss = self.criterion(logits, batch["targets"]) + self.output = {"logits": logits} + # this is useful for other components of the pipiline + + loader = MyDataset() + model = MyModel() + runner = MyRunner() + runner.train(...) + +Moreover, if some of the features are not required anymore - +you don't have to rewrite your code: + +.. code-block:: python + + class MyDataset: + + def __get_item__(self, index): + ... + return {"features": np.ndarray, "extra_features": np.ndarray, "targets": np.ndarray} + + class MyModel: + + def forward(self, features): + ... + return logits + + class MyRunner: + + def _handle_batch(self, batch): + # on this step we also have self.input = batch = {"features": ..., "extra_features": ...,"targets": ...} + logits = self.model(batch["features"]) + loss = self.criterion(logits, batch["targets"]) + self.output = {"logits": logits} + # this is useful for other components of the pipiline + + loader = MyDataset() + model = MyModel() + runner = MyRunner() + runner.train(...) + + +Key-value storage is also used to store the datasets/loaders for the experiment. +In this case we also need to use ``OrderedDict`` to ensure correct epoch handling - +that your model will firstly train on some ``train`` dataset +and only then will be evaluated on some ``valid`` dataset: + +.. code-block:: python + + train_loader = MyDataset(...) + valid_loader = MyDataset(...) + loaders = OrderedDict("train": train_loader, "valid": valid_loader) + model = MyModel() + runner = MyRunner() + runner.train(model=model, loaders=loaders) + +Catalyst uses the following "automatic naming documentation" for loader keys handling: + +- if loader_key starts with "train" - is's our train datasoure, we need to run forward and backward passes on it. +- if loader_key starts with "valid" - is's our validation datasoure, we need to run forward, but not the backward pass on it. +- if loader_key starts with "infer" - is's our datasoure for model inference, we need to run forward, but not the backward pass on it. + +Multiple datasources +---------------------------------------------------- +Thanks to key-value approach, +it's possible to handle any number of datasets/loader +without code changes or tricks with Datasets concatination, etc: + +.. code-block:: python + + train_loader = MyDataset(...) + train2_loader = MyDataset(...) + valid_loader = MyDataset(...) + valid2_loader = MyDataset(...) + loaders = OrderedDict( + "train": train_loader, + "train2": train2_loader, + "valid": valid_loader, + "valid2": valid2_loader, + ) + model = MyModel() + runner = MyRunner() + runner.train(model=model, loaders=loaders) + +What is even more interesting, you could also do something like: + +.. code-block:: python + + train_loader = MyDataset(...) + train2_loader = MyDataset(...) + valid_loader = MyDataset(...) + valid2_loader = MyDataset(...) + loaders = OrderedDict( + "train": train_loader, + "valid": valid_loader, + "train2": train2_loader, + "valid2": valid2_loader, + + ) + model = MyModel() + runner = MyRunner() + runner.train(model=model, loaders=loaders) + +Once again, it's also valid to do something like: + +.. code-block:: python + + train_loader = MyDataset(...) + train2_loader = MyDataset(...) + valid_loader = MyDataset(...) + valid2_loader = MyDataset(...) + loaders = OrderedDict( + "train": concat_datasets(train_loader, train2_loader), + "valid": concat_datasets(valid_loader, valid2_loader), + ) + model = MyModel() + runner = MyRunner() + runner.train(model=model, loaders=loaders) + + +Loader for model selection +---------------------------------------------------- +In case of multiple loaders, you could easily select one for model selection +with ``valid_loader`` param in the ``runner.train``. +For example, to use ``valid2`` loaders as your +model selection one you could do the following: + +.. code-block:: python + + train_loader = MyDataset(...) + train2_loader = MyDataset(...) + valid_loader = MyDataset(...) + valid2_loader = MyDataset(...) + loaders = OrderedDict( + "train": train_loader, + "train2": train2_loader, + "valid": valid_loader, + "valid2": valid2_loader, + ) + model = MyModel() + runner = MyRunner() + runner.train(model=model, loaders=loaders, valid_loader="valid2") + +.. note:: + + By default, Catalyst suppose to use + ``valid_loader=valid`` for model selection. + + +Metric for model selection +---------------------------------------------------- +Suppose, you are using a number of different metrics in your pipeline: + +.. code-block:: python + + class MyRunner: + + def _handle_batch(self, batch): + # on this step we also have self.input = batch = {"features": ..., "targets": ...} + logits = self.model(batch["features"]) + loss = self.criterion(logits, batch["targets"]) + accuracy01, accuracy03 = accuracy(logits, batch["targets"], topk=(1, 3)) + self.batch_metrics.update(**{ + "loss": loss, + "accuracy01": accuracy01, + "accuracy03": accuracy03, + }) + self.output = {"logits": logits} + # this is useful for other components of the pipiline + + loaders = ... + model = ... + runner = MyRunner() + runner.train(model=model, loaders=loaders) + +You could select one for model selection with ``main_metric`` and ``minimize_metric`` +params in the ``runner.train``. For example, to use ``accuracy01`` metric +as your model selection one you could do the following: + +.. code-block:: python + + class MyRunner: + + def _handle_batch(self, batch): + # on this step we also have self.input = batch = {"features": ..., "targets": ...} + logits = self.model(batch["features"]) + loss = self.criterion(logits, batch["targets"]) + accuracy01, accuracy03 = accuracy(logits, batch["targets"], topk=(1, 3)) + self.batch_metrics.update(**{ + "loss": loss, + "accuracy01": accuracy01, + "accuracy03": accuracy03, + }) + self.output = {"logits": logits} + # this is useful for other components of the pipiline + + loaders = ... + model = ... + runner = MyRunner() + # as far as we would like to maximize our model accuracy... + runner.train(model=model, loaders=loaders, main_metric="accuracy01", minimize_metric=False) + +.. note:: + + By default, Catalyst suppose to use + ``main_metric=loss`` and ``minimize_metric=False`` + for model selection. + +Use part of the data +---------------------------------------------------- +If you would like to use only some part of your data from the loader +(for example, you would like to check your pipeline and overfit for one small portion of the data), +you could use ``BatchLimitLoaderWrapper``: + +.. code-block:: python + + train_loader = BatchLimitLoaderWrapper(MyDataset(...), num_batches=1) + valid_loader = MyDataset(...) + loaders = OrderedDict("train": train_loader, "valid": valid_loader) + model = MyModel() + runner = MyRunner() + runner.train(model=model, loaders=loaders) + +As a more user-friendly approach with ``runner.train``: + +.. code-block:: python + + train_loader = MyDataset(...) + valid_loader = MyDataset(...) + loaders = OrderedDict("train": train_loader, "valid": valid_loader) + model = MyModel() + runner = MyRunner() + # here we overfit for one batch per loader + runner.train(model=model, loaders=loaders, overfit=True) + +And more convenient and customasible way: + +.. code-block:: python + + train_loader = MyDataset(...) + valid_loader = MyDataset(...) + loaders = OrderedDict("train": train_loader, "valid": valid_loader) + model = MyModel() + runner = MyRunner() + # here we overfit for 10 batches in `train` loader + # and half of the `valid` loader + runner.train( + model=model, + loaders=loaders, + callbacks=[dl.BatchOverfitCallback(train=10, valid=0.5)] + ) + +---- + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw diff --git a/docs/faq/ddp.rst b/docs/faq/ddp.rst new file mode 100644 index 0000000000..629263d63d --- /dev/null +++ b/docs/faq/ddp.rst @@ -0,0 +1,9 @@ +[WIP] Distributed training +============================================================================== + +- How to run experiments in distributed mode? +- (?) How to collect metrics in distributed mode in the right way? + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/debugging.rst b/docs/faq/debugging.rst new file mode 100644 index 0000000000..9c3fef15e5 --- /dev/null +++ b/docs/faq/debugging.rst @@ -0,0 +1,83 @@ +Model debugging +============================================================================== + +Pipeline debugging +---------------------------------------------------- +To check pipeline correctness, that everything is working correctly +and does not throws any error, we suggest to use ``CheckRunCallback``. +You could find more information about it here <../early_stopping.rst>. + +To check model convergence withing pipeline, +we suggest to use ``BatchOverfitCallback``. +You could find more information about it here <../data.rst>. + +Python debugging +---------------------------------------------------- +For python debugging we suggest to use ``ipdb``. You could install it with: + +.. code-block:: bash + + pip install ipdb + +After that you could stop the pipeline in the place you prefer, for example: + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from catalyst import dl, metrics + from catalyst.data.cv import ToTensor + from catalyst.contrib.datasets import MNIST + + model = torch.nn.Linear(28 * 28, 10) + optimizer = torch.optim.Adam(model.parameters(), lr=0.02) + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + + class CustomRunner(dl.Runner): + + def predict_batch(self, batch): + # model inference step + return self.model(batch[0].to(self.device).view(batch[0].size(0), -1)) + + def _handle_batch(self, batch): + # model train/valid step + x, y = batch + y_hat = self.model(x.view(x.size(0), -1)) + + # let's stop before metric computation, but after model forward pass + import ipdb; ipdb.set_trace() + # <--- we will stop here ---> + loss = F.cross_entropy(y_hat, y) + accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3)) + self.batch_metrics.update( + {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} + ) + + + if self.is_train_loader: + loss.backward() + self.optimizer.step() + self.optimizer.zero_grad() + + runner = CustomRunner() + # model training + runner.train( + model=model, + optimizer=optimizer, + loaders=loaders, + logdir="./logs", + num_epochs=5, + verbose=True, + load_best_on_end=True, + ) + + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/dp.rst b/docs/faq/dp.rst new file mode 100644 index 0000000000..c13bbb5434 --- /dev/null +++ b/docs/faq/dp.rst @@ -0,0 +1,6 @@ +[WIP] DataParallel training (single/multi-gpu) +============================================================================== + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/early_stopping.rst b/docs/faq/early_stopping.rst new file mode 100644 index 0000000000..8fc1f15c3f --- /dev/null +++ b/docs/faq/early_stopping.rst @@ -0,0 +1,118 @@ +Early stopping +============================================================================== + +Early stopping +---------------------------------------------------- + +To use experiment early stopping you could use ``EarlyStoppingCallback``: + +.. code-block:: python + + import torch + from torch.utils.data import DataLoader, TensorDataset + from catalyst import dl + + # data + num_samples, num_features = int(1e4), int(1e1) + X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) + dataset = TensorDataset(X, y) + loader = DataLoader(dataset, batch_size=32, num_workers=1) + loaders = {"train": loader, "valid": loader} + + # model, criterion, optimizer, scheduler + model = torch.nn.Linear(num_features, 1) + criterion = torch.nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters()) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) + + # model training + runner = dl.SupervisedRunner() + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + scheduler=scheduler, + loaders=loaders, + logdir="./logdir", + num_epochs=8, + verbose=True, + callbacks=[dl.EarlyStoppingCallback(patience=2, metric="loss", minimize=True)] + ) + +Pipeline checking +---------------------------------------------------- +You could also check the pipeline +(run only 3 batches per loader, and 3 epochs per stage) +with ``CheckRunCallback``: + +.. code-block:: python + + import torch + from torch.utils.data import DataLoader, TensorDataset + from catalyst import dl + + # data + num_samples, num_features = int(1e4), int(1e1) + X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) + dataset = TensorDataset(X, y) + loader = DataLoader(dataset, batch_size=32, num_workers=1) + loaders = {"train": loader, "valid": loader} + + # model, criterion, optimizer, scheduler + model = torch.nn.Linear(num_features, 1) + criterion = torch.nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters()) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) + + # model training + runner = dl.SupervisedRunner() + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + scheduler=scheduler, + loaders=loaders, + logdir="./logdir", + num_epochs=8, + verbose=True, + callbacks=[dl.CheckRunCallback(num_batch_steps=3, num_epoch_steps=3)] + ) + +You could also use ``runner.train(..., check=True)`` with Notebook API approach: + +.. code-block:: python + + import torch + from torch.utils.data import DataLoader, TensorDataset + from catalyst import dl + + # data + num_samples, num_features = int(1e4), int(1e1) + X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) + dataset = TensorDataset(X, y) + loader = DataLoader(dataset, batch_size=32, num_workers=1) + loaders = {"train": loader, "valid": loader} + + # model, criterion, optimizer, scheduler + model = torch.nn.Linear(num_features, 1) + criterion = torch.nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters()) + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6]) + + # model training + runner = dl.SupervisedRunner() + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + scheduler=scheduler, + loaders=loaders, + logdir="./logdir", + num_epochs=8, + verbose=True, + check=True, + ) + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/finetuning.rst b/docs/faq/finetuning.rst new file mode 100644 index 0000000000..ed027e814d --- /dev/null +++ b/docs/faq/finetuning.rst @@ -0,0 +1,6 @@ +[WIP] Finetuning +============================================================================== + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/inference.rst b/docs/faq/inference.rst new file mode 100644 index 0000000000..d60cecea55 --- /dev/null +++ b/docs/faq/inference.rst @@ -0,0 +1,92 @@ +Inference +============================================================================== + +To use your model in the inference mode, +you could redefine the ``Runner.predict_batch``. + +Suppose you have the following classification pipeline: + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from torchvision.datasets import MNIST + from torchvision.transforms import ToTensor + from catalyst import dl, metrics + + model = torch.nn.Linear(28 * 28, 10) + criterion = torch.nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.02) + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + + class CustomRunner(dl.Runner): + + # <--- model inference step ---> + def predict_batch(self, batch): + return self.model(batch[0].to(self.device).view(batch[0].size(0), -1)) + # <--- model inference step ---> + + def _handle_batch(self, batch): + # model train/valid step + x, y = batch + y_hat = self.model(x.view(x.size(0), -1)) + + loss = self.criterion(y_hat, y) + accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3)) + self.batch_metrics.update( + {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} + ) + + if self.is_train_loader: + loss.backward() + self.optimizer.step() + self.optimizer.zero_grad() + + runner = CustomRunner() + # model training + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + loaders=loaders, + logdir="./logs", + num_epochs=5, + verbose=True, + load_best_on_end=True, + ) + +Now you could easily predict your data with the Runner-specified logic. + +Predict batch +---------------------------------------------------- +If you want to predict one batch: + +.. code-block:: python + + batch_prediciton = runner.predict_batch(next(iter(loaders["valid"]))) + # which would be the same with + batch_model_prediciton = model(next(iter(loaders["valid"]))[0].view(32, -1)) + batch_prediciton == batch_model_prediciton + >>> True + +Predict loader +---------------------------------------------------- +If you want to predict entire loader: + +.. code-block:: python + + for prediction in runner.predict_loader(loader=loaders["valid"]): + assert prediction.detach().cpu().numpy().shape[-1] == 10 + +The ``runner.predict_loader`` method just iteratively goes through the loader batches, +makes model predictions and yields the results. + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/logging.rst b/docs/faq/logging.rst new file mode 100644 index 0000000000..f569da91db --- /dev/null +++ b/docs/faq/logging.rst @@ -0,0 +1,110 @@ +Logging +============================================================================== + +Metrics logging +---------------------------------------------------- +Catalyst supports a variety of metrics storages during the experiment + +- ``runner.batch_metrics`` - dictionary, flatten storage for batch metrics. + :: + + runner.batch_metrics = {"loss": ..., "accuracy": ..., "iou": ...} + +- ``runner.loader_metrics`` - dictionary with aggregated batch statistics for loader (mean over all batches) and global loader metrics, like AUC. + :: + + runner.loader_metrics = {"loss": ..., "accuracy": ..., "auc": ...} + +- ``runner.epoch_metrics`` - dictionary with summarized metrics for different loaders and global epoch metrics, like lr, momentum. + :: + + runner.epoch_metrics = { + "train_loss": ..., "train_auc": ..., "valid_loss": ..., + "lr": ..., "momentum": ..., + } + +- ``runner.valid_metrics`` - dictionary with validation metrics for current epoch. + :: + + runner.valid_metrics = {"loss": ..., "accuracy": ..., "auc": ...} + +- ``runner.best_valid_metrics`` - dictionary with best validation metrics during whole training process. + :: + + runner.best_valid_metrics = {"loss": ..., "accuracy": ..., "auc": ...} + +You could log any new metric in a stratforward way: + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from catalyst import dl, metrics + from catalyst.data.cv import ToTensor + from catalyst.contrib.datasets import MNIST + + model = torch.nn.Linear(28 * 28, 10) + optimizer = torch.optim.Adam(model.parameters(), lr=0.02) + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + + class CustomRunner(dl.Runner): + + def predict_batch(self, batch): + # model inference step + return self.model(batch[0].to(self.device).view(batch[0].size(0), -1)) + + def _handle_batch(self, batch): + # model train/valid step + x, y = batch + y_hat = self.model(x.view(x.size(0), -1)) + + loss = F.cross_entropy(y_hat, y) + accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3)) + # <--- logging ---> + # here we are adding loss, accuracy01 and accuracy03 to the batch metrics + self.batch_metrics.update( + {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} + ) + # <--- logging ---> + + if self.is_train_loader: + loss.backward() + self.optimizer.step() + self.optimizer.zero_grad() + + runner = CustomRunner() + # model training + runner.train( + model=model, + optimizer=optimizer, + loaders=loaders, + logdir="./logs", + num_epochs=5, + verbose=True, + load_best_on_end=True, + ) + +[WIP] Metrics logging with callback +---------------------------------------------------- + +- todo + +[WIP] Supported loggers +---------------------------------------------------- + +- console +- txt +- Tensorboard +- Alchemy +- Neptune +- Weights and Biases + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/lr_finder.rst b/docs/faq/lr_finder.rst new file mode 100644 index 0000000000..7b2b13f645 --- /dev/null +++ b/docs/faq/lr_finder.rst @@ -0,0 +1,21 @@ +[WIP] Learning Rate Finder +============================================================================== + +- How to find optimal learning rate? +- based on https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html + +Hi, + +Still work in progress here. + +But don't feel upset, check out the kitty... `Kittylyst`_ tutorial. + +.. _`Kittylyst`: https://github.com/Scitator/kittylyst + +.. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg + :target: https://github.com/Scitator/kittylyst + :alt: kitty + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw diff --git a/docs/faq/multi_components.rst b/docs/faq/multi_components.rst new file mode 100644 index 0000000000..cec50ac5eb --- /dev/null +++ b/docs/faq/multi_components.rst @@ -0,0 +1,282 @@ +Multiple components +============================================================================== + +Thanks to Catalyst "key-value is all you need" approach, +it's very easy to use run experiments in multi-components setup +(several model, criterions, optimizers, schedulers). + +Suppose you have the following classification pipeline: + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from torchvision.datasets import MNIST + from torchvision.transforms import ToTensor + from catalyst import dl, metrics + + model = torch.nn.Linear(28 * 28, 10) + criterion = torch.nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.02) + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + + class CustomRunner(dl.Runner): + + def predict_batch(self, batch): + # model inference step + return self.model(batch[0].to(self.device).view(batch[0].size(0), -1)) + + def _handle_batch(self, batch): + # model train/valid step + x, y = batch + y_hat = self.model(x.view(x.size(0), -1)) + + loss = self.criterion(y_hat, y) + accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3)) + self.batch_metrics.update( + {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} + ) + + if self.is_train_loader: + loss.backward() + self.optimizer.step() + self.optimizer.zero_grad() + + runner = CustomRunner() + # model training + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + loaders=loaders, + logdir="./logs", + num_epochs=5, + verbose=True, + load_best_on_end=True, + ) + +Multi-model +---------------------------------------------------- +Multi-model example: + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from torchvision.datasets import MNIST + from torchvision.transforms import ToTensor + from catalyst import dl, metrics + + # <--- multi-model setup ---> + encoder = torch.nn.Linear(28 * 28, 128) + head = torch.nn.Linear(128, 10) + model = {"encoder": encoder, "head": head} + optimizer = torch.optim.Adam([ + {'params': encoder.parameters()}, + {'params': head.parameters()}, + ], lr=0.02) + # <--- multi-model setup ---> + criterion = torch.nn.CrossEntropyLoss() + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + + class CustomRunner(dl.Runner): + + def predict_batch(self, batch): + # model inference step + return self.model(batch[0].to(self.device).view(batch[0].size(0), -1)) + + def _handle_batch(self, batch): + # model train/valid step + x, y = batch + # <--- multi-model usage ---> + x_ = self.model["encoder"](x.view(x.size(0), -1)) + y_hat = self.model["head"](x_) + # <--- multi-model usage ---> + + loss = self.criterion(y_hat, y) + accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3)) + self.batch_metrics.update( + {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} + ) + + if self.is_train_loader: + loss.backward() + self.optimizer.step() + self.optimizer.zero_grad() + + runner = CustomRunner() + # model training + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + loaders=loaders, + logdir="./logs", + num_epochs=5, + verbose=True, + load_best_on_end=True, + ) + +As you can see, the only think you need to do - just wrap the model with key-value. +That it, simple enough, no extra abstractions required. + +Multi-optimizer +---------------------------------------------------- +Multi-optimizer example: + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from torchvision.datasets import MNIST + from torchvision.transforms import ToTensor + from catalyst import dl, metrics + + # <--- multi-model/optimizer setup ---> + encoder = torch.nn.Linear(28 * 28, 128) + head = torch.nn.Linear(128, 10) + model = {"encoder": encoder, "head": head} + optimizer = { + "encoder": torch.optim.Adam(encoder.parameters(), lr=0.02), + "head": torch.optim.Adam(head.parameters(), lr=0.001), + } + # <--- multi-model/optimizer setup ---> + criterion = torch.nn.CrossEntropyLoss() + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + + class CustomRunner(dl.Runner): + + def predict_batch(self, batch): + # model inference step + return self.model(batch[0].to(self.device).view(batch[0].size(0), -1)) + + def _handle_batch(self, batch): + # model train/valid step + x, y = batch + # <--- multi-model/optimizer usage ---> + x_ = self.model["encoder"](x.view(x.size(0), -1)) + y_hat = self.model["head"](x_) + # <--- multi-model/optimizer usage ---> + + loss = self.criterion(y_hat, y) + accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3)) + self.batch_metrics.update( + {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} + ) + + if self.is_train_loader: + loss.backward() + # <--- multi-model/optimizer usage ---> + self.optimizer["encoder"].step() + self.optimizer["head"].step() + self.optimizer["encoder"].zero_grad() + self.optimizer["head"].zero_grad() + # <--- multi-model/optimizer usage ---> + + runner = CustomRunner() + # model training + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + loaders=loaders, + logdir="./logs", + num_epochs=5, + verbose=True, + load_best_on_end=True, + ) + +The same thing here - we could wrap our optimizers with key-value too and use it in a stratforward way. + +Multi-criterion +---------------------------------------------------- +Multi-criterion example: + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from torchvision.datasets import MNIST + from torchvision.transforms import ToTensor + from catalyst import dl, metrics + + model = torch.nn.Linear(28 * 28, 10) + optimizer = torch.optim.Adam(model.parameters(), lr=0.02) + # <--- multi-criterion setup ---> + criterion = { + "multi-class": torch.nn.CrossEntropyLoss(), + "multi-label": torch.nn.BCEWithLogitsLoss(), + } + # <--- multi-criterion setup ---> + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + + class CustomRunner(dl.Runner): + + def predict_batch(self, batch): + # model inference step + return self.model(batch[0].to(self.device).view(batch[0].size(0), -1)) + + def _handle_batch(self, batch): + # model train/valid step + x, y = batch + y_hat = self.model(x.view(x.size(0), -1)) + + # <--- multi-criterion usage ---> + loss_multi_class = self.criterion["multi-class"](y_hat, y) + loss_multi_label = self.criterion["multi-label"](y_hat, F.one_hot(y, 10).to(torch.float32)) + loss = loss_multi_class + loss_multi_label + # <--- multi-criterion usage ---> + + accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3)) + self.batch_metrics.update( + {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} + ) + + if self.is_train_loader: + loss.backward() + self.optimizer.step() + self.optimizer.zero_grad() + + runner = CustomRunner() + # model training + runner.train( + model=model, + criterion=criterion, + optimizer=optimizer, + loaders=loaders, + logdir="./logs", + num_epochs=5, + verbose=True, + load_best_on_end=True, + ) + +SSame approach here - just use key-value storage to pass criterion through the experiment. + + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/optuna.rst b/docs/faq/optuna.rst new file mode 100644 index 0000000000..499506e02c --- /dev/null +++ b/docs/faq/optuna.rst @@ -0,0 +1,91 @@ +Optuna integration +============================================================================== + +Notebook API +---------------------------------------------------- + +You can easily use Optuna for hyperparameters optimization: + +.. code-block:: python + + import os + import optuna + import torch + from torch import nn + from torch.utils.data import DataLoader + from catalyst import dl + from catalyst.data.cv import ToTensor + from catalyst.contrib.datasets import MNIST + from catalyst.contrib.nn import Flatten + + + def objective(trial): + lr = trial.suggest_loguniform("lr", 1e-3, 1e-1) + num_hidden = int(trial.suggest_loguniform("num_hidden", 32, 128)) + + loaders = { + "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32), + "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32), + } + model = nn.Sequential( + Flatten(), nn.Linear(784, num_hidden), nn.ReLU(), nn.Linear(num_hidden, 10) + ) + optimizer = torch.optim.Adam(model.parameters(), lr=lr) + criterion = nn.CrossEntropyLoss() + + runner = dl.SupervisedRunner() + runner.train( + model=model, + loaders=loaders, + criterion=criterion, + optimizer=optimizer, + callbacks=[ + dl.OptunaCallback(trial), + dl.AccuracyCallback(num_classes=10), + ], + num_epochs=10, + main_metric="accuracy01", + minimize_metric=False, + ) + return runner.best_valid_metrics[runner.main_metric] + + study = optuna.create_study( + direction="maximize", + pruner=optuna.pruners.MedianPruner( + n_startup_trials=1, n_warmup_steps=0, interval_steps=1 + ), + ) + study.optimize(objective, n_trials=10, timeout=300) + print(study.best_value, study.best_params) + +Config API +---------------------------------------------------- + +Firstly, prepare the Optuna-based config. For example, like: + +.. code-block:: yaml + + model_params: + model: SimpleNet + num_filters1: "int(trial.suggest_loguniform('num_filters1', 4, 32))" + num_filters2: "int(trial.suggest_loguniform('num_filters2', 4, 32))" + num_hiddens1: "int(trial.suggest_loguniform('num_hiddens1', 32, 128))" + num_hiddens2: "int(trial.suggest_loguniform('num_hiddens2', 32, 128))" + ... + +After that you ca easily run: + +.. code-block:: bash + + catalyst-dl tune --config=/path/to/config.yml --verbose + +And visualize current training progress with: + +.. code-block:: bash + + CUDA_VISIBLE_DEVICE="" tensorboard --logdir=/path/to/logdir + + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw \ No newline at end of file diff --git a/docs/faq/slurm.rst b/docs/faq/slurm.rst new file mode 100644 index 0000000000..67c652afd8 --- /dev/null +++ b/docs/faq/slurm.rst @@ -0,0 +1,20 @@ +[WIP] Slurm training +============================================================================== + +- How to run Catalyst experiments on SLURM? + +Hi, + +Still work in progress here. + +But don't feel upset, check out the kitty... `Kittylyst`_ tutorial. + +.. _`Kittylyst`: https://github.com/Scitator/kittylyst + +.. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg + :target: https://github.com/Scitator/kittylyst + :alt: kitty + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw diff --git a/docs/contributing/contributors.rst b/docs/faq/stages.rst similarity index 58% rename from docs/contributing/contributors.rst rename to docs/faq/stages.rst index 9fd8844499..04059eb602 100644 --- a/docs/contributing/contributors.rst +++ b/docs/faq/stages.rst @@ -1,4 +1,4 @@ -Contributors +[WIP] Multi-stage experiments ============================================================================== Hi, @@ -11,4 +11,8 @@ But don't feel upset, check out the kitty... `Kittylyst`_ tutorial. .. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg :target: https://github.com/Scitator/kittylyst - :alt: kitty \ No newline at end of file + :alt: kitty + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw diff --git a/docs/faq/tpu.rst b/docs/faq/tpu.rst new file mode 100644 index 0000000000..782aacc686 --- /dev/null +++ b/docs/faq/tpu.rst @@ -0,0 +1,8 @@ +[WIP] TPU training +============================================================================== + +- How to run Catalyst experiments on TPU? + +If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion. + +.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw diff --git a/docs/getting_started/quickstart.rst b/docs/getting_started/quickstart.rst index b3cfc74c5f..50a69eb7a0 100644 --- a/docs/getting_started/quickstart.rst +++ b/docs/getting_started/quickstart.rst @@ -49,7 +49,7 @@ Let's define **what** we are experimenting with: Step 4 - Accelerate it with Catalyst ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Let's define **how** we are running the experiment: +Let's define **how** we are running the experiment (in pure PyTorch): .. code-block:: python diff --git a/docs/index.rst b/docs/index.rst index 646edd5bf7..69123d36d5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -221,14 +221,22 @@ Indices and tables CV - Variational AutoEncoder CV - GAN - Engine - AMP - Engine - DDP - Engine - TPU + Engine - AMP/DDP/TPU AutoML - Catalyst with Optuna tutorials/ddp +.. toctree:: + :caption: Core + :maxdepth: 2 + :hidden: + + core/experiment + core/runner + core/callback +.. core/engine + .. toctree:: :caption: FAQ :maxdepth: 2 @@ -236,15 +244,36 @@ Indices and tables faq/intro + faq/data + faq/lr_finder + + faq/dp + faq/amp + faq/ddp + faq/slurm + faq/tpu + + faq/multi_components + faq/early_stopping + faq/checkpointing + faq/debugging + faq/logging + faq/inference + faq/finetuning + + faq/stages + faq/config_api + faq/optuna + + .. toctree:: :caption: Contributing guide :maxdepth: 2 :hidden: - contributing/how_to_start - contributing/codestyle - contributing/contributors - + How to start + Codestyle + Acknowledgments .. toctree::