diff --git a/catalyst/callbacks/early_stop.py b/catalyst/callbacks/early_stop.py
index 11e3986197..9cf1b112ee 100644
--- a/catalyst/callbacks/early_stop.py
+++ b/catalyst/callbacks/early_stop.py
@@ -7,7 +7,46 @@
 
 
 class CheckRunCallback(Callback):
-    """Executes only a pipeline part from the ``Experiment``."""
+    """Executes only a pipeline part from the ``Experiment``.
+
+    Minimal working example (Notebook API):
+
+    .. code-block:: python
+
+        import torch
+        from torch.utils.data import DataLoader, TensorDataset
+        from catalyst import dl
+
+        # data
+        num_samples, num_features = int(1e4), int(1e1)
+        X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
+        dataset = TensorDataset(X, y)
+        loader = DataLoader(dataset, batch_size=32, num_workers=1)
+        loaders = {"train": loader, "valid": loader}
+
+        # model, criterion, optimizer, scheduler
+        model = torch.nn.Linear(num_features, 1)
+        criterion = torch.nn.MSELoss()
+        optimizer = torch.optim.Adam(model.parameters())
+        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
+
+        # model training
+        runner = dl.SupervisedRunner()
+        runner.train(
+            model=model,
+            criterion=criterion,
+            optimizer=optimizer,
+            scheduler=scheduler,
+            loaders=loaders,
+            logdir="./logdir",
+            num_epochs=8,
+            verbose=True,
+            callbacks=[
+                dl.CheckRunCallback(num_batch_steps=3, num_epoch_steps=3)
+            ]
+        )
+
+    """
 
     def __init__(self, num_batch_steps: int = 3, num_epoch_steps: int = 2):
         """
@@ -41,24 +80,42 @@ def on_batch_end(self, runner: "IRunner"):
 class EarlyStoppingCallback(Callback):
     """Early exit based on metric.
 
-    Example of usage in notebook API:
+    Minimal working example (Notebook API):
 
     .. code-block:: python
 
-        runner = SupervisedRunner()
+        import torch
+        from torch.utils.data import DataLoader, TensorDataset
+        from catalyst import dl
+
+        # data
+        num_samples, num_features = int(1e4), int(1e1)
+        X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
+        dataset = TensorDataset(X, y)
+        loader = DataLoader(dataset, batch_size=32, num_workers=1)
+        loaders = {"train": loader, "valid": loader}
+
+        # model, criterion, optimizer, scheduler
+        model = torch.nn.Linear(num_features, 1)
+        criterion = torch.nn.MSELoss()
+        optimizer = torch.optim.Adam(model.parameters())
+        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
+
+        # model training
+        runner = dl.SupervisedRunner()
         runner.train(
-            ...
-            callbacks=[
-                ...
-                EarlyStoppingCallback(
-                    patience=5,
-                    metric="my_metric",
-                    minimize=True,
-                )
-                ...
-            ]
+          model=model,
+          criterion=criterion,
+          optimizer=optimizer,
+          scheduler=scheduler,
+          loaders=loaders,
+          logdir="./logdir",
+          num_epochs=8,
+          verbose=True,
+          callbacks=[
+            dl.EarlyStoppingCallback(patience=2, metric="loss", minimize=True)
+          ]
         )
-        ...
 
     Example of usage in config API:
 
diff --git a/catalyst/metrics/__init__.py b/catalyst/metrics/__init__.py
index 04b7a45e79..8437923844 100644
--- a/catalyst/metrics/__init__.py
+++ b/catalyst/metrics/__init__.py
@@ -1,16 +1,4 @@
 # flake8: noqa
-from catalyst.metrics.accuracy import accuracy, multi_label_accuracy
-from catalyst.metrics.avg_precision import avg_precision, mean_avg_precision
-from catalyst.metrics.auc import auc
-from catalyst.metrics.cmc_score import cmc_score, cmc_score_count
-from catalyst.metrics.ndcg import dcg, ndcg
-from catalyst.metrics.dice import dice, calculate_dice
-from catalyst.metrics.f1_score import f1_score, fbeta_score
-from catalyst.metrics.hitrate import hitrate
-from catalyst.metrics.classification import precision_recall_fbeta_support
-from catalyst.metrics.precision import precision
-from catalyst.metrics.recall import recall
-from catalyst.metrics.focal import sigmoid_focal_loss, reduced_focal_loss
 from catalyst.metrics.functional import (
     process_multilabel_components,
     get_binary_statistics,
@@ -20,6 +8,18 @@
     wrap_class_metric2dict,
     wrap_topk_metric2dict,
 )
+from catalyst.metrics.classification import precision_recall_fbeta_support
+
+from catalyst.metrics.accuracy import accuracy, multi_label_accuracy
+from catalyst.metrics.auc import auc
+from catalyst.metrics.avg_precision import avg_precision, mean_avg_precision
+from catalyst.metrics.cmc_score import cmc_score, cmc_score_count
+from catalyst.metrics.dice import dice, calculate_dice
+from catalyst.metrics.f1_score import f1_score, fbeta_score
+from catalyst.metrics.focal import sigmoid_focal_loss, reduced_focal_loss
+from catalyst.metrics.hitrate import hitrate
 from catalyst.metrics.iou import iou, jaccard
 from catalyst.metrics.mrr import mrr
-from catalyst.metrics.precision import average_precision
+from catalyst.metrics.ndcg import dcg, ndcg
+from catalyst.metrics.precision import average_precision, precision
+from catalyst.metrics.recall import recall
diff --git a/catalyst/metrics/recall.py b/catalyst/metrics/recall.py
index 52319a2b70..4a65f388a8 100644
--- a/catalyst/metrics/recall.py
+++ b/catalyst/metrics/recall.py
@@ -37,3 +37,6 @@ def recall(
     )
 
     return recall_score
+
+
+__all__ = ["recall"]
diff --git a/docs/api/metrics.rst b/docs/api/metrics.rst
index 00acbf14b0..dfda80dad6 100644
--- a/docs/api/metrics.rst
+++ b/docs/api/metrics.rst
@@ -78,13 +78,6 @@ MRR
 
 MAP
 ------------------------
-.. automodule:: catalyst.metrics.mean_avg_precision
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-AP
-------------------------
 .. automodule:: catalyst.metrics.avg_precision
     :members:
     :undoc-members:
@@ -97,16 +90,16 @@ NDCG
     :undoc-members:
     :show-inheritance:
 
-DCG
+Precision
 ------------------------
-.. automodule:: catalyst.metrics.dcg
+.. automodule:: catalyst.metrics.precision
     :members:
     :undoc-members:
     :show-inheritance:
 
-Precision
+Recall
 ------------------------
-.. automodule:: catalyst.metrics.precision
+.. automodule:: catalyst.metrics.recall
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/contributing/how_to_start.rst b/docs/contributing/how_to_start.rst
deleted file mode 100644
index d2884757d1..0000000000
--- a/docs/contributing/how_to_start.rst
+++ /dev/null
@@ -1,123 +0,0 @@
-How to start
-==============================================================================
-
-Issues
-------
-
-We use `GitHub issues`_ for bug reports and feature requests.
-
-Step-by-step guide
-^^^^^^^^^^^^^^^^^^
-
-New feature
-'''''''''''
-
-1. Make an issue with your feature description;
-2. We shall discuss the design and its implementation details;
-3. Once we agree that the plan looks good, go ahead and implement it.
-
-Bugfix
-''''''
-
-1. Goto `GitHub issues`_;
-2. Pick an issue and comment on the task that you want to work on this
-   feature;
-3. If you need more context on a specific issue, please ask, and we will
-   discuss the details.
-
-Once you finish implementing a feature or bugfix, please send a Pull
-Request.
-
-If you are not familiar with creating a Pull Request, here are some
-guides:
-
-- http://stackoverflow.com/questions/14680711/how-to-do-a-github-pull-request
-- https://help.github.com/articles/creating-a-pull-request/
-
-Contribution best practices
-'''''''''''''''''''''''''''
-
-1. Install requirements
-
-.. code-block:: bash
-
-    brew install bash # for MacOS users
-    pip install -r requirements/requirements.txt -r requirements/requirements-dev.txt
-
-2. Break your work into small, single-purpose updates if possible. It's much harder to merge in a large change with a lot of disjoint features.
-3. Submit the update as a GitHub pull request against the `master` branch.
-4. Make sure that you provide docstrings for all your new methods and classes.
-5. Add new unit tests for your code.
-6. Check the codestyle
-7. Make sure that your code passes the unit tests
-
-
-Codestyle
-^^^^^^^^^
-
-Do not forget to check the codestyle for your PR with
-
-.. code-block:: bash
-
-    catalyst-make-codestyle && catalyst-check-codestyle
-
-Make sure to have your python packages complied with `requirements/requirements.txt` and `requirements/requirements-dev.txt` to get codestyle run clean.
-
-
-Unit tests
-^^^^^^^^^^
-
-Do not forget to check that your code passes the unit tests
-
-.. code-block:: bash
-
-    pytest .
-
-
-Documentation
--------------
-
-Catalyst uses `Google style`_ for formatting `docstrings`_. Length of line
-inside docstrings block must be limited to 80 characters to fit into
-Jupyter documentation popups.
-
-Check that you have written working docs
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-    make check-docs
-
-The command requires Sphinx and some sphinx-specific libraries.
-If you don't want to install them, you may make a catalyst-dev container
-
-.. code-block:: bash
-
-    make docker-dev
-    docker run \\
-        -v `pwd`/:/workspace/ \\
-        catalyst-dev:latest \\
-        bash -c "make check-docs"
-
-To build docs add environment variable REMOVE_BUILDS=0
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-    REMOVE_BUILDS=0 make check-docs
-
-or through docker
-
-.. code-block:: bash
-
-    docker run \\
-        -v `pwd`/:/workspace/ \\
-        catalyst-dev:latest \\
-        bash -c "REMOVE_BUILDS=0 make check-docs"
-
-The docs will be stored in `builds/` folder.
-
-
-.. _GitHub issues: https://github.com/catalyst-team/catalyst/issues
-.. _Google style: http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
-.. _docstrings: https://github.com/google/styleguide/blob/gh-pages/pyguide.md#38-comments-and-docstrings
\ No newline at end of file
diff --git a/docs/core/callback.rst b/docs/core/callback.rst
new file mode 100644
index 0000000000..4f2614c2ab
--- /dev/null
+++ b/docs/core/callback.rst
@@ -0,0 +1,3 @@
+Callback
+==============================================================================
+
diff --git a/docs/core/experiment.rst b/docs/core/experiment.rst
new file mode 100644
index 0000000000..1e8490b55c
--- /dev/null
+++ b/docs/core/experiment.rst
@@ -0,0 +1,29 @@
+Experiment
+==============================================================================
+
+Experiment - an abstraction that contains information about the experiment
+- a model, a criterion, an optimizer, a scheduler, and their hyperparameters.
+It also holds information about the data and transformations to apply.
+The Experiment knows **what** you would like to run.
+
+Each deep learning project has several main components.
+These primitives define what we want to use during the experiment:
+
+- the data
+- the model(s)
+- the optimizer(s)
+- the loss(es)
+- and the scheduler(s) if we need them.
+
+That are the abstractions that Experiment covers in Catalyst,
+with a few modifications for easier experiment monitoring
+and hyperparameters logging. For each stage of our experiment,
+the Experiment provides interfaces to all primitives above + the callbacks.
+
+.. image:: https://raw.githubusercontent.com/catalyst-team/catalyst-pics/master/third_party_pics/catalyst102-experiment.png
+    :alt: Experiment
+
+
+---
+
+- what is the difference between different Experiments?
\ No newline at end of file
diff --git a/docs/core/runner.rst b/docs/core/runner.rst
new file mode 100644
index 0000000000..eff4a1fa8b
--- /dev/null
+++ b/docs/core/runner.rst
@@ -0,0 +1,6 @@
+Runner
+==============================================================================
+
+---
+
+- what is the difference between different Runners?
\ No newline at end of file
diff --git a/docs/faq/amp.rst b/docs/faq/amp.rst
new file mode 100644
index 0000000000..801218ab2a
--- /dev/null
+++ b/docs/faq/amp.rst
@@ -0,0 +1,9 @@
+Mixed precision training
+==============================================================================
+
+- How to use Nvidia Apex?
+- How to use torch.amp?
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/checkpointing.rst b/docs/faq/checkpointing.rst
new file mode 100644
index 0000000000..db1ec46329
--- /dev/null
+++ b/docs/faq/checkpointing.rst
@@ -0,0 +1,12 @@
+[WIP] Model checkpointing
+==============================================================================
+
+- how to load bset model?
+- notebook and config api
+- how to save model?
+- how to load model?
+- whats the difference between checkpoint and checkpoint_full?
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/contributing/codestyle.rst b/docs/faq/config_api.rst
similarity index 60%
rename from docs/contributing/codestyle.rst
rename to docs/faq/config_api.rst
index 3b92a95ea2..37a07199e3 100644
--- a/docs/contributing/codestyle.rst
+++ b/docs/faq/config_api.rst
@@ -1,4 +1,4 @@
-Codestyle
+[WIP] Config API
 ==============================================================================
 
 Hi,
@@ -11,4 +11,8 @@ But don't feel upset, check out the kitty... `Kittylyst`_ tutorial.
 
 .. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg
     :target: https://github.com/Scitator/kittylyst
-    :alt: kitty
\ No newline at end of file
+    :alt: kitty
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
diff --git a/docs/faq/data.rst b/docs/faq/data.rst
new file mode 100644
index 0000000000..8040697076
--- /dev/null
+++ b/docs/faq/data.rst
@@ -0,0 +1,321 @@
+Dataflow
+==============================================================================
+
+Base dataflow
+----------------------------------------------------
+Catalyst uses the "key-value is all you need" approach.
+Speaking so, it expects key-value outputs from your Dataset/Dataloader.
+
+Example dataflow:
+
+.. code-block:: python
+
+    class MyDataset:
+
+        def __get_item__(self, index):
+            ...
+            return {"features": np.ndarray, "targets": np.ndarray}
+
+    class MyModel:
+
+        def forward(self, features):
+            ...
+            return logits
+
+    class MyRunner:
+
+        def _handle_batch(self, batch):
+            # on this step we also have self.input = batch = {"features": ..., "targets": ...}
+            logits = self.model(batch["features"])
+            loss = self.criterion(logits, batch["targets"])
+            self.output = {"logits": logits}
+            # this is useful for other components of the pipiline
+
+    loader = MyDataset()
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(...)
+
+.. note::
+
+    ``SupervisedRunner`` has data preprocessing features to transform
+    tuple/list-based data into key-value.
+
+Such approach is easily extensible for any number of features, targets and
+very convenient to read, thanks to "automatic naming documentation" - keys for the values:
+
+.. code-block:: python
+
+    class MyDataset:
+
+        def __get_item__(self, index):
+            ...
+            return {"features": np.ndarray, "extra_features": np.ndarray, "targets": np.ndarray}
+
+    class MyModel:
+
+        def forward(self, features, extra_features):
+            ...
+            return logits
+
+    class MyRunner:
+
+        def _handle_batch(self, batch):
+            # on this step we also have self.input = batch = {"features": ..., "extra_features": ...,"targets": ...}
+            logits = self.model(batch["features"], batch["extra_features"])
+            loss = self.criterion(logits, batch["targets"])
+            self.output = {"logits": logits}
+            # this is useful for other components of the pipiline
+
+    loader = MyDataset()
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(...)
+
+Moreover, if some of the features are not required anymore -
+you don't have to rewrite your code:
+
+.. code-block:: python
+
+    class MyDataset:
+
+        def __get_item__(self, index):
+            ...
+            return {"features": np.ndarray, "extra_features": np.ndarray, "targets": np.ndarray}
+
+    class MyModel:
+
+        def forward(self, features):
+            ...
+            return logits
+
+    class MyRunner:
+
+        def _handle_batch(self, batch):
+            # on this step we also have self.input = batch = {"features": ..., "extra_features": ...,"targets": ...}
+            logits = self.model(batch["features"])
+            loss = self.criterion(logits, batch["targets"])
+            self.output = {"logits": logits}
+            # this is useful for other components of the pipiline
+
+    loader = MyDataset()
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(...)
+
+
+Key-value storage is also used to store the datasets/loaders for the experiment.
+In this case we also need to use ``OrderedDict`` to ensure correct epoch handling -
+that your model will firstly train on some ``train`` dataset
+and only then will be evaluated on some ``valid`` dataset:
+
+.. code-block:: python
+
+    train_loader = MyDataset(...)
+    valid_loader = MyDataset(...)
+    loaders = OrderedDict("train": train_loader, "valid": valid_loader)
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(model=model, loaders=loaders)
+
+Catalyst uses the following "automatic naming documentation" for loader keys handling:
+
+- if loader_key starts with "train" - is's our train datasoure, we need to run forward and backward passes on it.
+- if loader_key starts with "valid" - is's our validation datasoure, we need to run forward, but not the backward pass on it.
+- if loader_key starts with "infer" - is's our datasoure for model inference, we need to run forward, but not the backward pass on it.
+
+Multiple datasources
+----------------------------------------------------
+Thanks to key-value approach,
+it's possible to handle any number of datasets/loader
+without code changes or tricks with Datasets concatination, etc:
+
+.. code-block:: python
+
+    train_loader = MyDataset(...)
+    train2_loader = MyDataset(...)
+    valid_loader = MyDataset(...)
+    valid2_loader = MyDataset(...)
+    loaders = OrderedDict(
+        "train": train_loader,
+        "train2": train2_loader,
+        "valid": valid_loader,
+        "valid2": valid2_loader,
+    )
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(model=model, loaders=loaders)
+
+What is even more interesting, you could also do something like:
+
+.. code-block:: python
+
+    train_loader = MyDataset(...)
+    train2_loader = MyDataset(...)
+    valid_loader = MyDataset(...)
+    valid2_loader = MyDataset(...)
+    loaders = OrderedDict(
+        "train": train_loader,
+        "valid": valid_loader,
+        "train2": train2_loader,
+        "valid2": valid2_loader,
+
+    )
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(model=model, loaders=loaders)
+
+Once again, it's also valid to do something like:
+
+.. code-block:: python
+
+    train_loader = MyDataset(...)
+    train2_loader = MyDataset(...)
+    valid_loader = MyDataset(...)
+    valid2_loader = MyDataset(...)
+    loaders = OrderedDict(
+        "train": concat_datasets(train_loader, train2_loader),
+        "valid": concat_datasets(valid_loader, valid2_loader),
+    )
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(model=model, loaders=loaders)
+
+
+Loader for model selection
+----------------------------------------------------
+In case of multiple loaders, you could easily select one for model selection
+with ``valid_loader`` param in the ``runner.train``.
+For example, to use ``valid2`` loaders as your
+model selection one you could do the following:
+
+.. code-block:: python
+
+    train_loader = MyDataset(...)
+    train2_loader = MyDataset(...)
+    valid_loader = MyDataset(...)
+    valid2_loader = MyDataset(...)
+    loaders = OrderedDict(
+        "train": train_loader,
+        "train2": train2_loader,
+        "valid": valid_loader,
+        "valid2": valid2_loader,
+    )
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(model=model, loaders=loaders, valid_loader="valid2")
+
+.. note::
+
+    By default, Catalyst suppose to use
+    ``valid_loader=valid`` for model selection.
+
+
+Metric for model selection
+----------------------------------------------------
+Suppose, you are using a number of different metrics in your pipeline:
+
+.. code-block:: python
+
+    class MyRunner:
+
+        def _handle_batch(self, batch):
+            # on this step we also have self.input = batch = {"features": ..., "targets": ...}
+            logits = self.model(batch["features"])
+            loss = self.criterion(logits, batch["targets"])
+            accuracy01, accuracy03 = accuracy(logits, batch["targets"], topk=(1, 3))
+            self.batch_metrics.update(**{
+                "loss": loss,
+                "accuracy01": accuracy01,
+                "accuracy03": accuracy03,
+            })
+            self.output = {"logits": logits}
+            # this is useful for other components of the pipiline
+
+    loaders = ...
+    model = ...
+    runner = MyRunner()
+    runner.train(model=model, loaders=loaders)
+
+You could select one for model selection with ``main_metric`` and ``minimize_metric``
+params in the ``runner.train``. For example, to use ``accuracy01`` metric
+as your model selection one you could do the following:
+
+.. code-block:: python
+
+    class MyRunner:
+
+        def _handle_batch(self, batch):
+            # on this step we also have self.input = batch = {"features": ..., "targets": ...}
+            logits = self.model(batch["features"])
+            loss = self.criterion(logits, batch["targets"])
+            accuracy01, accuracy03 = accuracy(logits, batch["targets"], topk=(1, 3))
+            self.batch_metrics.update(**{
+                "loss": loss,
+                "accuracy01": accuracy01,
+                "accuracy03": accuracy03,
+            })
+            self.output = {"logits": logits}
+            # this is useful for other components of the pipiline
+
+    loaders = ...
+    model = ...
+    runner = MyRunner()
+    # as far as we would like to maximize our model accuracy...
+    runner.train(model=model, loaders=loaders, main_metric="accuracy01", minimize_metric=False)
+
+.. note::
+
+    By default, Catalyst suppose to use
+    ``main_metric=loss`` and ``minimize_metric=False``
+    for model selection.
+
+Use part of the data
+----------------------------------------------------
+If you would like to use only some part of your data from the loader
+(for example, you would like to check your pipeline and overfit for one small portion of the data),
+you could use ``BatchLimitLoaderWrapper``:
+
+.. code-block:: python
+
+    train_loader = BatchLimitLoaderWrapper(MyDataset(...), num_batches=1)
+    valid_loader = MyDataset(...)
+    loaders = OrderedDict("train": train_loader, "valid": valid_loader)
+    model = MyModel()
+    runner = MyRunner()
+    runner.train(model=model, loaders=loaders)
+
+As a more user-friendly approach with ``runner.train``:
+
+.. code-block:: python
+
+    train_loader = MyDataset(...)
+    valid_loader = MyDataset(...)
+    loaders = OrderedDict("train": train_loader, "valid": valid_loader)
+    model = MyModel()
+    runner = MyRunner()
+    # here we overfit for one batch per loader
+    runner.train(model=model, loaders=loaders, overfit=True)
+
+And more convenient and customasible way:
+
+.. code-block:: python
+
+    train_loader = MyDataset(...)
+    valid_loader = MyDataset(...)
+    loaders = OrderedDict("train": train_loader, "valid": valid_loader)
+    model = MyModel()
+    runner = MyRunner()
+    # here we overfit for 10 batches in `train` loader
+    # and half of the `valid` loader
+    runner.train(
+        model=model,
+        loaders=loaders,
+        callbacks=[dl.BatchOverfitCallback(train=10, valid=0.5)]
+    )
+
+----
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
diff --git a/docs/faq/ddp.rst b/docs/faq/ddp.rst
new file mode 100644
index 0000000000..629263d63d
--- /dev/null
+++ b/docs/faq/ddp.rst
@@ -0,0 +1,9 @@
+[WIP] Distributed training
+==============================================================================
+
+- How to run experiments in distributed mode?
+- (?) How to collect metrics in distributed mode in the right way?
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/debugging.rst b/docs/faq/debugging.rst
new file mode 100644
index 0000000000..9c3fef15e5
--- /dev/null
+++ b/docs/faq/debugging.rst
@@ -0,0 +1,83 @@
+Model debugging
+==============================================================================
+
+Pipeline debugging
+----------------------------------------------------
+To check pipeline correctness, that everything is working correctly
+and does not throws any error, we suggest to use ``CheckRunCallback``.
+You could find more information about it here <../early_stopping.rst>.
+
+To check model convergence withing pipeline,
+we suggest to use ``BatchOverfitCallback``.
+You could find more information about it here <../data.rst>.
+
+Python debugging
+----------------------------------------------------
+For python debugging we suggest to use ``ipdb``. You could install it with:
+
+.. code-block:: bash
+
+    pip install ipdb
+
+After that you could stop the pipeline in the place you prefer, for example:
+
+.. code-block:: python
+
+    import os
+    import torch
+    from torch.nn import functional as F
+    from torch.utils.data import DataLoader
+    from catalyst import dl, metrics
+    from catalyst.data.cv import ToTensor
+    from catalyst.contrib.datasets import MNIST
+
+    model = torch.nn.Linear(28 * 28, 10)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
+
+    loaders = {
+        "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+        "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+    }
+
+    class CustomRunner(dl.Runner):
+
+        def predict_batch(self, batch):
+            # model inference step
+            return self.model(batch[0].to(self.device).view(batch[0].size(0), -1))
+
+        def _handle_batch(self, batch):
+            # model train/valid step
+            x, y = batch
+            y_hat = self.model(x.view(x.size(0), -1))
+
+            # let's stop before metric computation, but after model forward pass
+            import ipdb; ipdb.set_trace()
+            # <--- we will stop here --->
+            loss = F.cross_entropy(y_hat, y)
+            accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3))
+            self.batch_metrics.update(
+                {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
+            )
+
+
+            if self.is_train_loader:
+                loss.backward()
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+    runner = CustomRunner()
+    # model training
+    runner.train(
+        model=model,
+        optimizer=optimizer,
+        loaders=loaders,
+        logdir="./logs",
+        num_epochs=5,
+        verbose=True,
+        load_best_on_end=True,
+    )
+
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/dp.rst b/docs/faq/dp.rst
new file mode 100644
index 0000000000..c13bbb5434
--- /dev/null
+++ b/docs/faq/dp.rst
@@ -0,0 +1,6 @@
+[WIP] DataParallel training (single/multi-gpu)
+==============================================================================
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/early_stopping.rst b/docs/faq/early_stopping.rst
new file mode 100644
index 0000000000..8fc1f15c3f
--- /dev/null
+++ b/docs/faq/early_stopping.rst
@@ -0,0 +1,118 @@
+Early stopping
+==============================================================================
+
+Early stopping
+----------------------------------------------------
+
+To use experiment early stopping you could use ``EarlyStoppingCallback``:
+
+.. code-block:: python
+
+    import torch
+    from torch.utils.data import DataLoader, TensorDataset
+    from catalyst import dl
+
+    # data
+    num_samples, num_features = int(1e4), int(1e1)
+    X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
+    dataset = TensorDataset(X, y)
+    loader = DataLoader(dataset, batch_size=32, num_workers=1)
+    loaders = {"train": loader, "valid": loader}
+
+    # model, criterion, optimizer, scheduler
+    model = torch.nn.Linear(num_features, 1)
+    criterion = torch.nn.MSELoss()
+    optimizer = torch.optim.Adam(model.parameters())
+    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
+
+    # model training
+    runner = dl.SupervisedRunner()
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        scheduler=scheduler,
+        loaders=loaders,
+        logdir="./logdir",
+        num_epochs=8,
+        verbose=True,
+        callbacks=[dl.EarlyStoppingCallback(patience=2, metric="loss", minimize=True)]
+    )
+
+Pipeline checking
+----------------------------------------------------
+You could also check the pipeline
+(run only 3 batches per loader, and 3 epochs per stage)
+with ``CheckRunCallback``:
+
+.. code-block:: python
+
+    import torch
+    from torch.utils.data import DataLoader, TensorDataset
+    from catalyst import dl
+
+    # data
+    num_samples, num_features = int(1e4), int(1e1)
+    X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
+    dataset = TensorDataset(X, y)
+    loader = DataLoader(dataset, batch_size=32, num_workers=1)
+    loaders = {"train": loader, "valid": loader}
+
+    # model, criterion, optimizer, scheduler
+    model = torch.nn.Linear(num_features, 1)
+    criterion = torch.nn.MSELoss()
+    optimizer = torch.optim.Adam(model.parameters())
+    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
+
+    # model training
+    runner = dl.SupervisedRunner()
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        scheduler=scheduler,
+        loaders=loaders,
+        logdir="./logdir",
+        num_epochs=8,
+        verbose=True,
+        callbacks=[dl.CheckRunCallback(num_batch_steps=3, num_epoch_steps=3)]
+    )
+
+You could also use ``runner.train(..., check=True)`` with Notebook API approach:
+
+.. code-block:: python
+
+    import torch
+    from torch.utils.data import DataLoader, TensorDataset
+    from catalyst import dl
+
+    # data
+    num_samples, num_features = int(1e4), int(1e1)
+    X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
+    dataset = TensorDataset(X, y)
+    loader = DataLoader(dataset, batch_size=32, num_workers=1)
+    loaders = {"train": loader, "valid": loader}
+
+    # model, criterion, optimizer, scheduler
+    model = torch.nn.Linear(num_features, 1)
+    criterion = torch.nn.MSELoss()
+    optimizer = torch.optim.Adam(model.parameters())
+    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
+
+    # model training
+    runner = dl.SupervisedRunner()
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        scheduler=scheduler,
+        loaders=loaders,
+        logdir="./logdir",
+        num_epochs=8,
+        verbose=True,
+        check=True,
+    )
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/finetuning.rst b/docs/faq/finetuning.rst
new file mode 100644
index 0000000000..ed027e814d
--- /dev/null
+++ b/docs/faq/finetuning.rst
@@ -0,0 +1,6 @@
+[WIP] Finetuning
+==============================================================================
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/inference.rst b/docs/faq/inference.rst
new file mode 100644
index 0000000000..d60cecea55
--- /dev/null
+++ b/docs/faq/inference.rst
@@ -0,0 +1,92 @@
+Inference
+==============================================================================
+
+To use your model in the inference mode,
+you could redefine the ``Runner.predict_batch``.
+
+Suppose you have the following classification pipeline:
+
+.. code-block:: python
+
+    import os
+    import torch
+    from torch.nn import functional as F
+    from torch.utils.data import DataLoader
+    from torchvision.datasets import MNIST
+    from torchvision.transforms import ToTensor
+    from catalyst import dl, metrics
+
+    model = torch.nn.Linear(28 * 28, 10)
+    criterion = torch.nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
+
+    loaders = {
+        "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+        "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+    }
+
+    class CustomRunner(dl.Runner):
+
+        # <--- model inference step --->
+        def predict_batch(self, batch):
+            return self.model(batch[0].to(self.device).view(batch[0].size(0), -1))
+        # <--- model inference step --->
+
+        def _handle_batch(self, batch):
+            # model train/valid step
+            x, y = batch
+            y_hat = self.model(x.view(x.size(0), -1))
+
+            loss = self.criterion(y_hat, y)
+            accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3))
+            self.batch_metrics.update(
+                {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
+            )
+
+            if self.is_train_loader:
+                loss.backward()
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+    runner = CustomRunner()
+    # model training
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        loaders=loaders,
+        logdir="./logs",
+        num_epochs=5,
+        verbose=True,
+        load_best_on_end=True,
+    )
+
+Now you could easily predict your data with the Runner-specified logic.
+
+Predict batch
+----------------------------------------------------
+If you want to predict one batch:
+
+.. code-block:: python
+
+    batch_prediciton = runner.predict_batch(next(iter(loaders["valid"])))
+    # which would be the same with
+    batch_model_prediciton = model(next(iter(loaders["valid"]))[0].view(32, -1))
+    batch_prediciton == batch_model_prediciton
+    >>> True
+
+Predict loader
+----------------------------------------------------
+If you want to predict entire loader:
+
+.. code-block:: python
+
+    for prediction in runner.predict_loader(loader=loaders["valid"]):
+        assert prediction.detach().cpu().numpy().shape[-1] == 10
+
+The ``runner.predict_loader`` method just iteratively goes through the loader batches,
+makes model predictions and yields the results.
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/logging.rst b/docs/faq/logging.rst
new file mode 100644
index 0000000000..f569da91db
--- /dev/null
+++ b/docs/faq/logging.rst
@@ -0,0 +1,110 @@
+Logging
+==============================================================================
+
+Metrics logging
+----------------------------------------------------
+Catalyst supports a variety of metrics storages during the experiment
+
+- ``runner.batch_metrics`` - dictionary, flatten storage for batch metrics.
+    ::
+
+        runner.batch_metrics = {"loss": ..., "accuracy": ..., "iou": ...}
+
+- ``runner.loader_metrics`` - dictionary with aggregated batch statistics for loader (mean over all batches) and global loader metrics, like AUC.
+    ::
+
+        runner.loader_metrics = {"loss": ..., "accuracy": ..., "auc": ...}
+
+- ``runner.epoch_metrics`` - dictionary with summarized metrics for different loaders and global epoch metrics, like lr, momentum.
+    ::
+
+        runner.epoch_metrics = {
+            "train_loss": ..., "train_auc": ..., "valid_loss": ...,
+            "lr": ..., "momentum": ...,
+        }
+
+- ``runner.valid_metrics`` - dictionary with validation metrics for current epoch.
+    ::
+
+        runner.valid_metrics = {"loss": ..., "accuracy": ..., "auc": ...}
+
+- ``runner.best_valid_metrics`` - dictionary with best validation metrics during whole training process.
+    ::
+
+        runner.best_valid_metrics = {"loss": ..., "accuracy": ..., "auc": ...}
+
+You could log any new metric in a stratforward way:
+
+.. code-block:: python
+
+    import os
+    import torch
+    from torch.nn import functional as F
+    from torch.utils.data import DataLoader
+    from catalyst import dl, metrics
+    from catalyst.data.cv import ToTensor
+    from catalyst.contrib.datasets import MNIST
+
+    model = torch.nn.Linear(28 * 28, 10)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
+
+    loaders = {
+        "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+        "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+    }
+
+    class CustomRunner(dl.Runner):
+
+        def predict_batch(self, batch):
+            # model inference step
+            return self.model(batch[0].to(self.device).view(batch[0].size(0), -1))
+
+        def _handle_batch(self, batch):
+            # model train/valid step
+            x, y = batch
+            y_hat = self.model(x.view(x.size(0), -1))
+
+            loss = F.cross_entropy(y_hat, y)
+            accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3))
+            # <--- logging --->
+            # here we are adding loss, accuracy01 and accuracy03 to the batch metrics
+            self.batch_metrics.update(
+                {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
+            )
+            # <--- logging --->
+
+            if self.is_train_loader:
+                loss.backward()
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+    runner = CustomRunner()
+    # model training
+    runner.train(
+        model=model,
+        optimizer=optimizer,
+        loaders=loaders,
+        logdir="./logs",
+        num_epochs=5,
+        verbose=True,
+        load_best_on_end=True,
+    )
+
+[WIP] Metrics logging with callback
+----------------------------------------------------
+
+- todo
+
+[WIP] Supported loggers
+----------------------------------------------------
+
+- console
+- txt
+- Tensorboard
+- Alchemy
+- Neptune
+- Weights and Biases
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/lr_finder.rst b/docs/faq/lr_finder.rst
new file mode 100644
index 0000000000..7b2b13f645
--- /dev/null
+++ b/docs/faq/lr_finder.rst
@@ -0,0 +1,21 @@
+[WIP] Learning Rate Finder
+==============================================================================
+
+- How to find optimal learning rate?
+- based on https://sgugger.github.io/how-do-you-find-a-good-learning-rate.html
+
+Hi,
+
+Still work in progress here.
+
+But don't feel upset, check out the kitty... `Kittylyst`_ tutorial.
+
+.. _`Kittylyst`: https://github.com/Scitator/kittylyst
+
+.. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg
+    :target: https://github.com/Scitator/kittylyst
+    :alt: kitty
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
diff --git a/docs/faq/multi_components.rst b/docs/faq/multi_components.rst
new file mode 100644
index 0000000000..cec50ac5eb
--- /dev/null
+++ b/docs/faq/multi_components.rst
@@ -0,0 +1,282 @@
+Multiple components
+==============================================================================
+
+Thanks to Catalyst "key-value is all you need" approach,
+it's very easy to use run experiments in multi-components setup
+(several model, criterions, optimizers, schedulers).
+
+Suppose you have the following classification pipeline:
+
+.. code-block:: python
+
+    import os
+    import torch
+    from torch.nn import functional as F
+    from torch.utils.data import DataLoader
+    from torchvision.datasets import MNIST
+    from torchvision.transforms import ToTensor
+    from catalyst import dl, metrics
+
+    model = torch.nn.Linear(28 * 28, 10)
+    criterion = torch.nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
+
+    loaders = {
+        "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+        "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+    }
+
+    class CustomRunner(dl.Runner):
+
+        def predict_batch(self, batch):
+            # model inference step
+            return self.model(batch[0].to(self.device).view(batch[0].size(0), -1))
+
+        def _handle_batch(self, batch):
+            # model train/valid step
+            x, y = batch
+            y_hat = self.model(x.view(x.size(0), -1))
+
+            loss = self.criterion(y_hat, y)
+            accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3))
+            self.batch_metrics.update(
+                {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
+            )
+
+            if self.is_train_loader:
+                loss.backward()
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+    runner = CustomRunner()
+    # model training
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        loaders=loaders,
+        logdir="./logs",
+        num_epochs=5,
+        verbose=True,
+        load_best_on_end=True,
+    )
+
+Multi-model
+----------------------------------------------------
+Multi-model example:
+
+.. code-block:: python
+
+    import os
+    import torch
+    from torch.nn import functional as F
+    from torch.utils.data import DataLoader
+    from torchvision.datasets import MNIST
+    from torchvision.transforms import ToTensor
+    from catalyst import dl, metrics
+
+    # <--- multi-model setup --->
+    encoder = torch.nn.Linear(28 * 28, 128)
+    head = torch.nn.Linear(128, 10)
+    model = {"encoder": encoder, "head": head}
+    optimizer = torch.optim.Adam([
+        {'params': encoder.parameters()},
+        {'params': head.parameters()},
+    ], lr=0.02)
+    # <--- multi-model setup --->
+    criterion = torch.nn.CrossEntropyLoss()
+
+    loaders = {
+        "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+        "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+    }
+
+    class CustomRunner(dl.Runner):
+
+        def predict_batch(self, batch):
+            # model inference step
+            return self.model(batch[0].to(self.device).view(batch[0].size(0), -1))
+
+        def _handle_batch(self, batch):
+            # model train/valid step
+            x, y = batch
+            # <--- multi-model usage --->
+            x_ = self.model["encoder"](x.view(x.size(0), -1))
+            y_hat = self.model["head"](x_)
+            # <--- multi-model usage --->
+
+            loss = self.criterion(y_hat, y)
+            accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3))
+            self.batch_metrics.update(
+                {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
+            )
+
+            if self.is_train_loader:
+                loss.backward()
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+    runner = CustomRunner()
+    # model training
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        loaders=loaders,
+        logdir="./logs",
+        num_epochs=5,
+        verbose=True,
+        load_best_on_end=True,
+    )
+
+As you can see, the only think you need to do - just wrap the model with key-value.
+That it, simple enough, no extra abstractions required.
+
+Multi-optimizer
+----------------------------------------------------
+Multi-optimizer example:
+
+.. code-block:: python
+
+    import os
+    import torch
+    from torch.nn import functional as F
+    from torch.utils.data import DataLoader
+    from torchvision.datasets import MNIST
+    from torchvision.transforms import ToTensor
+    from catalyst import dl, metrics
+
+    # <--- multi-model/optimizer setup --->
+    encoder = torch.nn.Linear(28 * 28, 128)
+    head = torch.nn.Linear(128, 10)
+    model = {"encoder": encoder, "head": head}
+    optimizer = {
+        "encoder": torch.optim.Adam(encoder.parameters(), lr=0.02),
+        "head": torch.optim.Adam(head.parameters(), lr=0.001),
+    }
+    # <--- multi-model/optimizer setup --->
+    criterion = torch.nn.CrossEntropyLoss()
+
+    loaders = {
+        "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+        "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+    }
+
+    class CustomRunner(dl.Runner):
+
+        def predict_batch(self, batch):
+            # model inference step
+            return self.model(batch[0].to(self.device).view(batch[0].size(0), -1))
+
+        def _handle_batch(self, batch):
+            # model train/valid step
+            x, y = batch
+            # <--- multi-model/optimizer usage --->
+            x_ = self.model["encoder"](x.view(x.size(0), -1))
+            y_hat = self.model["head"](x_)
+            # <--- multi-model/optimizer usage --->
+
+            loss = self.criterion(y_hat, y)
+            accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3))
+            self.batch_metrics.update(
+                {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
+            )
+
+            if self.is_train_loader:
+                loss.backward()
+                # <--- multi-model/optimizer usage --->
+                self.optimizer["encoder"].step()
+                self.optimizer["head"].step()
+                self.optimizer["encoder"].zero_grad()
+                self.optimizer["head"].zero_grad()
+                # <--- multi-model/optimizer usage --->
+
+    runner = CustomRunner()
+    # model training
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        loaders=loaders,
+        logdir="./logs",
+        num_epochs=5,
+        verbose=True,
+        load_best_on_end=True,
+    )
+
+The same thing here - we could wrap our optimizers with key-value too and use it in a stratforward way.
+
+Multi-criterion
+----------------------------------------------------
+Multi-criterion example:
+
+.. code-block:: python
+
+    import os
+    import torch
+    from torch.nn import functional as F
+    from torch.utils.data import DataLoader
+    from torchvision.datasets import MNIST
+    from torchvision.transforms import ToTensor
+    from catalyst import dl, metrics
+
+    model = torch.nn.Linear(28 * 28, 10)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
+    # <--- multi-criterion setup --->
+    criterion = {
+        "multi-class": torch.nn.CrossEntropyLoss(),
+        "multi-label": torch.nn.BCEWithLogitsLoss(),
+    }
+    # <--- multi-criterion setup --->
+
+    loaders = {
+        "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+        "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+    }
+
+    class CustomRunner(dl.Runner):
+
+        def predict_batch(self, batch):
+            # model inference step
+            return self.model(batch[0].to(self.device).view(batch[0].size(0), -1))
+
+        def _handle_batch(self, batch):
+            # model train/valid step
+            x, y = batch
+            y_hat = self.model(x.view(x.size(0), -1))
+
+            # <--- multi-criterion usage --->
+            loss_multi_class = self.criterion["multi-class"](y_hat, y)
+            loss_multi_label = self.criterion["multi-label"](y_hat, F.one_hot(y, 10).to(torch.float32))
+            loss = loss_multi_class + loss_multi_label
+            # <--- multi-criterion usage --->
+
+            accuracy01, accuracy03 = metrics.accuracy(y_hat, y, topk=(1, 3))
+            self.batch_metrics.update(
+                {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
+            )
+
+            if self.is_train_loader:
+                loss.backward()
+                self.optimizer.step()
+                self.optimizer.zero_grad()
+
+    runner = CustomRunner()
+    # model training
+    runner.train(
+        model=model,
+        criterion=criterion,
+        optimizer=optimizer,
+        loaders=loaders,
+        logdir="./logs",
+        num_epochs=5,
+        verbose=True,
+        load_best_on_end=True,
+    )
+
+SSame approach here - just use key-value storage to pass criterion through the experiment.
+
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/optuna.rst b/docs/faq/optuna.rst
new file mode 100644
index 0000000000..499506e02c
--- /dev/null
+++ b/docs/faq/optuna.rst
@@ -0,0 +1,91 @@
+Optuna integration
+==============================================================================
+
+Notebook API
+----------------------------------------------------
+
+You can easily use Optuna for hyperparameters optimization:
+
+.. code-block:: python
+
+    import os
+    import optuna
+    import torch
+    from torch import nn
+    from torch.utils.data import DataLoader
+    from catalyst import dl
+    from catalyst.data.cv import ToTensor
+    from catalyst.contrib.datasets import MNIST
+    from catalyst.contrib.nn import Flatten
+
+
+    def objective(trial):
+        lr = trial.suggest_loguniform("lr", 1e-3, 1e-1)
+        num_hidden = int(trial.suggest_loguniform("num_hidden", 32, 128))
+
+        loaders = {
+            "train": DataLoader(MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32),
+            "valid": DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32),
+        }
+        model = nn.Sequential(
+            Flatten(), nn.Linear(784, num_hidden), nn.ReLU(), nn.Linear(num_hidden, 10)
+        )
+        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+        criterion = nn.CrossEntropyLoss()
+
+        runner = dl.SupervisedRunner()
+        runner.train(
+            model=model,
+            loaders=loaders,
+            criterion=criterion,
+            optimizer=optimizer,
+            callbacks=[
+                dl.OptunaCallback(trial),
+                dl.AccuracyCallback(num_classes=10),
+            ],
+            num_epochs=10,
+            main_metric="accuracy01",
+            minimize_metric=False,
+        )
+        return runner.best_valid_metrics[runner.main_metric]
+
+    study = optuna.create_study(
+        direction="maximize",
+        pruner=optuna.pruners.MedianPruner(
+            n_startup_trials=1, n_warmup_steps=0, interval_steps=1
+        ),
+    )
+    study.optimize(objective, n_trials=10, timeout=300)
+    print(study.best_value, study.best_params)
+
+Config API
+----------------------------------------------------
+
+Firstly, prepare the Optuna-based config. For example, like:
+
+.. code-block:: yaml
+
+    model_params:
+        model: SimpleNet
+        num_filters1: "int(trial.suggest_loguniform('num_filters1', 4, 32))"
+        num_filters2: "int(trial.suggest_loguniform('num_filters2', 4, 32))"
+        num_hiddens1: "int(trial.suggest_loguniform('num_hiddens1', 32, 128))"
+        num_hiddens2: "int(trial.suggest_loguniform('num_hiddens2', 32, 128))"
+        ...
+
+After that you ca easily run:
+
+.. code-block:: bash
+
+    catalyst-dl tune --config=/path/to/config.yml --verbose
+
+And visualize current training progress with:
+
+.. code-block:: bash
+
+    CUDA_VISIBLE_DEVICE="" tensorboard --logdir=/path/to/logdir
+
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
\ No newline at end of file
diff --git a/docs/faq/slurm.rst b/docs/faq/slurm.rst
new file mode 100644
index 0000000000..67c652afd8
--- /dev/null
+++ b/docs/faq/slurm.rst
@@ -0,0 +1,20 @@
+[WIP] Slurm training
+==============================================================================
+
+- How to run Catalyst experiments on SLURM?
+
+Hi,
+
+Still work in progress here.
+
+But don't feel upset, check out the kitty... `Kittylyst`_ tutorial.
+
+.. _`Kittylyst`: https://github.com/Scitator/kittylyst
+
+.. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg
+    :target: https://github.com/Scitator/kittylyst
+    :alt: kitty
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
diff --git a/docs/contributing/contributors.rst b/docs/faq/stages.rst
similarity index 58%
rename from docs/contributing/contributors.rst
rename to docs/faq/stages.rst
index 9fd8844499..04059eb602 100644
--- a/docs/contributing/contributors.rst
+++ b/docs/faq/stages.rst
@@ -1,4 +1,4 @@
-Contributors
+[WIP] Multi-stage experiments
 ==============================================================================
 
 Hi,
@@ -11,4 +11,8 @@ But don't feel upset, check out the kitty... `Kittylyst`_ tutorial.
 
 .. image:: https://raw.githubusercontent.com/Scitator/kittylyst/master/assets/kitty.jpg
     :target: https://github.com/Scitator/kittylyst
-    :alt: kitty
\ No newline at end of file
+    :alt: kitty
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
diff --git a/docs/faq/tpu.rst b/docs/faq/tpu.rst
new file mode 100644
index 0000000000..782aacc686
--- /dev/null
+++ b/docs/faq/tpu.rst
@@ -0,0 +1,8 @@
+[WIP] TPU training
+==============================================================================
+
+- How to run Catalyst experiments on TPU?
+
+If you haven't found the answer for your question, feel free to `join our slack`_ for the discussion.
+
+.. _`join our slack`: https://join.slack.com/t/catalyst-team-core/shared_invite/zt-d9miirnn-z86oKDzFMKlMG4fgFdZafw
diff --git a/docs/getting_started/quickstart.rst b/docs/getting_started/quickstart.rst
index b3cfc74c5f..50a69eb7a0 100644
--- a/docs/getting_started/quickstart.rst
+++ b/docs/getting_started/quickstart.rst
@@ -49,7 +49,7 @@ Let's define **what** we are experimenting with:
 
 Step 4 - Accelerate it with Catalyst
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Let's define **how** we are running the experiment:
+Let's define **how** we are running the experiment (in pure PyTorch):
 
 .. code-block:: python
 
diff --git a/docs/index.rst b/docs/index.rst
index 646edd5bf7..69123d36d5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -221,14 +221,22 @@ Indices and tables
     CV - Variational AutoEncoder <https://github.com/catalyst-team/catalyst#minimal-examples>
     CV - GAN <https://github.com/catalyst-team/catalyst#minimal-examples>
 
-    Engine - AMP <https://github.com/catalyst-team/catalyst#minimal-examples>
-    Engine - DDP <https://github.com/catalyst-team/catalyst#minimal-examples>
-    Engine - TPU <https://github.com/catalyst-team/catalyst#minimal-examples>
+    Engine - AMP/DDP/TPU <https://github.com/catalyst-team/catalyst#minimal-examples>
 
     AutoML - Catalyst with Optuna <https://github.com/catalyst-team/catalyst#minimal-examples>
 
     tutorials/ddp
 
+.. toctree::
+    :caption: Core
+    :maxdepth: 2
+    :hidden:
+
+    core/experiment
+    core/runner
+    core/callback
+..    core/engine
+
 .. toctree::
     :caption: FAQ
     :maxdepth: 2
@@ -236,15 +244,36 @@ Indices and tables
 
     faq/intro
 
+    faq/data
+    faq/lr_finder
+
+    faq/dp
+    faq/amp
+    faq/ddp
+    faq/slurm
+    faq/tpu
+
+    faq/multi_components
+    faq/early_stopping
+    faq/checkpointing
+    faq/debugging
+    faq/logging
+    faq/inference
+    faq/finetuning
+
+    faq/stages
+    faq/config_api
+    faq/optuna
+
+
 .. toctree::
     :caption: Contributing guide
     :maxdepth: 2
     :hidden:
 
-    contributing/how_to_start
-    contributing/codestyle
-    contributing/contributors
-
+    How to start <https://github.com/catalyst-team/catalyst/blob/master/CONTRIBUTING.md>
+    Codestyle <https://github.com/catalyst-team/codestyle>
+    Acknowledgments <https://github.com/catalyst-team/catalyst#acknowledgments>
 
 
 .. toctree::