diff --git a/README.md b/README.md index 40e0f7a87a..352f3dcba5 100644 --- a/README.md +++ b/README.md @@ -29,11 +29,13 @@ Catalyst is a PyTorch framework for Deep Learning Research and Development. -Catalyst focuses on reproducibility, rapid experimentation, and codebase reuse +It focuses on reproducibility, rapid experimentation, and codebase reuse so you can create something new rather than write yet another train loop. -
Break the cycle, use Catalyst! +
Break the cycle – use the Catalyst! -Read more about our vision in the [Project Manifest](https://github.com/catalyst-team/catalyst/blob/master/MANIFEST.md). Catalyst is a part of the [PyTorch Ecosystem](https://pytorch.org/ecosystem/). [Catalyst Ecosystem](https://docs.google.com/presentation/d/1D-yhVOg6OXzjo9K_-IS5vSHLPIUxp1PEkFGnpRcNCNU/edit?usp=sharing) consists of: +Read more about our vision in the [Project Manifest](https://github.com/catalyst-team/catalyst/blob/master/MANIFEST.md). +Catalyst is a part of the [PyTorch Ecosystem](https://pytorch.org/ecosystem/). +
[Catalyst Ecosystem](https://docs.google.com/presentation/d/1D-yhVOg6OXzjo9K_-IS5vSHLPIUxp1PEkFGnpRcNCNU/edit?usp=sharing) consists of: - [Alchemy](https://github.com/catalyst-team/alchemy) - experiments logging & visualization - [Catalyst](https://github.com/catalyst-team/catalyst) - accelerated deep learning R&D - [Reaction](https://github.com/catalyst-team/reaction) - convenient deep learning model serving diff --git a/catalyst/data/loader.py b/catalyst/data/loader.py index 06a2bc96b0..a6247a24ff 100644 --- a/catalyst/data/loader.py +++ b/catalyst/data/loader.py @@ -1,4 +1,5 @@ from typing import Any, Callable, Iterable, Union +from itertools import tee import queue import sys import threading @@ -106,7 +107,7 @@ def __init__(self, loader: DataLoader, num_batches: Union[int, float]): ) num_batches = int(len(loader) * num_batches) - self.iterator = iter(self.origin) + self._iterator = iter(self.origin) self.iteration_index = 0 self.num_batches = num_batches @@ -117,7 +118,7 @@ def __iter__(self): iterator object """ self.iteration_index = 0 - self.iterator = iter(self.origin) + self._iterator, self.iterator = tee(self._iterator) return self def __next__(self): @@ -130,7 +131,7 @@ def __next__(self): raise StopIteration() self.iteration_index += 1 if self.iteration_index % self.num_batches == 0: - self.iterator = iter(self.origin) + self._iterator, self.iterator = tee(self._iterator) batch = next(self.iterator) return batch diff --git a/catalyst/data/tests/test_loader.py b/catalyst/data/tests/test_loader.py new file mode 100644 index 0000000000..dc3aec6d81 --- /dev/null +++ b/catalyst/data/tests/test_loader.py @@ -0,0 +1,36 @@ +# flake8: noqa +import torch +from torch.utils.data import DataLoader, TensorDataset + +from catalyst.data.loader import BatchLimitLoaderWrapper + + +def test_batch_limit1() -> None: + for shuffle in (False, True): + num_samples, num_features = int(1e2), int(1e1) + X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) + dataset = TensorDataset(X, y) + loader = DataLoader(dataset, batch_size=4, num_workers=1, shuffle=shuffle) + loader = BatchLimitLoaderWrapper(loader, num_batches=1) + + batch1 = next(iter(loader))[0] + batch2 = next(iter(loader))[0] + batch3 = next(iter(loader))[0] + assert all(torch.isclose(x, y).all() for x, y in zip(batch1, batch2)) + assert all(torch.isclose(x, y).all() for x, y in zip(batch2, batch3)) + + +def test_batch_limit2() -> None: + for shuffle in (False, True): + num_samples, num_features = int(1e2), int(1e1) + X, y = torch.rand(num_samples, num_features), torch.rand(num_samples) + dataset = TensorDataset(X, y) + loader = DataLoader(dataset, batch_size=4, num_workers=1, shuffle=shuffle) + loader = BatchLimitLoaderWrapper(loader, num_batches=2) + + batch1 = next(iter(loader))[0] + batch2 = next(iter(loader))[0] + batch3 = next(iter(loader))[0] + batch4 = next(iter(loader))[0] + assert all(torch.isclose(x, y).all() for x, y in zip(batch1, batch3)) + assert all(torch.isclose(x, y).all() for x, y in zip(batch2, batch4)) diff --git a/docs/index.rst b/docs/index.rst index 40248b75fa..7e9889e010 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -10,10 +10,12 @@ PyTorch framework for Deep Learning R&D. -------------------------------------------------------------------------------- It focuses on reproducibility, rapid experimentation, and codebase reuse -so you can **create** something new rather than write another regular train loop. +so you can **create** something new rather than write yet another train loop. Break the cycle - use the Catalyst_! -Project manifest_. Part of `PyTorch Ecosystem`_. Part of `Catalyst Ecosystem`_: +Read more about our vision in the `Project Manifest`_. Catalyst is a part of the `PyTorch Ecosystem`_. + +`Catalyst Ecosystem`_ consists of: - Alchemy_ - experiments logging & visualization - Catalyst_ - accelerated deep learning R&D - Reaction_ - convenient deep learning models serving @@ -25,7 +27,7 @@ Project manifest_. Part of `PyTorch Ecosystem`_. Part of `Catalyst Ecosystem`_: .. _Alchemy: https://github.com/catalyst-team/alchemy .. _Catalyst: https://github.com/catalyst-team/catalyst .. _Reaction: https://github.com/catalyst-team/reaction -.. _manifest: https://github.com/catalyst-team/catalyst/blob/master/MANIFEST.md +.. _`Project Manifest`: https://github.com/catalyst-team/catalyst/blob/master/MANIFEST.md .. _Catalyst at AI Landscape: https://landscape.lfai.foundation/selected=catalyst Getting started diff --git a/examples/README.md b/examples/README.md index 6a8da4ef8d..c259193c95 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,34 +2,9 @@ ## Python API -1. [demo notebook](<./notebooks/demo 21xx.ipynb>) [![Open In Colab]() - - minimal examples - - Runner customization - - DL and RL pipelines -1. [classification tutorial](./notebooks/classification-tutorial.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/catalyst-team/catalyst/blob/master/examples/notebooks/classification-tutorial.ipynb) - - dataset preparation (raw images -> train/valid/infer splits) - - augmentations usage example - - pretrained model finetuning - - various classification metrics - - metrics visualizaiton - - FocalLoss and OneCycle usage examples - - class imbalance handling - - model inference -1. [segmentation tutorial](notebooks/segmentation-tutorial.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/catalyst-team/catalyst/blob/master/examples/notebooks/segmentation-tutorial.ipynb) - - car segmentation dataset - - augmentations with [albumentations](https://github.com/albu/albumentations) library - - training in FP16 with [NVIDIA Apex](https://github.com/NVIDIA/apex) - - using segmentation models from `catalyst/contrib/models/segmentation` - - training with multiple criterion (Dice + IoU + BCE) example - - Lookahead + RAdam optimizer usage example - - tensorboard logs visualization - - predictions visualization - - Test-time augmentations with [ttach](https://github.com/qubvel/ttach) library -1. [Pruning tutorial](notebooks/Pruning.ipynb)[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/catalyst-team/catalyst/blob/master/examples/notebooks/Pruning.ipynb) - - Pruning intro - - Lottery ticket hypothesis - - Catalyst pruning callback - - Loading training result from logs +Catalyst Python API examples can be found in the +[minimal examples](https://github.com/catalyst-team/catalyst#minimal-examples) +and [notebook section](https://github.com/catalyst-team/catalyst#notebooks). ---- diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 48e9c5e454..de2d6c6139 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -3,7 +3,7 @@ numpy>=1.16.4 torch>=1.3.0 # Config API -PyYAML +PyYAML>=5.1 # for future development: # tensorboardX provides tensorboard support for any framework