diff --git a/CHANGELOG.md b/CHANGELOG.md index fa9bd9c0ce71be..c5cc63cbee1632 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,270 +4,193 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - ## [1.6.0] - 2022-MM-DD ### Added +- Add new `VERBOSE` log level to provide useful logs for production use case monitoring and debugging - Added a flag `SLURMEnvironment(auto_requeue=True|False)` to control whether Lightning handles the requeuing ([#10601](https://github.com/PyTorchLightning/pytorch-lightning/issues/10601)) - - Fault Tolerant Manual - * Add `_SupportsStateDict` protocol to detect if classes are stateful ([#10646](https://github.com/PyTorchLightning/pytorch-lightning/issues/10646)) - * Add `_FaultTolerantMode` enum used to track different supported fault tolerant modes ([#10645](https://github.com/PyTorchLightning/pytorch-lightning/issues/10645)) - * Add a `_rotate_worker_indices` utility to reload the state according the latest worker ([#10647](https://github.com/PyTorchLightning/pytorch-lightning/issues/10647)) - * Add stateful workers ([#10674](https://github.com/PyTorchLightning/pytorch-lightning/issues/10674)) - * Add an utility to collect the states across processes ([#10639](https://github.com/PyTorchLightning/pytorch-lightning/issues/10639)) - * Add logic to reload the states across data loading components ([#10699](https://github.com/PyTorchLightning/pytorch-lightning/issues/10699)) - * Cleanup some fault tolerant utilities ([#10703](https://github.com/PyTorchLightning/pytorch-lightning/issues/10703)) - * Enable Fault Tolerant Manual Training ([#10707](https://github.com/PyTorchLightning/pytorch-lightning/issues/10707)) - * Broadcast the `_terminate_gracefully` to all processes and add support for DDP ([#10638](https://github.com/PyTorchLightning/pytorch-lightning/issues/10638)) + - Add `_SupportsStateDict` protocol to detect if classes are stateful ([#10646](https://github.com/PyTorchLightning/pytorch-lightning/issues/10646)) + - Add `_FaultTolerantMode` enum used to track different supported fault tolerant modes ([#10645](https://github.com/PyTorchLightning/pytorch-lightning/issues/10645)) + - Add a `_rotate_worker_indices` utility to reload the state according the latest worker ([#10647](https://github.com/PyTorchLightning/pytorch-lightning/issues/10647)) + - Add stateful workers ([#10674](https://github.com/PyTorchLightning/pytorch-lightning/issues/10674)) + - Add an utility to collect the states across processes ([#10639](https://github.com/PyTorchLightning/pytorch-lightning/issues/10639)) + - Add logic to reload the states across data loading components ([#10699](https://github.com/PyTorchLightning/pytorch-lightning/issues/10699)) + - Cleanup some fault tolerant utilities ([#10703](https://github.com/PyTorchLightning/pytorch-lightning/issues/10703)) + - Enable Fault Tolerant Manual Training ([#10707](https://github.com/PyTorchLightning/pytorch-lightning/issues/10707)) + - Broadcast the `_terminate_gracefully` to all processes and add support for DDP ([#10638](https://github.com/PyTorchLightning/pytorch-lightning/issues/10638)) - Added support for re-instantiation of custom (subclasses of) `DataLoaders` returned in the `*_dataloader()` methods, i.e., automatic replacement of samplers now works with custom types of `DataLoader` ([#10680](https://github.com/PyTorchLightning/pytorch-lightning/issues/10639)) - - Added a function to validate if fault tolerant training is supported. ([#10465](https://github.com/PyTorchLightning/pytorch-lightning/issues/10465)) - - Show a better error message when a custom `DataLoader` implementation is not well implemented and we need to reconstruct it ([#10719](https://github.com/PyTorchLightning/pytorch-lightning/issues/10719)) - - Added support for `--lr_scheduler=ReduceLROnPlateau` to the `LightningCLI` ([#10860](https://github.com/PyTorchLightning/pytorch-lightning/issues/10860)) - - Added `LightningCLI.configure_optimizers` to override the `configure_optimizers` return value ([#10860](https://github.com/PyTorchLightning/pytorch-lightning/issues/10860)) - - Added a warning that shows when `max_epochs` in the `Trainer` is not set ([#10700](https://github.com/PyTorchLightning/pytorch-lightning/issues/10700)) - ### Changed - Raised exception in `init_dist_connection()` when torch distibuted is not available ([#10418](https://github.com/PyTorchLightning/pytorch-lightning/issues/10418)) - - The `monitor` argument in the `EarlyStopping` callback is no longer optional ([#10328](https://github.com/PyTorchLightning/pytorch-lightning/pull/10328)) - - Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/PyTorchLightning/pytorch-lightning/issues/10438)) - - Raised `MisconfigurationException` when `enable_progress_bar=False` and a progress bar instance has been passed in the callback list ([#10520](https://github.com/PyTorchLightning/pytorch-lightning/issues/10520)) - - Moved `trainer.connectors.env_vars_connector._defaults_from_env_vars` to `utilities.argsparse._defaults_from_env_vars` ([#10501](https://github.com/PyTorchLightning/pytorch-lightning/pull/10501)) - - Changes in `LightningCLI` required for the new major release of jsonargparse v4.0.0 ([#10426](https://github.com/PyTorchLightning/pytorch-lightning/pull/10426)) - - Renamed `refresh_rate_per_second` parameter to `referesh_rate` for `RichProgressBar` signature ([#10497](https://github.com/PyTorchLightning/pytorch-lightning/pull/10497)) - - Moved ownership of the `PrecisionPlugin` into `TrainingTypePlugin` and updated all references ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570)) - - Fault Tolerant relies on `signal.SIGTERM` to gracefully exit instead of `signal.SIGUSR1` ([#10605](https://github.com/PyTorchLightning/pytorch-lightning/pull/10605)) - - Raised an error if the `batch_size` cannot be inferred from the current batch if it contained a string or was a custom batch object ([#10541](https://github.com/PyTorchLightning/pytorch-lightning/pull/10541)) - - The validation loop is now disabled when `overfit_batches > 0` is set in the Trainer ([#9709](https://github.com/PyTorchLightning/pytorch-lightning/pull/9709)) - - Moved optimizer related logics from `Accelerator` to `TrainingTypePlugin` ([#10596](https://github.com/PyTorchLightning/pytorch-lightning/pull/10596)) - - Moved `batch_to_device` method from `Accelerator` to `TrainingTypePlugin` ([#10649](https://github.com/PyTorchLightning/pytorch-lightning/pull/10649)) - - The `DDPSpawnPlugin` no longer overrides the `post_dispatch` plugin hook ([#10034](https://github.com/PyTorchLightning/pytorch-lightning/pull/10034)) - - The `LightningModule.{add_to_queue,get_from_queue}` hooks no longer get a `torch.multiprocessing.SimpleQueue` and instead receive a list based queue ([#10034](https://github.com/PyTorchLightning/pytorch-lightning/pull/10034)) - - Changed `training_step`, `validation_step`, `test_step` and `predict_step` method signatures in `Accelerator` and updated input from caller side ([#10908](https://github.com/PyTorchLightning/pytorch-lightning/pull/10908)) - - Changed the name of the temporary checkpoint that the `DDPSpawnPlugin` and related plugins save ([#10934](https://github.com/PyTorchLightning/pytorch-lightning/pull/10934)) - ### Deprecated - Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/issues/10103)) - - Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/PyTorchLightning/pytorch-lightning/pull/10505)) - - Deprecated the `precision_plugin` constructor argument from `Accelerator` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570)) - - Deprecated `DeviceType` in favor of `_AcceleratorType` ([#10503](https://github.com/PyTorchLightning/pytorch-lightning/pull/10503)) - - Deprecated the property `Trainer.slurm_job_id` in favor of the new `SLURMEnvironment.job_id()` method ([#10622](https://github.com/PyTorchLightning/pytorch-lightning/pull/10622)) - - Deprecated the access to the attribute `IndexBatchSamplerWrapper.batch_indices` in favor of `IndexBatchSamplerWrapper.seen_batch_indices` ([#10870](https://github.com/PyTorchLightning/pytorch-lightning/pull/10870)) - - Deprecated `TrainingTypePlugin.post_dispatch` in favor of `TrainingTypePlugin.teardown` ([#10939](https://github.com/PyTorchLightning/pytorch-lightning/pull/10939)) - ### Removed - Removed deprecated parameter `method` in `pytorch_lightning.utilities.model_helpers.is_overridden` ([#10507](https://github.com/PyTorchLightning/pytorch-lightning/pull/10507)) - - Remove deprecated method `ClusterEnvironment.creates_children` ([#10339](https://github.com/PyTorchLightning/pytorch-lightning/issues/10339)) - - Removed deprecated `TrainerModelHooksMixin.is_function_implemented` and `TrainerModelHooksMixin.has_arg` ([#10322](https://github.com/PyTorchLightning/pytorch-lightning/pull/10322)) - - Removed deprecated `pytorch_lightning.utilities.device_dtype_mixin.DeviceDtypeModuleMixin` in favor of `pytorch_lightning.core.mixins.device_dtype_mixin.DeviceDtypeModuleMixin` ([#10442](https://github.com/PyTorchLightning/pytorch-lightning/pull/10442)) - - Removed deprecated `LightningModule.loaded_optimizer_states_dict` property ([#10346](https://github.com/PyTorchLightning/pytorch-lightning/pull/10346)) - - Removed deprecated `Trainer.fit(train_dataloader=)`, `Trainer.validate(val_dataloaders=)`, and `Trainer.test(test_dataloader=)` ([#10325](https://github.com/PyTorchLightning/pytorch-lightning/pull/10325)) - -- Removed deprecated `has_prepared_data`, `has_setup_fit`, `has_setup_validate`, `has_setup_test`, `has_setup_predict`, `has_teardown_fit`, `has_teardown_validate`, `has_teardown_test` and `has_teardown_predict` datamodule lifecycle properties ([#10350](https://github.com/PyTorchLightning/pytorch-lightning/pull/10350)) - +- Removed deprecated `has_prepared_data`, `has_setup_fit`, `has_setup_validate`, `has_setup_test`, `has_setup_predict`, `has_teardown_fit`, `has_teardown_validate`, `has_teardown_test` and `has_teardown_predict` datamodule lifecycle properties ([#10350](https://github.com/PyTorchLightning/pytorch-lightning/pull/10350)) - Removed deprecated `every_n_val_epochs` parameter of ModelCheckpoint ([#10366](https://github.com/PyTorchLightning/pytorch-lightning/pull/10366)) - - Removed deprecated `import pytorch_lightning.profiler.profilers` in favor of `import pytorch_lightning.profiler` ([#10443](https://github.com/PyTorchLightning/pytorch-lightning/pull/10443)) - - Removed deprecated property `configure_slurm_dpp` from accelerator connector ([#10370](https://github.com/PyTorchLightning/pytorch-lightning/pull/10370)) - - Removed deprecated arguments `num_nodes` and `sync_batchnorm` from `DDPPlugin`, `DDPSpawnPlugin`, `DeepSpeedPlugin` ([#10357](https://github.com/PyTorchLightning/pytorch-lightning/pull/10357)) - - Removed deprecated property `is_slurm_managing_tasks` from AcceleratorConnector ([#10353](https://github.com/PyTorchLightning/pytorch-lightning/pull/10353)) - - Removed deprecated `LightningModule.log(tbptt_reduce_fx, tbptt_reduce_token, sync_dist_op)` ([#10423](https://github.com/PyTorchLightning/pytorch-lightning/pull/10423)) - - Removed deprecated `Plugin.task_idx` ([#10441](https://github.com/PyTorchLightning/pytorch-lightning/pull/10441)) - - Removed deprecated method `master_params` from PrecisionPlugin ([#10372](https://github.com/PyTorchLightning/pytorch-lightning/pull/10372)) - - Removed the automatic detachment of "extras" returned from `training_step`. For example, `return {'loss': ..., 'foo': foo.detach()}` will now be necessary if `foo` has gradients which you do not want to store ([#10424](https://github.com/PyTorchLightning/pytorch-lightning/pull/10424)) - - Removed deprecated passthrough methods and properties from `Accelerator` base class: - * ([#10403](https://github.com/PyTorchLightning/pytorch-lightning/pull/10403)) - * ([#10448](https://github.com/PyTorchLightning/pytorch-lightning/pull/10448)) -- Removed deprecated signature for `transfer_batch_to_device` hook. The new argument `dataloader_idx` is now required ([#10480](https://github.com/PyTorchLightning/pytorch-lightning/pull/10480)) + - ([#10403](https://github.com/PyTorchLightning/pytorch-lightning/pull/10403)) + - ([#10448](https://github.com/PyTorchLightning/pytorch-lightning/pull/10448)) +- Removed deprecated signature for `transfer_batch_to_device` hook. The new argument `dataloader_idx` is now required ([#10480](https://github.com/PyTorchLightning/pytorch-lightning/pull/10480)) - Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/PyTorchLightning/pytorch-lightning/pull/10451)) - - Removed deprecated `mode` argument from `ModelSummary` class ([#10449](https://github.com/PyTorchLightning/pytorch-lightning/pull/10449)) - - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) - - Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/PyTorchLightning/pytorch-lightning/pull/10482)) - - Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/PyTorchLightning/pytorch-lightning/pull/10450)) - - Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525)) - - Removed deprecated `reload_dataloaders_every_epoch` from `Trainer` in favour of `reload_dataloaders_every_n_epochs` ([#10481](https://github.com/PyTorchLightning/pytorch-lightning/pull/10481)) - - Removed the `precision_plugin` attribute from `Accelerator` in favor of its equivalent attribute `precision_plugin` in the `TrainingTypePlugin` ([#10570](https://github.com/PyTorchLightning/pytorch-lightning/pull/10570)) - - Removed `DeepSpeedPlugin.{precision,amp_type,amp_level}` properties ([#10657](https://github.com/PyTorchLightning/pytorch-lightning/pull/10657)) - - Removed argument `return_result` from the `DDPSpawnPlugin.spawn()` method ([#10867](https://github.com/PyTorchLightning/pytorch-lightning/pull/10867)) - - Removed the property `TrainingTypePlugin.results` and corresponding properties in subclasses ([#10034](https://github.com/PyTorchLightning/pytorch-lightning/pull/10034)) - - Removed the `mp_queue` attribute from `DDPSpawnPlugin` and `TPUSpawnPlugin` ([#10034](https://github.com/PyTorchLightning/pytorch-lightning/pull/10034)) - - Removed unnessesary `_move_optimizer_state` method overrides from `TPUSpawnPlugin` and `SingleTPUPlugin` ([#10849](https://github.com/PyTorchLightning/pytorch-lightning/pull/10849)) - - Removed `model_sharded_context` method from `Accelerator` ([#10886](https://github.com/PyTorchLightning/pytorch-lightning/pull/10886)) - - Removed method `pre_dispatch` from the `PrecisionPlugin` ([#10887](https://github.com/PyTorchLightning/pytorch-lightning/pull/10887)) - - Removed method `setup_optimizers_in_pre_dispatch` from the `strategies` and achieve the same logic in `setup` and `pre_dispatch` methods ([#10906](https://github.com/PyTorchLightning/pytorch-lightning/pull/10906)) - - Removed methods `pre_dispatch`, `dispatch` and `post_dispatch` from the `Accelerator` ([#10885](https://github.com/PyTorchLightning/pytorch-lightning/pull/10885)) - - Removed method `training_step`, `test_step`, `validation_step` and `predict_step` from the `Accelerator` ([#10890](https://github.com/PyTorchLightning/pytorch-lightning/pull/10890)) - ### Fixed - Fixed an issue with `SignalConnector` not restoring the default signal handlers on teardown when running on SLURM or with fault-tolerant training enabled ([#10611](https://github.com/PyTorchLightning/pytorch-lightning/pull/10611)) - - Fixed `SignalConnector._has_already_handler` check for callable type ([#10483](https://github.com/PyTorchLightning/pytorch-lightning/pull/10483)) - - Disabled batch_size extraction for torchmetric instances because they accumulate the metrics internally ([#10815](https://github.com/PyTorchLightning/pytorch-lightning/pull/10815)) - - Fixed importing `torch_xla.debug` for `torch-xla<1.8` ([#10836](https://github.com/PyTorchLightning/pytorch-lightning/pull/10836)) - - Fixed an issue to return the results for each dataloader separately instead of duplicating them for each ([#10810](https://github.com/PyTorchLightning/pytorch-lightning/pull/10810)) - - Improved exception message if `rich` version is less than `10.2.2` ([#10839](https://github.com/PyTorchLightning/pytorch-lightning/pull/10839)) - - Fixed uploading best model checkpoint in NeptuneLogger ([#10369](https://github.com/PyTorchLightning/pytorch-lightning/pull/10369)) - - Fixed early schedule reset logic in PyTorch profiler that was causing data leak ([#10837](https://github.com/PyTorchLightning/pytorch-lightning/pull/10837)) - - Fixed a bug that caused incorrect batch indices to be passed to the `BasePredictionWriter` hooks when using a dataloader with `num_workers > 0` ([#10870](https://github.com/PyTorchLightning/pytorch-lightning/pull/10870)) - - Fixed an issue with item assignment on the logger on rank > 0 for those who support it ([#10917](https://github.com/PyTorchLightning/pytorch-lightning/pull/10917)) - - Fixed an issue with `DDPSpawnPlugin` and related plugins leaving a temporary checkpoint behind ([#10934](https://github.com/PyTorchLightning/pytorch-lightning/pull/10934)) - - Fixed TypeError cause failure in `singal_connector` `teardown` method by adding None check ([#10961](https://github.com/PyTorchLightning/pytorch-lightning/pull/10961)) - ## [1.5.4] - 2021-11-30 ### Fixed @@ -282,7 +205,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed PyTorch 1.6 support ([#10367](https://github.com/PyTorchLightning/pytorch-lightning/pull/10367), [#10738](https://github.com/PyTorchLightning/pytorch-lightning/pull/10738)) - ## [1.5.3] - 2021-11-24 ### Fixed @@ -296,7 +218,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed the `{validation,test}_step` outputs getting moved to CPU with `Trainer(move_metrics_to_cpu=True)` ([#10631](https://github.com/PyTorchLightning/pytorch-lightning/pull/10631)) - Fixed an issue with collecting logged test results with multiple dataloaders ([#10522](https://github.com/PyTorchLightning/pytorch-lightning/pull/10522)) - ## [1.5.2] - 2021-11-16 ### Fixed @@ -311,7 +232,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/PyTorchLightning/pytorch-lightning/issues/10438)) - Fixed propagation of device and dtype information to submodules of LightningLite when they inherit from `DeviceDtypeModuleMixin` ([#10559](https://github.com/PyTorchLightning/pytorch-lightning/issues/10559)) - ## [1.5.1] - 2021-11-09 ### Fixed @@ -327,7 +247,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an issue with inferring the dataset type in fault-tolerant training ([#10432](https://github.com/PyTorchLightning/pytorch-lightning/pull/10432)) - Fixed dataloader workers with `persistent_workers` being deleted on every iteration ([#10434](https://github.com/PyTorchLightning/pytorch-lightning/pull/10434)) - ## [1.5.0] - 2021-11-02 ### Added @@ -338,13 +257,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added a flavor of `training_step` that takes `dataloader_iter` as an argument ([#8807](https://github.com/PyTorchLightning/pytorch-lightning/pull/8807)) - Added a `state_key` property to the `Callback` base class ([#6886](https://github.com/PyTorchLightning/pytorch-lightning/pull/6886)) - Added progress tracking to loops: - * Integrated `TrainingEpochLoop.total_batch_idx` ([#8598](https://github.com/PyTorchLightning/pytorch-lightning/pull/8598)) - * Added `BatchProgress` and integrated `TrainingEpochLoop.is_last_batch` ([#9657](https://github.com/PyTorchLightning/pytorch-lightning/pull/9657)) - * Avoid optional `Tracker` attributes ([#9320](https://github.com/PyTorchLightning/pytorch-lightning/pull/9320)) - * Reset `current` progress counters when restarting an epoch loop that had already finished ([#9371](https://github.com/PyTorchLightning/pytorch-lightning/pull/9371)) - * Call `reset_on_restart` in the loop's `reset` hook instead of when loading a checkpoint ([#9561](https://github.com/PyTorchLightning/pytorch-lightning/pull/9561)) - * Use `completed` over `processed` in `reset_on_restart` ([#9656](https://github.com/PyTorchLightning/pytorch-lightning/pull/9656)) - * Renamed `reset_on_epoch` to `reset_on_run` ([#9658](https://github.com/PyTorchLightning/pytorch-lightning/pull/9658)) + - Integrated `TrainingEpochLoop.total_batch_idx` ([#8598](https://github.com/PyTorchLightning/pytorch-lightning/pull/8598)) + - Added `BatchProgress` and integrated `TrainingEpochLoop.is_last_batch` ([#9657](https://github.com/PyTorchLightning/pytorch-lightning/pull/9657)) + - Avoid optional `Tracker` attributes ([#9320](https://github.com/PyTorchLightning/pytorch-lightning/pull/9320)) + - Reset `current` progress counters when restarting an epoch loop that had already finished ([#9371](https://github.com/PyTorchLightning/pytorch-lightning/pull/9371)) + - Call `reset_on_restart` in the loop's `reset` hook instead of when loading a checkpoint ([#9561](https://github.com/PyTorchLightning/pytorch-lightning/pull/9561)) + - Use `completed` over `processed` in `reset_on_restart` ([#9656](https://github.com/PyTorchLightning/pytorch-lightning/pull/9656)) + - Renamed `reset_on_epoch` to `reset_on_run` ([#9658](https://github.com/PyTorchLightning/pytorch-lightning/pull/9658)) - Added `batch_size` and `rank_zero_only` arguments for `log_dict` to match `log` ([#8628](https://github.com/PyTorchLightning/pytorch-lightning/pull/8628)) - Added a check for unique GPU ids ([#8666](https://github.com/PyTorchLightning/pytorch-lightning/pull/8666)) - Added `ResultCollection` state_dict to the Loop `state_dict` and added support for distributed reload ([#8641](https://github.com/PyTorchLightning/pytorch-lightning/pull/8641)) @@ -353,61 +272,61 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added a warning to `WandbLogger` when reusing a wandb run ([#8714](https://github.com/PyTorchLightning/pytorch-lightning/pull/8714)) - Added `log_graph` argument for `watch` method of `WandbLogger` ([#8662](https://github.com/PyTorchLightning/pytorch-lightning/pull/8662)) - `LightningCLI` additions: - * Added `LightningCLI(run=False|True)` to choose whether to run a `Trainer` subcommand ([#8751](https://github.com/PyTorchLightning/pytorch-lightning/pull/8751)) - * Added support to call any trainer function from the `LightningCLI` via subcommands ([#7508](https://github.com/PyTorchLightning/pytorch-lightning/pull/7508)) - * Allow easy trainer re-instantiation ([#7508](https://github.com/PyTorchLightning/pytorch-lightning/pull/9241)) - * Automatically register all optimizers and learning rate schedulers ([#9565](https://github.com/PyTorchLightning/pytorch-lightning/pull/9565)) - * Allow registering custom optimizers and learning rate schedulers without subclassing the CLI ([#9565](https://github.com/PyTorchLightning/pytorch-lightning/pull/9565)) - * Support shorthand notation to instantiate optimizers and learning rate schedulers ([#9565](https://github.com/PyTorchLightning/pytorch-lightning/pull/9565)) - * Support passing lists of callbacks via command line ([#8815](https://github.com/PyTorchLightning/pytorch-lightning/pull/8815)) - * Support shorthand notation to instantiate models ([#9588](https://github.com/PyTorchLightning/pytorch-lightning/pull/9588)) - * Support shorthand notation to instantiate datamodules ([#10011](https://github.com/PyTorchLightning/pytorch-lightning/pull/10011)) - * Added `multifile` option to `LightningCLI` to enable/disable config saving to preserve multiple files structure ([#9073](https://github.com/PyTorchLightning/pytorch-lightning/pull/9073)) + - Added `LightningCLI(run=False|True)` to choose whether to run a `Trainer` subcommand ([#8751](https://github.com/PyTorchLightning/pytorch-lightning/pull/8751)) + - Added support to call any trainer function from the `LightningCLI` via subcommands ([#7508](https://github.com/PyTorchLightning/pytorch-lightning/pull/7508)) + - Allow easy trainer re-instantiation ([#7508](https://github.com/PyTorchLightning/pytorch-lightning/pull/9241)) + - Automatically register all optimizers and learning rate schedulers ([#9565](https://github.com/PyTorchLightning/pytorch-lightning/pull/9565)) + - Allow registering custom optimizers and learning rate schedulers without subclassing the CLI ([#9565](https://github.com/PyTorchLightning/pytorch-lightning/pull/9565)) + - Support shorthand notation to instantiate optimizers and learning rate schedulers ([#9565](https://github.com/PyTorchLightning/pytorch-lightning/pull/9565)) + - Support passing lists of callbacks via command line ([#8815](https://github.com/PyTorchLightning/pytorch-lightning/pull/8815)) + - Support shorthand notation to instantiate models ([#9588](https://github.com/PyTorchLightning/pytorch-lightning/pull/9588)) + - Support shorthand notation to instantiate datamodules ([#10011](https://github.com/PyTorchLightning/pytorch-lightning/pull/10011)) + - Added `multifile` option to `LightningCLI` to enable/disable config saving to preserve multiple files structure ([#9073](https://github.com/PyTorchLightning/pytorch-lightning/pull/9073)) - Fault-tolerant training: - * Added `FastForwardSampler` and `CaptureIterableDataset` injection to data loading utilities ([#8366](https://github.com/PyTorchLightning/pytorch-lightning/pull/8366)) - * Added `DataFetcher` to control fetching flow ([#8890](https://github.com/PyTorchLightning/pytorch-lightning/pull/8890)) - * Added `SharedCycleIteratorState` to prevent infinite loop ([#8889](https://github.com/PyTorchLightning/pytorch-lightning/pull/8889)) - * Added `CaptureMapDataset` for state management in map-style datasets ([#8891](https://github.com/PyTorchLightning/pytorch-lightning/pull/8891)) - * Added Fault Tolerant Training to `DataFetcher` ([#8891](https://github.com/PyTorchLightning/pytorch-lightning/pull/8891)) - * Replaced old prefetch iterator with new `DataFetcher` in training loop ([#8953](https://github.com/PyTorchLightning/pytorch-lightning/pull/8953)) - * Added partial support for global random state fault-tolerance in map-style datasets ([#8950](https://github.com/PyTorchLightning/pytorch-lightning/pull/8950)) - * Converted state to tuple explicitly when setting Python random state ([#9401](https://github.com/PyTorchLightning/pytorch-lightning/pull/9401)) - * Added support for restarting an optimizer loop (multiple optimizers) ([#9537](https://github.com/PyTorchLightning/pytorch-lightning/pull/9537)) - * Added support for restarting within Evaluation Loop ([#9563](https://github.com/PyTorchLightning/pytorch-lightning/pull/9563)) - * Added mechanism to detect that a signal has been sent so the Trainer can gracefully exit ([#9566](https://github.com/PyTorchLightning/pytorch-lightning/pull/9566)) - * Added support for skipping ahead to validation during the auto-restart of fitting ([#9681](https://github.com/PyTorchLightning/pytorch-lightning/pull/9681)) - * Added support for auto-restart if a fault-tolerant checkpoint is available ([#9722](https://github.com/PyTorchLightning/pytorch-lightning/pull/9722)) + - Added `FastForwardSampler` and `CaptureIterableDataset` injection to data loading utilities ([#8366](https://github.com/PyTorchLightning/pytorch-lightning/pull/8366)) + - Added `DataFetcher` to control fetching flow ([#8890](https://github.com/PyTorchLightning/pytorch-lightning/pull/8890)) + - Added `SharedCycleIteratorState` to prevent infinite loop ([#8889](https://github.com/PyTorchLightning/pytorch-lightning/pull/8889)) + - Added `CaptureMapDataset` for state management in map-style datasets ([#8891](https://github.com/PyTorchLightning/pytorch-lightning/pull/8891)) + - Added Fault Tolerant Training to `DataFetcher` ([#8891](https://github.com/PyTorchLightning/pytorch-lightning/pull/8891)) + - Replaced old prefetch iterator with new `DataFetcher` in training loop ([#8953](https://github.com/PyTorchLightning/pytorch-lightning/pull/8953)) + - Added partial support for global random state fault-tolerance in map-style datasets ([#8950](https://github.com/PyTorchLightning/pytorch-lightning/pull/8950)) + - Converted state to tuple explicitly when setting Python random state ([#9401](https://github.com/PyTorchLightning/pytorch-lightning/pull/9401)) + - Added support for restarting an optimizer loop (multiple optimizers) ([#9537](https://github.com/PyTorchLightning/pytorch-lightning/pull/9537)) + - Added support for restarting within Evaluation Loop ([#9563](https://github.com/PyTorchLightning/pytorch-lightning/pull/9563)) + - Added mechanism to detect that a signal has been sent so the Trainer can gracefully exit ([#9566](https://github.com/PyTorchLightning/pytorch-lightning/pull/9566)) + - Added support for skipping ahead to validation during the auto-restart of fitting ([#9681](https://github.com/PyTorchLightning/pytorch-lightning/pull/9681)) + - Added support for auto-restart if a fault-tolerant checkpoint is available ([#9722](https://github.com/PyTorchLightning/pytorch-lightning/pull/9722)) - Checkpoint saving and loading extensibility: - * Added `CheckpointIO` plugin to expose checkpoint IO from training type plugin ([#8743](https://github.com/PyTorchLightning/pytorch-lightning/pull/8743)) - * Refactored `CheckpointConnector` to offload validation logic to the `CheckpointIO` plugin ([#9045](https://github.com/PyTorchLightning/pytorch-lightning/pull/9045)) - * Added `remove_checkpoint` to `CheckpointIO` plugin by moving the responsibility out of the `ModelCheckpoint` callback ([#9373](https://github.com/PyTorchLightning/pytorch-lightning/pull/9373)) - * Added `XLACheckpointIO` plugin ([#9972](https://github.com/PyTorchLightning/pytorch-lightning/pull/9972)) + - Added `CheckpointIO` plugin to expose checkpoint IO from training type plugin ([#8743](https://github.com/PyTorchLightning/pytorch-lightning/pull/8743)) + - Refactored `CheckpointConnector` to offload validation logic to the `CheckpointIO` plugin ([#9045](https://github.com/PyTorchLightning/pytorch-lightning/pull/9045)) + - Added `remove_checkpoint` to `CheckpointIO` plugin by moving the responsibility out of the `ModelCheckpoint` callback ([#9373](https://github.com/PyTorchLightning/pytorch-lightning/pull/9373)) + - Added `XLACheckpointIO` plugin ([#9972](https://github.com/PyTorchLightning/pytorch-lightning/pull/9972)) - Loop customization: - * Added `Closure` and `AbstractClosure` classes ([#8642](https://github.com/PyTorchLightning/pytorch-lightning/pull/8642)) - * Refactored `TrainingBatchLoop` and extracted `OptimizerLoop`, splitting off automatic optimization into its own loop ([#9191](https://github.com/PyTorchLightning/pytorch-lightning/pull/9191)) - * Removed `TrainingBatchLoop.backward()`; manual optimization now calls directly into `Accelerator.backward()` and automatic optimization handles backward in new `OptimizerLoop` ([#9265](https://github.com/PyTorchLightning/pytorch-lightning/pull/9265)) - * Extracted `ManualOptimization` logic from `TrainingBatchLoop` into its own separate loop class ([#9266](https://github.com/PyTorchLightning/pytorch-lightning/pull/9266)) - * Added `OutputResult` and `ManualResult` classes ([#9437](https://github.com/PyTorchLightning/pytorch-lightning/pull/9437), [#9424](https://github.com/PyTorchLightning/pytorch-lightning/pull/9424)) - * Marked `OptimizerLoop.backward` as protected ([#9514](https://github.com/PyTorchLightning/pytorch-lightning/pull/9514)) - * Marked `FitLoop.should_accumulate` as protected ([#9515](https://github.com/PyTorchLightning/pytorch-lightning/pull/9515)) - * Marked several methods in `PredictionLoop` as protected: `on_predict_start`, `on_predict_epoch_end`, `on_predict_end`, `on_predict_model_eval` ([#9516](https://github.com/PyTorchLightning/pytorch-lightning/pull/9516)) - * Marked several methods in `EvaluationLoop` as protected: `get_max_batches`, `on_evaluation_model_eval`, `on_evaluation_model_train`, `on_evaluation_start`, `on_evaluation_epoch_start`, `on_evaluation_epoch_end`, `on_evaluation_end`, `reload_evaluation_dataloaders` ([#9516](https://github.com/PyTorchLightning/pytorch-lightning/pull/9516)) - * Marked several methods in `EvaluationEpochLoop` as protected: `on_evaluation_batch_start`, `evaluation_step`, `evaluation_step_end` ([#9516](https://github.com/PyTorchLightning/pytorch-lightning/pull/9516)) - * Added `yielding_training_step` example ([#9983](https://github.com/PyTorchLightning/pytorch-lightning/pull/9983)) + - Added `Closure` and `AbstractClosure` classes ([#8642](https://github.com/PyTorchLightning/pytorch-lightning/pull/8642)) + - Refactored `TrainingBatchLoop` and extracted `OptimizerLoop`, splitting off automatic optimization into its own loop ([#9191](https://github.com/PyTorchLightning/pytorch-lightning/pull/9191)) + - Removed `TrainingBatchLoop.backward()`; manual optimization now calls directly into `Accelerator.backward()` and automatic optimization handles backward in new `OptimizerLoop` ([#9265](https://github.com/PyTorchLightning/pytorch-lightning/pull/9265)) + - Extracted `ManualOptimization` logic from `TrainingBatchLoop` into its own separate loop class ([#9266](https://github.com/PyTorchLightning/pytorch-lightning/pull/9266)) + - Added `OutputResult` and `ManualResult` classes ([#9437](https://github.com/PyTorchLightning/pytorch-lightning/pull/9437), [#9424](https://github.com/PyTorchLightning/pytorch-lightning/pull/9424)) + - Marked `OptimizerLoop.backward` as protected ([#9514](https://github.com/PyTorchLightning/pytorch-lightning/pull/9514)) + - Marked `FitLoop.should_accumulate` as protected ([#9515](https://github.com/PyTorchLightning/pytorch-lightning/pull/9515)) + - Marked several methods in `PredictionLoop` as protected: `on_predict_start`, `on_predict_epoch_end`, `on_predict_end`, `on_predict_model_eval` ([#9516](https://github.com/PyTorchLightning/pytorch-lightning/pull/9516)) + - Marked several methods in `EvaluationLoop` as protected: `get_max_batches`, `on_evaluation_model_eval`, `on_evaluation_model_train`, `on_evaluation_start`, `on_evaluation_epoch_start`, `on_evaluation_epoch_end`, `on_evaluation_end`, `reload_evaluation_dataloaders` ([#9516](https://github.com/PyTorchLightning/pytorch-lightning/pull/9516)) + - Marked several methods in `EvaluationEpochLoop` as protected: `on_evaluation_batch_start`, `evaluation_step`, `evaluation_step_end` ([#9516](https://github.com/PyTorchLightning/pytorch-lightning/pull/9516)) + - Added `yielding_training_step` example ([#9983](https://github.com/PyTorchLightning/pytorch-lightning/pull/9983)) - Added support for saving and loading state of multiple callbacks of the same type ([#7187](https://github.com/PyTorchLightning/pytorch-lightning/pull/7187)) - Added DeepSpeed Stage 1 support ([#8974](https://github.com/PyTorchLightning/pytorch-lightning/pull/8974)) - Added `Python dataclass` support for `LightningDataModule` ([#8272](https://github.com/PyTorchLightning/pytorch-lightning/issues/8272)) - Added sanitization of tensors when they get logged as hyperparameters in `TensorBoardLogger` ([#9031](https://github.com/PyTorchLightning/pytorch-lightning/pull/9031)) - Added `InterBatchParallelDataFetcher` ([#9020](https://github.com/PyTorchLightning/pytorch-lightning/pull/9020)) - Added `DataLoaderIterDataFetcher` ([#9020](https://github.com/PyTorchLightning/pytorch-lightning/pull/9020)) -- Added `DataFetcher` within `Fit / Evaluation` Loop ([#9047](https://github.com/PyTorchLightning/pytorch-lightning/pull/9047)) +- Added `DataFetcher` within `Fit / Evaluation` Loop ([#9047](https://github.com/PyTorchLightning/pytorch-lightning/pull/9047)) - Added a friendly error message when DDP attempts to spawn new distributed processes with rank > 0 ([#9005](https://github.com/PyTorchLightning/pytorch-lightning/pull/9005)) - Added Rich integration: - * Added Rich progress bar ([#8929](https://github.com/PyTorchLightning/pytorch-lightning/pull/8929), [#9559](https://github.com/PyTorchLightning/pytorch-lightning/pull/9559)) - * Added Support for iterable datasets ([#9734](https://github.com/PyTorchLightning/pytorch-lightning/pull/9734)) - * Added `RichModelSummary` callback ([#9546](https://github.com/PyTorchLightning/pytorch-lightning/pull/9546)) - * Added `configure_columns` method to `RichProgressBar` ([#10288](https://github.com/PyTorchLightning/pytorch-lightning/pull/10288)) - * Added `leave` argument to `RichProgressBar` ([#10301](https://github.com/PyTorchLightning/pytorch-lightning/pull/10301)) + - Added Rich progress bar ([#8929](https://github.com/PyTorchLightning/pytorch-lightning/pull/8929), [#9559](https://github.com/PyTorchLightning/pytorch-lightning/pull/9559)) + - Added Support for iterable datasets ([#9734](https://github.com/PyTorchLightning/pytorch-lightning/pull/9734)) + - Added `RichModelSummary` callback ([#9546](https://github.com/PyTorchLightning/pytorch-lightning/pull/9546)) + - Added `configure_columns` method to `RichProgressBar` ([#10288](https://github.com/PyTorchLightning/pytorch-lightning/pull/10288)) + - Added `leave` argument to `RichProgressBar` ([#10301](https://github.com/PyTorchLightning/pytorch-lightning/pull/10301)) - Added input validation logic for precision ([#9080](https://github.com/PyTorchLightning/pytorch-lightning/pull/9080)) - Added support for CPU AMP autocast ([#9084](https://github.com/PyTorchLightning/pytorch-lightning/pull/9084)) - Added `on_exception` callback hook ([#9183](https://github.com/PyTorchLightning/pytorch-lightning/pull/9183)) @@ -428,25 +347,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `init_meta_context`, `materialize_module` utilities ([#9920](https://github.com/PyTorchLightning/pytorch-lightning/pull/9920)) - Added `TPUPrecisionPlugin` ([#10020](https://github.com/PyTorchLightning/pytorch-lightning/pull/#10020)) - Added `torch.bfloat16` support: - * Added bfloat16 support for Lightning Trainer ([#9049](https://github.com/PyTorchLightning/pytorch-lightning/pull/9049)) - * Renamed `TPUHalfPrecisionPlugin` to `TPUBf16PrecisionPlugin` ([#10026](https://github.com/PyTorchLightning/pytorch-lightning/pull/10026)) - * Default to `precision=bf16` on CPU when `precision=16` is passed ([#10033](https://github.com/PyTorchLightning/pytorch-lightning/pull/10033)) - * Added support for `torch.autocast` ([#10053](https://github.com/PyTorchLightning/pytorch-lightning/pull/10053)) + - Added bfloat16 support for Lightning Trainer ([#9049](https://github.com/PyTorchLightning/pytorch-lightning/pull/9049)) + - Renamed `TPUHalfPrecisionPlugin` to `TPUBf16PrecisionPlugin` ([#10026](https://github.com/PyTorchLightning/pytorch-lightning/pull/10026)) + - Default to `precision=bf16` on CPU when `precision=16` is passed ([#10033](https://github.com/PyTorchLightning/pytorch-lightning/pull/10033)) + - Added support for `torch.autocast` ([#10053](https://github.com/PyTorchLightning/pytorch-lightning/pull/10053)) - Added `kfold` example for loop customization ([#9965](https://github.com/PyTorchLightning/pytorch-lightning/pull/9965)) - LightningLite: - * Added `PrecisionPlugin.forward_context`, making it the default implementation for all `{train,val,test,predict}_step_context()` methods ([#9988](https://github.com/PyTorchLightning/pytorch-lightning/pull/9988)) - * Added `DDPSpawnPlugin.spawn()` for spawning new processes of a given function ([#10018](https://github.com/PyTorchLightning/pytorch-lightning/pull/10018), [#10022](https://github.com/PyTorchLightning/pytorch-lightning/pull/10022)) - * Added `TrainingTypePlugin.{_setup_model, _setup_optimizer}` methods ([#9994](https://github.com/PyTorchLightning/pytorch-lightning/pull/9994), [#10064](https://github.com/PyTorchLightning/pytorch-lightning/pull/10064)) - * Implemented `DataParallelPlugin._setup_model` ([#10010](https://github.com/PyTorchLightning/pytorch-lightning/pull/10010)) - * Implemented `DeepSpeedPlugin._setup_model_and_optimizers` ([#10009](https://github.com/PyTorchLightning/pytorch-lightning/pull/10009), [#10064](https://github.com/PyTorchLightning/pytorch-lightning/pull/10064)) - * Implemented `{DDPShardedPlugin,DDPShardedSpawnPlugin}._setup_model_and_optimizers` ([#10028](https://github.com/PyTorchLightning/pytorch-lightning/pull/10028), [#10064](https://github.com/PyTorchLightning/pytorch-lightning/pull/10064)) - * Added optional `model` argument to the `optimizer_step` methods in accelerators and plugins ([#10023](https://github.com/PyTorchLightning/pytorch-lightning/pull/10023)) - * Updated precision attributes in `DeepSpeedPlugin` ([#10164](https://github.com/PyTorchLightning/pytorch-lightning/pull/10164)) - * Added the ability to return a result from rank 0 in `DDPSpawnPlugin.spawn` ([#10162](https://github.com/PyTorchLightning/pytorch-lightning/pull/10162)) - * Added `pytorch_lightning.lite` package ([#10175](https://github.com/PyTorchLightning/pytorch-lightning/pull/10175)) - * Added `LightningLite` documentation ([#10043](https://github.com/PyTorchLightning/pytorch-lightning/pull/10043)) - * Added `LightningLite` examples ([#9987](https://github.com/PyTorchLightning/pytorch-lightning/pull/9987)) - * Make the `_LiteDataLoader` an iterator and add supports for custom dataloader ([#10279](https://github.com/PyTorchLightning/pytorch-lightning/pull/10279)) + - Added `PrecisionPlugin.forward_context`, making it the default implementation for all `{train,val,test,predict}_step_context()` methods ([#9988](https://github.com/PyTorchLightning/pytorch-lightning/pull/9988)) + - Added `DDPSpawnPlugin.spawn()` for spawning new processes of a given function ([#10018](https://github.com/PyTorchLightning/pytorch-lightning/pull/10018), [#10022](https://github.com/PyTorchLightning/pytorch-lightning/pull/10022)) + - Added `TrainingTypePlugin.{_setup_model, _setup_optimizer}` methods ([#9994](https://github.com/PyTorchLightning/pytorch-lightning/pull/9994), [#10064](https://github.com/PyTorchLightning/pytorch-lightning/pull/10064)) + - Implemented `DataParallelPlugin._setup_model` ([#10010](https://github.com/PyTorchLightning/pytorch-lightning/pull/10010)) + - Implemented `DeepSpeedPlugin._setup_model_and_optimizers` ([#10009](https://github.com/PyTorchLightning/pytorch-lightning/pull/10009), [#10064](https://github.com/PyTorchLightning/pytorch-lightning/pull/10064)) + - Implemented `{DDPShardedPlugin,DDPShardedSpawnPlugin}._setup_model_and_optimizers` ([#10028](https://github.com/PyTorchLightning/pytorch-lightning/pull/10028), [#10064](https://github.com/PyTorchLightning/pytorch-lightning/pull/10064)) + - Added optional `model` argument to the `optimizer_step` methods in accelerators and plugins ([#10023](https://github.com/PyTorchLightning/pytorch-lightning/pull/10023)) + - Updated precision attributes in `DeepSpeedPlugin` ([#10164](https://github.com/PyTorchLightning/pytorch-lightning/pull/10164)) + - Added the ability to return a result from rank 0 in `DDPSpawnPlugin.spawn` ([#10162](https://github.com/PyTorchLightning/pytorch-lightning/pull/10162)) + - Added `pytorch_lightning.lite` package ([#10175](https://github.com/PyTorchLightning/pytorch-lightning/pull/10175)) + - Added `LightningLite` documentation ([#10043](https://github.com/PyTorchLightning/pytorch-lightning/pull/10043)) + - Added `LightningLite` examples ([#9987](https://github.com/PyTorchLightning/pytorch-lightning/pull/9987)) + - Make the `_LiteDataLoader` an iterator and add supports for custom dataloader ([#10279](https://github.com/PyTorchLightning/pytorch-lightning/pull/10279)) - Added `use_omegaconf` argument to `save_hparams_to_yaml` plugin ([#9170](https://github.com/PyTorchLightning/pytorch-lightning/pull/9170)) - Added `ckpt_path` argument for `Trainer.fit()` ([#10061](https://github.com/PyTorchLightning/pytorch-lightning/pull/10061)) - Added `auto_device_count` method to `Accelerators` ([#10222](https://github.com/PyTorchLightning/pytorch-lightning/pull/10222)) @@ -470,10 +389,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Saved checkpoints will no longer use the type of a `Callback` as the key to avoid issues with unpickling ([#6886](https://github.com/PyTorchLightning/pytorch-lightning/pull/6886)) - Improved string conversion for `ResultCollection` ([#8622](https://github.com/PyTorchLightning/pytorch-lightning/pull/8622)) - `LightningCLI` changes: - * `LightningCLI.init_parser` now returns the parser instance ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) - * `LightningCLI.add_core_arguments_to_parser`, `LightningCLI.parse_arguments` now take a `parser` argument ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) - * `LightningCLI.instantiate_trainer` now takes a config and a list of callbacks ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) - * Split `LightningCLI.add_core_arguments_to_parser` into `LightningCLI.add_default_arguments_to_parser` + `LightningCLI.add_core_arguments_to_parser` ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) + - `LightningCLI.init_parser` now returns the parser instance ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) + - `LightningCLI.add_core_arguments_to_parser`, `LightningCLI.parse_arguments` now take a `parser` argument ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) + - `LightningCLI.instantiate_trainer` now takes a config and a list of callbacks ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) + - Split `LightningCLI.add_core_arguments_to_parser` into `LightningCLI.add_default_arguments_to_parser` + `LightningCLI.add_core_arguments_to_parser` ([#8721](https://github.com/PyTorchLightning/pytorch-lightning/pull/8721)) - The accelerator and training type plugin `setup` hooks no longer have a `model` argument ([#8536](https://github.com/PyTorchLightning/pytorch-lightning/pull/8536)) - The accelerator and training type plugin `update_global_step` hook has been removed ([#8856](https://github.com/PyTorchLightning/pytorch-lightning/pull/8856)) - The coverage of `self.log`-ing in any `LightningModule` or `Callback` hook has been improved ([#8498](https://github.com/PyTorchLightning/pytorch-lightning/pull/8498)) @@ -489,7 +408,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - The tuner now usees a unique filename to save a temporary checkpoint ([#9682](https://github.com/PyTorchLightning/pytorch-lightning/pull/9682)) - Changed `HorovodPlugin.all_gather` to return a `torch.Tensor` instead of a list ([#9696](https://github.com/PyTorchLightning/pytorch-lightning/pull/9696)) - Changed Trainer connectors to be protected attributes: - * Configuration Validator ([#9779](https://github.com/PyTorchLightning/pytorch-lightning/pull/9779)) + - Configuration Validator ([#9779](https://github.com/PyTorchLightning/pytorch-lightning/pull/9779)) - The `current_epoch` and `global_step` attributes now get restored irrespective of the Trainer task ([#9413](https://github.com/PyTorchLightning/pytorch-lightning/pull/9413)) - Trainer now raises an exception when requesting `amp_level` with native `amp_backend` ([#9755](https://github.com/PyTorchLightning/pytorch-lightning/pull/9755)) - Update the logic to check for accumulation steps with deepspeed ([#9826](https://github.com/PyTorchLightning/pytorch-lightning/pull/9826)) @@ -626,7 +545,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `batch_size` in `ResultCollection` not being reset to 1 on epoch end ([#10242](https://github.com/PyTorchLightning/pytorch-lightning/pull/10242)) - Fixed `distrib_type` not being set when training plugin instances are being passed to the Trainer ([#10251](https://github.com/PyTorchLightning/pytorch-lightning/pull/10251)) - ## [1.4.9] - 2021-09-30 - Fixed `lr_find` to generate same results on multiple calls ([#9704](https://github.com/PyTorchLightning/pytorch-lightning/pull/9704)) @@ -634,7 +552,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed input validation for `gradient_clip_val`, `gradient_clip_algorithm`, `track_grad_norm` and `terminate_on_nan` Trainer arguments ([#9595](https://github.com/PyTorchLightning/pytorch-lightning/pull/9595)) - Reset metrics before each task starts ([#9410](https://github.com/PyTorchLightning/pytorch-lightning/pull/9410)) - ## [1.4.8] - 2021-09-22 - Fixed error reporting in DDP process reconciliation when processes are launched by an external agent ([#9389](https://github.com/PyTorchLightning/pytorch-lightning/pull/9389)) @@ -642,7 +559,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `add_argparse_args` raising `TypeError` when args are typed as `typing.Generic` in Python 3.6 ([#9554](https://github.com/PyTorchLightning/pytorch-lightning/pull/9554)) - Fixed back-compatibility for saving hyperparameters from a single container and inferring its argument name by reverting [#9125](https://github.com/PyTorchLightning/pytorch-lightning/pull/9125) ([#9642](https://github.com/PyTorchLightning/pytorch-lightning/pull/9642)) - ## [1.4.7] - 2021-09-14 - Fixed logging of nan parameters ([#9364](https://github.com/PyTorchLightning/pytorch-lightning/pull/9364)) @@ -651,15 +567,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed collision of user argument when using ShardedDDP ([#9512](https://github.com/PyTorchLightning/pytorch-lightning/pull/9512)) - Fixed DeepSpeed crash for RNNs ([#9489](https://github.com/PyTorchLightning/pytorch-lightning/pull/9489)) - ## [1.4.6] - 2021-09-07 - Fixed an issues with export to ONNX format when a model has multiple inputs ([#8800](https://github.com/PyTorchLightning/pytorch-lightning/pull/8800)) - Removed deprecation warnings being called for `on_{task}_dataloader` ([#9279](https://github.com/PyTorchLightning/pytorch-lightning/pull/9279)) - Fixed save/load/resume from checkpoint for DeepSpeed Plugin ( - [#8397](https://github.com/PyTorchLightning/pytorch-lightning/pull/8397), - [#8644](https://github.com/PyTorchLightning/pytorch-lightning/pull/8644), - [#8627](https://github.com/PyTorchLightning/pytorch-lightning/pull/8627)) + [#8397](https://github.com/PyTorchLightning/pytorch-lightning/pull/8397), + [#8644](https://github.com/PyTorchLightning/pytorch-lightning/pull/8644), + [#8627](https://github.com/PyTorchLightning/pytorch-lightning/pull/8627)) - Fixed `EarlyStopping` running on train epoch end when `check_val_every_n_epoch>1` is set ([#9156](https://github.com/PyTorchLightning/pytorch-lightning/pull/9156)) - Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8333](https://github.com/PyTorchLightning/pytorch-lightning/issues/8333)) - Fixed the Apex and DeepSpeed plugin closure running after the `on_before_optimizer_step` hook ([#9288](https://github.com/PyTorchLightning/pytorch-lightning/issues/9288)) @@ -671,7 +586,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed inspection of other args when a container is specified in `save_hyperparameters` ([#9125](https://github.com/PyTorchLightning/pytorch-lightning/pull/9125)) - Fixed signature of `Timer.on_train_epoch_end` and `StochasticWeightAveraging.on_train_epoch_end` to prevent unwanted deprecation warnings ([#9347](https://github.com/PyTorchLightning/pytorch-lightning/pull/9347)) - ## [1.4.5] - 2021-08-31 - Fixed reduction using `self.log(sync_dict=True, reduce_fx={mean,max})` ([#9142](https://github.com/PyTorchLightning/pytorch-lightning/pull/9142)) @@ -679,13 +593,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed the CometLogger, no longer modifies the metrics in place. Instead creates a copy of metrics before performing any operations ([#9150](https://github.com/PyTorchLightning/pytorch-lightning/pull/9150)) - Fixed `DDP` "CUDA error: initialization error" due to a `copy` instead of `deepcopy` on `ResultCollection` ([#9239](https://github.com/PyTorchLightning/pytorch-lightning/pull/9239)) - ## [1.4.4] - 2021-08-24 - Fixed a bug in the binary search mode of auto batch size scaling where exception was raised if the first trainer run resulted in OOM ([#8954](https://github.com/PyTorchLightning/pytorch-lightning/pull/8954)) - Fixed a bug causing logging with `log_gpu_memory='min_max'` not working ([#9013](https://github.com/PyTorchLightning/pytorch-lightning/pull/9013)) - ## [1.4.3] - 2021-08-17 - Fixed plateau scheduler stepping on incomplete epoch ([#8861](https://github.com/PyTorchLightning/pytorch-lightning/pull/8861)) @@ -695,7 +607,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed lost reference to `_Metadata` object in `ResultMetricCollection` ([#8932](https://github.com/PyTorchLightning/pytorch-lightning/pull/8932)) - Ensure the existence of `DDPPlugin._sync_dir` in `reconciliate_processes` ([#8939](https://github.com/PyTorchLightning/pytorch-lightning/pull/8939)) - ## [1.4.2] - 2021-08-10 - Fixed recursive call for `apply_to_collection(include_none=False)` ([#8719](https://github.com/PyTorchLightning/pytorch-lightning/pull/8719)) @@ -703,7 +614,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed comments and exception message for metrics_to_scalars ([#8782](https://github.com/PyTorchLightning/pytorch-lightning/pull/8782/)) - Fixed typo error in LightningLoggerBase.after_save_checkpoint docstring ([#8737](https://github.com/PyTorchLightning/pytorch-lightning/pull/8737/)) - ## [1.4.1] - 2021-08-03 - Fixed `trainer.fit_loop.split_idx` always returning `None` ([#8601](https://github.com/PyTorchLightning/pytorch-lightning/pull/8601)) @@ -715,7 +625,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a deadlock issue with DDP and torchelastic ([#8655](https://github.com/PyTorchLightning/pytorch-lightning/pull/8655)) - Fixed `accelerator=ddp` choice for CPU ([#8645](https://github.com/PyTorchLightning/pytorch-lightning/pull/8645)) - ## [1.4.0] - 2021-07-27 ### Added @@ -729,14 +638,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `ModelPruning(prune_on_train_epoch_end=True|False)` to choose when to apply pruning ([#7704](https://github.com/PyTorchLightning/pytorch-lightning/pull/7704)) - Added support for checkpointing based on a provided time interval during training ([#7515](https://github.com/PyTorchLightning/pytorch-lightning/pull/7515)) - Progress tracking - * Added dataclasses for progress tracking ([#6603](https://github.com/PyTorchLightning/pytorch-lightning/pull/6603), + - Added dataclasses for progress tracking ([#6603](https://github.com/PyTorchLightning/pytorch-lightning/pull/6603), [#7574](https://github.com/PyTorchLightning/pytorch-lightning/pull/7574), [#8140](https://github.com/PyTorchLightning/pytorch-lightning/pull/8140), [#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) - * Add `{,load_}state_dict` to the progress tracking dataclasses ([#8140](https://github.com/PyTorchLightning/pytorch-lightning/pull/8140)) - * Connect the progress tracking dataclasses to the loops ([#8244](https://github.com/PyTorchLightning/pytorch-lightning/pull/8244), + - Add `{,load_}state_dict` to the progress tracking dataclasses ([#8140](https://github.com/PyTorchLightning/pytorch-lightning/pull/8140)) + - Connect the progress tracking dataclasses to the loops ([#8244](https://github.com/PyTorchLightning/pytorch-lightning/pull/8244), [#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) - * Do not reset the progress tracking dataclasses total counters ([#8475](https://github.com/PyTorchLightning/pytorch-lightning/pull/8475)) + - Do not reset the progress tracking dataclasses total counters ([#8475](https://github.com/PyTorchLightning/pytorch-lightning/pull/8475)) - Added support for passing a `LightningDataModule` positionally as the second argument to `trainer.{validate,test,predict}` ([#7431](https://github.com/PyTorchLightning/pytorch-lightning/pull/7431)) - Added argument `trainer.predict(ckpt_path)` ([#7430](https://github.com/PyTorchLightning/pytorch-lightning/pull/7430)) - Added `clip_grad_by_value` support for TPUs ([#7025](https://github.com/PyTorchLightning/pytorch-lightning/pull/7025)) @@ -755,13 +664,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added the `on_before_optimizer_step` hook ([#8048](https://github.com/PyTorchLightning/pytorch-lightning/pull/8048)) - Added IPU Accelerator ([#7867](https://github.com/PyTorchLightning/pytorch-lightning/pull/7867)) - Fault-tolerant training - * Added `{,load_}state_dict` to `ResultCollection` ([#7948](https://github.com/PyTorchLightning/pytorch-lightning/pull/7948)) - * Added `{,load_}state_dict` to `Loops` ([#8197](https://github.com/PyTorchLightning/pytorch-lightning/pull/8197)) - * Added `FastForwardSampler` and `CaptureIterableDataset` ([#8307](https://github.com/PyTorchLightning/pytorch-lightning/pull/8307)) - * Set `Loop.restarting=False` at the end of the first iteration ([#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) - * Save the loops state with the checkpoint (opt-in) ([#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) - * Save a checkpoint to restore the state on exception (opt-in) ([#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) - * Added `state_dict` and `load_state_dict` utilities for `CombinedLoader` + utilities for dataloader ([#8364](https://github.com/PyTorchLightning/pytorch-lightning/pull/8364)) + - Added `{,load_}state_dict` to `ResultCollection` ([#7948](https://github.com/PyTorchLightning/pytorch-lightning/pull/7948)) + - Added `{,load_}state_dict` to `Loops` ([#8197](https://github.com/PyTorchLightning/pytorch-lightning/pull/8197)) + - Added `FastForwardSampler` and `CaptureIterableDataset` ([#8307](https://github.com/PyTorchLightning/pytorch-lightning/pull/8307)) + - Set `Loop.restarting=False` at the end of the first iteration ([#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) + - Save the loops state with the checkpoint (opt-in) ([#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) + - Save a checkpoint to restore the state on exception (opt-in) ([#8362](https://github.com/PyTorchLightning/pytorch-lightning/pull/8362)) + - Added `state_dict` and `load_state_dict` utilities for `CombinedLoader` + utilities for dataloader ([#8364](https://github.com/PyTorchLightning/pytorch-lightning/pull/8364)) - Added `rank_zero_only` to `LightningModule.log` function ([#7966](https://github.com/PyTorchLightning/pytorch-lightning/pull/7966)) - Added `metric_attribute` to `LightningModule.log` function ([#7966](https://github.com/PyTorchLightning/pytorch-lightning/pull/7966)) - Added a warning if `Trainer(log_every_n_steps)` is a value too high for the training dataloader ([#7734](https://github.com/PyTorchLightning/pytorch-lightning/pull/7734)) @@ -805,33 +714,33 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `ModelCheckpoint` now runs at the end of the training epoch by default ([#8389](https://github.com/PyTorchLightning/pytorch-lightning/pull/8389)) - `EarlyStopping` now runs at the end of the training epoch by default ([#8286](https://github.com/PyTorchLightning/pytorch-lightning/pull/8286)) - Refactored Loops - * Moved attributes `global_step`, `current_epoch`, `max/min_steps`, `max/min_epochs`, `batch_idx`, and `total_batch_idx` to TrainLoop ([#7437](https://github.com/PyTorchLightning/pytorch-lightning/pull/7437)) - * Refactored result handling in training loop ([#7506](https://github.com/PyTorchLightning/pytorch-lightning/pull/7506)) - * Moved attributes `hiddens` and `split_idx` to TrainLoop ([#7507](https://github.com/PyTorchLightning/pytorch-lightning/pull/7507)) - * Refactored the logic around manual and automatic optimization inside the optimizer loop ([#7526](https://github.com/PyTorchLightning/pytorch-lightning/pull/7526)) - * Simplified "should run validation" logic ([#7682](https://github.com/PyTorchLightning/pytorch-lightning/pull/7682)) - * Simplified logic for updating the learning rate for schedulers ([#7682](https://github.com/PyTorchLightning/pytorch-lightning/pull/7682)) - * Removed the `on_epoch` guard from the "should stop" validation check ([#7701](https://github.com/PyTorchLightning/pytorch-lightning/pull/7701)) - * Refactored internal loop interface; added new classes `FitLoop`, `TrainingEpochLoop`, `TrainingBatchLoop` ([#7871](https://github.com/PyTorchLightning/pytorch-lightning/pull/7871), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077)) - * Removed `pytorch_lightning/trainer/training_loop.py` ([#7985](https://github.com/PyTorchLightning/pytorch-lightning/pull/7985)) - * Refactored evaluation loop interface; added new classes `DataLoaderLoop`, `EvaluationLoop`, `EvaluationEpochLoop` ([#7990](https://github.com/PyTorchLightning/pytorch-lightning/pull/7990), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077)) - * Removed `pytorch_lightning/trainer/evaluation_loop.py` ([#8056](https://github.com/PyTorchLightning/pytorch-lightning/pull/8056)) - * Restricted public access to several internal functions ([#8024](https://github.com/PyTorchLightning/pytorch-lightning/pull/8024)) - * Refactored trainer `_run_*` functions and separate evaluation loops ([#8065](https://github.com/PyTorchLightning/pytorch-lightning/pull/8065)) - * Refactored prediction loop interface; added new classes `PredictionLoop`, `PredictionEpochLoop` ([#7700](https://github.com/PyTorchLightning/pytorch-lightning/pull/7700), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077)) - * Removed `pytorch_lightning/trainer/predict_loop.py` ([#8094](https://github.com/PyTorchLightning/pytorch-lightning/pull/8094)) - * Moved result teardown to the loops ([#8245](https://github.com/PyTorchLightning/pytorch-lightning/pull/8245)) - * Improve `Loop` API to better handle children `state_dict` and `progress` ([#8334](https://github.com/PyTorchLightning/pytorch-lightning/pull/8334)) + - Moved attributes `global_step`, `current_epoch`, `max/min_steps`, `max/min_epochs`, `batch_idx`, and `total_batch_idx` to TrainLoop ([#7437](https://github.com/PyTorchLightning/pytorch-lightning/pull/7437)) + - Refactored result handling in training loop ([#7506](https://github.com/PyTorchLightning/pytorch-lightning/pull/7506)) + - Moved attributes `hiddens` and `split_idx` to TrainLoop ([#7507](https://github.com/PyTorchLightning/pytorch-lightning/pull/7507)) + - Refactored the logic around manual and automatic optimization inside the optimizer loop ([#7526](https://github.com/PyTorchLightning/pytorch-lightning/pull/7526)) + - Simplified "should run validation" logic ([#7682](https://github.com/PyTorchLightning/pytorch-lightning/pull/7682)) + - Simplified logic for updating the learning rate for schedulers ([#7682](https://github.com/PyTorchLightning/pytorch-lightning/pull/7682)) + - Removed the `on_epoch` guard from the "should stop" validation check ([#7701](https://github.com/PyTorchLightning/pytorch-lightning/pull/7701)) + - Refactored internal loop interface; added new classes `FitLoop`, `TrainingEpochLoop`, `TrainingBatchLoop` ([#7871](https://github.com/PyTorchLightning/pytorch-lightning/pull/7871), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077)) + - Removed `pytorch_lightning/trainer/training_loop.py` ([#7985](https://github.com/PyTorchLightning/pytorch-lightning/pull/7985)) + - Refactored evaluation loop interface; added new classes `DataLoaderLoop`, `EvaluationLoop`, `EvaluationEpochLoop` ([#7990](https://github.com/PyTorchLightning/pytorch-lightning/pull/7990), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077)) + - Removed `pytorch_lightning/trainer/evaluation_loop.py` ([#8056](https://github.com/PyTorchLightning/pytorch-lightning/pull/8056)) + - Restricted public access to several internal functions ([#8024](https://github.com/PyTorchLightning/pytorch-lightning/pull/8024)) + - Refactored trainer `_run_*` functions and separate evaluation loops ([#8065](https://github.com/PyTorchLightning/pytorch-lightning/pull/8065)) + - Refactored prediction loop interface; added new classes `PredictionLoop`, `PredictionEpochLoop` ([#7700](https://github.com/PyTorchLightning/pytorch-lightning/pull/7700), [#8077](https://github.com/PyTorchLightning/pytorch-lightning/pull/8077)) + - Removed `pytorch_lightning/trainer/predict_loop.py` ([#8094](https://github.com/PyTorchLightning/pytorch-lightning/pull/8094)) + - Moved result teardown to the loops ([#8245](https://github.com/PyTorchLightning/pytorch-lightning/pull/8245)) + - Improve `Loop` API to better handle children `state_dict` and `progress` ([#8334](https://github.com/PyTorchLightning/pytorch-lightning/pull/8334)) - Refactored logging - * Renamed and moved `core/step_result.py` to `trainer/connectors/logger_connector/result.py` ([#7736](https://github.com/PyTorchLightning/pytorch-lightning/pull/7736)) - * Dramatically simplify the `LoggerConnector` ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) - * `trainer.{logged,progress_bar,callback}_metrics` are now updated on-demand ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) - * Completely overhaul the `Result` object in favor of `ResultMetric` ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) - * Improve epoch-level reduction time and overall memory usage ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) - * Allow passing `self.log(batch_size=...)` ([#7891](https://github.com/PyTorchLightning/pytorch-lightning/pull/7891)) - * Each of the training loops now keeps its own results collection ([#7891](https://github.com/PyTorchLightning/pytorch-lightning/pull/7891)) - * Remove `EpochResultStore` and `HookResultStore` in favor of `ResultCollection` ([#7909](https://github.com/PyTorchLightning/pytorch-lightning/pull/7909)) - * Remove `MetricsHolder` ([#7909](https://github.com/PyTorchLightning/pytorch-lightning/pull/7909)) + - Renamed and moved `core/step_result.py` to `trainer/connectors/logger_connector/result.py` ([#7736](https://github.com/PyTorchLightning/pytorch-lightning/pull/7736)) + - Dramatically simplify the `LoggerConnector` ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) + - `trainer.{logged,progress_bar,callback}_metrics` are now updated on-demand ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) + - Completely overhaul the `Result` object in favor of `ResultMetric` ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) + - Improve epoch-level reduction time and overall memory usage ([#7882](https://github.com/PyTorchLightning/pytorch-lightning/pull/7882)) + - Allow passing `self.log(batch_size=...)` ([#7891](https://github.com/PyTorchLightning/pytorch-lightning/pull/7891)) + - Each of the training loops now keeps its own results collection ([#7891](https://github.com/PyTorchLightning/pytorch-lightning/pull/7891)) + - Remove `EpochResultStore` and `HookResultStore` in favor of `ResultCollection` ([#7909](https://github.com/PyTorchLightning/pytorch-lightning/pull/7909)) + - Remove `MetricsHolder` ([#7909](https://github.com/PyTorchLightning/pytorch-lightning/pull/7909)) - Moved `ignore_scalar_return_in_dp` warning suppression to the DataParallelPlugin class ([#7421](https://github.com/PyTorchLightning/pytorch-lightning/pull/7421/)) - Changed the behaviour when logging evaluation step metrics to no longer append `/epoch_*` to the metric name ([#7351](https://github.com/PyTorchLightning/pytorch-lightning/pull/7351)) - Raised `ValueError` when a `None` value is `self.log`-ed ([#7771](https://github.com/PyTorchLightning/pytorch-lightning/pull/7771)) @@ -847,7 +756,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Accelerator hooks are called regardless if `LightningModule` overrides the same hooks ([#7826](https://github.com/PyTorchLightning/pytorch-lightning/pull/7826)) - Moved profilers to their own file ([#7822](https://github.com/PyTorchLightning/pytorch-lightning/pull/7822)) - The `on_after_backward` hook is now called on accumulating iterations. Use the `on_before_optimizer_step` hook to mimic the old behaviour ([#8328](https://github.com/PyTorchLightning/pytorch-lightning/pull/8328)) -- The mixed precision loss is no longer unscaled before the `on_after_backward` hook. Use the `on_before_optimizer_step` hook to mimic the old behaviour ([#8328](https://github.com/PyTorchLightning/pytorch-lightning/pull/8328)) +- The mixed precision loss is no longer unscaled before the `on_after_backward` hook. Use the `on_before_optimizer_step` hook to mimic the old behaviour ([#8328](https://github.com/PyTorchLightning/pytorch-lightning/pull/8328)) - The `TrainingTypePlugin.{pre,post}_backward` hooks no longer take the `optimizer, opt_idx, should_accumulate` arguments ([#8328](https://github.com/PyTorchLightning/pytorch-lightning/pull/8328)) - The `PrecisionPlugin.backward` hooks no longer returns a value ([#8328](https://github.com/PyTorchLightning/pytorch-lightning/pull/8328)) - The `PrecisionPlugin.backward` hooks no longer takes a `should_accumulate` argument ([#8328](https://github.com/PyTorchLightning/pytorch-lightning/pull/8328)) @@ -944,7 +853,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `lr_scheduler` with metric (e.g. `torch.optim.lr_scheduler.ReduceLROnPlateau`) when using `automatic_optimization = False` ([#7643](https://github.com/PyTorchLightning/pytorch-lightning/pull/7643)) - Fixed `DeepSpeed` breaking with no schedulers ([#8580](https://github.com/PyTorchLightning/pytorch-lightning/pull/8580)) - ## [1.3.8] - 2021-07-01 ### Fixed @@ -959,7 +867,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed NCCL error when selecting non-consecutive device ids ([#8165](https://github.com/PyTorchLightning/pytorch-lightning/pull/8165)) - Fixed SWA to also work with `IterableDataset` ([#8172](https://github.com/PyTorchLightning/pytorch-lightning/pull/8172)) - ## [1.3.7] - 2021-06-22 ### Fixed @@ -971,7 +878,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed moving the best score to device in `EarlyStopping` callback for TPU devices ([#7959](https://github.com/PyTorchLightning/pytorch-lightning/pull/7959)) - Fixes access to `callback_metrics` in ddp_spawn ([#7916](https://github.com/PyTorchLightning/pytorch-lightning/pull/7916)) - ## [1.3.6] - 2021-06-15 ### Fixed @@ -981,7 +887,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed setting `worker_init_fn` to seed dataloaders correctly when using DDP ([#7942](https://github.com/PyTorchLightning/pytorch-lightning/pull/7942)) - Fixed `BaseFinetuning` callback to properly handle parent modules w/ parameters ([#7931](https://github.com/PyTorchLightning/pytorch-lightning/pull/7931)) - ## [1.3.5] - 2021-06-08 ### Added @@ -1000,7 +905,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Move `training_output` validation to after `train_step_end` ([#7868](https://github.com/PyTorchLightning/pytorch-lightning/pull/7868)) - ## [1.3.4] - 2021-06-01 ### Fixed @@ -1008,7 +912,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed info message when max training time reached ([#7780](https://github.com/PyTorchLightning/pytorch-lightning/pull/7780)) - Fixed missing `__len__` method to `IndexBatchSamplerWrapper` ([#7681](https://github.com/PyTorchLightning/pytorch-lightning/pull/7681)) - ## [1.3.3] - 2021-05-27 ### Changed @@ -1024,7 +927,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed global step update when the epoch is skipped ([#7677](https://github.com/PyTorchLightning/pytorch-lightning/pull/7677)) - Fixed training loop total batch counter when accumulate grad batches was enabled ([#7692](https://github.com/PyTorchLightning/pytorch-lightning/pull/7692)) - ## [1.3.2] - 2021-05-18 ### Changed @@ -1038,7 +940,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed setting correct `DistribType` for `ddp_cpu` (spawn) backend ([#7492](https://github.com/PyTorchLightning/pytorch-lightning/pull/7492)) - Fixed incorrect number of calls to LR scheduler when `check_val_every_n_epoch > 1` ([#7032](https://github.com/PyTorchLightning/pytorch-lightning/pull/7032)) - ## [1.3.1] - 2021-05-11 ### Fixed @@ -1047,7 +948,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `Trainer.current_epoch` not getting restored after tuning ([#7434](https://github.com/PyTorchLightning/pytorch-lightning/pull/7434)) - Fixed local rank displayed in console log ([#7395](https://github.com/PyTorchLightning/pytorch-lightning/pull/7395)) - ## [1.3.0] - 2021-05-06 ### Added @@ -1059,10 +959,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added utils for NaN/Inf detection for gradients and parameters ([#6834](https://github.com/PyTorchLightning/pytorch-lightning/pull/6834)) - Added more explicit exception message when trying to execute `trainer.test()` or `trainer.validate()` with `fast_dev_run=True` ([#6667](https://github.com/PyTorchLightning/pytorch-lightning/pull/6667)) - Added `LightningCLI` class to provide simple reproducibility with minimum boilerplate training CLI ( - [#4492](https://github.com/PyTorchLightning/pytorch-lightning/pull/4492), - [#6862](https://github.com/PyTorchLightning/pytorch-lightning/pull/6862), - [#7156](https://github.com/PyTorchLightning/pytorch-lightning/pull/7156), - [#7299](https://github.com/PyTorchLightning/pytorch-lightning/pull/7299)) + [#4492](https://github.com/PyTorchLightning/pytorch-lightning/pull/4492), + [#6862](https://github.com/PyTorchLightning/pytorch-lightning/pull/6862), + [#7156](https://github.com/PyTorchLightning/pytorch-lightning/pull/7156), + [#7299](https://github.com/PyTorchLightning/pytorch-lightning/pull/7299)) - Added `gradient_clip_algorithm` argument to Trainer for gradient clipping by value ([#6123](https://github.com/PyTorchLightning/pytorch-lightning/pull/6123)). - Added a way to print to terminal without breaking up the progress bar ([#5470](https://github.com/PyTorchLightning/pytorch-lightning/pull/5470)) - Added support to checkpoint after training steps in `ModelCheckpoint` callback ([#6146](https://github.com/PyTorchLightning/pytorch-lightning/pull/6146)) @@ -1085,10 +985,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added support for DDP communication hooks ([#6736](https://github.com/PyTorchLightning/pytorch-lightning/pull/6736)) - Added `artifact_location` argument to `MLFlowLogger` which will be passed to the `MlflowClient.create_experiment` call ([#6677](https://github.com/PyTorchLightning/pytorch-lightning/pull/6677)) - Added `model` parameter to precision plugins' `clip_gradients` signature ( - [#6764](https://github.com/PyTorchLightning/pytorch-lightning/pull/6764), - [#7231](https://github.com/PyTorchLightning/pytorch-lightning/pull/7231)) + [#6764](https://github.com/PyTorchLightning/pytorch-lightning/pull/6764), + [#7231](https://github.com/PyTorchLightning/pytorch-lightning/pull/7231)) - Added `is_last_batch` attribute to `Trainer` ([#6825](https://github.com/PyTorchLightning/pytorch-lightning/pull/6825)) -- Added `LightningModule.lr_schedulers()` for manual optimization ([#6567](https://github.com/PyTorchLightning/pytorch-lightning/pull/6567)) +- Added `LightningModule.lr_schedulers()` for manual optimization ([#6567](https://github.com/PyTorchLightning/pytorch-lightning/pull/6567)) - Added `MpModelWrapper` in TPU Spawn ([#7045](https://github.com/PyTorchLightning/pytorch-lightning/pull/7045)) - Added `max_time` Trainer argument to limit training time ([#6823](https://github.com/PyTorchLightning/pytorch-lightning/pull/6823)) - Added `on_predict_{batch,epoch}_{start,end}` hooks ([#7141](https://github.com/PyTorchLightning/pytorch-lightning/pull/7141)) @@ -1103,11 +1003,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added warning when missing `Callback` and using `resume_from_checkpoint` ([#7254](https://github.com/PyTorchLightning/pytorch-lightning/pull/7254)) - DeepSpeed single file saving ([#6900](https://github.com/PyTorchLightning/pytorch-lightning/pull/6900)) - Added Training type Plugins Registry ( - [#6982](https://github.com/PyTorchLightning/pytorch-lightning/pull/6982), - [#7063](https://github.com/PyTorchLightning/pytorch-lightning/pull/7063), - [#7214](https://github.com/PyTorchLightning/pytorch-lightning/pull/7214), - [#7224](https://github.com/PyTorchLightning/pytorch-lightning/pull/7224) -) + [#6982](https://github.com/PyTorchLightning/pytorch-lightning/pull/6982), + [#7063](https://github.com/PyTorchLightning/pytorch-lightning/pull/7063), + [#7214](https://github.com/PyTorchLightning/pytorch-lightning/pull/7214), + [#7224](https://github.com/PyTorchLightning/pytorch-lightning/pull/7224) + ) - Add `ignore` param to `save_hyperparameters` ([#6056](https://github.com/PyTorchLightning/pytorch-lightning/pull/6056)) ### Changed @@ -1116,16 +1016,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `EarlyStopping` callback from by default running `EarlyStopping.on_validation_end` if only training is run. Set `check_on_train_epoch_end` to run the callback at the end of the train epoch instead of at the end of the validation epoch ([#7069](https://github.com/PyTorchLightning/pytorch-lightning/pull/7069)) - Renamed `pytorch_lightning.callbacks.swa` to `pytorch_lightning.callbacks.stochastic_weight_avg` ([#6259](https://github.com/PyTorchLightning/pytorch-lightning/pull/6259)) - Refactor `RunningStage` and `TrainerState` usage ( - [#4945](https://github.com/PyTorchLightning/pytorch-lightning/pull/4945), - [#7173](https://github.com/PyTorchLightning/pytorch-lightning/pull/7173)) - * Added `RunningStage.SANITY_CHECKING` - * Added `TrainerFn.{FITTING,VALIDATING,TESTING,PREDICTING,TUNING}` - * Changed `trainer.evaluating` to return `True` if validating or testing + [#4945](https://github.com/PyTorchLightning/pytorch-lightning/pull/4945), + [#7173](https://github.com/PyTorchLightning/pytorch-lightning/pull/7173)) + - Added `RunningStage.SANITY_CHECKING` + - Added `TrainerFn.{FITTING,VALIDATING,TESTING,PREDICTING,TUNING}` + - Changed `trainer.evaluating` to return `True` if validating or testing - Changed `setup()` and `teardown()` stage argument to take any of `{fit,validate,test,predict}` ([#6386](https://github.com/PyTorchLightning/pytorch-lightning/pull/6386)) - Changed profilers to save separate report files per state and rank ([#6621](https://github.com/PyTorchLightning/pytorch-lightning/pull/6621)) - The trainer no longer tries to save a checkpoint on exception or run callback's `on_train_end` functions ([#6864](https://github.com/PyTorchLightning/pytorch-lightning/pull/6864)) - Changed `PyTorchProfiler` to use `torch.autograd.profiler.record_function` to record functions ([#6349](https://github.com/PyTorchLightning/pytorch-lightning/pull/6349)) -- Disabled `lr_scheduler.step()` in manual optimization ([#6825](https://github.com/PyTorchLightning/pytorch-lightning/pull/6825)) +- Disabled `lr_scheduler.step()` in manual optimization ([#6825](https://github.com/PyTorchLightning/pytorch-lightning/pull/6825)) - Changed warnings and recommendations for dataloaders in `ddp_spawn` ([#6762](https://github.com/PyTorchLightning/pytorch-lightning/pull/6762)) - `pl.seed_everything` will now also set the seed on the `DistributedSampler` ([#7024](https://github.com/PyTorchLightning/pytorch-lightning/pull/7024)) - Changed default setting for communication of multi-node training using `DDPShardedPlugin` ([#6937](https://github.com/PyTorchLightning/pytorch-lightning/pull/6937)) @@ -1137,9 +1037,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed ModelSummary validation from train loop on_trainer_init ([#6610](https://github.com/PyTorchLightning/pytorch-lightning/pull/6610)) - Moved `save_function` to accelerator ([#6689](https://github.com/PyTorchLightning/pytorch-lightning/pull/6689)) - Updated DeepSpeed ZeRO ([#6546](https://github.com/PyTorchLightning/pytorch-lightning/pull/6546), - [#6752](https://github.com/PyTorchLightning/pytorch-lightning/pull/6752), - [#6142](https://github.com/PyTorchLightning/pytorch-lightning/pull/6142), - [#6321](https://github.com/PyTorchLightning/pytorch-lightning/pull/6321)) + [#6752](https://github.com/PyTorchLightning/pytorch-lightning/pull/6752), + [#6142](https://github.com/PyTorchLightning/pytorch-lightning/pull/6142), + [#6321](https://github.com/PyTorchLightning/pytorch-lightning/pull/6321)) - Improved verbose logging for `EarlyStopping` callback ([#6811](https://github.com/PyTorchLightning/pytorch-lightning/pull/6811)) - Run ddp_spawn dataloader checks on Windows ([#6930](https://github.com/PyTorchLightning/pytorch-lightning/pull/6930)) - Updated mlflow with using `resolve_tags` ([#6746](https://github.com/PyTorchLightning/pytorch-lightning/pull/6746)) @@ -1169,20 +1069,20 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `@auto_move_data` in favor of `trainer.predict` ([#6993](https://github.com/PyTorchLightning/pytorch-lightning/pull/6993)) - Deprecated `Callback.on_load_checkpoint(checkpoint)` in favor of `Callback.on_load_checkpoint(trainer, pl_module, checkpoint)` ([#7253](https://github.com/PyTorchLightning/pytorch-lightning/pull/7253)) - Deprecated metrics in favor of `torchmetrics` ( - [#6505](https://github.com/PyTorchLightning/pytorch-lightning/pull/6505), - [#6530](https://github.com/PyTorchLightning/pytorch-lightning/pull/6530), - [#6540](https://github.com/PyTorchLightning/pytorch-lightning/pull/6540), - [#6547](https://github.com/PyTorchLightning/pytorch-lightning/pull/6547), - [#6515](https://github.com/PyTorchLightning/pytorch-lightning/pull/6515), - [#6572](https://github.com/PyTorchLightning/pytorch-lightning/pull/6572), - [#6573](https://github.com/PyTorchLightning/pytorch-lightning/pull/6573), - [#6584](https://github.com/PyTorchLightning/pytorch-lightning/pull/6584), - [#6636](https://github.com/PyTorchLightning/pytorch-lightning/pull/6636), - [#6637](https://github.com/PyTorchLightning/pytorch-lightning/pull/6637), - [#6649](https://github.com/PyTorchLightning/pytorch-lightning/pull/6649), - [#6659](https://github.com/PyTorchLightning/pytorch-lightning/pull/6659), - [#7131](https://github.com/PyTorchLightning/pytorch-lightning/pull/7131), -) + [#6505](https://github.com/PyTorchLightning/pytorch-lightning/pull/6505), + [#6530](https://github.com/PyTorchLightning/pytorch-lightning/pull/6530), + [#6540](https://github.com/PyTorchLightning/pytorch-lightning/pull/6540), + [#6547](https://github.com/PyTorchLightning/pytorch-lightning/pull/6547), + [#6515](https://github.com/PyTorchLightning/pytorch-lightning/pull/6515), + [#6572](https://github.com/PyTorchLightning/pytorch-lightning/pull/6572), + [#6573](https://github.com/PyTorchLightning/pytorch-lightning/pull/6573), + [#6584](https://github.com/PyTorchLightning/pytorch-lightning/pull/6584), + [#6636](https://github.com/PyTorchLightning/pytorch-lightning/pull/6636), + [#6637](https://github.com/PyTorchLightning/pytorch-lightning/pull/6637), + [#6649](https://github.com/PyTorchLightning/pytorch-lightning/pull/6649), + [#6659](https://github.com/PyTorchLightning/pytorch-lightning/pull/6659), + [#7131](https://github.com/PyTorchLightning/pytorch-lightning/pull/7131), + ) - Deprecated the `LightningModule.datamodule` getter and setter methods; access them through `Trainer.datamodule` instead ([#7168](https://github.com/PyTorchLightning/pytorch-lightning/pull/7168)) - Deprecated the use of `Trainer(gpus="i")` (string) for selecting the i-th GPU; from v1.5 this will set the number of GPUs instead of the index ([#6388](https://github.com/PyTorchLightning/pytorch-lightning/pull/6388)) @@ -1197,8 +1097,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed passing a `ModelCheckpoint` instance to `Trainer(checkpoint_callback)` ([#6166](https://github.com/PyTorchLightning/pytorch-lightning/pull/6166)) - Removed deprecated Trainer argument `enable_pl_optimizer` and `automatic_optimization` ([#6163](https://github.com/PyTorchLightning/pytorch-lightning/pull/6163)) - Removed deprecated metrics ([#6161](https://github.com/PyTorchLightning/pytorch-lightning/pull/6161)) - * from `pytorch_lightning.metrics.functional.classification` removed `to_onehot`, `to_categorical`, `get_num_classes`, `roc`, `multiclass_roc`, `average_precision`, `precision_recall_curve`, `multiclass_precision_recall_curve` - * from `pytorch_lightning.metrics.functional.reduction` removed `reduce`, `class_reduce` + - from `pytorch_lightning.metrics.functional.classification` removed `to_onehot`, `to_categorical`, `get_num_classes`, `roc`, `multiclass_roc`, `average_precision`, `precision_recall_curve`, `multiclass_precision_recall_curve` + - from `pytorch_lightning.metrics.functional.reduction` removed `reduce`, `class_reduce` - Removed deprecated `ModelCheckpoint` arguments `prefix`, `mode="auto"` ([#6162](https://github.com/PyTorchLightning/pytorch-lightning/pull/6162)) - Removed `mode='auto'` from `EarlyStopping` ([#6167](https://github.com/PyTorchLightning/pytorch-lightning/pull/6167)) - Removed `epoch` and `step` arguments from `ModelCheckpoint.format_checkpoint_name()`, these are now included in the `metrics` argument ([#7344](https://github.com/PyTorchLightning/pytorch-lightning/pull/7344)) @@ -1249,19 +1149,19 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed custom init args for `WandbLogger` ([#6989](https://github.com/PyTorchLightning/pytorch-lightning/pull/6989)) - Fixed a bug where an error would be raised if the train dataloader sometimes produced None for a batch ([#7342](https://github.com/PyTorchLightning/pytorch-lightning/pull/7342)) - Fixed examples ( - [#6600](https://github.com/PyTorchLightning/pytorch-lightning/pull/6600), - [#6638](https://github.com/PyTorchLightning/pytorch-lightning/pull/6638), - [#7096](https://github.com/PyTorchLightning/pytorch-lightning/pull/7096), - [#7246](https://github.com/PyTorchLightning/pytorch-lightning/pull/7246), - [#6357](https://github.com/PyTorchLightning/pytorch-lightning/pull/6357), - [#6476](https://github.com/PyTorchLightning/pytorch-lightning/pull/6476), - [#6294](https://github.com/PyTorchLightning/pytorch-lightning/pull/6294), - [#6373](https://github.com/PyTorchLightning/pytorch-lightning/pull/6373), - [#6088](https://github.com/PyTorchLightning/pytorch-lightning/pull/6088), - [#7398](https://github.com/PyTorchLightning/pytorch-lightning/pull/7398) -) + [#6600](https://github.com/PyTorchLightning/pytorch-lightning/pull/6600), + [#6638](https://github.com/PyTorchLightning/pytorch-lightning/pull/6638), + [#7096](https://github.com/PyTorchLightning/pytorch-lightning/pull/7096), + [#7246](https://github.com/PyTorchLightning/pytorch-lightning/pull/7246), + [#6357](https://github.com/PyTorchLightning/pytorch-lightning/pull/6357), + [#6476](https://github.com/PyTorchLightning/pytorch-lightning/pull/6476), + [#6294](https://github.com/PyTorchLightning/pytorch-lightning/pull/6294), + [#6373](https://github.com/PyTorchLightning/pytorch-lightning/pull/6373), + [#6088](https://github.com/PyTorchLightning/pytorch-lightning/pull/6088), + [#7398](https://github.com/PyTorchLightning/pytorch-lightning/pull/7398) + ) - Resolved schedule step bug for PyTorch Profiler ([#6674](https://github.com/PyTorchLightning/pytorch-lightning/pull/6674), - [#6681](https://github.com/PyTorchLightning/pytorch-lightning/pull/6681)) + [#6681](https://github.com/PyTorchLightning/pytorch-lightning/pull/6681)) - Updated logic for checking TPUs availability ([#6767](https://github.com/PyTorchLightning/pytorch-lightning/pull/6767)) - Resolve TPU miss rendezvous ([#6781](https://github.com/PyTorchLightning/pytorch-lightning/pull/6781)) - Fixed auto-scaling mode when calling tune method on trainer ([#7321](https://github.com/PyTorchLightning/pytorch-lightning/pull/7321)) @@ -1277,9 +1177,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `set_default_tensor_type` to `torch.DoubleTensor` with precision=64 ([#7108](https://github.com/PyTorchLightning/pytorch-lightning/pull/7108)) - Fixed `NeptuneLogger.log_text(step=None)` ([#7194](https://github.com/PyTorchLightning/pytorch-lightning/pull/7194)) - Fixed importing torchtext batch ([#6365](https://github.com/PyTorchLightning/pytorch-lightning/pull/6365), - [#6323](https://github.com/PyTorchLightning/pytorch-lightning/pull/6323), - [#6211](https://github.com/PyTorchLightning/pytorch-lightning/pull/6211)) - + [#6323](https://github.com/PyTorchLightning/pytorch-lightning/pull/6323), + [#6211](https://github.com/PyTorchLightning/pytorch-lightning/pull/6211)) ## [1.2.9] - 2021-04-20 @@ -1289,7 +1188,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed multi-gpu join for Horovod ([#6954](https://github.com/PyTorchLightning/pytorch-lightning/pull/6954)) - Fixed parsing for pre-release package versions ([#6999](https://github.com/PyTorchLightning/pytorch-lightning/pull/6999)) - ## [1.2.8] - 2021-04-14 ### Added @@ -1311,23 +1209,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed DDP_SPAWN compatibility with bug_report_model.py ([#6892](https://github.com/PyTorchLightning/pytorch-lightning/pull/6892)) - Fixed bug where `BaseFinetuning.flatten_modules()` was duplicating leaf node parameters ([#6879](https://github.com/PyTorchLightning/pytorch-lightning/pull/6879)) - Set better defaults for `rank_zero_only.rank` when training is launched with SLURM and torchelastic: - * Support SLURM and torchelastic global rank environment variables ([#5715](https://github.com/PyTorchLightning/pytorch-lightning/pull/5715)) - * Remove hardcoding of local rank in accelerator connector ([#6878](https://github.com/PyTorchLightning/pytorch-lightning/pull/6878)) - + - Support SLURM and torchelastic global rank environment variables ([#5715](https://github.com/PyTorchLightning/pytorch-lightning/pull/5715)) + - Remove hardcoding of local rank in accelerator connector ([#6878](https://github.com/PyTorchLightning/pytorch-lightning/pull/6878)) ## [1.2.7] - 2021-04-06 ### Fixed - Fixed resolve a bug with omegaconf and xm.save ([#6741](https://github.com/PyTorchLightning/pytorch-lightning/pull/6741)) -- Fixed an issue with IterableDataset when __len__ is not defined ([#6828](https://github.com/PyTorchLightning/pytorch-lightning/pull/6828)) +- Fixed an issue with IterableDataset when **len** is not defined ([#6828](https://github.com/PyTorchLightning/pytorch-lightning/pull/6828)) - Sanitize None params during pruning ([#6836](https://github.com/PyTorchLightning/pytorch-lightning/pull/6836)) - Enforce an epoch scheduler interval when using SWA ([#6588](https://github.com/PyTorchLightning/pytorch-lightning/pull/6588)) - Fixed TPU Colab hang issue, post training ([#6816](https://github.com/PyTorchLightning/pytorch-lightning/pull/6816)) - Fixed a bug where `TensorBoardLogger` would give a warning and not log correctly to a symbolic link `save_dir` ([#6730](https://github.com/PyTorchLightning/pytorch-lightning/pull/6730)) - Fixed bug where `predict` could not be used when `progress_bar_refresh_rate=0` ([#6884](https://github.com/PyTorchLightning/pytorch-lightning/pull/6884)) - ## [1.2.6] - 2021-03-30 ### Changed @@ -1346,7 +1242,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a bug where gradients were disabled after calling `Trainer.predict` ([#6657](https://github.com/PyTorchLightning/pytorch-lightning/pull/6657)) - Fixed bug where no TPUs were detected in a TPU pod env ([#6719](https://github.com/PyTorchLightning/pytorch-lightning/pull/6719)) - ## [1.2.5] - 2021-03-23 ### Changed @@ -1361,7 +1256,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed duplicate logs appearing in console when using the python logging module ([#6275](https://github.com/PyTorchLightning/pytorch-lightning/pull/6275)) - Added Autocast in validation, test and predict modes for Native AMP ([#6565](https://github.com/PyTorchLightning/pytorch-lightning/pull/6565)) - ## [1.2.4] - 2021-03-16 ### Changed @@ -1380,7 +1274,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an exception in the layer summary when the model contains torch.jit scripted submodules ([#6511](https://github.com/PyTorchLightning/pytorch-lightning/pull/6511)) - Fixed when Train loop config was run during `Trainer.predict` ([#6541](https://github.com/PyTorchLightning/pytorch-lightning/pull/6541)) - ## [1.2.3] - 2021-03-09 ### Fixed @@ -1389,7 +1282,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed when `_stable_1d_sort` to work when `n >= N` ([#6177](https://github.com/PyTorchLightning/pytorch-lightning/pull/6177)) - Fixed `AttributeError` when `logger=None` on TPU ([#6221](https://github.com/PyTorchLightning/pytorch-lightning/pull/6221)) - Fixed PyTorch Profiler with `emit_nvtx` ([#6260](https://github.com/PyTorchLightning/pytorch-lightning/pull/6260)) -- Fixed `trainer.test` from `best_path` hangs after calling `trainer.fit` ([#6272](https://github.com/PyTorchLightning/pytorch-lightning/pull/6272)) +- Fixed `trainer.test` from `best_path` hangs after calling `trainer.fit` ([#6272](https://github.com/PyTorchLightning/pytorch-lightning/pull/6272)) - Fixed `SingleTPU` calling `all_gather` ([#6296](https://github.com/PyTorchLightning/pytorch-lightning/pull/6296)) - Ensure we check DeepSpeed/Sharded in multi-node DDP ([#6297](https://github.com/PyTorchLightning/pytorch-lightning/pull/6297) - Check `LightningOptimizer` doesn't delete optimizer hooks ([#6305](https://github.com/PyTorchLightning/pytorch-lightning/pull/6305) @@ -1397,7 +1290,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Ensure that clip gradients is only called if the value is greater than 0 ([#6330](https://github.com/PyTorchLightning/pytorch-lightning/pull/6330) - Fixed `Trainer` not resetting `lightning_optimizers` when calling `Trainer.fit()` multiple times ([#6372](https://github.com/PyTorchLightning/pytorch-lightning/pull/6372)) - ## [1.2.2] - 2021-03-02 ### Added @@ -1418,7 +1310,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Prevent `WandbLogger` from dropping values ([#5931](https://github.com/PyTorchLightning/pytorch-lightning/pull/5931)) - Fixed error thrown when using valid distributed mode in multi node ([#6297](https://github.com/PyTorchLightning/pytorch-lightning/pull/6297) - ## [1.2.1] - 2021-02-23 ### Fixed @@ -1428,7 +1319,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed error message for AMP + CPU incompatibility ([#6107](https://github.com/PyTorchLightning/pytorch-lightning/pull/6107)) - Disabled batch transfer in DP mode ([#6093](https://github.com/PyTorchLightning/pytorch-lightning/pull/6093)) - ## [1.2.0] - 2021-02-18 ### Added @@ -1453,8 +1343,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added missing val/test hooks in `LightningModule` ([#5467](https://github.com/PyTorchLightning/pytorch-lightning/pull/5467)) - The `Recall` and `Precision` metrics (and their functional counterparts `recall` and `precision`) can now be generalized to Recall@K and Precision@K with the use of `top_k` parameter ([#4842](https://github.com/PyTorchLightning/pytorch-lightning/pull/4842)) - Added `ModelPruning` Callback ([#5618](https://github.com/PyTorchLightning/pytorch-lightning/pull/5618), - [#5825](https://github.com/PyTorchLightning/pytorch-lightning/pull/5825), - [#6045](https://github.com/PyTorchLightning/pytorch-lightning/pull/6045)) + [#5825](https://github.com/PyTorchLightning/pytorch-lightning/pull/5825), + [#6045](https://github.com/PyTorchLightning/pytorch-lightning/pull/6045)) - Added `PyTorchProfiler` ([#5560](https://github.com/PyTorchLightning/pytorch-lightning/pull/5560)) - Added compositional metrics ([#5464](https://github.com/PyTorchLightning/pytorch-lightning/pull/5464)) - Added Trainer method `predict(...)` for high performence predictions ([#5579](https://github.com/PyTorchLightning/pytorch-lightning/pull/5579)) @@ -1462,7 +1352,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added AUC/AUROC class interface ([#5479](https://github.com/PyTorchLightning/pytorch-lightning/pull/5479)) - Added `PredictLoop` object ([#5752](https://github.com/PyTorchLightning/pytorch-lightning/pull/5752)) - Added `QuantizationAwareTraining` callback ([#5706](https://github.com/PyTorchLightning/pytorch-lightning/pull/5706), - [#6040](https://github.com/PyTorchLightning/pytorch-lightning/pull/6040)) + [#6040](https://github.com/PyTorchLightning/pytorch-lightning/pull/6040)) - Added `LightningModule.configure_callbacks` to enable the definition of model-specific callbacks ([#5621](https://github.com/PyTorchLightning/pytorch-lightning/pull/5621)) - Added `dim` to `PSNR` metric for mean-squared-error reduction ([#5957](https://github.com/PyTorchLightning/pytorch-lightning/pull/5957)) - Added promxial policy optimization template to pl_examples ([#5394](https://github.com/PyTorchLightning/pytorch-lightning/pull/5394)) @@ -1474,7 +1364,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `PL_TORCH_DISTRIBUTED_BACKEND` env variable to select backend ([#5981](https://github.com/PyTorchLightning/pytorch-lightning/pull/5981)) - Added `Trainer` flag to activate Stochastic Weight Averaging (SWA) `Trainer(stochastic_weight_avg=True)` ([#6038](https://github.com/PyTorchLightning/pytorch-lightning/pull/6038)) - Added DeepSpeed integration ([#5954](https://github.com/PyTorchLightning/pytorch-lightning/pull/5954), - [#6042](https://github.com/PyTorchLightning/pytorch-lightning/pull/6042)) + [#6042](https://github.com/PyTorchLightning/pytorch-lightning/pull/6042)) ### Changed @@ -1493,24 +1383,24 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Made `LightningModule.global_rank`, `LightningModule.local_rank` and `LightningModule.logger` read-only properties ([#5730](https://github.com/PyTorchLightning/pytorch-lightning/pull/5730)) - Forced `ModelCheckpoint` callbacks to run after all others to guarantee all states are saved to the checkpoint ([#5731](https://github.com/PyTorchLightning/pytorch-lightning/pull/5731)) - Refactored Accelerators and Plugins: - * Added base classes for plugins ([#5715](https://github.com/PyTorchLightning/pytorch-lightning/pull/5715)) - * Added parallel plugins for DP, DDP, DDPSpawn, DDP2 and Horovod ([#5714](https://github.com/PyTorchLightning/pytorch-lightning/pull/5714)) - * Precision Plugins ([#5718](https://github.com/PyTorchLightning/pytorch-lightning/pull/5718)) - * Added new Accelerators for CPU, GPU and TPU ([#5719](https://github.com/PyTorchLightning/pytorch-lightning/pull/5719)) - * Added RPC and Sharded plugins ([#5732](https://github.com/PyTorchLightning/pytorch-lightning/pull/5732)) - * Added missing `LightningModule`-wrapper logic to new plugins and accelerator ([#5734](https://github.com/PyTorchLightning/pytorch-lightning/pull/5734)) - * Moved device-specific teardown logic from training loop to accelerator ([#5973](https://github.com/PyTorchLightning/pytorch-lightning/pull/5973)) - * Moved accelerator_connector.py to the connectors subfolder ([#6033](https://github.com/PyTorchLightning/pytorch-lightning/pull/6033)) - * Trainer only references accelerator ([#6039](https://github.com/PyTorchLightning/pytorch-lightning/pull/6039)) - * Made parallel devices optional across all plugins ([#6051](https://github.com/PyTorchLightning/pytorch-lightning/pull/6051)) - * Cleaning ([#5948](https://github.com/PyTorchLightning/pytorch-lightning/pull/5948), - [#5949](https://github.com/PyTorchLightning/pytorch-lightning/pull/5949), - [#5950](https://github.com/PyTorchLightning/pytorch-lightning/pull/5950)) + - Added base classes for plugins ([#5715](https://github.com/PyTorchLightning/pytorch-lightning/pull/5715)) + - Added parallel plugins for DP, DDP, DDPSpawn, DDP2 and Horovod ([#5714](https://github.com/PyTorchLightning/pytorch-lightning/pull/5714)) + - Precision Plugins ([#5718](https://github.com/PyTorchLightning/pytorch-lightning/pull/5718)) + - Added new Accelerators for CPU, GPU and TPU ([#5719](https://github.com/PyTorchLightning/pytorch-lightning/pull/5719)) + - Added RPC and Sharded plugins ([#5732](https://github.com/PyTorchLightning/pytorch-lightning/pull/5732)) + - Added missing `LightningModule`-wrapper logic to new plugins and accelerator ([#5734](https://github.com/PyTorchLightning/pytorch-lightning/pull/5734)) + - Moved device-specific teardown logic from training loop to accelerator ([#5973](https://github.com/PyTorchLightning/pytorch-lightning/pull/5973)) + - Moved accelerator_connector.py to the connectors subfolder ([#6033](https://github.com/PyTorchLightning/pytorch-lightning/pull/6033)) + - Trainer only references accelerator ([#6039](https://github.com/PyTorchLightning/pytorch-lightning/pull/6039)) + - Made parallel devices optional across all plugins ([#6051](https://github.com/PyTorchLightning/pytorch-lightning/pull/6051)) + - Cleaning ([#5948](https://github.com/PyTorchLightning/pytorch-lightning/pull/5948), + [#5949](https://github.com/PyTorchLightning/pytorch-lightning/pull/5949), + [#5950](https://github.com/PyTorchLightning/pytorch-lightning/pull/5950)) - Enabled `self.log` in callbacks ([#5094](https://github.com/PyTorchLightning/pytorch-lightning/pull/5094)) - Renamed xxx_AVAILABLE as protected ([#5082](https://github.com/PyTorchLightning/pytorch-lightning/pull/5082)) - Unified module names in Utils ([#5199](https://github.com/PyTorchLightning/pytorch-lightning/pull/5199)) - Separated utils: imports & enums ([#5256](https://github.com/PyTorchLightning/pytorch-lightning/pull/5256) - [#5874](https://github.com/PyTorchLightning/pytorch-lightning/pull/5874)) + [#5874](https://github.com/PyTorchLightning/pytorch-lightning/pull/5874)) - Refactor: clean trainer device & distributed getters ([#5300](https://github.com/PyTorchLightning/pytorch-lightning/pull/5300)) - Simplified training phase as LightningEnum ([#5419](https://github.com/PyTorchLightning/pytorch-lightning/pull/5419)) - Updated metrics to use LightningEnum ([#5689](https://github.com/PyTorchLightning/pytorch-lightning/pull/5689)) @@ -1530,10 +1420,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `LightningDistributedDataParallel` in favor of new wrapper module `LightningDistributedModule` ([#5185](https://github.com/PyTorchLightning/pytorch-lightning/pull/5185)) - Deprecated `LightningDataParallel` in favor of new wrapper module `LightningParallelModule` ([#5670](https://github.com/PyTorchLightning/pytorch-lightning/pull/5670)) - Renamed utils modules ([#5199](https://github.com/PyTorchLightning/pytorch-lightning/pull/5199)) - * `argparse_utils` >> `argparse` - * `model_utils` >> `model_helpers` - * `warning_utils` >> `warnings` - * `xla_device_utils` >> `xla_device` + - `argparse_utils` >> `argparse` + - `model_utils` >> `model_helpers` + - `warning_utils` >> `warnings` + - `xla_device_utils` >> `xla_device` - Deprecated using `'val_loss'` to set the `ModelCheckpoint` monitor ([#6012](https://github.com/PyTorchLightning/pytorch-lightning/pull/6012)) - Deprecated `.get_model()` with explicit `.lightning_module` property ([#6035](https://github.com/PyTorchLightning/pytorch-lightning/pull/6035)) - Deprecated Trainer attribute `accelerator_backend` in favor of `accelerator` ([#6034](https://github.com/PyTorchLightning/pytorch-lightning/pull/6034)) @@ -1567,14 +1457,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed synchronization issues with TPU training ([#6027](https://github.com/PyTorchLightning/pytorch-lightning/pull/6027)) - Fixed `hparams.yaml` saved twice when using `TensorBoardLogger` ([#5953](https://github.com/PyTorchLightning/pytorch-lightning/pull/5953)) - Fixed basic examples ([#5912](https://github.com/PyTorchLightning/pytorch-lightning/pull/5912), - [#5985](https://github.com/PyTorchLightning/pytorch-lightning/pull/5985)) + [#5985](https://github.com/PyTorchLightning/pytorch-lightning/pull/5985)) - Fixed `fairscale` compatible with PT 1.8 ([#5996](https://github.com/PyTorchLightning/pytorch-lightning/pull/5996)) - Ensured `process_dataloader` is called when `tpu_cores > 1` to use Parallel DataLoader ([#6015](https://github.com/PyTorchLightning/pytorch-lightning/pull/6015)) - Attempted SLURM auto resume call when non-shell call fails ([#6002](https://github.com/PyTorchLightning/pytorch-lightning/pull/6002)) - Fixed wrapping optimizers upon assignment ([#6006](https://github.com/PyTorchLightning/pytorch-lightning/pull/6006)) - Fixed allowing hashing of metrics with lists in their state ([#5939](https://github.com/PyTorchLightning/pytorch-lightning/pull/5939)) - ## [1.1.8] - 2021-02-08 ### Fixed @@ -1582,20 +1471,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Separate epoch validation from step validation ([#5208](https://github.com/PyTorchLightning/pytorch-lightning/pull/5208)) - Fixed `toggle_optimizers` not handling all optimizer parameters ([#5775](https://github.com/PyTorchLightning/pytorch-lightning/pull/5775)) - ## [1.1.7] - 2021-02-03 ### Fixed - Fixed `TensorBoardLogger` not closing `SummaryWriter` on `finalize` ([#5696](https://github.com/PyTorchLightning/pytorch-lightning/pull/5696)) -- Fixed filtering of pytorch "unsqueeze" warning when using DP ([#5622](https://github.com/PyTorchLightning/pytorch-lightning/pull/5622)) +- Fixed filtering of pytorch "unsqueeze" warning when using DP ([#5622](https://github.com/PyTorchLightning/pytorch-lightning/pull/5622)) - Fixed `num_classes` argument in F1 metric ([#5663](https://github.com/PyTorchLightning/pytorch-lightning/pull/5663)) - Fixed `log_dir` property ([#5537](https://github.com/PyTorchLightning/pytorch-lightning/pull/5537)) - Fixed a race condition in `ModelCheckpoint` when checking if a checkpoint file exists ([#5144](https://github.com/PyTorchLightning/pytorch-lightning/pull/5144)) - Remove unnecessary intermediate layers in Dockerfiles ([#5697](https://github.com/PyTorchLightning/pytorch-lightning/pull/5697)) - Fixed auto learning rate ordering ([#5638](https://github.com/PyTorchLightning/pytorch-lightning/pull/5638)) - ## [1.1.6] - 2021-01-26 ### Changed @@ -1606,7 +1493,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed `toggle_optimizer` to reset `requires_grad` state ([#5574](https://github.com/PyTorchLightning/pytorch-lightning/pull/5574)) +- Fixed `toggle_optimizer` to reset `requires_grad` state ([#5574](https://github.com/PyTorchLightning/pytorch-lightning/pull/5574)) - Fixed FileNotFoundError for best checkpoint when using DDP with Hydra ([#5629](https://github.com/PyTorchLightning/pytorch-lightning/pull/5629)) - Fixed an error when logging a progress bar metric with a reserved name ([#5620](https://github.com/PyTorchLightning/pytorch-lightning/pull/5620)) - Fixed `Metric`'s `state_dict` not included when child modules ([#5614](https://github.com/PyTorchLightning/pytorch-lightning/pull/5614)) @@ -1615,7 +1502,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed tensor printing in `trainer.test()` ([#5138](https://github.com/PyTorchLightning/pytorch-lightning/pull/5138)) - Fixed not using dataloader when `hparams` present ([#4559](https://github.com/PyTorchLightning/pytorch-lightning/pull/4559)) - ## [1.1.5] - 2021-01-19 ### Fixed @@ -1625,7 +1511,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `reinit_scheduler_properties` with correct optimizer ([#5519](https://github.com/PyTorchLightning/pytorch-lightning/pull/5519)) - Fixed `val_check_interval` with `fast_dev_run` ([#5540](https://github.com/PyTorchLightning/pytorch-lightning/pull/5540)) - ## [1.1.4] - 2021-01-12 ### Added @@ -1669,7 +1554,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `LightningOptimizer` AMP bug ([#5191](https://github.com/PyTorchLightning/pytorch-lightning/pull/5191)) - Fixed casted key to string in `_flatten_dict` ([#5354](https://github.com/PyTorchLightning/pytorch-lightning/pull/5354)) - ## [1.1.2] - 2020-12-23 ### Added @@ -1691,7 +1575,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Updated `DALIClassificationLoader` to not use deprecated arguments ([#4925](https://github.com/PyTorchLightning/pytorch-lightning/pull/4925)) - Corrected call to `torch.no_grad` ([#5124](https://github.com/PyTorchLightning/pytorch-lightning/pull/5124)) - ## [1.1.1] - 2020-12-15 ### Added @@ -1716,12 +1599,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/PyTorchLightning/pytorch-lightning/pull/5057)) - Check if optimizer supports closure ([#4981](https://github.com/PyTorchLightning/pytorch-lightning/pull/4981)) - Add deprecated metric utility functions back to functional ( - [#5067](https://github.com/PyTorchLightning/pytorch-lightning/pull/5067), - [#5068](https://github.com/PyTorchLightning/pytorch-lightning/pull/5068)) + [#5067](https://github.com/PyTorchLightning/pytorch-lightning/pull/5067), + [#5068](https://github.com/PyTorchLightning/pytorch-lightning/pull/5068)) - Allow any input in `to_onnx` and `to_torchscript` ([#4378](https://github.com/PyTorchLightning/pytorch-lightning/pull/4378)) - Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/PyTorchLightning/pytorch-lightning/pull/5157)) - ## [1.1.0] - 2020-12-09 ### Added @@ -1734,11 +1616,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added option to log momentum ([#4384](https://github.com/PyTorchLightning/pytorch-lightning/pull/4384)) - Added `current_score` to `ModelCheckpoint.on_save_checkpoint` ([#4721](https://github.com/PyTorchLightning/pytorch-lightning/pull/4721)) - Added logging using `self.log` in train and evaluation for epoch end hooks ( - [#4552](https://github.com/PyTorchLightning/pytorch-lightning/pull/4552), - [#4495](https://github.com/PyTorchLightning/pytorch-lightning/pull/4495), - [#4439](https://github.com/PyTorchLightning/pytorch-lightning/pull/4439), - [#4684](https://github.com/PyTorchLightning/pytorch-lightning/pull/4684), - [#4913](https://github.com/PyTorchLightning/pytorch-lightning/pull/4913)) + [#4552](https://github.com/PyTorchLightning/pytorch-lightning/pull/4552), + [#4495](https://github.com/PyTorchLightning/pytorch-lightning/pull/4495), + [#4439](https://github.com/PyTorchLightning/pytorch-lightning/pull/4439), + [#4684](https://github.com/PyTorchLightning/pytorch-lightning/pull/4684), + [#4913](https://github.com/PyTorchLightning/pytorch-lightning/pull/4913)) - Added ability for DDP plugin to modify optimizer state saving ([#4675](https://github.com/PyTorchLightning/pytorch-lightning/pull/4675)) - Added `prefix` argument in loggers ([#4557](https://github.com/PyTorchLightning/pytorch-lightning/pull/4557)) - Added printing of total num of params, trainable and non-trainable params in ModelSummary ([#4521](https://github.com/PyTorchLightning/pytorch-lightning/pull/4521)) @@ -1746,17 +1628,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added custom `Apex` and `NativeAMP` as `Precision plugins` ([#4355](https://github.com/PyTorchLightning/pytorch-lightning/pull/4355)) - Added `DALI MNIST` example ([#3721](https://github.com/PyTorchLightning/pytorch-lightning/pull/3721)) - Added `sharded plugin` for DDP for multi-gpu training memory optimizations ( - [#4639](https://github.com/PyTorchLightning/pytorch-lightning/pull/4639), - [#4686](https://github.com/PyTorchLightning/pytorch-lightning/pull/4686), - [#4737](https://github.com/PyTorchLightning/pytorch-lightning/pull/4737), - [#4773](https://github.com/PyTorchLightning/pytorch-lightning/pull/4773)) + [#4639](https://github.com/PyTorchLightning/pytorch-lightning/pull/4639), + [#4686](https://github.com/PyTorchLightning/pytorch-lightning/pull/4686), + [#4737](https://github.com/PyTorchLightning/pytorch-lightning/pull/4737), + [#4773](https://github.com/PyTorchLightning/pytorch-lightning/pull/4773)) - Added `experiment_id` to the NeptuneLogger ([#3462](https://github.com/PyTorchLightning/pytorch-lightning/pull/3462)) - Added `Pytorch Geometric` integration example with Lightning ([#4568](https://github.com/PyTorchLightning/pytorch-lightning/pull/4568)) - Added `all_gather` method to `LightningModule` which allows gradient based tensor synchronizations for use-cases such as negative sampling. ([#5012](https://github.com/PyTorchLightning/pytorch-lightning/pull/5012)) - Enabled `self.log` in most functions ([#4969](https://github.com/PyTorchLightning/pytorch-lightning/pull/4969)) - Added changeable extension variable for `ModelCheckpoint` ([#4977](https://github.com/PyTorchLightning/pytorch-lightning/pull/4977)) - ### Changed - Tuner algorithms will be skipped if `fast_dev_run=True` ([#3903](https://github.com/PyTorchLightning/pytorch-lightning/pull/3903)) @@ -1768,7 +1649,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Updated `fast_dev_run` to accept integer representing num_batches ([#4629](https://github.com/PyTorchLightning/pytorch-lightning/pull/4629)) - Refactored optimizer ([#4658](https://github.com/PyTorchLightning/pytorch-lightning/pull/4658)) - ### Deprecated - Deprecated `prefix` argument in `ModelCheckpoint` ([#4765](https://github.com/PyTorchLightning/pytorch-lightning/pull/4765)) @@ -1789,7 +1669,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed allowing logger to support indexing ([#4595](https://github.com/PyTorchLightning/pytorch-lightning/pull/4595)) - Fixed DDP and manual_optimization ([#4976](https://github.com/PyTorchLightning/pytorch-lightning/pull/4976)) - ## [1.0.8] - 2020-11-24 ### Added @@ -1807,7 +1686,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Do not override `PYTHONWARNINGS` ([#4700](https://github.com/PyTorchLightning/pytorch-lightning/pull/4700)) - Changed `init_ddp_connection` moved from `DDP` to `DDPPlugin` ([#4407](https://github.com/PyTorchLightning/pytorch-lightning/pull/4407)) - ### Fixed - Fixed checkpoint `hparams` dict casting when `omegaconf` is available ([#4770](https://github.com/PyTorchLightning/pytorch-lightning/pull/4770)) @@ -1828,15 +1706,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Change Metrics `persistent` default mode to `False` ([#4685](https://github.com/PyTorchLightning/pytorch-lightning/pull/4685)) - LoggerConnector log_metrics will use `total_batch_idx` instead of `global_step` when logging on `training step` ([#4738](https://github.com/PyTorchLightning/pytorch-lightning/pull/4738)) - ### Fixed - Prevent crash if `sync_dist=True` on CPU ([#4626](https://github.com/PyTorchLightning/pytorch-lightning/pull/4626)) - Fixed average pbar Metrics ([#4534](https://github.com/PyTorchLightning/pytorch-lightning/pull/4534)) - Fixed `setup` callback hook to correctly pass the LightningModule through ([#4608](https://github.com/PyTorchLightning/pytorch-lightning/pull/4608)) - Allowing decorate model init with saving `hparams` inside ([#4662](https://github.com/PyTorchLightning/pytorch-lightning/pull/4662)) -- Fixed `split_idx` set by `LoggerConnector` in `on_trainer_init` to `Trainer` ([#4697](https://github.com/PyTorchLightning/pytorch-lightning/pull/4697)) - +- Fixed `split_idx` set by `LoggerConnector` in `on_trainer_init` to `Trainer` ([#4697](https://github.com/PyTorchLightning/pytorch-lightning/pull/4697)) ## [1.0.6] - 2020-11-11 @@ -1848,15 +1724,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added congratulations at the end of our notebooks ([#4555](https://github.com/PyTorchLightning/pytorch-lightning/pull/4555)) - Added parameters `move_metrics_to_cpu` in Trainer to disable gpu leak ([#4592](https://github.com/PyTorchLightning/pytorch-lightning/pull/4592)) - ### Changed - Changed `fsspec` to tuner ([#4458](https://github.com/PyTorchLightning/pytorch-lightning/pull/4458)) - Unify SLURM/TorchElastic under backend plugin ([#4578](https://github.com/PyTorchLightning/pytorch-lightning/pull/4578), - [#4580](https://github.com/PyTorchLightning/pytorch-lightning/pull/4580), - [#4581](https://github.com/PyTorchLightning/pytorch-lightning/pull/4581), - [#4582](https://github.com/PyTorchLightning/pytorch-lightning/pull/4582), - [#4583](https://github.com/PyTorchLightning/pytorch-lightning/pull/4583)) + [#4580](https://github.com/PyTorchLightning/pytorch-lightning/pull/4580), + [#4581](https://github.com/PyTorchLightning/pytorch-lightning/pull/4581), + [#4582](https://github.com/PyTorchLightning/pytorch-lightning/pull/4582), + [#4583](https://github.com/PyTorchLightning/pytorch-lightning/pull/4583)) ### Fixed @@ -1900,10 +1775,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed TorchScript trace method's data to device and docstring ([#4360](https://github.com/PyTorchLightning/pytorch-lightning/pull/4360)) - Fixed CSV logger warning ([#4419](https://github.com/PyTorchLightning/pytorch-lightning/pull/4419)) - Fixed skip DDP parameter sync ([#4301](https://github.com/PyTorchLightning/pytorch-lightning/pull/4301)) -- Fixed `WandbLogger` _sanitize_callable function ([#4422](https://github.com/PyTorchLightning/pytorch-lightning/pull/4422)) +- Fixed `WandbLogger` \_sanitize_callable function ([#4422](https://github.com/PyTorchLightning/pytorch-lightning/pull/4422)) - Fixed `AMP Native` `_unscale` gradient ([#4441](https://github.com/PyTorchLightning/pytorch-lightning/pull/4441)) - ## [1.0.4] - 2020-10-27 ### Added @@ -1957,7 +1831,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `hparams` assign in init ([#4189](https://github.com/PyTorchLightning/pytorch-lightning/pull/4189)) - Fixed overwrite check for model hooks ([#4010](https://github.com/PyTorchLightning/pytorch-lightning/pull/4010)) - ## [1.0.2] - 2020-10-15 ### Added @@ -1978,14 +1851,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `hparams` saving - save the state when `save_hyperparameters()` is called [in `__init__`] ([#4163](https://github.com/PyTorchLightning/pytorch-lightning/pull/4163)) - Fixed runtime failure while exporting `hparams` to yaml ([#4158](https://github.com/PyTorchLightning/pytorch-lightning/pull/4158)) - ## [1.0.1] - 2020-10-14 ### Added - Added getstate/setstate method for torch.save serialization ([#4127](https://github.com/PyTorchLightning/pytorch-lightning/pull/4127)) - ## [1.0.0] - 2020-10-13 ### Added @@ -2001,23 +1872,23 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `LightningModule.toggle_optimizer` ([#4058](https://github.com/PyTorchLightning/pytorch-lightning/pull/4058)) - Added `LightningModule.manual_backward` ([#4063](https://github.com/PyTorchLightning/pytorch-lightning/pull/4063)) - Added `output` argument to `*_batch_end` hooks ([#3965](https://github.com/PyTorchLightning/pytorch-lightning/pull/3965), - [#3966](https://github.com/PyTorchLightning/pytorch-lightning/pull/3966)) + [#3966](https://github.com/PyTorchLightning/pytorch-lightning/pull/3966)) - Added `output` argument to `*_epoch_end` hooks ([#3967](https://github.com/PyTorchLightning/pytorch-lightning/pull/3967)) ### Changed - Integrated metrics API with self.log ([#3961](https://github.com/PyTorchLightning/pytorch-lightning/pull/3961)) - Decoupled Apex ([#4052](https://github.com/PyTorchLightning/pytorch-lightning/pull/4052), - [#4054](https://github.com/PyTorchLightning/pytorch-lightning/pull/4054), - [#4055](https://github.com/PyTorchLightning/pytorch-lightning/pull/4055), - [#4056](https://github.com/PyTorchLightning/pytorch-lightning/pull/4056), - [#4058](https://github.com/PyTorchLightning/pytorch-lightning/pull/4058), - [#4060](https://github.com/PyTorchLightning/pytorch-lightning/pull/4060), - [#4061](https://github.com/PyTorchLightning/pytorch-lightning/pull/4061), - [#4062](https://github.com/PyTorchLightning/pytorch-lightning/pull/4062), - [#4063](https://github.com/PyTorchLightning/pytorch-lightning/pull/4063), - [#4064](https://github.com/PyTorchLightning/pytorch-lightning/pull/4064), - [#4065](https://github.com/PyTorchLightning/pytorch-lightning/pull/4065)) + [#4054](https://github.com/PyTorchLightning/pytorch-lightning/pull/4054), + [#4055](https://github.com/PyTorchLightning/pytorch-lightning/pull/4055), + [#4056](https://github.com/PyTorchLightning/pytorch-lightning/pull/4056), + [#4058](https://github.com/PyTorchLightning/pytorch-lightning/pull/4058), + [#4060](https://github.com/PyTorchLightning/pytorch-lightning/pull/4060), + [#4061](https://github.com/PyTorchLightning/pytorch-lightning/pull/4061), + [#4062](https://github.com/PyTorchLightning/pytorch-lightning/pull/4062), + [#4063](https://github.com/PyTorchLightning/pytorch-lightning/pull/4063), + [#4064](https://github.com/PyTorchLightning/pytorch-lightning/pull/4064), + [#4065](https://github.com/PyTorchLightning/pytorch-lightning/pull/4065)) - Renamed all backends to `Accelerator` ([#4066](https://github.com/PyTorchLightning/pytorch-lightning/pull/4066)) - Enabled manual returns ([#4089](https://github.com/PyTorchLightning/pytorch-lightning/pull/4089)) @@ -2036,7 +1907,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed to print scaler value in progress bar ([#4053](https://github.com/PyTorchLightning/pytorch-lightning/pull/4053)) - Fixed mismatch between docstring and code regarding when `on_load_checkpoint` hook is called ([#3996](https://github.com/PyTorchLightning/pytorch-lightning/pull/3996)) - ## [0.10.0] - 2020-10-07 ### Added @@ -2059,137 +1929,137 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed - Refactored accelerator backends: - * moved TPU `xxx_step` to backend ([#3118](https://github.com/PyTorchLightning/pytorch-lightning/pull/3118)) - * refactored DDP backend `forward` ([#3119](https://github.com/PyTorchLightning/pytorch-lightning/pull/3119)) - * refactored GPU backend `__step` ([#3120](https://github.com/PyTorchLightning/pytorch-lightning/pull/3120)) - * refactored Horovod backend ([#3121](https://github.com/PyTorchLightning/pytorch-lightning/pull/3121), - [#3122](https://github.com/PyTorchLightning/pytorch-lightning/pull/3122)) - * remove obscure forward call in eval + CPU backend `___step` ([#3123](https://github.com/PyTorchLightning/pytorch-lightning/pull/3123)) - * reduced all simplified forward ([#3126](https://github.com/PyTorchLightning/pytorch-lightning/pull/3126)) - * added hook base method ([#3127](https://github.com/PyTorchLightning/pytorch-lightning/pull/3127)) - * refactor eval loop to use hooks - use `test_mode` for if so we can split later ([#3129](https://github.com/PyTorchLightning/pytorch-lightning/pull/3129)) - * moved `___step_end` hooks ([#3130](https://github.com/PyTorchLightning/pytorch-lightning/pull/3130)) - * training forward refactor ([#3134](https://github.com/PyTorchLightning/pytorch-lightning/pull/3134)) - * training AMP scaling refactor ([#3135](https://github.com/PyTorchLightning/pytorch-lightning/pull/3135)) - * eval step scaling factor ([#3136](https://github.com/PyTorchLightning/pytorch-lightning/pull/3136)) - * add eval loop object to streamline eval loop ([#3138](https://github.com/PyTorchLightning/pytorch-lightning/pull/3138)) - * refactored dataloader process hook ([#3139](https://github.com/PyTorchLightning/pytorch-lightning/pull/3139)) - * refactored inner eval loop ([#3141](https://github.com/PyTorchLightning/pytorch-lightning/pull/3141)) - * final inner eval loop hooks ([#3154](https://github.com/PyTorchLightning/pytorch-lightning/pull/3154)) - * clean up hooks in `run_evaluation` ([#3156](https://github.com/PyTorchLightning/pytorch-lightning/pull/3156)) - * clean up data reset ([#3161](https://github.com/PyTorchLightning/pytorch-lightning/pull/3161)) - * expand eval loop out ([#3165](https://github.com/PyTorchLightning/pytorch-lightning/pull/3165)) - * moved hooks around in eval loop ([#3195](https://github.com/PyTorchLightning/pytorch-lightning/pull/3195)) - * remove `_evaluate` fx ([#3197](https://github.com/PyTorchLightning/pytorch-lightning/pull/3197)) - * `Trainer.fit` hook clean up ([#3198](https://github.com/PyTorchLightning/pytorch-lightning/pull/3198)) - * DDPs train hooks ([#3203](https://github.com/PyTorchLightning/pytorch-lightning/pull/3203)) - * refactor DDP backend ([#3204](https://github.com/PyTorchLightning/pytorch-lightning/pull/3204), - [#3207](https://github.com/PyTorchLightning/pytorch-lightning/pull/3207), - [#3208](https://github.com/PyTorchLightning/pytorch-lightning/pull/3208), - [#3209](https://github.com/PyTorchLightning/pytorch-lightning/pull/3209), - [#3210](https://github.com/PyTorchLightning/pytorch-lightning/pull/3210)) - * reduced accelerator selection ([#3211](https://github.com/PyTorchLightning/pytorch-lightning/pull/3211)) - * group prepare data hook ([#3212](https://github.com/PyTorchLightning/pytorch-lightning/pull/3212)) - * added data connector ([#3285](https://github.com/PyTorchLightning/pytorch-lightning/pull/3285)) - * modular is_overridden ([#3290](https://github.com/PyTorchLightning/pytorch-lightning/pull/3290)) - * adding `Trainer.tune()` ([#3293](https://github.com/PyTorchLightning/pytorch-lightning/pull/3293)) - * move `run_pretrain_routine` -> `setup_training` ([#3294](https://github.com/PyTorchLightning/pytorch-lightning/pull/3294)) - * move train outside of setup training ([#3297](https://github.com/PyTorchLightning/pytorch-lightning/pull/3297)) - * move `prepare_data` to data connector ([#3307](https://github.com/PyTorchLightning/pytorch-lightning/pull/3307)) - * moved accelerator router ([#3309](https://github.com/PyTorchLightning/pytorch-lightning/pull/3309)) - * train loop refactor - moving train loop to own object ([#3310](https://github.com/PyTorchLightning/pytorch-lightning/pull/3310), - [#3312](https://github.com/PyTorchLightning/pytorch-lightning/pull/3312), - [#3313](https://github.com/PyTorchLightning/pytorch-lightning/pull/3313), - [#3314](https://github.com/PyTorchLightning/pytorch-lightning/pull/3314)) - * duplicate data interface definition up into DataHooks class ([#3344](https://github.com/PyTorchLightning/pytorch-lightning/pull/3344)) - * inner train loop ([#3359](https://github.com/PyTorchLightning/pytorch-lightning/pull/3359), - [#3361](https://github.com/PyTorchLightning/pytorch-lightning/pull/3361), - [#3362](https://github.com/PyTorchLightning/pytorch-lightning/pull/3362), - [#3363](https://github.com/PyTorchLightning/pytorch-lightning/pull/3363), - [#3365](https://github.com/PyTorchLightning/pytorch-lightning/pull/3365), - [#3366](https://github.com/PyTorchLightning/pytorch-lightning/pull/3366), - [#3367](https://github.com/PyTorchLightning/pytorch-lightning/pull/3367), - [#3368](https://github.com/PyTorchLightning/pytorch-lightning/pull/3368), - [#3369](https://github.com/PyTorchLightning/pytorch-lightning/pull/3369), - [#3370](https://github.com/PyTorchLightning/pytorch-lightning/pull/3370), - [#3371](https://github.com/PyTorchLightning/pytorch-lightning/pull/3371), - [#3372](https://github.com/PyTorchLightning/pytorch-lightning/pull/3372), - [#3373](https://github.com/PyTorchLightning/pytorch-lightning/pull/3373), - [#3374](https://github.com/PyTorchLightning/pytorch-lightning/pull/3374), - [#3375](https://github.com/PyTorchLightning/pytorch-lightning/pull/3375), - [#3376](https://github.com/PyTorchLightning/pytorch-lightning/pull/3376), - [#3385](https://github.com/PyTorchLightning/pytorch-lightning/pull/3385), - [#3388](https://github.com/PyTorchLightning/pytorch-lightning/pull/3388), - [#3397](https://github.com/PyTorchLightning/pytorch-lightning/pull/3397)) - * all logging related calls in a connector ([#3395](https://github.com/PyTorchLightning/pytorch-lightning/pull/3395)) - * device parser ([#3400](https://github.com/PyTorchLightning/pytorch-lightning/pull/3400), - [#3405](https://github.com/PyTorchLightning/pytorch-lightning/pull/3405)) - * added model connector ([#3407](https://github.com/PyTorchLightning/pytorch-lightning/pull/3407)) - * moved eval loop logging to loggers ([#3408](https://github.com/PyTorchLightning/pytorch-lightning/pull/3408)) - * moved eval loop (#3412[#3408](https://github.com/PyTorchLightning/pytorch-lightning/pull/3408)) - * trainer/separate argparse ([#3421](https://github.com/PyTorchLightning/pytorch-lightning/pull/3421), - [#3428](https://github.com/PyTorchLightning/pytorch-lightning/pull/3428), - [#3432](https://github.com/PyTorchLightning/pytorch-lightning/pull/3432)) - * move `lr_finder` ([#3434](https://github.com/PyTorchLightning/pytorch-lightning/pull/3434)) - * organize args (#[#3435](https://github.com/PyTorchLightning/pytorch-lightning/pull/3435), - [#3442](https://github.com/PyTorchLightning/pytorch-lightning/pull/3442), - [#3447](https://github.com/PyTorchLightning/pytorch-lightning/pull/3447), - [#3448](https://github.com/PyTorchLightning/pytorch-lightning/pull/3448), - [#3449](https://github.com/PyTorchLightning/pytorch-lightning/pull/3449), - [#3456](https://github.com/PyTorchLightning/pytorch-lightning/pull/3456)) - * move specific accelerator code ([#3457](https://github.com/PyTorchLightning/pytorch-lightning/pull/3457)) - * group connectors ([#3472](https://github.com/PyTorchLightning/pytorch-lightning/pull/3472)) - * accelerator connector methods x/n ([#3469](https://github.com/PyTorchLightning/pytorch-lightning/pull/3469), - [#3470](https://github.com/PyTorchLightning/pytorch-lightning/pull/3470), - [#3474](https://github.com/PyTorchLightning/pytorch-lightning/pull/3474)) - * merge backends x/n ([#3476](https://github.com/PyTorchLightning/pytorch-lightning/pull/3476), - [#3477](https://github.com/PyTorchLightning/pytorch-lightning/pull/3477), - [#3478](https://github.com/PyTorchLightning/pytorch-lightning/pull/3478), - [#3480](https://github.com/PyTorchLightning/pytorch-lightning/pull/3480), - [#3482](https://github.com/PyTorchLightning/pytorch-lightning/pull/3482)) - * apex plugin ([#3502](https://github.com/PyTorchLightning/pytorch-lightning/pull/3502)) - * precision plugins ([#3504](https://github.com/PyTorchLightning/pytorch-lightning/pull/3504)) - * Result - make monitor default to `checkpoint_on` to simplify ([#3571](https://github.com/PyTorchLightning/pytorch-lightning/pull/3571)) - * reference to the Trainer on the `LightningDataModule` ([#3684](https://github.com/PyTorchLightning/pytorch-lightning/pull/3684)) - * add `.log` to lightning module ([#3686](https://github.com/PyTorchLightning/pytorch-lightning/pull/3686), - [#3699](https://github.com/PyTorchLightning/pytorch-lightning/pull/3699), - [#3701](https://github.com/PyTorchLightning/pytorch-lightning/pull/3701), - [#3704](https://github.com/PyTorchLightning/pytorch-lightning/pull/3704), - [#3715](https://github.com/PyTorchLightning/pytorch-lightning/pull/3715)) - * enable tracking original metric when step and epoch are both true ([#3685](https://github.com/PyTorchLightning/pytorch-lightning/pull/3685)) - * deprecated results obj, added support for simpler comms ([#3681](https://github.com/PyTorchLightning/pytorch-lightning/pull/3681)) - * move backends back to individual files ([#3712](https://github.com/PyTorchLightning/pytorch-lightning/pull/3712)) - * fixes logging for eval steps ([#3763](https://github.com/PyTorchLightning/pytorch-lightning/pull/3763)) - * decoupled DDP, DDP spawn ([#3733](https://github.com/PyTorchLightning/pytorch-lightning/pull/3733), - [#3766](https://github.com/PyTorchLightning/pytorch-lightning/pull/3766), - [#3767](https://github.com/PyTorchLightning/pytorch-lightning/pull/3767), - [#3774](https://github.com/PyTorchLightning/pytorch-lightning/pull/3774), - [#3802](https://github.com/PyTorchLightning/pytorch-lightning/pull/3802), - [#3806](https://github.com/PyTorchLightning/pytorch-lightning/pull/3806), - [#3817](https://github.com/PyTorchLightning/pytorch-lightning/pull/3817), - [#3819](https://github.com/PyTorchLightning/pytorch-lightning/pull/3819), - [#3927](https://github.com/PyTorchLightning/pytorch-lightning/pull/3927)) - * remove weight loading hack for ddp_cpu ([#3808](https://github.com/PyTorchLightning/pytorch-lightning/pull/3808)) - * separate `torchelastic` from DDP ([#3810](https://github.com/PyTorchLightning/pytorch-lightning/pull/3810)) - * separate SLURM from DDP ([#3809](https://github.com/PyTorchLightning/pytorch-lightning/pull/3809)) - * decoupled DDP2 ([#3816](https://github.com/PyTorchLightning/pytorch-lightning/pull/3816)) - * bug fix with logging val epoch end + monitor ([#3812](https://github.com/PyTorchLightning/pytorch-lightning/pull/3812)) - * callback system and init DDP ([#3836](https://github.com/PyTorchLightning/pytorch-lightning/pull/3836)) - * adding compute environments ([#3837](https://github.com/PyTorchLightning/pytorch-lightning/pull/3837), [#3842](https://github.com/PyTorchLightning/pytorch-lightning/pull/3842)) - * epoch can now log independently ([#3843](https://github.com/PyTorchLightning/pytorch-lightning/pull/3843)) - * test selecting the correct backend. temp backends while slurm and TorchElastic are decoupled ([#3848](https://github.com/PyTorchLightning/pytorch-lightning/pull/3848)) - * fixed `init_slurm_connection` causing hostname errors ([#3856](https://github.com/PyTorchLightning/pytorch-lightning/pull/3856)) - * moves init apex from LM to apex connector ([#3923](https://github.com/PyTorchLightning/pytorch-lightning/pull/3923)) - * moves sync bn to each backend ([#3925](https://github.com/PyTorchLightning/pytorch-lightning/pull/3925)) - * moves configure ddp to each backend ([#3924](https://github.com/PyTorchLightning/pytorch-lightning/pull/3924)) + - moved TPU `xxx_step` to backend ([#3118](https://github.com/PyTorchLightning/pytorch-lightning/pull/3118)) + - refactored DDP backend `forward` ([#3119](https://github.com/PyTorchLightning/pytorch-lightning/pull/3119)) + - refactored GPU backend `__step` ([#3120](https://github.com/PyTorchLightning/pytorch-lightning/pull/3120)) + - refactored Horovod backend ([#3121](https://github.com/PyTorchLightning/pytorch-lightning/pull/3121), + [#3122](https://github.com/PyTorchLightning/pytorch-lightning/pull/3122)) + - remove obscure forward call in eval + CPU backend `___step` ([#3123](https://github.com/PyTorchLightning/pytorch-lightning/pull/3123)) + - reduced all simplified forward ([#3126](https://github.com/PyTorchLightning/pytorch-lightning/pull/3126)) + - added hook base method ([#3127](https://github.com/PyTorchLightning/pytorch-lightning/pull/3127)) + - refactor eval loop to use hooks - use `test_mode` for if so we can split later ([#3129](https://github.com/PyTorchLightning/pytorch-lightning/pull/3129)) + - moved `___step_end` hooks ([#3130](https://github.com/PyTorchLightning/pytorch-lightning/pull/3130)) + - training forward refactor ([#3134](https://github.com/PyTorchLightning/pytorch-lightning/pull/3134)) + - training AMP scaling refactor ([#3135](https://github.com/PyTorchLightning/pytorch-lightning/pull/3135)) + - eval step scaling factor ([#3136](https://github.com/PyTorchLightning/pytorch-lightning/pull/3136)) + - add eval loop object to streamline eval loop ([#3138](https://github.com/PyTorchLightning/pytorch-lightning/pull/3138)) + - refactored dataloader process hook ([#3139](https://github.com/PyTorchLightning/pytorch-lightning/pull/3139)) + - refactored inner eval loop ([#3141](https://github.com/PyTorchLightning/pytorch-lightning/pull/3141)) + - final inner eval loop hooks ([#3154](https://github.com/PyTorchLightning/pytorch-lightning/pull/3154)) + - clean up hooks in `run_evaluation` ([#3156](https://github.com/PyTorchLightning/pytorch-lightning/pull/3156)) + - clean up data reset ([#3161](https://github.com/PyTorchLightning/pytorch-lightning/pull/3161)) + - expand eval loop out ([#3165](https://github.com/PyTorchLightning/pytorch-lightning/pull/3165)) + - moved hooks around in eval loop ([#3195](https://github.com/PyTorchLightning/pytorch-lightning/pull/3195)) + - remove `_evaluate` fx ([#3197](https://github.com/PyTorchLightning/pytorch-lightning/pull/3197)) + - `Trainer.fit` hook clean up ([#3198](https://github.com/PyTorchLightning/pytorch-lightning/pull/3198)) + - DDPs train hooks ([#3203](https://github.com/PyTorchLightning/pytorch-lightning/pull/3203)) + - refactor DDP backend ([#3204](https://github.com/PyTorchLightning/pytorch-lightning/pull/3204), + [#3207](https://github.com/PyTorchLightning/pytorch-lightning/pull/3207), + [#3208](https://github.com/PyTorchLightning/pytorch-lightning/pull/3208), + [#3209](https://github.com/PyTorchLightning/pytorch-lightning/pull/3209), + [#3210](https://github.com/PyTorchLightning/pytorch-lightning/pull/3210)) + - reduced accelerator selection ([#3211](https://github.com/PyTorchLightning/pytorch-lightning/pull/3211)) + - group prepare data hook ([#3212](https://github.com/PyTorchLightning/pytorch-lightning/pull/3212)) + - added data connector ([#3285](https://github.com/PyTorchLightning/pytorch-lightning/pull/3285)) + - modular is_overridden ([#3290](https://github.com/PyTorchLightning/pytorch-lightning/pull/3290)) + - adding `Trainer.tune()` ([#3293](https://github.com/PyTorchLightning/pytorch-lightning/pull/3293)) + - move `run_pretrain_routine` -> `setup_training` ([#3294](https://github.com/PyTorchLightning/pytorch-lightning/pull/3294)) + - move train outside of setup training ([#3297](https://github.com/PyTorchLightning/pytorch-lightning/pull/3297)) + - move `prepare_data` to data connector ([#3307](https://github.com/PyTorchLightning/pytorch-lightning/pull/3307)) + - moved accelerator router ([#3309](https://github.com/PyTorchLightning/pytorch-lightning/pull/3309)) + - train loop refactor - moving train loop to own object ([#3310](https://github.com/PyTorchLightning/pytorch-lightning/pull/3310), + [#3312](https://github.com/PyTorchLightning/pytorch-lightning/pull/3312), + [#3313](https://github.com/PyTorchLightning/pytorch-lightning/pull/3313), + [#3314](https://github.com/PyTorchLightning/pytorch-lightning/pull/3314)) + - duplicate data interface definition up into DataHooks class ([#3344](https://github.com/PyTorchLightning/pytorch-lightning/pull/3344)) + - inner train loop ([#3359](https://github.com/PyTorchLightning/pytorch-lightning/pull/3359), + [#3361](https://github.com/PyTorchLightning/pytorch-lightning/pull/3361), + [#3362](https://github.com/PyTorchLightning/pytorch-lightning/pull/3362), + [#3363](https://github.com/PyTorchLightning/pytorch-lightning/pull/3363), + [#3365](https://github.com/PyTorchLightning/pytorch-lightning/pull/3365), + [#3366](https://github.com/PyTorchLightning/pytorch-lightning/pull/3366), + [#3367](https://github.com/PyTorchLightning/pytorch-lightning/pull/3367), + [#3368](https://github.com/PyTorchLightning/pytorch-lightning/pull/3368), + [#3369](https://github.com/PyTorchLightning/pytorch-lightning/pull/3369), + [#3370](https://github.com/PyTorchLightning/pytorch-lightning/pull/3370), + [#3371](https://github.com/PyTorchLightning/pytorch-lightning/pull/3371), + [#3372](https://github.com/PyTorchLightning/pytorch-lightning/pull/3372), + [#3373](https://github.com/PyTorchLightning/pytorch-lightning/pull/3373), + [#3374](https://github.com/PyTorchLightning/pytorch-lightning/pull/3374), + [#3375](https://github.com/PyTorchLightning/pytorch-lightning/pull/3375), + [#3376](https://github.com/PyTorchLightning/pytorch-lightning/pull/3376), + [#3385](https://github.com/PyTorchLightning/pytorch-lightning/pull/3385), + [#3388](https://github.com/PyTorchLightning/pytorch-lightning/pull/3388), + [#3397](https://github.com/PyTorchLightning/pytorch-lightning/pull/3397)) + - all logging related calls in a connector ([#3395](https://github.com/PyTorchLightning/pytorch-lightning/pull/3395)) + - device parser ([#3400](https://github.com/PyTorchLightning/pytorch-lightning/pull/3400), + [#3405](https://github.com/PyTorchLightning/pytorch-lightning/pull/3405)) + - added model connector ([#3407](https://github.com/PyTorchLightning/pytorch-lightning/pull/3407)) + - moved eval loop logging to loggers ([#3408](https://github.com/PyTorchLightning/pytorch-lightning/pull/3408)) + - moved eval loop (#3412[#3408](https://github.com/PyTorchLightning/pytorch-lightning/pull/3408)) + - trainer/separate argparse ([#3421](https://github.com/PyTorchLightning/pytorch-lightning/pull/3421), + [#3428](https://github.com/PyTorchLightning/pytorch-lightning/pull/3428), + [#3432](https://github.com/PyTorchLightning/pytorch-lightning/pull/3432)) + - move `lr_finder` ([#3434](https://github.com/PyTorchLightning/pytorch-lightning/pull/3434)) + - organize args (#[#3435](https://github.com/PyTorchLightning/pytorch-lightning/pull/3435), + [#3442](https://github.com/PyTorchLightning/pytorch-lightning/pull/3442), + [#3447](https://github.com/PyTorchLightning/pytorch-lightning/pull/3447), + [#3448](https://github.com/PyTorchLightning/pytorch-lightning/pull/3448), + [#3449](https://github.com/PyTorchLightning/pytorch-lightning/pull/3449), + [#3456](https://github.com/PyTorchLightning/pytorch-lightning/pull/3456)) + - move specific accelerator code ([#3457](https://github.com/PyTorchLightning/pytorch-lightning/pull/3457)) + - group connectors ([#3472](https://github.com/PyTorchLightning/pytorch-lightning/pull/3472)) + - accelerator connector methods x/n ([#3469](https://github.com/PyTorchLightning/pytorch-lightning/pull/3469), + [#3470](https://github.com/PyTorchLightning/pytorch-lightning/pull/3470), + [#3474](https://github.com/PyTorchLightning/pytorch-lightning/pull/3474)) + - merge backends x/n ([#3476](https://github.com/PyTorchLightning/pytorch-lightning/pull/3476), + [#3477](https://github.com/PyTorchLightning/pytorch-lightning/pull/3477), + [#3478](https://github.com/PyTorchLightning/pytorch-lightning/pull/3478), + [#3480](https://github.com/PyTorchLightning/pytorch-lightning/pull/3480), + [#3482](https://github.com/PyTorchLightning/pytorch-lightning/pull/3482)) + - apex plugin ([#3502](https://github.com/PyTorchLightning/pytorch-lightning/pull/3502)) + - precision plugins ([#3504](https://github.com/PyTorchLightning/pytorch-lightning/pull/3504)) + - Result - make monitor default to `checkpoint_on` to simplify ([#3571](https://github.com/PyTorchLightning/pytorch-lightning/pull/3571)) + - reference to the Trainer on the `LightningDataModule` ([#3684](https://github.com/PyTorchLightning/pytorch-lightning/pull/3684)) + - add `.log` to lightning module ([#3686](https://github.com/PyTorchLightning/pytorch-lightning/pull/3686), + [#3699](https://github.com/PyTorchLightning/pytorch-lightning/pull/3699), + [#3701](https://github.com/PyTorchLightning/pytorch-lightning/pull/3701), + [#3704](https://github.com/PyTorchLightning/pytorch-lightning/pull/3704), + [#3715](https://github.com/PyTorchLightning/pytorch-lightning/pull/3715)) + - enable tracking original metric when step and epoch are both true ([#3685](https://github.com/PyTorchLightning/pytorch-lightning/pull/3685)) + - deprecated results obj, added support for simpler comms ([#3681](https://github.com/PyTorchLightning/pytorch-lightning/pull/3681)) + - move backends back to individual files ([#3712](https://github.com/PyTorchLightning/pytorch-lightning/pull/3712)) + - fixes logging for eval steps ([#3763](https://github.com/PyTorchLightning/pytorch-lightning/pull/3763)) + - decoupled DDP, DDP spawn ([#3733](https://github.com/PyTorchLightning/pytorch-lightning/pull/3733), + [#3766](https://github.com/PyTorchLightning/pytorch-lightning/pull/3766), + [#3767](https://github.com/PyTorchLightning/pytorch-lightning/pull/3767), + [#3774](https://github.com/PyTorchLightning/pytorch-lightning/pull/3774), + [#3802](https://github.com/PyTorchLightning/pytorch-lightning/pull/3802), + [#3806](https://github.com/PyTorchLightning/pytorch-lightning/pull/3806), + [#3817](https://github.com/PyTorchLightning/pytorch-lightning/pull/3817), + [#3819](https://github.com/PyTorchLightning/pytorch-lightning/pull/3819), + [#3927](https://github.com/PyTorchLightning/pytorch-lightning/pull/3927)) + - remove weight loading hack for ddp_cpu ([#3808](https://github.com/PyTorchLightning/pytorch-lightning/pull/3808)) + - separate `torchelastic` from DDP ([#3810](https://github.com/PyTorchLightning/pytorch-lightning/pull/3810)) + - separate SLURM from DDP ([#3809](https://github.com/PyTorchLightning/pytorch-lightning/pull/3809)) + - decoupled DDP2 ([#3816](https://github.com/PyTorchLightning/pytorch-lightning/pull/3816)) + - bug fix with logging val epoch end + monitor ([#3812](https://github.com/PyTorchLightning/pytorch-lightning/pull/3812)) + - callback system and init DDP ([#3836](https://github.com/PyTorchLightning/pytorch-lightning/pull/3836)) + - adding compute environments ([#3837](https://github.com/PyTorchLightning/pytorch-lightning/pull/3837), [#3842](https://github.com/PyTorchLightning/pytorch-lightning/pull/3842)) + - epoch can now log independently ([#3843](https://github.com/PyTorchLightning/pytorch-lightning/pull/3843)) + - test selecting the correct backend. temp backends while slurm and TorchElastic are decoupled ([#3848](https://github.com/PyTorchLightning/pytorch-lightning/pull/3848)) + - fixed `init_slurm_connection` causing hostname errors ([#3856](https://github.com/PyTorchLightning/pytorch-lightning/pull/3856)) + - moves init apex from LM to apex connector ([#3923](https://github.com/PyTorchLightning/pytorch-lightning/pull/3923)) + - moves sync bn to each backend ([#3925](https://github.com/PyTorchLightning/pytorch-lightning/pull/3925)) + - moves configure ddp to each backend ([#3924](https://github.com/PyTorchLightning/pytorch-lightning/pull/3924)) - Deprecation warning ([#3844](https://github.com/PyTorchLightning/pytorch-lightning/pull/3844)) - Changed `LearningRateLogger` to `LearningRateMonitor` ([#3251](https://github.com/PyTorchLightning/pytorch-lightning/pull/3251)) - Used `fsspec` instead of `gfile` for all IO ([#3320](https://github.com/PyTorchLightning/pytorch-lightning/pull/3320)) - * Swaped `torch.load` for `fsspec` load in DDP spawn backend ([#3787](https://github.com/PyTorchLightning/pytorch-lightning/pull/3787)) - * Swaped `torch.load` for `fsspec` load in cloud_io loading ([#3692](https://github.com/PyTorchLightning/pytorch-lightning/pull/3692)) - * Added support for `to_disk()` to use remote filepaths with `fsspec` ([#3930](https://github.com/PyTorchLightning/pytorch-lightning/pull/3930)) - * Updated model_checkpoint's to_yaml to use `fsspec` open ([#3801](https://github.com/PyTorchLightning/pytorch-lightning/pull/3801)) - * Fixed `fsspec` is inconsistent when doing `fs.ls` ([#3805](https://github.com/PyTorchLightning/pytorch-lightning/pull/3805)) + - Swaped `torch.load` for `fsspec` load in DDP spawn backend ([#3787](https://github.com/PyTorchLightning/pytorch-lightning/pull/3787)) + - Swaped `torch.load` for `fsspec` load in cloud_io loading ([#3692](https://github.com/PyTorchLightning/pytorch-lightning/pull/3692)) + - Added support for `to_disk()` to use remote filepaths with `fsspec` ([#3930](https://github.com/PyTorchLightning/pytorch-lightning/pull/3930)) + - Updated model_checkpoint's to_yaml to use `fsspec` open ([#3801](https://github.com/PyTorchLightning/pytorch-lightning/pull/3801)) + - Fixed `fsspec` is inconsistent when doing `fs.ls` ([#3805](https://github.com/PyTorchLightning/pytorch-lightning/pull/3805)) - Refactor `GPUStatsMonitor` to improve training speed ([#3257](https://github.com/PyTorchLightning/pytorch-lightning/pull/3257)) - Changed IoU score behavior for classes absent in target and pred ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098)) - Changed IoU `remove_bg` bool to `ignore_index` optional int ([#3098](https://github.com/PyTorchLightning/pytorch-lightning/pull/3098)) @@ -2201,15 +2071,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Enable `None` model checkpoint default ([#3669](https://github.com/PyTorchLightning/pytorch-lightning/pull/3669)) - Skipped `best_model_path` if `checkpoint_callback` is `None` ([#2962](https://github.com/PyTorchLightning/pytorch-lightning/pull/2962)) - Used `raise .. from ..` to explicitly chain exceptions ([#3750](https://github.com/PyTorchLightning/pytorch-lightning/pull/3750)) -- Mocking loggers ([#3596](https://github.com/PyTorchLightning/pytorch-lightning/pull/3596), - [#3617](https://github.com/PyTorchLightning/pytorch-lightning/pull/3617), - [#3851](https://github.com/PyTorchLightning/pytorch-lightning/pull/3851), - [#3859](https://github.com/PyTorchLightning/pytorch-lightning/pull/3859), - [#3884](https://github.com/PyTorchLightning/pytorch-lightning/pull/3884), - [#3853](https://github.com/PyTorchLightning/pytorch-lightning/pull/3853), - [#3910](https://github.com/PyTorchLightning/pytorch-lightning/pull/3910), - [#3889](https://github.com/PyTorchLightning/pytorch-lightning/pull/3889), - [#3926](https://github.com/PyTorchLightning/pytorch-lightning/pull/3926)) +- Mocking loggers ([#3596](https://github.com/PyTorchLightning/pytorch-lightning/pull/3596), + [#3617](https://github.com/PyTorchLightning/pytorch-lightning/pull/3617), + [#3851](https://github.com/PyTorchLightning/pytorch-lightning/pull/3851), + [#3859](https://github.com/PyTorchLightning/pytorch-lightning/pull/3859), + [#3884](https://github.com/PyTorchLightning/pytorch-lightning/pull/3884), + [#3853](https://github.com/PyTorchLightning/pytorch-lightning/pull/3853), + [#3910](https://github.com/PyTorchLightning/pytorch-lightning/pull/3910), + [#3889](https://github.com/PyTorchLightning/pytorch-lightning/pull/3889), + [#3926](https://github.com/PyTorchLightning/pytorch-lightning/pull/3926)) - Write predictions in LightningModule instead of EvalResult [#3882](https://github.com/PyTorchLightning/pytorch-lightning/pull/3882) ### Deprecated @@ -2221,19 +2091,19 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed - Removed experimental Metric API ([#3943](https://github.com/PyTorchLightning/pytorch-lightning/pull/3943), - [#3949](https://github.com/PyTorchLightning/pytorch-lightning/pull/3949), - [#3946](https://github.com/PyTorchLightning/pytorch-lightning/pull/3946)), listed changes before final removal: - * Added `EmbeddingSimilarity` metric ([#3349](https://github.com/PyTorchLightning/pytorch-lightning/pull/3349), [#3358](https://github.com/PyTorchLightning/pytorch-lightning/pull/3358)) - * Added hooks to metric module interface ([#2528](https://github.com/PyTorchLightning/pytorch-lightning/pull/2528)) - * Added error when AUROC metric is used for multiclass problems ([#3350](https://github.com/PyTorchLightning/pytorch-lightning/pull/3350)) - * Fixed `ModelCheckpoint` with `save_top_k=-1` option not tracking the best models when a monitor metric is available ([#3735](https://github.com/PyTorchLightning/pytorch-lightning/pull/3735)) - * Fixed counter-intuitive error being thrown in `Accuracy` metric for zero target tensor ([#3764](https://github.com/PyTorchLightning/pytorch-lightning/pull/3764)) - * Fixed aggregation of metrics ([#3517](https://github.com/PyTorchLightning/pytorch-lightning/pull/3517)) - * Fixed Metric aggregation ([#3321](https://github.com/PyTorchLightning/pytorch-lightning/pull/3321)) - * Fixed RMSLE metric ([#3188](https://github.com/PyTorchLightning/pytorch-lightning/pull/3188)) - * Renamed `reduction` to `class_reduction` in classification metrics ([#3322](https://github.com/PyTorchLightning/pytorch-lightning/pull/3322)) - * Changed `class_reduction` similar to sklearn for classification metrics ([#3322](https://github.com/PyTorchLightning/pytorch-lightning/pull/3322)) - * Renaming of precision recall metric ([#3308](https://github.com/PyTorchLightning/pytorch-lightning/pull/3308)) + [#3949](https://github.com/PyTorchLightning/pytorch-lightning/pull/3949), + [#3946](https://github.com/PyTorchLightning/pytorch-lightning/pull/3946)), listed changes before final removal: + - Added `EmbeddingSimilarity` metric ([#3349](https://github.com/PyTorchLightning/pytorch-lightning/pull/3349), [#3358](https://github.com/PyTorchLightning/pytorch-lightning/pull/3358)) + - Added hooks to metric module interface ([#2528](https://github.com/PyTorchLightning/pytorch-lightning/pull/2528)) + - Added error when AUROC metric is used for multiclass problems ([#3350](https://github.com/PyTorchLightning/pytorch-lightning/pull/3350)) + - Fixed `ModelCheckpoint` with `save_top_k=-1` option not tracking the best models when a monitor metric is available ([#3735](https://github.com/PyTorchLightning/pytorch-lightning/pull/3735)) + - Fixed counter-intuitive error being thrown in `Accuracy` metric for zero target tensor ([#3764](https://github.com/PyTorchLightning/pytorch-lightning/pull/3764)) + - Fixed aggregation of metrics ([#3517](https://github.com/PyTorchLightning/pytorch-lightning/pull/3517)) + - Fixed Metric aggregation ([#3321](https://github.com/PyTorchLightning/pytorch-lightning/pull/3321)) + - Fixed RMSLE metric ([#3188](https://github.com/PyTorchLightning/pytorch-lightning/pull/3188)) + - Renamed `reduction` to `class_reduction` in classification metrics ([#3322](https://github.com/PyTorchLightning/pytorch-lightning/pull/3322)) + - Changed `class_reduction` similar to sklearn for classification metrics ([#3322](https://github.com/PyTorchLightning/pytorch-lightning/pull/3322)) + - Renaming of precision recall metric ([#3308](https://github.com/PyTorchLightning/pytorch-lightning/pull/3308)) ### Fixed @@ -2282,18 +2152,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added - Added SyncBN for DDP ([#2801](https://github.com/PyTorchLightning/pytorch-lightning/pull/2801), - [#2838](https://github.com/PyTorchLightning/pytorch-lightning/pull/2838)) + [#2838](https://github.com/PyTorchLightning/pytorch-lightning/pull/2838)) - Added basic `CSVLogger` ([#2721](https://github.com/PyTorchLightning/pytorch-lightning/pull/2721)) - Added SSIM metrics ([#2671](https://github.com/PyTorchLightning/pytorch-lightning/pull/2671)) - Added BLEU metrics ([#2535](https://github.com/PyTorchLightning/pytorch-lightning/pull/2535)) - Added support to export a model to ONNX format ([#2596](https://github.com/PyTorchLightning/pytorch-lightning/pull/2596)) - Added support for `Trainer(num_sanity_val_steps=-1)` to check all validation data before training ([#2246](https://github.com/PyTorchLightning/pytorch-lightning/pull/2246)) - Added struct. output: - * tests for val loop flow ([#2605](https://github.com/PyTorchLightning/pytorch-lightning/pull/2605)) - * `EvalResult` support for train and val. loop ([#2615](https://github.com/PyTorchLightning/pytorch-lightning/pull/2615), - [#2651](https://github.com/PyTorchLightning/pytorch-lightning/pull/2651)) - * weighted average in results obj ([#2930](https://github.com/PyTorchLightning/pytorch-lightning/pull/2930)) - * fix result obj DP auto reduce ([#3013](https://github.com/PyTorchLightning/pytorch-lightning/pull/3013)) + - tests for val loop flow ([#2605](https://github.com/PyTorchLightning/pytorch-lightning/pull/2605)) + - `EvalResult` support for train and val. loop ([#2615](https://github.com/PyTorchLightning/pytorch-lightning/pull/2615), + [#2651](https://github.com/PyTorchLightning/pytorch-lightning/pull/2651)) + - weighted average in results obj ([#2930](https://github.com/PyTorchLightning/pytorch-lightning/pull/2930)) + - fix result obj DP auto reduce ([#3013](https://github.com/PyTorchLightning/pytorch-lightning/pull/3013)) - Added class `LightningDataModule` ([#2668](https://github.com/PyTorchLightning/pytorch-lightning/pull/2668)) - Added support for PyTorch 1.6 ([#2745](https://github.com/PyTorchLightning/pytorch-lightning/pull/2745)) - Added call DataModule hooks implicitly in trainer ([#2755](https://github.com/PyTorchLightning/pytorch-lightning/pull/2755)) @@ -2316,10 +2186,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Truncated long version numbers in progress bar ([#2594](https://github.com/PyTorchLightning/pytorch-lightning/pull/2594)) - Enabling val/test loop disabling ([#2692](https://github.com/PyTorchLightning/pytorch-lightning/pull/2692)) - Refactored into `accelerator` module: - * GPU training ([#2704](https://github.com/PyTorchLightning/pytorch-lightning/pull/2704)) - * TPU training ([#2708](https://github.com/PyTorchLightning/pytorch-lightning/pull/2708)) - * DDP(2) backend ([#2796](https://github.com/PyTorchLightning/pytorch-lightning/pull/2796)) - * Retrieve last logged val from result by key ([#3049](https://github.com/PyTorchLightning/pytorch-lightning/pull/3049)) + - GPU training ([#2704](https://github.com/PyTorchLightning/pytorch-lightning/pull/2704)) + - TPU training ([#2708](https://github.com/PyTorchLightning/pytorch-lightning/pull/2708)) + - DDP(2) backend ([#2796](https://github.com/PyTorchLightning/pytorch-lightning/pull/2796)) + - Retrieve last logged val from result by key ([#3049](https://github.com/PyTorchLightning/pytorch-lightning/pull/3049)) - Using `.comet.config` file for `CometLogger` ([#1913](https://github.com/PyTorchLightning/pytorch-lightning/pull/1913)) - Updated hooks arguments - breaking for `setup` and `teardown` ([#2850](https://github.com/PyTorchLightning/pytorch-lightning/pull/2850)) - Using `gfile` to support remote directories ([#2164](https://github.com/PyTorchLightning/pytorch-lightning/pull/2164)) @@ -2336,11 +2206,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed - Removed deprecated: ([#2760](https://github.com/PyTorchLightning/pytorch-lightning/pull/2760)) - * core decorator `data_loader` - * Module hook `on_sanity_check_start` and loading `load_from_metrics` - * package `pytorch_lightning.logging` - * Trainer arguments: `show_progress_bar`, `num_tpu_cores`, `use_amp`, `print_nan_grads` - * LR Finder argument `num_accumulation_steps` + - core decorator `data_loader` + - Module hook `on_sanity_check_start` and loading `load_from_metrics` + - package `pytorch_lightning.logging` + - Trainer arguments: `show_progress_bar`, `num_tpu_cores`, `use_amp`, `print_nan_grads` + - LR Finder argument `num_accumulation_steps` ### Fixed @@ -2404,7 +2274,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed Trainer `.fit()` returning last not best weights in "ddp_spawn" ([#2565](https://github.com/PyTorchLightning/pytorch-lightning/pull/2565)) - Fixed passing (do not pass) TPU weights back on test ([#2566](https://github.com/PyTorchLightning/pytorch-lightning/pull/2566)) - Fixed DDP tests and `.test()` ([#2512](https://github.com/PyTorchLightning/pytorch-lightning/pull/2512), - [#2570](https://github.com/PyTorchLightning/pytorch-lightning/pull/2570)) + [#2570](https://github.com/PyTorchLightning/pytorch-lightning/pull/2570)) ## [0.8.4] - 2020-07-01 @@ -2426,7 +2296,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed TPU saving dir ([fc26078e](https://github.com/PyTorchLightning/pytorch-lightning/commit/fc26078e395f8a001f4c6dd7b3fe7ca202f914a3), [04e68f02](https://github.com/PyTorchLightning/pytorch-lightning/commit/04e68f022fc03dd5f1555ee86dea997d42a448ad)) - Fixed logging on rank 0 only ([#2425](https://github.com/PyTorchLightning/pytorch-lightning/pull/2425)) - ## [0.8.3] - 2020-06-29 ### Fixed @@ -2445,7 +2314,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed epoch indexing from 0 instead of 1 ([#2289](https://github.com/PyTorchLightning/pytorch-lightning/pull/2289)) - Refactor Model `backward` ([#2276](https://github.com/PyTorchLightning/pytorch-lightning/pull/2276)) - Refactored `training_batch` + tests to verify correctness ([#2327](https://github.com/PyTorchLightning/pytorch-lightning/pull/2327), - [#2328](https://github.com/PyTorchLightning/pytorch-lightning/pull/2328)) + [#2328](https://github.com/PyTorchLightning/pytorch-lightning/pull/2328)) - Refactored training loop ([#2336](https://github.com/PyTorchLightning/pytorch-lightning/pull/2336)) - Made optimization steps for hooks ([#2363](https://github.com/PyTorchLightning/pytorch-lightning/pull/2363)) - Changed default apex level to 'O2' ([#2362](https://github.com/PyTorchLightning/pytorch-lightning/pull/2362)) @@ -2458,10 +2327,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed parsing TPU arguments and TPU tests ([#2094](https://github.com/PyTorchLightning/pytorch-lightning/pull/2094)) - Fixed number batches in case of multiple dataloaders and `limit_{*}_batches` ([#1920](https://github.com/PyTorchLightning/pytorch-lightning/pull/1920), - [#2226](https://github.com/PyTorchLightning/pytorch-lightning/pull/2226)) + [#2226](https://github.com/PyTorchLightning/pytorch-lightning/pull/2226)) - Fixed an issue with forward hooks not being removed after model summary ([#2298](https://github.com/PyTorchLightning/pytorch-lightning/pull/2298)) - Fix for `load_from_checkpoint()` not working with absolute path on Windows ([#2294](https://github.com/PyTorchLightning/pytorch-lightning/pull/2294)) -- Fixed an issue how _has_len handles `NotImplementedError` e.g. raised by `torchtext.data.Iterator` ([#2293](https://github.com/PyTorchLightning/pytorch-lightning/pull/2293)), ([#2307](https://github.com/PyTorchLightning/pytorch-lightning/pull/2307)) +- Fixed an issue how \_has_len handles `NotImplementedError` e.g. raised by `torchtext.data.Iterator` ([#2293](https://github.com/PyTorchLightning/pytorch-lightning/pull/2293)), ([#2307](https://github.com/PyTorchLightning/pytorch-lightning/pull/2307)) - Fixed `average_precision` metric ([#2319](https://github.com/PyTorchLightning/pytorch-lightning/pull/2319)) - Fixed ROC metric for CUDA tensors ([#2304](https://github.com/PyTorchLightning/pytorch-lightning/pull/2304)) - Fixed lost compatibility with custom datatypes implementing `.to` ([#2335](https://github.com/PyTorchLightning/pytorch-lightning/pull/2335)) @@ -2474,7 +2343,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed use model ref for tear down ([#2360](https://github.com/PyTorchLightning/pytorch-lightning/pull/2360)) - Fixed logger crash on DDP ([#2388](https://github.com/PyTorchLightning/pytorch-lightning/pull/2388)) - Fixed several issues with early stopping and checkpoint callbacks ([#1504](https://github.com/PyTorchLightning/pytorch-lightning/pull/1504), - [#2391](https://github.com/PyTorchLightning/pytorch-lightning/pull/2391)) + [#2391](https://github.com/PyTorchLightning/pytorch-lightning/pull/2391)) - Fixed loading past checkpoints from v0.7.x ([#2405](https://github.com/PyTorchLightning/pytorch-lightning/pull/2405)) - Fixed loading model without arguments ([#2403](https://github.com/PyTorchLightning/pytorch-lightning/pull/2403)) - Fixed Windows compatibility issue ([#2358](https://github.com/PyTorchLightning/pytorch-lightning/pull/2358)) @@ -2485,8 +2354,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed the `load_from_checkpoint` path detected as URL bug ([#2244](https://github.com/PyTorchLightning/pytorch-lightning/pull/2244)) - Fixed hooks - added barrier ([#2245](https://github.com/PyTorchLightning/pytorch-lightning/pull/2245), - [#2257](https://github.com/PyTorchLightning/pytorch-lightning/pull/2257), - [#2260](https://github.com/PyTorchLightning/pytorch-lightning/pull/220)) + [#2257](https://github.com/PyTorchLightning/pytorch-lightning/pull/2257), + [#2260](https://github.com/PyTorchLightning/pytorch-lightning/pull/220)) - Fixed `hparams` - remove frame inspection on `self.hparams` ([#2253](https://github.com/PyTorchLightning/pytorch-lightning/pull/2253)) - Fixed setup and on fit calls ([#2252](https://github.com/PyTorchLightning/pytorch-lightning/pull/2252)) - Fixed GPU template ([#2255](https://github.com/PyTorchLightning/pytorch-lightning/pull/2255)) @@ -2497,14 +2366,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `overfit_batches`, `limit_{val|test}_batches` flags (overfit now uses training set for all three) ([#2213](https://github.com/PyTorchLightning/pytorch-lightning/pull/2213)) - Added metrics - * Base classes ([#1326](https://github.com/PyTorchLightning/pytorch-lightning/pull/1326), - [#1877](https://github.com/PyTorchLightning/pytorch-lightning/pull/1877)) - * Sklearn metrics classes ([#1327](https://github.com/PyTorchLightning/pytorch-lightning/pull/1327)) - * Native torch metrics ([#1488](https://github.com/PyTorchLightning/pytorch-lightning/pull/1488), - [#2062](https://github.com/PyTorchLightning/pytorch-lightning/pull/2062)) - * docs for all Metrics ([#2184](https://github.com/PyTorchLightning/pytorch-lightning/pull/2184), - [#2209](https://github.com/PyTorchLightning/pytorch-lightning/pull/2209)) - * Regression metrics ([#2221](https://github.com/PyTorchLightning/pytorch-lightning/pull/2221)) + - Base classes ([#1326](https://github.com/PyTorchLightning/pytorch-lightning/pull/1326), + [#1877](https://github.com/PyTorchLightning/pytorch-lightning/pull/1877)) + - Sklearn metrics classes ([#1327](https://github.com/PyTorchLightning/pytorch-lightning/pull/1327)) + - Native torch metrics ([#1488](https://github.com/PyTorchLightning/pytorch-lightning/pull/1488), + [#2062](https://github.com/PyTorchLightning/pytorch-lightning/pull/2062)) + - docs for all Metrics ([#2184](https://github.com/PyTorchLightning/pytorch-lightning/pull/2184), + [#2209](https://github.com/PyTorchLightning/pytorch-lightning/pull/2209)) + - Regression metrics ([#2221](https://github.com/PyTorchLightning/pytorch-lightning/pull/2221)) - Allow dataloaders without sampler field present ([#1907](https://github.com/PyTorchLightning/pytorch-lightning/pull/1907)) - Added option `save_last` to save the model at the end of every epoch in `ModelCheckpoint` ([#1908](https://github.com/PyTorchLightning/pytorch-lightning/pull/1908)) - Early stopping checks `on_validation_end` ([#1458](https://github.com/PyTorchLightning/pytorch-lightning/pull/1458)) @@ -2534,23 +2403,23 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Deprecated - Deprecated flags: ([#2213](https://github.com/PyTorchLightning/pytorch-lightning/pull/2213)) - * `overfit_pct` in favour of `overfit_batches` - * `val_percent_check` in favour of `limit_val_batches` - * `test_percent_check` in favour of `limit_test_batches` + - `overfit_pct` in favour of `overfit_batches` + - `val_percent_check` in favour of `limit_val_batches` + - `test_percent_check` in favour of `limit_test_batches` - Deprecated `ModelCheckpoint`'s attributes `best` and `kth_best_model` ([#1799](https://github.com/PyTorchLightning/pytorch-lightning/pull/1799)) - Dropped official support/testing for older PyTorch versions <1.3 ([#1917](https://github.com/PyTorchLightning/pytorch-lightning/pull/1917)) - Deprecated Trainer `proc_rank` in favour of `global_rank` ([#2166](https://github.com/PyTorchLightning/pytorch-lightning/pull/2166), - [#2269](https://github.com/PyTorchLightning/pytorch-lightning/pull/2269)) + [#2269](https://github.com/PyTorchLightning/pytorch-lightning/pull/2269)) ### Removed - Removed unintended Trainer argument `progress_bar_callback`, the callback should be passed in by `Trainer(callbacks=[...])` instead ([#1855](https://github.com/PyTorchLightning/pytorch-lightning/pull/1855)) - Removed obsolete `self._device` in Trainer ([#1849](https://github.com/PyTorchLightning/pytorch-lightning/pull/1849)) - Removed deprecated API ([#2073](https://github.com/PyTorchLightning/pytorch-lightning/pull/2073)) - * Packages: `pytorch_lightning.pt_overrides`, `pytorch_lightning.root_module` - * Modules: `pytorch_lightning.logging.comet_logger`, `pytorch_lightning.logging.mlflow_logger`, `pytorch_lightning.logging.test_tube_logger`, `pytorch_lightning.overrides.override_data_parallel`, `pytorch_lightning.core.model_saving`, `pytorch_lightning.core.root_module` - * Trainer arguments: `add_row_log_interval`, `default_save_path`, `gradient_clip`, `nb_gpu_nodes`, `max_nb_epochs`, `min_nb_epochs`, `nb_sanity_val_steps` - * Trainer attributes: `nb_gpu_nodes`, `num_gpu_nodes`, `gradient_clip`, `max_nb_epochs`, `min_nb_epochs`, `nb_sanity_val_steps`, `default_save_path`, `tng_tqdm_dic` + - Packages: `pytorch_lightning.pt_overrides`, `pytorch_lightning.root_module` + - Modules: `pytorch_lightning.logging.comet_logger`, `pytorch_lightning.logging.mlflow_logger`, `pytorch_lightning.logging.test_tube_logger`, `pytorch_lightning.overrides.override_data_parallel`, `pytorch_lightning.core.model_saving`, `pytorch_lightning.core.root_module` + - Trainer arguments: `add_row_log_interval`, `default_save_path`, `gradient_clip`, `nb_gpu_nodes`, `max_nb_epochs`, `min_nb_epochs`, `nb_sanity_val_steps` + - Trainer attributes: `nb_gpu_nodes`, `num_gpu_nodes`, `gradient_clip`, `max_nb_epochs`, `min_nb_epochs`, `nb_sanity_val_steps`, `default_save_path`, `tng_tqdm_dic` ### Fixed @@ -2571,7 +2440,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed an issue with the model summary and `example_input_array` depending on a specific ordering of the submodules in a LightningModule ([#1773](https://github.com/PyTorchLightning/pytorch-lightning/pull/1773)) - Fixed Tpu logging ([#2230](https://github.com/PyTorchLightning/pytorch-lightning/pull/2230)) - Fixed Pid port + duplicate `rank_zero` logging ([#2140](https://github.com/PyTorchLightning/pytorch-lightning/pull/2140), - [#2231](https://github.com/PyTorchLightning/pytorch-lightning/pull/2231)) + [#2231](https://github.com/PyTorchLightning/pytorch-lightning/pull/2231)) ## [0.7.6] - 2020-05-16 @@ -2586,9 +2455,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added option to provide seed to random generators to ensure reproducibility ([#1572](https://github.com/PyTorchLightning/pytorch-lightning/pull/1572)) - Added override for hparams in `load_from_ckpt` ([#1797](https://github.com/PyTorchLightning/pytorch-lightning/pull/1797)) - Added support multi-node distributed execution under `torchelastic` ([#1811](https://github.com/PyTorchLightning/pytorch-lightning/pull/1811), - [#1818](https://github.com/PyTorchLightning/pytorch-lightning/pull/1818)) + [#1818](https://github.com/PyTorchLightning/pytorch-lightning/pull/1818)) - Added using `store_true` for bool args ([#1822](https://github.com/PyTorchLightning/pytorch-lightning/pull/1822), - [#1842](https://github.com/PyTorchLightning/pytorch-lightning/pull/1842)) + [#1842](https://github.com/PyTorchLightning/pytorch-lightning/pull/1842)) - Added dummy logger for internally disabling logging for some features ([#1836](https://github.com/PyTorchLightning/pytorch-lightning/pull/1836)) ### Changed @@ -2642,12 +2511,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed CPU DDP breaking change and DDP change ([#1635](https://github.com/PyTorchLightning/pytorch-lightning/pull/1635)) - Tested pickling ([#1636](https://github.com/PyTorchLightning/pytorch-lightning/pull/1636)) - ## [0.7.4] - 2020-04-26 ### Added -- Added flag `replace_sampler_ddp` to manually disable sampler replacement in DDP ([#1513](https://github.com/PyTorchLightning/pytorch-lightning/pull/1513)) +- Added flag `replace_sampler_ddp` to manually disable sampler replacement in DDP ([#1513](https://github.com/PyTorchLightning/pytorch-lightning/pull/1513)) - Added `auto_select_gpus` flag to trainer that enables automatic selection of available GPUs on exclusive mode systems. - Added learning rate finder ([#1347](https://github.com/PyTorchLightning/pytorch-lightning/pull/1347)) - Added support for DDP mode in clusters without SLURM ([#1387](https://github.com/PyTorchLightning/pytorch-lightning/pull/1387)) @@ -2658,7 +2526,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added [Horovod](http://horovod.ai) support as a distributed backend `Trainer(distributed_backend='horovod')` ([#1529](https://github.com/PyTorchLightning/pytorch-lightning/pull/1529)) - Added support for 8 core distributed training on Kaggle TPU's ([#1568](https://github.com/PyTorchLightning/pytorch-lightning/pull/1568)) - Added support for native AMP ([#1561](https://github.com/PyTorchLightning/pytorch-lightning/pull/1561), - [#1580](https://github.com/PyTorchLightning/pytorch-lightning/pull/1580)) + [#1580](https://github.com/PyTorchLightning/pytorch-lightning/pull/1580)) ### Changed @@ -2690,7 +2558,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a bug that caused the `callbacks` Trainer argument to reference a global variable ([#1534](https://github.com/PyTorchLightning/pytorch-lightning/pull/1534)). - Fixed a bug that set all boolean CLI arguments from `Trainer.add_argparse_args` always to True ([#1571](https://github.com/PyTorchLightning/pytorch-lightning/pull/1571)) - Fixed do not copy the batch when training on a single GPU ([#1576](https://github.com/PyTorchLightning/pytorch-lightning/pull/1576), - [#1579](https://github.com/PyTorchLightning/pytorch-lightning/pull/1579)) + [#1579](https://github.com/PyTorchLightning/pytorch-lightning/pull/1579)) - Fixed soft checkpoint removing on DDP ([#1408](https://github.com/PyTorchLightning/pytorch-lightning/pull/1408)) - Fixed automatic parser bug ([#1585](https://github.com/PyTorchLightning/pytorch-lightning/pull/1585)) - Fixed bool conversion from string ([#1606](https://github.com/PyTorchLightning/pytorch-lightning/pull/1606)) @@ -2709,7 +2577,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed gradient clipping ([#1438](https://github.com/PyTorchLightning/pytorch-lightning/pull/1438)) - Fixed pretty print ([#1441](https://github.com/PyTorchLightning/pytorch-lightning/pull/1441)) - ## [0.7.2] - 2020-04-07 ### Added @@ -2780,12 +2647,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a bug that would cause `trainer.test()` to run on the validation set when overloading `validation_epoch_end` and `test_end` ([#1353](https://github.com/PyTorchLightning/pytorch-lightning/pull/1353)) - Fixed `WandbLogger.watch` - use of the watch method without importing `wandb` ([#1311](https://github.com/PyTorchLightning/pytorch-lightning/pull/1311)) - Fixed `WandbLogger` to be used with 'ddp' - allow reinits in sub-processes ([#1149](https://github.com/PyTorchLightning/pytorch-lightning/pull/1149), - [#1360](https://github.com/PyTorchLightning/pytorch-lightning/pull/1360)) + [#1360](https://github.com/PyTorchLightning/pytorch-lightning/pull/1360)) - Made `training_epoch_end` behave like `validation_epoch_end` ([#1357](https://github.com/PyTorchLightning/pytorch-lightning/pull/1357)) - Fixed `fast_dev_run` running validation twice ([#1365](https://github.com/PyTorchLightning/pytorch-lightning/pull/1365)) - Fixed pickle error from quick patch `__code__` ([#1352](https://github.com/PyTorchLightning/pytorch-lightning/pull/1352)) - Fixed memory leak on GPU0 ([#1094](https://github.com/PyTorchLightning/pytorch-lightning/pull/1094), - [#1349](https://github.com/PyTorchLightning/pytorch-lightning/pull/1349)) + [#1349](https://github.com/PyTorchLightning/pytorch-lightning/pull/1349)) - Fixed checkpointing interval ([#1272](https://github.com/PyTorchLightning/pytorch-lightning/pull/1272)) - Fixed validation and training loops run the partial dataset ([#1192](https://github.com/PyTorchLightning/pytorch-lightning/pull/1192)) - Fixed running `on_validation_end` only on main process in DDP ([#1125](https://github.com/PyTorchLightning/pytorch-lightning/pull/1125)) @@ -2817,7 +2684,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added `train_dataloader`, `val_dataloader` and `test_dataloader` arguments to `Trainer.fit()`, for alternative data parsing ([#759](https://github.com/PyTorchLightning/pytorch-lightning/pull/759)) - Added Tensor Processing Unit (TPU) support ([#868](https://github.com/PyTorchLightning/pytorch-lightning/pull/868)) - Added semantic segmentation example ([#751](https://github.com/PyTorchLightning/pytorch-lightning/pull/751),[#876](https://github.com/PyTorchLightning/pytorch-lightning/pull/876), - [#881](https://github.com/PyTorchLightning/pytorch-lightning/pull/881)) + [#881](https://github.com/PyTorchLightning/pytorch-lightning/pull/881)) - Split callbacks in multiple files ([#849](https://github.com/PyTorchLightning/pytorch-lightning/pull/849)) - Support for user defined callbacks ([#889](https://github.com/PyTorchLightning/pytorch-lightning/pull/889) and [#950](https://github.com/PyTorchLightning/pytorch-lightning/pull/950)) - Added support for multiple loggers to be passed to `Trainer` as an iterable (e.g. list, tuple, etc.) ([#903](https://github.com/PyTorchLightning/pytorch-lightning/pull/903)) @@ -2825,10 +2692,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added support for logging `hparams` as dict ([#1029](https://github.com/PyTorchLightning/pytorch-lightning/pull/1029)) - Checkpoint and early stopping now work without val. step ([#1041](https://github.com/PyTorchLightning/pytorch-lightning/pull/1041)) - Support graceful training cleanup after Keyboard Interrupt ([#856](https://github.com/PyTorchLightning/pytorch-lightning/pull/856), - [#1019](https://github.com/PyTorchLightning/pytorch-lightning/pull/1019)) + [#1019](https://github.com/PyTorchLightning/pytorch-lightning/pull/1019)) - Added type hints for function arguments ([#912](https://github.com/PyTorchLightning/pytorch-lightning/pull/912), ) - Added default `argparser` for `Trainer` ([#952](https://github.com/PyTorchLightning/pytorch-lightning/pull/1023), - [#1023](https://github.com/PyTorchLightning/pytorch-lightning/pull/1023)) + [#1023](https://github.com/PyTorchLightning/pytorch-lightning/pull/1023)) - Added TPU gradient clipping ([#963](https://github.com/PyTorchLightning/pytorch-lightning/pull/963)) - Added max/min number of steps in `Trainer` ([#728](https://github.com/PyTorchLightning/pytorch-lightning/pull/728)) @@ -2853,10 +2720,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `pytorch_lightning.logging` ([#767](https://github.com/PyTorchLightning/pytorch-lightning/pull/767)) - Deprecated `LightningModule.load_from_metrics` in favour of `LightningModule.load_from_checkpoint` ([#995](https://github.com/PyTorchLightning/pytorch-lightning/pull/995), - [#1079](https://github.com/PyTorchLightning/pytorch-lightning/pull/1079)) + [#1079](https://github.com/PyTorchLightning/pytorch-lightning/pull/1079)) - Deprecated `@data_loader` decorator ([#926](https://github.com/PyTorchLightning/pytorch-lightning/pull/926)) - Deprecated model steps `training_end`, `validation_end` and `test_end` ([#1051](https://github.com/PyTorchLightning/pytorch-lightning/pull/1051), - [#1056](https://github.com/PyTorchLightning/pytorch-lightning/pull/1056)) + [#1056](https://github.com/PyTorchLightning/pytorch-lightning/pull/1056)) ### Removed diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py index c9d914573fe71d..8138e5929a1dce 100644 --- a/pytorch_lightning/__init__.py +++ b/pytorch_lightning/__init__.py @@ -4,6 +4,18 @@ from pytorch_lightning.__about__ import * # noqa: F401, F403 +VERBOSE = 15 # between logging.INFO and logging.DEBUG, used for logging in production use cases + + +def verbose(self, message, *args, **kws): + if self.isEnabledFor(VERBOSE): + self._log(VERBOSE, message, args, **kws) + + +logging.addLevelName(VERBOSE, "VERBOSE") +logging.verbose = verbose +logging.Logger.verbose = verbose + _root_logger = logging.getLogger() _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 62d198536b877d..07b9ea9664c1f2 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -100,6 +100,7 @@ def __init__( checkpoint_io=checkpoint_io, precision_plugin=precision_plugin, ) + log.verbose(f"Initializing DDP: {self.__class__.__name__}") self.interactive_ddp_procs = [] self._num_nodes = 1 self.sync_batchnorm = False @@ -222,6 +223,7 @@ def _call_children_scripts(self): self._rank_0_has_called_call_children_scripts = True def setup_distributed(self): + log.verbose(f"{self.__class__.__name__}: setting up distributed...") reset_seed() # determine which process we are and world size @@ -329,6 +331,7 @@ def _reinit_optimizers_with_post_localSGD(self, warmup_steps: int): trainer.convert_to_lightning_optimizers() def configure_ddp(self) -> None: + log.verbose(f"{self.__class__.__name__}: configuring DDP...") self.pre_configure_ddp() self._model = self._setup_model(LightningDistributedModule(self.model)) self._register_ddp_hooks() @@ -377,6 +380,7 @@ def pre_backward(self, closure_loss: torch.Tensor) -> None: prepare_for_backward(self.model, closure_loss) def model_to_device(self): + log.verbose(f"{self.__class__.__name__}: moving model to device [{self.root_device}]...") self.model.to(self.root_device) def reduce(self, tensor, group: Optional[Any] = None, reduce_op: Union[ReduceOp, str] = "mean") -> torch.Tensor: @@ -497,12 +501,14 @@ def reconciliate_processes(self, trace: str) -> None: raise DeadlockDetectedException(f"DeadLock detected from rank: {self.global_rank} \n {trace}") def teardown(self) -> None: + log.verbose(f"{self.__class__.__name__}: tearing down plugin...") super().teardown() if isinstance(self.model, DistributedDataParallel): self.model = self.lightning_module if self.on_gpu: # GPU teardown + log.verbose(f"{self.__class__.__name__}: moving model to CPU...") self.lightning_module.cpu() # clean up memory torch.cuda.empty_cache() diff --git a/pytorch_lightning/plugins/training_type/fully_sharded.py b/pytorch_lightning/plugins/training_type/fully_sharded.py index d1b1257622beb8..3f12bc16de4af6 100644 --- a/pytorch_lightning/plugins/training_type/fully_sharded.py +++ b/pytorch_lightning/plugins/training_type/fully_sharded.py @@ -13,6 +13,7 @@ # limitations under the License. import contextlib from typing import Dict, Generator, List, Optional +import logging import torch @@ -30,6 +31,8 @@ from fairscale.nn import default_auto_wrap_policy, enable_wrap from fairscale.nn.data_parallel import FullyShardedDataParallel +log = logging.getLogger(__name__) + class DDPFullyShardedPlugin(DDPPlugin): @@ -129,6 +132,7 @@ def setup_distributed(self) -> None: @contextlib.contextmanager def model_sharded_context(self) -> Generator: + log.verbose(f"{self.__class__.__name__}: entered model_sharded_context.") precision = self.precision_plugin.precision def wrap_policy(*args, **kwargs): @@ -150,7 +154,10 @@ def wrap_policy(*args, **kwargs): ): yield + log.verbose(f"{self.__class__.__name__}: exiting model_sharded_context.") + def configure_ddp(self) -> None: + log.verbose(f"{self.__class__.__name__}: configuring DDP... (cpu_offload: [{self.cpu_offload}])") if not self.cpu_offload: # When using CPU Offload, FSDP will manage the CUDA movement for us. # Note: this would be problematic for large model (which could not fit in one GPU) @@ -170,6 +177,7 @@ def pre_dispatch(self, trainer: "pl.Trainer") -> None: self.setup_optimizers(trainer) def model_to_device(self) -> None: + log.verbose(f"{self.__class__.__name__}: moving model to device [{self.root_device}]...") # ensure we update the device type in the lightning module self.lightning_module.to(self.root_device) diff --git a/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/pytorch_lightning/trainer/connectors/checkpoint_connector.py index 92cad3b1180060..0a2143e2769de8 100644 --- a/pytorch_lightning/trainer/connectors/checkpoint_connector.py +++ b/pytorch_lightning/trainer/connectors/checkpoint_connector.py @@ -14,6 +14,7 @@ import os import re +import logging from typing import Any, Dict, Optional import torch @@ -35,6 +36,9 @@ from omegaconf import Container +log: logging.Logger = logging.getLogger(__name__) + + class CheckpointConnector: def __init__(self, trainer: "pl.Trainer", resume_from_checkpoint: Optional[_PATH] = None) -> None: self.trainer = trainer @@ -70,6 +74,7 @@ def resume_start(self, checkpoint_path: Optional[_PATH] = None) -> None: self.resume_checkpoint_path = self.hpc_resume_path or checkpoint_path checkpoint_path = self.resume_checkpoint_path if not checkpoint_path: + log.verbose("`checkpoint_path` not specified. Skipping checkpoint loading.") return rank_zero_info(f"Restoring states from the checkpoint path at {checkpoint_path}")