From 620ac56329612164b6cd75b57b9cef491b0cfb94 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Fri, 18 Mar 2022 14:55:55 +0800 Subject: [PATCH 01/20] add paddle.optimizer.lr.CyclicLR --- python/paddle/optimizer/lr.py | 168 +++++++++++++++++++++++++++++++++- 1 file changed, 166 insertions(+), 2 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 9d55b8d1d2f12..c79ec5bbd6427 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -33,7 +33,8 @@ 'LambdaDecay', 'ReduceOnPlateau', 'CosineAnnealingDecay', - 'MultiplicativeDecay' + 'MultiplicativeDecay', + 'CyclicLR' ] @@ -992,7 +993,8 @@ def __init__(self, self.milestones = milestones self.gamma = gamma - super(MultiStepDecay, self).__init__(learning_rate, last_epoch, verbose) + super(MultiStepDecay, self).__init__( + learning_rate, last_epoch, verbose) def get_lr(self): for i in range(len(self.milestones)): @@ -1589,3 +1591,165 @@ def get_lr(self): return self.last_lr * self.lr_lambda(self.last_epoch) else: return self.base_lr + + +class CyclicLR(LRScheduler): + r""" + + Set the learning rate using a cyclic learning rate scheduler, which cycles the learning rate between two boundaries with a constant frequrncy. + The distance between the two boundaries can be csaled on a per-iteration or per-cycle basis. + + It has been proposed in `Cyclic Learning Rates for Training Netural Networks `_. + + According to the paper, the cyclic learning rate schedule has three build-in scale methods: + + * "triangular": A basic traingular cycle without amplitude scaling. + * "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. + * "exp_range": A cycle that scales initial amplitude by half each cycle. + + The initial amplitude is defined as max_learning_rate - base_learning_rate. + + Args: + base_learning_rate (float): Initial learning rate which is the lower boundary in the cycle. + max_learning_rate (float): Upper learning rate in the cycle. Functionally, it defines the cycle amplitude (max_learning_rate - base_learning_rate). + The lr at any cycle is the sum of base_lr and some scaling of the amplitude; + therefore max_learning_rate may not actually be reached depending on scaling function. + step_size_up (int): Step number of trainning iterations in the increasing half of a cycle. + step_size_down (int): Step number of trainning iterations in the decreasing half of a cycle. + mode (str): one of 'triangular', 'triangular2' or 'exp_ragne'.Values correspond to policies detailed above. + If scale_fn is not None, this argument is ignored. Default: 'triangular' + gamma (float): Constant in 'exp_range' scaling function: gamma**(cycle iterations) Default: 1.0 + scale_fn (function, optional): Custom scaling policy defined by a single argument lambda function, + where 0 <= scale_fn(x) <= 1 for all x >= 0. + If specified, then 'mode' is ignored. Default: None + scale_mode (str, optional): One of 'cycle' or 'iterations'. Defines whether scale_fn is evaluated on cycle number + or cycle iterations (total iterations since start of training). Default: 'cycle' + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + verose: (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . + + Returns: + ``CyclicLR`` instance to schedule learning rate. + + Examples: + + .. code-block:: python + + import paddle + import numpy as np + + # train on default dynamic graph mode + linear = paddle.nn.Linear(10, 10) + scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, paramters=linear.parameters()) + for epoch in range(5): + for batch_id in range(20): + x = paddle.uniform([10, 10]) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + sgd.step() + sgd.clear_gradients() + scheduler.step() # You should update learning rate each step + + # train on static graph mode + paddle.enable_static() + main_prog = paddle.static.Program() + start_prog = paddle.static.Program() + with paddle.static.program_guard(main_prog, start_prog): + x = paddle.static.data(name='x', shape=[None, 4, 5]) + y = paddle.static.data(name='y', shape=[None, 4, 5]) + z = paddle.static.nn.fc(x, 100) + loss = paddle.mean(z) + scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) + sgd = paddle.optimizer.SGD(learning_rate=scheduler) + sgd.minimize(loss) + + exe = paddle.static.Executor() + exe.run(start_prog) + for epoch in range(5): + for batch_id in range(20): + out = exe.run( + main_prog, + feed={ + 'x': np.random.randn(3, 4, 5).astype('float32'), + 'y': np.random.randn(3, 4, 5).astype('float32') + }, + fetch_list=loss.name) + scheduler.step() # You should update learning rate each step + """ + + def __init__(self, + base_learning_rate, + max_learning_rate, + step_size_up, + step_size_down, + mode='triangular', + gamma=1., + scale_fn=None, + scale_mode='cycle', + last_epoch=-1, + verbose=False): + self.max_lr = max_learning_rate + + if step_size_up <= 0: + raise ValueError("'step_size_up' must be a positive integer.") + if step_size_down <= 0: + raise ValueError("'step_size_down' must be a positive integer.") + + step_size_up = float(step_size_up) + step_size_down = float( + step_size_down) if step_size_down is not None else step_size_up + self.total_size = step_size_up + step_size_down + self.step_ratio = step_size_up / self.total_size + + if mode not in ['triangular', 'triangular2', 'exp_range'] and scale_fn is None: + raise ValueError( + "'mode' is invalid and 'scale_fn' is None, make sure one of 'mode' or 'scale_fn' is valid") + if scale_mode not in ['cycle', 'iterations']: + raise ValueError( + "'scale_mode' must be one of 'cycle' or 'iterations") + + self.mode = mode + self.gamma = gamma + + if scale_fn is None: + if self.mode == 'triangular': + self.scale_fn = self._triangular_scale_fn + self.scale_mode = 'cycle' + elif self.mode == 'triangular2': + self.scale_fn = self._triangular2_scale_fn + self.scale_mode = 'cycle' + elif self.mode == 'exp_range': + self.scale_fn = self._exp_range_scale_fn + self.scale_mode = 'iterations' + else: + self.scale_fn = scale_fn + self.scale_mode = scale_mode + super().__init__(base_learning_rate, last_epoch, verbose) + + def _triangular_scale_fn(self, x): + return 1. + + def _triangular2_scale_fn(self, x): + return 1 / (2. ** (x - 1)) + + def _exp_range_scale_fn(self, x): + return self.gamma ** x + + def get_lr(self): + cycle = math.floor(1 + self.last_epoch / self.total_size) + x = 1. + self.last_epoch / self.total_size - cycle + + if x <= self.step_ratio: + scale_factor = x / self.step_ratio + else: + scale_factor = (x - 1) / (self.step_ratio - 1) + + base_height = (self.max_lr - self.base_lr) * scale_factor + + if self.scale_mode == 'cycle': + lr = self.base_lr + base_height * self.scale_fn(cycle) + else: + lr = self.base_lr + base_height * self.scale_fn(self.last_epoch) + + return lr From 4fe10c4f30a729d6989cf7ecfbd1894115f66533 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Fri, 18 Mar 2022 14:56:26 +0800 Subject: [PATCH 02/20] add unittest of CyclicLR --- .../tests/unittests/test_lr_scheduler.py | 94 ++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 6d94144fc7788..470386e9c879c 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -51,7 +51,8 @@ def is_better(current, best, m, n): var_list[2] = cooldown var_list[3] = 0 new_lr = var_list[1] * decay_rate - var_list[1] = new_lr if var_list[1] - new_lr > 1e-8 else var_list[1] + var_list[1] = new_lr if var_list[1] - \ + new_lr > 1e-8 else var_list[1] return var_list[1] @@ -321,6 +322,47 @@ def step_lr(epoch_num, learning_rate, step_size, gamma=0.1, verbose=False): return learning_rate * math.pow(gamma, epoch_num // step_size) +def cyclic_lr(epoch_num, + base_learning_rate, + max_learning_rate, + step_size_up, + step_size_down, + mode, + gamma=0.1, + scale_fn=None, + scale_mode='cycle', + verbose=False): + total_steps = step_size_up + step_size_down + step_ratio = step_size_up / total_steps + if scale_fn is None: + if mode == 'triangular': + def scale_fn(x): return 1. + scale_mode = 'cycle' + elif mode == 'triangular2': + def scale_fn(x): return 1 / (2. ** (x - 1)) + scale_mode = 'cycle' + elif mode == 'exp_range': + def scale_fn(x): return gamma**(x) + scale_mode = 'interations' + + cycle = math.floor(1 + epoch_num / total_steps) + x = 1. + epoch_num / total_steps - cycle + + if x <= step_ratio: + scale_factor = x / step_ratio + else: + scale_factor = (x - 1) / (step_ratio - 1) + + base_height = (max_learning_rate - base_learning_rate) * scale_factor + + if scale_mode == 'cycle': + lr = base_learning_rate + base_height * scale_fn(cycle) + else: + lr = base_learning_rate + base_height * scale_fn(epoch_num) + + return lr + + class TestLRScheduler(unittest.TestCase): def _test_static(self, python_func, paddle_api, kwarg, place): scheduler = paddle_api(**kwarg) @@ -527,6 +569,56 @@ def test_scheduler(self): "learning_rate": 0.5, "T_max": 10, "verbose": False + }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { + "base_learning_rate": 0.5, + "max_learning_rate": 1.0, + "step_size_up": 15, + "step_size_down": 5, + "mode": 'triangular', + "gamma": 1., + "scale_mode": None, + "scale_mode": 'cycle', + "verbose": False + }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { + "base_learning_rate": 0.5, + "max_learning_rate": 1.0, + "step_size_up": 15, + "step_size_down": 5, + "mode": 'triangular2', + "gamma": 1., + "scale_mode": None, + "scale_mode": 'cycle', + "verbose": False + }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { + "base_learning_rate": 0.5, + "max_learning_rate": 1.0, + "step_size_up": 15, + "step_size_down": 5, + "mode": 'exp_range', + "gamma": 0.8, + "scale_mode": None, + "scale_mode": 'cycle', + "verbose": False + }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { + "base_learning_rate": 0.5, + "max_learning_rate": 1.0, + "step_size_up": 15, + "step_size_down": 5, + "mode": 'exp_range', + "gamma": 1., + "scale_mode": lambda x: 0.95**x, + "scale_mode": 'cycle', + "verbose": False + }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { + "base_learning_rate": 0.5, + "max_learning_rate": 1.0, + "step_size_up": 15, + "step_size_down": 5, + "mode": 'exp_range', + "gamma": 1., + "scale_mode": lambda x: 0.95, + "scale_mode": 'iterations', + "verbose": False })] for python_func, paddle_api, kwarg in func_api_kwargs: From 41a2fe0f7fab6fa3521bb41820bca6561683f59a Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Mon, 28 Mar 2022 20:36:12 +0800 Subject: [PATCH 03/20] fix code format --- .../fluid/tests/unittests/test_lr_scheduler.py | 17 +++++++++++++---- python/paddle/optimizer/lr.py | 13 +++++++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 470386e9c879c..226a6a23f984a 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -336,14 +336,23 @@ def cyclic_lr(epoch_num, step_ratio = step_size_up / total_steps if scale_fn is None: if mode == 'triangular': - def scale_fn(x): return 1. + + def scale_fn(x): + return 1. + scale_mode = 'cycle' elif mode == 'triangular2': - def scale_fn(x): return 1 / (2. ** (x - 1)) + + def scale_fn(x): + return 1 / (2.**(x - 1)) + scale_mode = 'cycle' elif mode == 'exp_range': - def scale_fn(x): return gamma**(x) - scale_mode = 'interations' + + def scale_fn(x): + return gamma**x + + scale_mode = 'iterations' cycle = math.floor(1 + epoch_num / total_steps) x = 1. + epoch_num / total_steps - cycle diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index c79ec5bbd6427..1bbfc6ac025bd 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -993,8 +993,7 @@ def __init__(self, self.milestones = milestones self.gamma = gamma - super(MultiStepDecay, self).__init__( - learning_rate, last_epoch, verbose) + super(MultiStepDecay, self).__init__(learning_rate, last_epoch, verbose) def get_lr(self): for i in range(len(self.milestones)): @@ -1702,9 +1701,11 @@ def __init__(self, self.total_size = step_size_up + step_size_down self.step_ratio = step_size_up / self.total_size - if mode not in ['triangular', 'triangular2', 'exp_range'] and scale_fn is None: + if mode not in ['triangular', 'triangular2', 'exp_range' + ] and scale_fn is None: raise ValueError( - "'mode' is invalid and 'scale_fn' is None, make sure one of 'mode' or 'scale_fn' is valid") + "'mode' is invalid and 'scale_fn' is None, make sure one of 'mode' or 'scale_fn' is valid" + ) if scale_mode not in ['cycle', 'iterations']: raise ValueError( "'scale_mode' must be one of 'cycle' or 'iterations") @@ -1731,10 +1732,10 @@ def _triangular_scale_fn(self, x): return 1. def _triangular2_scale_fn(self, x): - return 1 / (2. ** (x - 1)) + return 1 / (2.**(x - 1)) def _exp_range_scale_fn(self, x): - return self.gamma ** x + return self.gamma**x def get_lr(self): cycle = math.floor(1 + self.last_epoch / self.total_size) From d47a8c07001e9290833270256ac2dca611f17633 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Thu, 31 Mar 2022 19:05:18 +0800 Subject: [PATCH 04/20] fix bug --- python/paddle/fluid/tests/unittests/test_lr_scheduler.py | 3 +-- python/paddle/optimizer/lr.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index c9cf9e68af007..e231c1fae1191 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -51,8 +51,7 @@ def is_better(current, best, m, n): var_list[2] = cooldown var_list[3] = 0 new_lr = var_list[1] * decay_rate - var_list[1] = new_lr if var_list[1] - \ - new_lr > 1e-8 else var_list[1] + var_list[1] = new_lr if var_list[1] - new_lr > 1e-8 else var_list[1] return var_list[1] diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 536167f72b24a..988b899a44af9 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1639,7 +1639,7 @@ class CyclicLR(LRScheduler): # train on default dynamic graph mode linear = paddle.nn.Linear(10, 10) scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) - sgd = paddle.optimizer.SGD(learning_rate=scheduler, paramters=linear.parameters()) + sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(5): for batch_id in range(20): x = paddle.uniform([10, 10]) From efdb2fe9384c97bfab39a207661c4a09dd01f880 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sat, 2 Apr 2022 22:59:28 +0800 Subject: [PATCH 05/20] try --- python/paddle/optimizer/lr.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 988b899a44af9..7587e8520bd9a 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1598,7 +1598,7 @@ class CyclicLR(LRScheduler): Set the learning rate using a cyclic learning rate scheduler, which cycles the learning rate between two boundaries with a constant frequrncy. The distance between the two boundaries can be csaled on a per-iteration or per-cycle basis. - It has been proposed in `Cyclic Learning Rates for Training Netural Networks `_. + It has been proposed in `Cyclic Learning Rates for Training Neural Networks `_. According to the paper, the cyclic learning rate schedule has three build-in scale methods: @@ -1610,21 +1610,23 @@ class CyclicLR(LRScheduler): Args: base_learning_rate (float): Initial learning rate which is the lower boundary in the cycle. - max_learning_rate (float): Upper learning rate in the cycle. Functionally, it defines the cycle amplitude (max_learning_rate - base_learning_rate). - The lr at any cycle is the sum of base_lr and some scaling of the amplitude; + max_learning_rate (float): Upper learning rate in the cycle. + Functionally, it defines the cycle amplitude (max_learning_rate - base_learning_rate). + The lr at any cycle is the sum of base_lr and some scaling of the amplitude; therefore max_learning_rate may not actually be reached depending on scaling function. - step_size_up (int): Step number of trainning iterations in the increasing half of a cycle. - step_size_down (int): Step number of trainning iterations in the decreasing half of a cycle. - mode (str): one of 'triangular', 'triangular2' or 'exp_ragne'.Values correspond to policies detailed above. + step_size_up (int): Step number of training iterations in the increasing half of a cycle. + step_size_down (int): Step number of training iterations in the decreasing half of a cycle. + mode (str): one of 'triangular', 'triangular2' or 'exp_range'.Values correspond to policies detailed above. If scale_fn is not None, this argument is ignored. Default: 'triangular' gamma (float): Constant in 'exp_range' scaling function: gamma**(cycle iterations) Default: 1.0 scale_fn (function, optional): Custom scaling policy defined by a single argument lambda function, where 0 <= scale_fn(x) <= 1 for all x >= 0. If specified, then 'mode' is ignored. Default: None - scale_mode (str, optional): One of 'cycle' or 'iterations'. Defines whether scale_fn is evaluated on cycle number - or cycle iterations (total iterations since start of training). Default: 'cycle' - last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. - verose: (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . + scale_mode (str, optional): One of 'cycle' or 'iterations'. Defines whether scale_fn is evaluated on cycle + number or cycle iterations (total iterations since start of training). Default: 'cycle' + last_epoch (int, optional): The index of last epoch. Can be set to restart training. + Default: -1, means initial learning rate. + verbose: (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . Returns: ``CyclicLR`` instance to schedule learning rate. @@ -1659,7 +1661,8 @@ class CyclicLR(LRScheduler): y = paddle.static.data(name='y', shape=[None, 4, 5]) z = paddle.static.nn.fc(x, 100) loss = paddle.mean(z) - scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) + scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, + max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler) sgd.minimize(loss) From fc0d68ce0c5660e8d1e611dbad70d4d6d566ffc3 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sun, 3 Apr 2022 15:42:55 +0800 Subject: [PATCH 06/20] fix CI-Coverage --- .../tests/unittests/test_lr_scheduler.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index e231c1fae1191..1278b8c8019b4 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -517,6 +517,18 @@ def test_scheduler(self): with self.assertRaises(ValueError): paddle.optimizer.lr.MultiStepDecay( learning_rate=0.5, milestones=[1, 2, 3], gamma=2) + with self.assertRaises(ValueError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=-1) + with self.assertRaises(ValueError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down=-1) + with self.assertRaises(ValueError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down=500, mode='t') + with self.assertRaises(ValueError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down=-1, scale_mode='c') func_api_kwargs = [(noam_lr, paddle.optimizer.lr.NoamDecay, { "d_model": 0.01, @@ -584,7 +596,7 @@ def test_scheduler(self): "step_size_down": 5, "mode": 'triangular', "gamma": 1., - "scale_mode": None, + "scale_fn": None, "scale_mode": 'cycle', "verbose": False }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { @@ -594,7 +606,7 @@ def test_scheduler(self): "step_size_down": 5, "mode": 'triangular2', "gamma": 1., - "scale_mode": None, + "scale_fn": None, "scale_mode": 'cycle', "verbose": False }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { @@ -604,7 +616,7 @@ def test_scheduler(self): "step_size_down": 5, "mode": 'exp_range', "gamma": 0.8, - "scale_mode": None, + "scale_fn": None, "scale_mode": 'cycle', "verbose": False }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { @@ -614,7 +626,7 @@ def test_scheduler(self): "step_size_down": 5, "mode": 'exp_range', "gamma": 1., - "scale_mode": lambda x: 0.95**x, + "scale_fn": lambda x: 0.95**x, "scale_mode": 'cycle', "verbose": False }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { @@ -624,7 +636,7 @@ def test_scheduler(self): "step_size_down": 5, "mode": 'exp_range', "gamma": 1., - "scale_mode": lambda x: 0.95, + "scale_fn": lambda x: 0.95, "scale_mode": 'iterations', "verbose": False })] From 3893c1cfde8e9ae5b59b1c3ef615b23ec143b48b Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sun, 3 Apr 2022 15:43:36 +0800 Subject: [PATCH 07/20] fix ValueError --- python/paddle/optimizer/lr.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 7587e8520bd9a..ddd92dc72357c 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1693,14 +1693,15 @@ def __init__(self, verbose=False): self.max_lr = max_learning_rate + step_size_up = float(step_size_up) + step_size_down = float( + step_size_down) if step_size_down is not None else step_size_up + if step_size_up <= 0: raise ValueError("'step_size_up' must be a positive integer.") if step_size_down <= 0: raise ValueError("'step_size_down' must be a positive integer.") - step_size_up = float(step_size_up) - step_size_down = float( - step_size_down) if step_size_down is not None else step_size_up self.total_size = step_size_up + step_size_down self.step_ratio = step_size_up / self.total_size From e0b05583ac6efa726e656ed316c4e9d55bf6bdb6 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sun, 3 Apr 2022 16:29:00 +0800 Subject: [PATCH 08/20] fix arguments assgin --- python/paddle/optimizer/lr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index ddd92dc72357c..48b7dbfd09fff 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1684,7 +1684,7 @@ def __init__(self, base_learning_rate, max_learning_rate, step_size_up, - step_size_down, + step_size_down=None, mode='triangular', gamma=1., scale_fn=None, From e9acc0eb7603155410bac397fb3aedc839bdcc63 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sun, 3 Apr 2022 22:54:48 +0800 Subject: [PATCH 09/20] fix code format and retry pulling develop to pass ci --- .../fluid/tests/unittests/test_lr_scheduler.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 1278b8c8019b4..ffd386b9ccaa2 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -522,13 +522,24 @@ def test_scheduler(self): base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=-1) with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( - base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down=-1) + base_learning_rate=0.5, + max_learning_rate=1.0, + step_size_up=500, + step_size_down=-1) with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( - base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down=500, mode='t') + base_learning_rate=0.5, + max_learning_rate=1.0, + step_size_up=500, + step_size_down=500, + mode='test') with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( - base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down=-1, scale_mode='c') + base_learning_rate=0.5, + max_learning_rate=1.0, + step_size_up=500, + step_size_down=-1, + scale_mode='test') func_api_kwargs = [(noam_lr, paddle.optimizer.lr.NoamDecay, { "d_model": 0.01, From 4189f26eeaaf464a0a89c4c656fe336fcfb1514c Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sat, 9 Apr 2022 14:32:34 +0800 Subject: [PATCH 10/20] fix typo --- python/paddle/fluid/tests/unittests/test_lr_scheduler.py | 2 +- python/paddle/optimizer/lr.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index ffd386b9ccaa2..935a8d5657ad5 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -637,7 +637,7 @@ def test_scheduler(self): "step_size_down": 5, "mode": 'exp_range', "gamma": 1., - "scale_fn": lambda x: 0.95**x, + "scale_fn": lambda x: 0.85**x, "scale_mode": 'cycle', "verbose": False }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 48b7dbfd09fff..d2bf9c2b73ed1 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1596,13 +1596,13 @@ class CyclicLR(LRScheduler): r""" Set the learning rate using a cyclic learning rate scheduler, which cycles the learning rate between two boundaries with a constant frequrncy. - The distance between the two boundaries can be csaled on a per-iteration or per-cycle basis. + The distance between the two boundaries can be scaled on a per-iteration or per-cycle basis. It has been proposed in `Cyclic Learning Rates for Training Neural Networks `_. According to the paper, the cyclic learning rate schedule has three build-in scale methods: - * "triangular": A basic traingular cycle without amplitude scaling. + * "triangular": A basic triangular cycle without amplitude scaling. * "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. * "exp_range": A cycle that scales initial amplitude by half each cycle. From ab8cc5e1fcdbdab983f0d8d20dbc3affd283b2f9 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sat, 14 May 2022 15:54:46 +0800 Subject: [PATCH 11/20] Refactor --- .../tests/unittests/test_lr_scheduler.py | 43 ++++--- python/paddle/optimizer/lr.py | 111 ++++++++++++------ 2 files changed, 102 insertions(+), 52 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 935a8d5657ad5..2cc42f0742518 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -327,7 +327,7 @@ def cyclic_lr(epoch_num, step_size_up, step_size_down, mode, - gamma=0.1, + exp_gamma=0.1, scale_fn=None, scale_mode='cycle', verbose=False): @@ -349,11 +349,12 @@ def scale_fn(x): elif mode == 'exp_range': def scale_fn(x): - return gamma**x + return exp_gamma**x scale_mode = 'iterations' cycle = math.floor(1 + epoch_num / total_steps) + iterations = epoch_num x = 1. + epoch_num / total_steps - cycle if x <= step_ratio: @@ -363,12 +364,7 @@ def scale_fn(x): base_height = (max_learning_rate - base_learning_rate) * scale_factor - if scale_mode == 'cycle': - lr = base_learning_rate + base_height * scale_fn(cycle) - else: - lr = base_learning_rate + base_height * scale_fn(epoch_num) - - return lr + return base_learning_rate + base_height * scale_fn(eval(scale_mode)) class TestLRScheduler(unittest.TestCase): @@ -517,9 +513,28 @@ def test_scheduler(self): with self.assertRaises(ValueError): paddle.optimizer.lr.MultiStepDecay( learning_rate=0.5, milestones=[1, 2, 3], gamma=2) + with self.assertRaises(TypeError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, + max_learning_rate='test', + step_size_up=10) + with self.assertRaises(ValueError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, max_learning_rate=-1, step_size_up=10) + with self.assertRaises(TypeError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, + max_learning_rate=1.0, + step_size_up='test') with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=-1) + with self.assertRaises(TypeError): + paddle.optimizer.lr.CyclicLR( + base_learning_rate=0.5, + max_learning_rate=1.0, + step_size_up=500, + step_size_down='test') with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, @@ -606,7 +621,7 @@ def test_scheduler(self): "step_size_up": 15, "step_size_down": 5, "mode": 'triangular', - "gamma": 1., + "exp_gamma": 1., "scale_fn": None, "scale_mode": 'cycle', "verbose": False @@ -616,7 +631,7 @@ def test_scheduler(self): "step_size_up": 15, "step_size_down": 5, "mode": 'triangular2', - "gamma": 1., + "exp_gamma": 1., "scale_fn": None, "scale_mode": 'cycle', "verbose": False @@ -626,7 +641,7 @@ def test_scheduler(self): "step_size_up": 15, "step_size_down": 5, "mode": 'exp_range', - "gamma": 0.8, + "exp_gamma": 0.8, "scale_fn": None, "scale_mode": 'cycle', "verbose": False @@ -636,8 +651,8 @@ def test_scheduler(self): "step_size_up": 15, "step_size_down": 5, "mode": 'exp_range', - "gamma": 1., - "scale_fn": lambda x: 0.85**x, + "exp_gamma": 1., + "scale_fn": lambda x: 0.95**x, "scale_mode": 'cycle', "verbose": False }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { @@ -646,7 +661,7 @@ def test_scheduler(self): "step_size_up": 15, "step_size_down": 5, "mode": 'exp_range', - "gamma": 1., + "exp_gamma": 1., "scale_fn": lambda x: 0.95, "scale_mode": 'iterations', "verbose": False diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index b9043dbb609a8..ce7b0cd5ec217 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1588,7 +1588,7 @@ def __init__(self, learning_rate, lr_lambda, last_epoch=-1, verbose=False): verbose) def get_lr(self): - cur_lr = self.base_lr + cur_lr = self.base_lr for epoch in range(1, self.last_epoch + 1): cur_lr = cur_lr * self.lr_lambda(epoch) return cur_lr @@ -1596,8 +1596,9 @@ def get_lr(self): class CyclicLR(LRScheduler): r""" - - Set the learning rate using a cyclic learning rate scheduler, which cycles the learning rate between two boundaries with a constant frequrncy. + Set the learning rate according to a cyclic learning rate scheduler. + The scheduler regards the process of learning rate adjustment as one cycle after another. + It cycles the learning rate between two boundaries with a constant frequency. The distance between the two boundaries can be scaled on a per-iteration or per-cycle basis. It has been proposed in `Cyclic Learning Rates for Training Neural Networks `_. @@ -1611,19 +1612,23 @@ class CyclicLR(LRScheduler): The initial amplitude is defined as max_learning_rate - base_learning_rate. Args: - base_learning_rate (float): Initial learning rate which is the lower boundary in the cycle. - max_learning_rate (float): Upper learning rate in the cycle. - Functionally, it defines the cycle amplitude (max_learning_rate - base_learning_rate). - The lr at any cycle is the sum of base_lr and some scaling of the amplitude; - therefore max_learning_rate may not actually be reached depending on scaling function. - step_size_up (int): Step number of training iterations in the increasing half of a cycle. - step_size_down (int): Step number of training iterations in the decreasing half of a cycle. - mode (str): one of 'triangular', 'triangular2' or 'exp_range'.Values correspond to policies detailed above. - If scale_fn is not None, this argument is ignored. Default: 'triangular' - gamma (float): Constant in 'exp_range' scaling function: gamma**(cycle iterations) Default: 1.0 - scale_fn (function, optional): Custom scaling policy defined by a single argument lambda function, - where 0 <= scale_fn(x) <= 1 for all x >= 0. - If specified, then 'mode' is ignored. Default: None + base_learning_rate (float): Initial learning rate, which is the lower boundary in the cycle. The paper recommends + that set the base_learning_rate to 1/3 or 1/4 of max_learning_rate. + max_learning_rate (float): Upper learning rate in the cycle. It defines the cycle amplitude. + Since there is some scaling operation during process of learning rate adjustment, + max_learning_rate may not actually be reached. + step_size_up (int): Number of training steps, which is used to increase learning rate in a cycle. + The step size of one cycle will be defined by step_size_up + step_size_down. According to the paper, step + size should be set as at least 3 or 4 times steps in one epoch. + step_size_down (int, optional): Number of training steps, which is used to decrease learning rate in a cycle. + If not specified, it's value will initialize to `` step_size_up `` . Default: None + mode (str, optional): one of 'triangular', 'triangular2' or 'exp_range'. + If scale_fn is specified, this argument will be ignored. Default: 'triangular' + exp_gamma (float): Constant in 'exp_range' scaling function: gamma**(cycle iterations). + Used only when mode = 'exp_range'. Default: 1.0 + scale_fn (function, optional): A custom scaling function, which is used to replace three build-in methods. + It should only have one argument. For all x >= 0, 0 <= scale_fn(x) <= 1. + If specified, then 'mode' will be ignored. Default: None scale_mode (str, optional): One of 'cycle' or 'iterations'. Defines whether scale_fn is evaluated on cycle number or cycle iterations (total iterations since start of training). Default: 'cycle' last_epoch (int, optional): The index of last epoch. Can be set to restart training. @@ -1688,36 +1693,68 @@ def __init__(self, step_size_up, step_size_down=None, mode='triangular', - gamma=1., + exp_gamma=1., scale_fn=None, scale_mode='cycle', last_epoch=-1, verbose=False): - self.max_lr = max_learning_rate + # check type and value of max_learning_rate + if not isinstance(max_learning_rate, (float, int)): + raise TypeError( + "'max_learning_rate' must be 'float' or 'int', but received {}". + format(type(max_learning_rate))) + if max_learning_rate < 0: + raise ValueError( + "'max_learning_rate' must be a positive integer, but received {}". + format(max_learning_rate)) + + # check type and value of step_size_up + if not isinstance(step_size_up, int): + raise TypeError( + "The type of 'step_size_up' must be int, but received {}". + format(type(step_size_up))) + if step_size_up <= 0: + raise ValueError( + "'step_size_up' must be a positive integer, but received {}". + format(step_size_up)) + + # check type and value of step_size_down + if step_size_down is not None: + if not isinstance(step_size_down, int): + raise TypeError( + "The type of 'step_size_up' must be int, but received {}". + format(type(step_size_down))) + if step_size_down <= 0: + raise ValueError( + "'step_size_up' must be a positive integer, but received {}". + format(step_size_down)) + + # check type of exp_gamma + if not isinstance(exp_gamma, float): + raise TypeError( + "The type of 'exp_gamma' must be int, but received {}".format( + type(exp_gamma))) step_size_up = float(step_size_up) step_size_down = float( step_size_down) if step_size_down is not None else step_size_up - if step_size_up <= 0: - raise ValueError("'step_size_up' must be a positive integer.") - if step_size_down <= 0: - raise ValueError("'step_size_down' must be a positive integer.") - - self.total_size = step_size_up + step_size_down - self.step_ratio = step_size_up / self.total_size + self.cycle_size = step_size_up + step_size_down + self.step_up_pct = step_size_up / self.cycle_size + self.max_lr = float(max_learning_rate) + self.amplitude = self.max_lr - base_learning_rate if mode not in ['triangular', 'triangular2', 'exp_range' ] and scale_fn is None: raise ValueError( - "'mode' is invalid and 'scale_fn' is None, make sure one of 'mode' or 'scale_fn' is valid" + "'mode' is invalid and 'scale_fn' is not specified, make sure one of 'mode' or 'scale_fn' is valid" ) if scale_mode not in ['cycle', 'iterations']: raise ValueError( "'scale_mode' must be one of 'cycle' or 'iterations") self.mode = mode - self.gamma = gamma + self.gamma = exp_gamma # only for exp_range mode if scale_fn is None: if self.mode == 'triangular': @@ -1744,20 +1781,18 @@ def _exp_range_scale_fn(self, x): return self.gamma**x def get_lr(self): - cycle = math.floor(1 + self.last_epoch / self.total_size) - x = 1. + self.last_epoch / self.total_size - cycle + iterations = self.last_epoch + + cycle = 1 + iterations // self.cycle_size + pct_per_cycle = 1. + iterations / self.cycle_size - cycle - if x <= self.step_ratio: - scale_factor = x / self.step_ratio + if pct_per_cycle <= self.step_up_pct: + scale_factor = pct_per_cycle / self.step_up_pct else: - scale_factor = (x - 1) / (self.step_ratio - 1) + scale_factor = (1 - pct_per_cycle) / (1 - self.step_up_pct) - base_height = (self.max_lr - self.base_lr) * scale_factor + base_height = self.amplitude * scale_factor - if self.scale_mode == 'cycle': - lr = self.base_lr + base_height * self.scale_fn(cycle) - else: - lr = self.base_lr + base_height * self.scale_fn(self.last_epoch) + lr = self.base_lr + base_height * self.scale_fn(eval(self.scale_mode)) return lr - From 301256cbfcbbe46d19c630800a79334aab1ac0b6 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sat, 14 May 2022 16:24:23 +0800 Subject: [PATCH 12/20] fix function-redefined in test_lr_scheduler.py --- .../tests/unittests/test_lr_scheduler.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 2cc42f0742518..bef86c8f27937 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -333,24 +333,25 @@ def cyclic_lr(epoch_num, verbose=False): total_steps = step_size_up + step_size_down step_ratio = step_size_up / total_steps - if scale_fn is None: - if mode == 'triangular': - def scale_fn(x): - return 1. + def triangular(x): + return 1. - scale_mode = 'cycle' - elif mode == 'triangular2': + def triangular2(x): + return 1 / (2.**(x - 1)) - def scale_fn(x): - return 1 / (2.**(x - 1)) + def exp_range(x): + return exp_gamma**x + if scale_fn is None: + if mode == 'triangular': + scale_fn = triangular + scale_mode = 'cycle' + elif mode == 'triangular2': + scale_fn = triangular2 scale_mode = 'cycle' elif mode == 'exp_range': - - def scale_fn(x): - return exp_gamma**x - + scale_fn = exp_range scale_mode = 'iterations' cycle = math.floor(1 + epoch_num / total_steps) From 558ee62542ef23297dee653c6ad404213fd3a075 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Mon, 16 May 2022 22:18:14 +0800 Subject: [PATCH 13/20] update --- python/paddle/fluid/tests/unittests/test_lr_scheduler.py | 8 ++++++++ python/paddle/optimizer/lr.py | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index bef86c8f27937..b43fd91decbd4 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -514,34 +514,41 @@ def test_scheduler(self): with self.assertRaises(ValueError): paddle.optimizer.lr.MultiStepDecay( learning_rate=0.5, milestones=[1, 2, 3], gamma=2) + # check type of max_learning_rate with self.assertRaises(TypeError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, max_learning_rate='test', step_size_up=10) + # check value of max_learning_rate with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, max_learning_rate=-1, step_size_up=10) + # check type of step_size_up with self.assertRaises(TypeError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, max_learning_rate=1.0, step_size_up='test') + # check value of step_size_up with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=-1) + # check type of step_size_down with self.assertRaises(TypeError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down='test') + # check type of step_size_down with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=500, step_size_down=-1) + # check value of mode with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, @@ -549,6 +556,7 @@ def test_scheduler(self): step_size_up=500, step_size_down=500, mode='test') + # check type value of scale_mode with self.assertRaises(ValueError): paddle.optimizer.lr.CyclicLR( base_learning_rate=0.5, diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index ce7b0cd5ec217..aa39ce2b6d225 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1722,17 +1722,17 @@ def __init__(self, if step_size_down is not None: if not isinstance(step_size_down, int): raise TypeError( - "The type of 'step_size_up' must be int, but received {}". + "The type of 'step_size_down' must be int, but received {}". format(type(step_size_down))) if step_size_down <= 0: raise ValueError( - "'step_size_up' must be a positive integer, but received {}". + "'step_size_down' must be a positive integer, but received {}". format(step_size_down)) # check type of exp_gamma if not isinstance(exp_gamma, float): raise TypeError( - "The type of 'exp_gamma' must be int, but received {}".format( + "The type of 'exp_gamma' must be float, but received {}".format( type(exp_gamma))) step_size_up = float(step_size_up) From 12932f19ff93e3a32c428b8bbde07988616a9812 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Mon, 16 May 2022 22:42:47 +0800 Subject: [PATCH 14/20] fix conflict --- python/paddle/optimizer/lr.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 881388d293650..d82f8c9ed39e8 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1600,11 +1600,15 @@ class OneCycleLR(LRScheduler): Sets the learning rate according to the one cycle learning rate scheduler. The scheduler adjusts the learning rate from an initial learning rate to the maximum learning rate and then from that maximum learning rate to the minimum learning rate, which is much less than the initial learning rate. + It has been proposed in `Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates `_. + Please note that the default behaviour of this scheduler follows the fastai implementation of one cycle, which claims that “unpublished work has shown even better results by using only two phases”. If you want the behaviour of this scheduler to be consistent with the paper, please set ``three_phase=True`` . + Also note that you should update learning rate each step. + Args: max_learning_rate (float): The maximum learning rate. It is a python float number. Functionally, it defines the initial learning rate by ``divide_factor`` . @@ -1624,12 +1628,16 @@ class OneCycleLR(LRScheduler): 2. Then it will directly decrease to minimum learning rate. last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . + Returns: ``OneCycleLR`` instance to schedule learning rate. + Examples: .. code-block:: python + import paddle import numpy as np + # train on default dynamic graph mode linear = paddle.nn.Linear(10, 10) scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) @@ -1643,6 +1651,7 @@ class OneCycleLR(LRScheduler): sgd.step() sgd.clear_gradients() scheduler.step() # You should update learning rate each step + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() @@ -1655,6 +1664,7 @@ class OneCycleLR(LRScheduler): scheduler = paddle.optimizer.lr.OneCycleLR(max_learning_rate=1.0, total_steps=100, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler) sgd.minimize(loss) + exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(5): @@ -1800,12 +1810,17 @@ class CyclicLR(LRScheduler): The scheduler regards the process of learning rate adjustment as one cycle after another. It cycles the learning rate between two boundaries with a constant frequency. The distance between the two boundaries can be scaled on a per-iteration or per-cycle basis. + It has been proposed in `Cyclic Learning Rates for Training Neural Networks `_. + According to the paper, the cyclic learning rate schedule has three build-in scale methods: + * "triangular": A basic triangular cycle without amplitude scaling. * "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. * "exp_range": A cycle that scales initial amplitude by half each cycle. + The initial amplitude is defined as max_learning_rate - base_learning_rate. + Args: base_learning_rate (float): Initial learning rate, which is the lower boundary in the cycle. The paper recommends that set the base_learning_rate to 1/3 or 1/4 of max_learning_rate. @@ -1829,12 +1844,16 @@ class CyclicLR(LRScheduler): last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. verbose: (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . + Returns: ``CyclicLR`` instance to schedule learning rate. + Examples: .. code-block:: python + import paddle import numpy as np + # train on default dynamic graph mode linear = paddle.nn.Linear(10, 10) scheduler = paddle.optimizer.lr.CyclicLR(base_learning_rate=0.5, max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) @@ -1848,6 +1867,7 @@ class CyclicLR(LRScheduler): sgd.step() sgd.clear_gradients() scheduler.step() # You should update learning rate each step + # train on static graph mode paddle.enable_static() main_prog = paddle.static.Program() @@ -1861,6 +1881,7 @@ class CyclicLR(LRScheduler): max_learning_rate=1.0, step_size_up=15, step_size_down=5, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler) sgd.minimize(loss) + exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(5): From b9d63552f419cac590466dbdc58830b551cac08c Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Fri, 20 May 2022 17:57:41 +0800 Subject: [PATCH 15/20] update --- python/paddle/optimizer/lr.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index d82f8c9ed39e8..7120410c69186 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1815,11 +1815,12 @@ class CyclicLR(LRScheduler): According to the paper, the cyclic learning rate schedule has three build-in scale methods: - * "triangular": A basic triangular cycle without amplitude scaling. - * "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. - * "exp_range": A cycle that scales initial amplitude by half each cycle. + * "triangular": A basic triangular cycle without any amplitude scaling. + * "triangular2": A basic triangular cycle that reduce initial amplitude by half each cycle. + * "exp_range": A cycle that scales initial amplitude by half each cycle. Scale function defined as :math:`gamma^{iterations}` The initial amplitude is defined as max_learning_rate - base_learning_rate. + Also note that you should update learning rate each step. Args: base_learning_rate (float): Initial learning rate, which is the lower boundary in the cycle. The paper recommends From ef0b38346e3da0a0350124345cb2e8013d8f0302 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Fri, 20 May 2022 18:01:58 +0800 Subject: [PATCH 16/20] gamma->exp_gamma --- python/paddle/optimizer/lr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 7120410c69186..c2111b5c2523f 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1835,7 +1835,7 @@ class CyclicLR(LRScheduler): If not specified, it's value will initialize to `` step_size_up `` . Default: None mode (str, optional): one of 'triangular', 'triangular2' or 'exp_range'. If scale_fn is specified, this argument will be ignored. Default: 'triangular' - exp_gamma (float): Constant in 'exp_range' scaling function: gamma**(cycle iterations). + exp_gamma (float): Constant in 'exp_range' scaling function: exp_gamma**(cycle iterations). Used only when mode = 'exp_range'. Default: 1.0 scale_fn (function, optional): A custom scaling function, which is used to replace three build-in methods. It should only have one argument. For all x >= 0, 0 <= scale_fn(x) <= 1. From 03d7fac195a978bba95e1bbf45f9f90d7ca4166d Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Sun, 22 May 2022 16:36:00 +0800 Subject: [PATCH 17/20] polish docs --- python/paddle/optimizer/lr.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index c2111b5c2523f..3663fc92946c8 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1806,7 +1806,7 @@ def get_lr(self): class CyclicLR(LRScheduler): r""" - Set the learning rate according to a cyclic learning rate scheduler. + Set the learning rate according to the cyclic learning rate (CLR) scheduler. The scheduler regards the process of learning rate adjustment as one cycle after another. It cycles the learning rate between two boundaries with a constant frequency. The distance between the two boundaries can be scaled on a per-iteration or per-cycle basis. @@ -1817,7 +1817,7 @@ class CyclicLR(LRScheduler): * "triangular": A basic triangular cycle without any amplitude scaling. * "triangular2": A basic triangular cycle that reduce initial amplitude by half each cycle. - * "exp_range": A cycle that scales initial amplitude by half each cycle. Scale function defined as :math:`gamma^{iterations}` + * "exp_range": A cycle that scales initial amplitude by scale function which is defined as :math:`gamma^{iterations}` . The initial amplitude is defined as max_learning_rate - base_learning_rate. Also note that you should update learning rate each step. @@ -1825,7 +1825,7 @@ class CyclicLR(LRScheduler): Args: base_learning_rate (float): Initial learning rate, which is the lower boundary in the cycle. The paper recommends that set the base_learning_rate to 1/3 or 1/4 of max_learning_rate. - max_learning_rate (float): Upper learning rate in the cycle. It defines the cycle amplitude. + max_learning_rate (float): Maximum learning rate in the cycle. It defines the cycle amplitude as above. Since there is some scaling operation during process of learning rate adjustment, max_learning_rate may not actually be reached. step_size_up (int): Number of training steps, which is used to increase learning rate in a cycle. @@ -1835,15 +1835,13 @@ class CyclicLR(LRScheduler): If not specified, it's value will initialize to `` step_size_up `` . Default: None mode (str, optional): one of 'triangular', 'triangular2' or 'exp_range'. If scale_fn is specified, this argument will be ignored. Default: 'triangular' - exp_gamma (float): Constant in 'exp_range' scaling function: exp_gamma**(cycle iterations). - Used only when mode = 'exp_range'. Default: 1.0 + exp_gamma (float): Constant in 'exp_range' scaling function: exp_gamma**iterations. Used only when mode = 'exp_range'. Default: 1.0 scale_fn (function, optional): A custom scaling function, which is used to replace three build-in methods. It should only have one argument. For all x >= 0, 0 <= scale_fn(x) <= 1. If specified, then 'mode' will be ignored. Default: None scale_mode (str, optional): One of 'cycle' or 'iterations'. Defines whether scale_fn is evaluated on cycle number or cycle iterations (total iterations since start of training). Default: 'cycle' - last_epoch (int, optional): The index of last epoch. Can be set to restart training. - Default: -1, means initial learning rate. + last_epoch (int, optional): The index of last epoch. Can be set to restart training.Default: -1, means initial learning rate. verbose: (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` . Returns: From 5ebf5c138a3a8679f7b20b681dc1be74b8ead2a0 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Wed, 1 Jun 2022 13:38:03 +0800 Subject: [PATCH 18/20] fix code-style --- python/paddle/fluid/tests/unittests/test_lr_scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 5b29b7051f400..65a5b3506b7df 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -755,7 +755,7 @@ def test_scheduler(self): "anneal_strategy": 'linear', "phase_pct": 0.2, "three_phase": True, - }),(cyclic_lr, paddle.optimizer.lr.CyclicLR, { + }), (cyclic_lr, paddle.optimizer.lr.CyclicLR, { "base_learning_rate": 0.5, "max_learning_rate": 1.0, "step_size_up": 15, From f86f9e301a83ef29f95b35eb00b464e736bbe518 Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Tue, 7 Jun 2022 14:18:35 +0800 Subject: [PATCH 19/20] adjust code format again --- python/paddle/optimizer/lr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index feeef58d0b094..498d364519ec0 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1903,8 +1903,8 @@ def __init__(self, format(type(max_learning_rate))) if max_learning_rate < 0: raise ValueError( - "'max_learning_rate' must be a positive integer, but received {}". - format(max_learning_rate)) + "'max_learning_rate' must be a positive integer, but received {}" + .format(max_learning_rate)) # check type and value of step_size_up if not isinstance(step_size_up, int): @@ -1924,8 +1924,8 @@ def __init__(self, format(type(step_size_down))) if step_size_down <= 0: raise ValueError( - "'step_size_down' must be a positive integer, but received {}". - format(step_size_down)) + "'step_size_down' must be a positive integer, but received {}" + .format(step_size_down)) # check type of exp_gamma if not isinstance(exp_gamma, float): From 07db9a0e2543681b51babb6e27971c5f90adb57c Mon Sep 17 00:00:00 2001 From: Asthestarsfalll <1186454801@qq.com> Date: Tue, 7 Jun 2022 17:41:52 +0800 Subject: [PATCH 20/20] change format of __all__ in lr.py --- python/paddle/optimizer/lr.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 498d364519ec0..4d7d128e05e49 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -20,10 +20,22 @@ from ..fluid.framework import _in_legacy_dygraph __all__ = [ # noqa - 'LRScheduler', 'NoamDecay', 'PiecewiseDecay', 'NaturalExpDecay', - 'InverseTimeDecay', 'PolynomialDecay', 'LinearWarmup', 'ExponentialDecay', - 'MultiStepDecay', 'StepDecay', 'LambdaDecay', 'ReduceOnPlateau', - 'CosineAnnealingDecay', 'MultiplicativeDecay', 'OneCycleLR', 'CyclicLR' + 'LRScheduler', + 'NoamDecay', + 'PiecewiseDecay', + 'NaturalExpDecay', + 'InverseTimeDecay', + 'PolynomialDecay', + 'LinearWarmup', + 'ExponentialDecay', + 'MultiStepDecay', + 'StepDecay', + 'LambdaDecay', + 'ReduceOnPlateau', + 'CosineAnnealingDecay', + 'MultiplicativeDecay', + 'OneCycleLR', + 'CyclicLR', ]