From 92db47f7ac02578f831cf487cb6b3ad0f150f2d7 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Mon, 14 Sep 2020 21:59:06 +0300 Subject: [PATCH 01/23] cosface loss --- catalyst/contrib/nn/criterion/__init__.py | 1 + catalyst/contrib/nn/criterion/cosface.py | 96 +++++++++++++++++++++ catalyst/contrib/nn/tests/test_criterion.py | 46 ++++++++++ 3 files changed, 143 insertions(+) create mode 100644 catalyst/contrib/nn/criterion/cosface.py diff --git a/catalyst/contrib/nn/criterion/__init__.py b/catalyst/contrib/nn/criterion/__init__.py index 70d4be2f77..843e3fc1a3 100644 --- a/catalyst/contrib/nn/criterion/__init__.py +++ b/catalyst/contrib/nn/criterion/__init__.py @@ -36,3 +36,4 @@ TripletMarginLossWithSampler, ) from catalyst.contrib.nn.criterion.wing import WingLoss +from catalyst.contrib.nn.criterion.cosface import CosFaceLoss diff --git a/catalyst/contrib/nn/criterion/cosface.py b/catalyst/contrib/nn/criterion/cosface.py new file mode 100644 index 0000000000..2ca1ac7f43 --- /dev/null +++ b/catalyst/contrib/nn/criterion/cosface.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.modules.loss import _WeightedLoss + + +class CosFaceLoss(_WeightedLoss): + """Implementation of CosFace loss for metric learning. + + .. _CosFace: Large Margin Cosine Loss for Deep Face Recognition + https://arxiv.org/abs/1801.09414 + """ + + def __init__( + self, + embedding_size: int, + num_classes: int, + s: float = 64.0, + m: float = 0.35, + weight: torch.Tensor = None, + size_average=None, + ignore_index: int = -100, + reduce=None, + reduction: str = "mean", + ): + """ + Args: + embedding_size (int): size of each input sample. + num_classes (int): size of each output sample. + s (float): norm of input feature, + Default: ``64.0``. + m (float): margin. + Default: ``0.35``. + weight (float, optional): – a manual rescaling weight given to each class. + If given, has to be a Tensor of size `num_classes`. + Default: ``None``. + size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, + the losses are averaged over each loss element in the batch. Note that for + some losses, there are multiple elements per sample. If the field :attr:`size_average` + is set to ``False``, the losses are instead summed for each minibatch. Ignored + when reduce is ``False``. + Default: ``True`` + reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the + losses are averaged or summed over observations for each minibatch depending + on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per + batch element instead and ignores :attr:`size_average`. + Default: ``True`` + reduction (string, optional): Specifies the reduction to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, + ``'mean'``: the sum of the output will be divided by the number of + elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` + and :attr:`reduce` are in the process of being deprecated, and in the meantime, + specifying either of those two args will override :attr:`reduction`. + Default: ``'mean'`` + """ + super(CosFaceLoss, self).__init__( + weight, size_average, reduce, reduction + ) + self.ignore_index = ignore_index + self.embedding_size = embedding_size + self.num_classes = num_classes + self.s = s + self.m = m + + self.projection = nn.Parameter( + torch.FloatTensor(num_classes, embedding_size) + ) + nn.init.xavier_uniform_(self.projection) + + def forward( + self, input: torch.Tensor, target: torch.Tensor + ) -> torch.Tensor: + """ + Args: + input (torch.Tensor): input features, + expected shapes BxF. + target (torch.Tensor): target classes, + expected shapes B. + + Returns: + torch.Tensor with loss value. + """ + cosine = F.linear(F.normalize(input), F.normalize(self.projection)) + phi = cosine - self.m + one_hot = torch.zeros(cosine.size()).to(input.device) + one_hot.scatter_(1, target.view(-1, 1).long(), 1) + logits = (one_hot * phi) + ((1.0 - one_hot) * cosine) + logits *= self.s + + return F.cross_entropy( + logits, + target, + weight=self.weight, + ignore_index=self.ignore_index, + reduction=self.reduction, + ) diff --git a/catalyst/contrib/nn/tests/test_criterion.py b/catalyst/contrib/nn/tests/test_criterion.py index 01cadc42cb..3be51b937a 100644 --- a/catalyst/contrib/nn/tests/test_criterion.py +++ b/catalyst/contrib/nn/tests/test_criterion.py @@ -1,7 +1,10 @@ +import torch +import numpy as np from catalyst.contrib.nn import criterion as module from catalyst.contrib.nn.criterion import ( CircleLoss, TripletMarginLossWithSampler, + CosFaceLoss, ) from catalyst.data import AllTripletsSampler @@ -19,3 +22,46 @@ def test_criterion_init(): else: instance = module_class() assert instance is not None + + +def test_cosface_loss(): + emb_size = 4 + n_classes = 3 + s = 3.0 + m = 0.1 + loss_fn = CosFaceLoss(emb_size, n_classes, s, m) + + features = np.array( + [ + [1, 2, 3, 4], + [5, 6, 7, 8], + ], + dtype="f", + ) + target = np.array([0, 2], dtype="l") + projection = np.array( + [ + [0.1, 0.2, 0.3, 0.4], + [1.1, 3.2, 5.3, 0.4], + [0.1, 0.2, 6.3, 0.4], + ], + dtype="f", + ) + + loss_fn.projection.data = torch.from_numpy(projection) + + def normalize(matr): + return matr / np.sqrt((matr ** 2).sum(axis=1))[:, np.newaxis] + + normalized_features = normalize(features) + normalized_projection = normalize(projection) + + cosine = normalized_features @ normalized_projection.T + phi = cosine - m + + mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") + feats = (mask * phi + (1.0 - mask) * cosine) * s + + expected_loss = 1.3651 + actual = loss_fn(torch.from_numpy(features), torch.LongTensor(target)) + assert abs(expected_loss - actual.item()) < 1e-5 From e63eedfcd1fa9b993b9090f1adfc00226308b493 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Mon, 14 Sep 2020 22:24:27 +0300 Subject: [PATCH 02/23] pep fixes --- catalyst/contrib/nn/criterion/cosface.py | 47 +++++++++++++++--------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/catalyst/contrib/nn/criterion/cosface.py b/catalyst/contrib/nn/criterion/cosface.py index 2ca1ac7f43..cb4fb8704b 100644 --- a/catalyst/contrib/nn/criterion/cosface.py +++ b/catalyst/contrib/nn/criterion/cosface.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn import torch.nn.functional as F -from torch.nn.modules.loss import _WeightedLoss +from torch.nn.modules.loss import _WeightedLoss # noqa class CosFaceLoss(_WeightedLoss): @@ -31,26 +31,39 @@ def __init__( Default: ``64.0``. m (float): margin. Default: ``0.35``. - weight (float, optional): – a manual rescaling weight given to each class. - If given, has to be a Tensor of size `num_classes`. + weight (float, optional): – a manual rescaling weight + given to each class. If given, has to be a Tensor + of size `num_classes`. Default: ``None``. - size_average (bool, optional): Deprecated (see :attr:`reduction`). By default, - the losses are averaged over each loss element in the batch. Note that for - some losses, there are multiple elements per sample. If the field :attr:`size_average` - is set to ``False``, the losses are instead summed for each minibatch. Ignored + size_average (bool, optional): + Deprecated (see :attr:`reduction`). + By default, the losses are averaged over each + loss element in the batch. Note that for + some losses, there are multiple elements + per sample. If the field :attr:`size_average` + is set to ``False``, the losses are instead + summed for each minibatch. Ignored when reduce is ``False``. Default: ``True`` - reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the - losses are averaged or summed over observations for each minibatch depending - on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per - batch element instead and ignores :attr:`size_average`. + reduce (bool, optional): + Deprecated (see :attr:`reduction`). + By default, the losses are averaged or summed + over observations for each minibatch depending + on :attr:`size_average`. When :attr:`reduce` is + ``False``, returns a loss per batch element + instead and ignores :attr:`size_average`. Default: ``True`` - reduction (string, optional): Specifies the reduction to apply to the output: - ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, - ``'mean'``: the sum of the output will be divided by the number of - elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average` - and :attr:`reduce` are in the process of being deprecated, and in the meantime, - specifying either of those two args will override :attr:`reduction`. + reduction (string, optional): Specifies the reduction + to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: + no reduction will be applied, ``'mean'``: the sum + of the output will be divided by the number of + elements in the output, ``'sum'``: the output will + be summed. Note: :attr:`size_average` + and :attr:`reduce` are in the process of being + deprecated, and in the meantime, specifying either + of those two args will + override :attr:`reduction`. Default: ``'mean'`` """ super(CosFaceLoss, self).__init__( From 5539c7f08314e282ba1ededcc48f222c3b14c4c0 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Mon, 14 Sep 2020 22:28:19 +0300 Subject: [PATCH 03/23] fixed link --- catalyst/contrib/nn/criterion/cosface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/contrib/nn/criterion/cosface.py b/catalyst/contrib/nn/criterion/cosface.py index cb4fb8704b..57b063162d 100644 --- a/catalyst/contrib/nn/criterion/cosface.py +++ b/catalyst/contrib/nn/criterion/cosface.py @@ -7,7 +7,7 @@ class CosFaceLoss(_WeightedLoss): """Implementation of CosFace loss for metric learning. - .. _CosFace: Large Margin Cosine Loss for Deep Face Recognition + .. _CosFace: Large Margin Cosine Loss for Deep Face Recognition: https://arxiv.org/abs/1801.09414 """ From df4a32f4f9f5c4ad6e5afccbb287b891a786135a Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Mon, 14 Sep 2020 23:22:32 +0300 Subject: [PATCH 04/23] more tests --- catalyst/contrib/nn/tests/test_criterion.py | 58 +++++++++++++++++---- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/catalyst/contrib/nn/tests/test_criterion.py b/catalyst/contrib/nn/tests/test_criterion.py index 3be51b937a..41c0e906b8 100644 --- a/catalyst/contrib/nn/tests/test_criterion.py +++ b/catalyst/contrib/nn/tests/test_criterion.py @@ -29,7 +29,6 @@ def test_cosface_loss(): n_classes = 3 s = 3.0 m = 0.1 - loss_fn = CosFaceLoss(emb_size, n_classes, s, m) features = np.array( [ @@ -48,20 +47,57 @@ def test_cosface_loss(): dtype="f", ) + loss_fn = CosFaceLoss(emb_size, n_classes, s, m, reduction="none") loss_fn.projection.data = torch.from_numpy(projection) def normalize(matr): - return matr / np.sqrt((matr ** 2).sum(axis=1))[:, np.newaxis] + return ( + matr / np.sqrt((matr ** 2).sum(axis=1))[:, np.newaxis] + ) # for each row - normalized_features = normalize(features) - normalized_projection = normalize(projection) + def softmax(x): + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum(1)[:, np.newaxis] # for each row - cosine = normalized_features @ normalized_projection.T - phi = cosine - m + def cross_entropy(preds, targs, axis=None): + print(softmax(preds)) + return -(targs * np.log(softmax(preds))).sum(axis) - mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") - feats = (mask * phi + (1.0 - mask) * cosine) * s + normalized_features = normalize(features) # 2x4 + normalized_projection = normalize(projection) # 3x4 - expected_loss = 1.3651 - actual = loss_fn(torch.from_numpy(features), torch.LongTensor(target)) - assert abs(expected_loss - actual.item()) < 1e-5 + cosine = normalized_features @ normalized_projection.T # 2x4 * 4x3 = 2x3 + phi = cosine - m # 2x3 + + mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") # one_hot(target) + feats = (mask * phi + (1.0 - mask) * cosine) * s # 2x3 + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn(torch.from_numpy(features), torch.LongTensor(target)) + .detach() + .numpy() + ) + assert np.allclose(expected_loss, actual) + + loss_fn = CosFaceLoss(emb_size, n_classes, s, m, reduction="mean") + loss_fn.projection.data = torch.from_numpy(projection) + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn(torch.from_numpy(features), torch.LongTensor(target)) + .detach() + .numpy() + ) + assert np.isclose(expected_loss.mean(), actual) + + loss_fn = CosFaceLoss(emb_size, n_classes, s, m, reduction="sum") + loss_fn.projection.data = torch.from_numpy(projection) + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn(torch.from_numpy(features), torch.LongTensor(target)) + .detach() + .numpy() + ) + assert np.isclose(expected_loss.sum(), actual) \ No newline at end of file From a9734e119a6c63ba1c789a88e90f87353193152d Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Mon, 14 Sep 2020 23:24:46 +0300 Subject: [PATCH 05/23] ignore flake --- catalyst/contrib/nn/tests/test_criterion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/catalyst/contrib/nn/tests/test_criterion.py b/catalyst/contrib/nn/tests/test_criterion.py index 41c0e906b8..c7ba8137b8 100644 --- a/catalyst/contrib/nn/tests/test_criterion.py +++ b/catalyst/contrib/nn/tests/test_criterion.py @@ -1,3 +1,4 @@ +# flake8: noqa import torch import numpy as np from catalyst.contrib.nn import criterion as module From e6c1588c8184c961aa0e95a7f1414150369f2422 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 20 Sep 2020 16:11:09 +0300 Subject: [PATCH 06/23] cosface now is a layer, softmax, cosface, tests --- catalyst/contrib/nn/criterion/__init__.py | 1 - catalyst/contrib/nn/criterion/cosface.py | 109 ---------- catalyst/contrib/nn/modules/__init__.py | 4 + catalyst/contrib/nn/modules/arcface.py | 86 ++++++++ catalyst/contrib/nn/modules/cosface.py | 75 +++++++ catalyst/contrib/nn/modules/softmax.py | 49 +++++ catalyst/contrib/nn/tests/test_criterion.py | 84 +------- catalyst/contrib/nn/tests/test_modules.py | 212 ++++++++++++++++++++ 8 files changed, 429 insertions(+), 191 deletions(-) delete mode 100644 catalyst/contrib/nn/criterion/cosface.py create mode 100644 catalyst/contrib/nn/modules/arcface.py create mode 100644 catalyst/contrib/nn/modules/cosface.py create mode 100644 catalyst/contrib/nn/modules/softmax.py create mode 100644 catalyst/contrib/nn/tests/test_modules.py diff --git a/catalyst/contrib/nn/criterion/__init__.py b/catalyst/contrib/nn/criterion/__init__.py index 843e3fc1a3..70d4be2f77 100644 --- a/catalyst/contrib/nn/criterion/__init__.py +++ b/catalyst/contrib/nn/criterion/__init__.py @@ -36,4 +36,3 @@ TripletMarginLossWithSampler, ) from catalyst.contrib.nn.criterion.wing import WingLoss -from catalyst.contrib.nn.criterion.cosface import CosFaceLoss diff --git a/catalyst/contrib/nn/criterion/cosface.py b/catalyst/contrib/nn/criterion/cosface.py deleted file mode 100644 index 57b063162d..0000000000 --- a/catalyst/contrib/nn/criterion/cosface.py +++ /dev/null @@ -1,109 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn.modules.loss import _WeightedLoss # noqa - - -class CosFaceLoss(_WeightedLoss): - """Implementation of CosFace loss for metric learning. - - .. _CosFace: Large Margin Cosine Loss for Deep Face Recognition: - https://arxiv.org/abs/1801.09414 - """ - - def __init__( - self, - embedding_size: int, - num_classes: int, - s: float = 64.0, - m: float = 0.35, - weight: torch.Tensor = None, - size_average=None, - ignore_index: int = -100, - reduce=None, - reduction: str = "mean", - ): - """ - Args: - embedding_size (int): size of each input sample. - num_classes (int): size of each output sample. - s (float): norm of input feature, - Default: ``64.0``. - m (float): margin. - Default: ``0.35``. - weight (float, optional): – a manual rescaling weight - given to each class. If given, has to be a Tensor - of size `num_classes`. - Default: ``None``. - size_average (bool, optional): - Deprecated (see :attr:`reduction`). - By default, the losses are averaged over each - loss element in the batch. Note that for - some losses, there are multiple elements - per sample. If the field :attr:`size_average` - is set to ``False``, the losses are instead - summed for each minibatch. Ignored - when reduce is ``False``. - Default: ``True`` - reduce (bool, optional): - Deprecated (see :attr:`reduction`). - By default, the losses are averaged or summed - over observations for each minibatch depending - on :attr:`size_average`. When :attr:`reduce` is - ``False``, returns a loss per batch element - instead and ignores :attr:`size_average`. - Default: ``True`` - reduction (string, optional): Specifies the reduction - to apply to the output: - ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: - no reduction will be applied, ``'mean'``: the sum - of the output will be divided by the number of - elements in the output, ``'sum'``: the output will - be summed. Note: :attr:`size_average` - and :attr:`reduce` are in the process of being - deprecated, and in the meantime, specifying either - of those two args will - override :attr:`reduction`. - Default: ``'mean'`` - """ - super(CosFaceLoss, self).__init__( - weight, size_average, reduce, reduction - ) - self.ignore_index = ignore_index - self.embedding_size = embedding_size - self.num_classes = num_classes - self.s = s - self.m = m - - self.projection = nn.Parameter( - torch.FloatTensor(num_classes, embedding_size) - ) - nn.init.xavier_uniform_(self.projection) - - def forward( - self, input: torch.Tensor, target: torch.Tensor - ) -> torch.Tensor: - """ - Args: - input (torch.Tensor): input features, - expected shapes BxF. - target (torch.Tensor): target classes, - expected shapes B. - - Returns: - torch.Tensor with loss value. - """ - cosine = F.linear(F.normalize(input), F.normalize(self.projection)) - phi = cosine - self.m - one_hot = torch.zeros(cosine.size()).to(input.device) - one_hot.scatter_(1, target.view(-1, 1).long(), 1) - logits = (one_hot * phi) + ((1.0 - one_hot) * cosine) - logits *= self.s - - return F.cross_entropy( - logits, - target, - weight=self.weight, - ignore_index=self.ignore_index, - reduction=self.reduction, - ) diff --git a/catalyst/contrib/nn/modules/__init__.py b/catalyst/contrib/nn/modules/__init__.py index c4f4d015eb..96401825eb 100644 --- a/catalyst/contrib/nn/modules/__init__.py +++ b/catalyst/contrib/nn/modules/__init__.py @@ -31,3 +31,7 @@ scSE, cSE, ) + +from catalyst.contrib.nn.modules.softmax import SoftMax +from catalyst.contrib.nn.modules.arcface import ArcFace +from catalyst.contrib.nn.modules.cosface import CosFace diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py new file mode 100644 index 0000000000..3a86d1674b --- /dev/null +++ b/catalyst/contrib/nn/modules/arcface.py @@ -0,0 +1,86 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ArcFace(nn.Module): + """Implementation of ArcFace loss for metric learning. + + .. _ArcFace: Additive Angular Margin Loss for Deep Face Recognition: + https://arxiv.org/abs/1801.07698v1 + + Example: + >>> layer = ArcFace(5, 10, s=1.31, m=0.5) + >>> loss_fn = nn.CrosEntropyLoss() + >>> embedding = torch.randn(3, 5, requires_grad=True) + >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> output = layer(embedding, target) + >>> loss = loss_fn(output, target) + >>> loss.backward() + + """ + + def __init__( + self, + in_features: int, + out_features: int, + s: float = 64.0, + m: float = 0.5, + ): + """ + Args: + in_features (int): size of each input sample. + out_features (int): size of each output sample. + s (float, optional): norm of input feature, + Default: ``64.0``. + m (float, optional): margin. + Default: ``0.5``. + """ + super(ArcFace, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.s = s + self.m = m + + self.cos_m = math.cos(m) + self.sin_m = math.sin(m) + self.th = math.cos(math.pi - m) + self.mm = math.sin(math.pi - m) * m + + self.weight = nn.Parameter( + torch.FloatTensor(out_features, in_features) + ) + nn.init.xavier_uniform_(self.weight) + + def __repr__(self) -> str: + return "ArcFace(in_features={},out_features={},s={},m={})".format( + self.in_features, self.out_features, self.s, self.m + ) + + def forward( + self, input: torch.Tensor, target: torch.Tensor + ) -> torch.Tensor: + """ + Args: + input (torch.Tensor): input features, + expected shapes BxF. + target (torch.Tensor): target classes, + expected shapes B. + + Returns: + torch.Tensor with loss value. + """ + cosine = F.linear(F.normalize(input), F.normalize(self.weight)) + sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) + phi = cosine * self.cos_m - sine * self.sin_m + + phi = torch.where(cosine > self.th, phi, cosine - self.mm) + + one_hot = torch.zeros(cosine.size()).to(input.device) + one_hot.scatter_(1, target.view(-1, 1).long(), 1) + logits = (one_hot * phi) + ((1.0 - one_hot) * cosine) + logits *= self.s + + return logits diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py new file mode 100644 index 0000000000..5c1b4aa720 --- /dev/null +++ b/catalyst/contrib/nn/modules/cosface.py @@ -0,0 +1,75 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class CosFace(nn.Module): + """Implementation of CosFace loss for metric learning. + + .. _CosFace: Large Margin Cosine Loss for Deep Face Recognition: + https://arxiv.org/abs/1801.09414 + + Example: + >>> layer = CosFaceLoss(5, 10, s=1.31, m=0.1) + >>> loss_fn = nn.CrosEntropyLoss() + >>> embedding = torch.randn(3, 5, requires_grad=True) + >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> output = layer(embedding, target) + >>> loss = loss_fn(output, target) + >>> loss.backward() + + """ + + def __init__( + self, + in_features: int, + out_features: int, + s: float = 64.0, + m: float = 0.35, + ): + """ + Args: + in_features (int): size of each input sample. + out_features (int): size of each output sample. + s (float, optional): norm of input feature, + Default: ``64.0``. + m (float, optional): margin. + Default: ``0.35``. + """ + super(CosFace, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.s = s + self.m = m + + self.weight = nn.Parameter( + torch.FloatTensor(out_features, in_features) + ) + nn.init.xavier_uniform_(self.weight) + + def __repr__(self) -> str: + return "CosFace(in_features={},out_features={},s={},m={})".format( + self.in_features, self.out_features, self.s, self.m + ) + + def forward( + self, input: torch.Tensor, target: torch.Tensor + ) -> torch.Tensor: + """ + Args: + input (torch.Tensor): input features, + expected shapes BxF. + target (torch.Tensor): target classes, + expected shapes B. + + Returns: + torch.Tensor with loss value. + """ + cosine = F.linear(F.normalize(input), F.normalize(self.weight)) + phi = cosine - self.m + one_hot = torch.zeros(cosine.size()).to(input.device) + one_hot.scatter_(1, target.view(-1, 1).long(), 1) + logits = (one_hot * phi) + ((1.0 - one_hot) * cosine) + logits *= self.s + + return logits diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py new file mode 100644 index 0000000000..1f094cd50e --- /dev/null +++ b/catalyst/contrib/nn/modules/softmax.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class SoftMax(nn.Module): + """Implementation of SoftMax head for metric learning. + + Example: + >>> layer = SoftMax() + >>> loss_fn = nn.CrosEntropyLoss() + >>> embedding = torch.randn(3, 5, requires_grad=True) + >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> output = layer(embedding, target) + >>> loss = loss_fn(output, target) + >>> loss.backward() + + """ + + def __init__(self, in_features: int, num_classes: int): + """ + Args: + in_features (int): size of each input sample. + num_classes (int): size of each output sample. + """ + super(SoftMax, self).__init__() + self.in_features = in_features + self.out_features = num_classes + self.weight = nn.Parameter(torch.FloatTensor(num_classes, in_features)) + self.bias = nn.Parameter(torch.FloatTensor(num_classes)) + + nn.init.xavier_uniform_(self.weight) + nn.init.zeros_(self.bias) + + def __repr__(self) -> str: + return "SoftMax(in_features={},out_features={})".format( + self.in_features, self.out_features + ) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + """ + Args: + input (torch.Tensor): input features, + expected shapes BxF. + + Returns: + torch.Tensor with loss value. + """ + return F.linear(input, self.weight, self.bias) diff --git a/catalyst/contrib/nn/tests/test_criterion.py b/catalyst/contrib/nn/tests/test_criterion.py index c7ba8137b8..f230071790 100644 --- a/catalyst/contrib/nn/tests/test_criterion.py +++ b/catalyst/contrib/nn/tests/test_criterion.py @@ -1,11 +1,12 @@ # flake8: noqa -import torch import numpy as np + +import torch + from catalyst.contrib.nn import criterion as module from catalyst.contrib.nn.criterion import ( CircleLoss, TripletMarginLossWithSampler, - CosFaceLoss, ) from catalyst.data import AllTripletsSampler @@ -23,82 +24,3 @@ def test_criterion_init(): else: instance = module_class() assert instance is not None - - -def test_cosface_loss(): - emb_size = 4 - n_classes = 3 - s = 3.0 - m = 0.1 - - features = np.array( - [ - [1, 2, 3, 4], - [5, 6, 7, 8], - ], - dtype="f", - ) - target = np.array([0, 2], dtype="l") - projection = np.array( - [ - [0.1, 0.2, 0.3, 0.4], - [1.1, 3.2, 5.3, 0.4], - [0.1, 0.2, 6.3, 0.4], - ], - dtype="f", - ) - - loss_fn = CosFaceLoss(emb_size, n_classes, s, m, reduction="none") - loss_fn.projection.data = torch.from_numpy(projection) - - def normalize(matr): - return ( - matr / np.sqrt((matr ** 2).sum(axis=1))[:, np.newaxis] - ) # for each row - - def softmax(x): - e_x = np.exp(x - np.max(x)) - return e_x / e_x.sum(1)[:, np.newaxis] # for each row - - def cross_entropy(preds, targs, axis=None): - print(softmax(preds)) - return -(targs * np.log(softmax(preds))).sum(axis) - - normalized_features = normalize(features) # 2x4 - normalized_projection = normalize(projection) # 3x4 - - cosine = normalized_features @ normalized_projection.T # 2x4 * 4x3 = 2x3 - phi = cosine - m # 2x3 - - mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") # one_hot(target) - feats = (mask * phi + (1.0 - mask) * cosine) * s # 2x3 - - expected_loss = cross_entropy(feats, mask, 1) - actual = ( - loss_fn(torch.from_numpy(features), torch.LongTensor(target)) - .detach() - .numpy() - ) - assert np.allclose(expected_loss, actual) - - loss_fn = CosFaceLoss(emb_size, n_classes, s, m, reduction="mean") - loss_fn.projection.data = torch.from_numpy(projection) - - expected_loss = cross_entropy(feats, mask, 1) - actual = ( - loss_fn(torch.from_numpy(features), torch.LongTensor(target)) - .detach() - .numpy() - ) - assert np.isclose(expected_loss.mean(), actual) - - loss_fn = CosFaceLoss(emb_size, n_classes, s, m, reduction="sum") - loss_fn.projection.data = torch.from_numpy(projection) - - expected_loss = cross_entropy(feats, mask, 1) - actual = ( - loss_fn(torch.from_numpy(features), torch.LongTensor(target)) - .detach() - .numpy() - ) - assert np.isclose(expected_loss.sum(), actual) \ No newline at end of file diff --git a/catalyst/contrib/nn/tests/test_modules.py b/catalyst/contrib/nn/tests/test_modules.py new file mode 100644 index 0000000000..e971f1bfbe --- /dev/null +++ b/catalyst/contrib/nn/tests/test_modules.py @@ -0,0 +1,212 @@ +# flake8: noqa +import numpy as np + +import torch +import torch.nn as nn + +from catalyst.contrib.nn.modules import ArcFace, CosFace, SoftMax + + +def normalize(m: np.ndarray) -> np.ndarray: + m_s = np.sqrt((m ** 2).sum(axis=1))[:, np.newaxis] # for each row + return m / m_s + + +def softmax(x: np.ndarray) -> np.ndarray: + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum(1)[:, np.newaxis] # for each row + + +def cross_entropy( + preds: np.ndarray, targs: np.ndarray, axis: int = 1 +) -> float: + return -(targs * np.log(softmax(preds))).sum(axis) + + +def test_softmax(): + emb_size = 4 + n_classes = 3 + + # fmt: off + features = np.array( + [ + [1, 2, 3, 4], + [5, 6, 7, 8], + ], + dtype="f", + ) + target = np.array([0, 2], dtype="l") + weight = np.array( + [ + [0.1, 0.2, 0.3, 0.4], + [1.1, 3.2, 5.3, 0.4], + [0.1, 0.2, 6.3, 0.4], + ], + dtype="f", + ) + bias = np.array([0.2, 0.01, 0.1], dtype="f") + # fmt: on + + layer = SoftMax(emb_size, n_classes) + layer.weight.data = torch.from_numpy(weight) + layer.bias.data = torch.from_numpy(bias) + + expected = features @ weight.T + bias + actual = layer(torch.from_numpy(features)).detach().numpy() + assert np.allclose(expected, actual) + + +def test_arcface_with_cross_entropy_loss(): + emb_size = 4 + n_classes = 3 + s = 3.0 + m = 0.5 + + # fmt: off + features = np.array( + [ + [1, 2, 3, 4], + [5, 6, 7, 8], + ], + dtype="f", + ) + target = np.array([0, 2], dtype="l") + weight = np.array( + [ + [0.1, 0.2, 0.3, 0.4], + [1.1, 3.2, 5.3, 0.4], + [0.1, 0.2, 6.3, 0.4], + ], + dtype="f", + ) + # fmt: on + + layer = ArcFace(emb_size, n_classes, s, m) + layer.weight.data = torch.from_numpy(weight) + loss_fn = nn.CrossEntropyLoss(reduction="none") + + normalized_features = normalize(features) # 2x4 + normalized_projection = normalize(weight) # 3x4 + + cosine = normalized_features @ normalized_projection.T # 2x4 * 4x3 = 2x3 + sine = np.sqrt(1 - np.power(cosine, 2)) # 2x3 + phi = cosine * np.cos(m) - sine * np.sin(m) # 2x3 + phi = np.where( + cosine > np.cos(np.pi - m), phi, cosine - np.sin(np.pi - m) * m + ) # 2x3 + + mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") # one_hot(target) + feats = (mask * phi + (1.0 - mask) * cosine) * s # 2x3 + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn( + layer(torch.from_numpy(features), torch.LongTensor(target)), + torch.LongTensor(target), + ) + .detach() + .numpy() + ) + assert np.allclose(expected_loss, actual) + + loss_fn = nn.CrossEntropyLoss(reduction="mean") + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn( + layer(torch.from_numpy(features), torch.LongTensor(target)), + torch.LongTensor(target), + ) + .detach() + .numpy() + ) + assert np.isclose(expected_loss.mean(), actual) + + loss_fn = nn.CrossEntropyLoss(reduction="sum") + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn( + layer(torch.from_numpy(features), torch.LongTensor(target)), + torch.LongTensor(target), + ) + .detach() + .numpy() + ) + assert np.isclose(expected_loss.sum(), actual) + + +def test_cosface_with_cross_entropy_loss(): + emb_size = 4 + n_classes = 3 + s = 3.0 + m = 0.1 + + # fmt: off + features = np.array( + [ + [1, 2, 3, 4], + [5, 6, 7, 8], + ], + dtype="f", + ) + target = np.array([0, 2], dtype="l") + weight = np.array( + [ + [0.1, 0.2, 0.3, 0.4], + [1.1, 3.2, 5.3, 0.4], + [0.1, 0.2, 6.3, 0.4], + ], + dtype="f", + ) + # fmt: on + + layer = CosFace(emb_size, n_classes, s, m) + layer.weight.data = torch.from_numpy(weight) + loss_fn = nn.CrossEntropyLoss(reduction="none") + + normalized_features = normalize(features) # 2x4 + normalized_projection = normalize(weight) # 3x4 + + cosine = normalized_features @ normalized_projection.T # 2x4 * 4x3 = 2x3 + phi = cosine - m # 2x3 + + mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") # one_hot(target) + feats = (mask * phi + (1.0 - mask) * cosine) * s # 2x3 + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn( + layer(torch.from_numpy(features), torch.LongTensor(target)), + torch.LongTensor(target), + ) + .detach() + .numpy() + ) + assert np.allclose(expected_loss, actual) + + loss_fn = nn.CrossEntropyLoss(reduction="mean") + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn( + layer(torch.from_numpy(features), torch.LongTensor(target)), + torch.LongTensor(target), + ) + .detach() + .numpy() + ) + assert np.isclose(expected_loss.mean(), actual) + + loss_fn = nn.CrossEntropyLoss(reduction="sum") + + expected_loss = cross_entropy(feats, mask, 1) + actual = ( + loss_fn( + layer(torch.from_numpy(features), torch.LongTensor(target)), + torch.LongTensor(target), + ) + .detach() + .numpy() + ) + assert np.isclose(expected_loss.sum(), actual) From 2893c1bdc4986c91050b21e1ee2f72154cb1e17d Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 20 Sep 2020 16:19:55 +0300 Subject: [PATCH 07/23] docs --- docs/api/contrib.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/api/contrib.rst b/docs/api/contrib.rst index 7039a8a812..ebfd99dfd7 100644 --- a/docs/api/contrib.rst +++ b/docs/api/contrib.rst @@ -250,6 +250,13 @@ Wing Modules ~~~~~~~~~~~~~~~~ +ArcFace: Additive Angular Margin Loss for Deep Face Recognition +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. automodule:: catalyst.contrib.nn.modules.arcface + :members: + :undoc-members: + :show-inheritance: + Common modules """""""""""""""""""""""""""""""""""""""""" .. automodule:: catalyst.contrib.nn.modules.common @@ -257,6 +264,13 @@ Common modules :undoc-members: :show-inheritance: +CosFace: Large Margin Cosine Loss for Deep Face Recognition +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. automodule:: catalyst.contrib.nn.modules.cosface + :members: + :undoc-members: + :show-inheritance: + Last-Mean-Average-Attention (LAMA)-Pooling """""""""""""""""""""""""""""""""""""""""" .. automodule:: catalyst.contrib.nn.modules.lama @@ -285,6 +299,12 @@ SqueezeAndExcitation :undoc-members: :show-inheritance: +SoftMax +"""""""""""""""""""""""""""""""""""""""""" +.. automodule:: catalyst.contrib.nn.modules.softmax + :members: + :undoc-members: + :show-inheritance: Optimizers From e8ae3f75c658df9ca33c9ed9750845e179344fc7 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 20 Sep 2020 16:21:34 +0300 Subject: [PATCH 08/23] softmax, cosface, arcface layers --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c225eb0f9..cf0782374b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added - Runner registry support for Config API ([#936](https://github.com/catalyst-team/catalyst/pull/936)) +- SoftMax, CosFace, ArcFace layers to contrib ([#939](https://github.com/catalyst-team/catalyst/pull/939)) ### Changed From 2f401d2034e840c0313a8c99666ebb69eb1732b1 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 20 Sep 2020 20:16:08 +0300 Subject: [PATCH 09/23] docs for __repr__ --- catalyst/contrib/nn/modules/arcface.py | 1 + catalyst/contrib/nn/modules/cosface.py | 1 + catalyst/contrib/nn/modules/softmax.py | 1 + 3 files changed, 3 insertions(+) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 3a86d1674b..afa023ad89 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -55,6 +55,7 @@ def __init__( nn.init.xavier_uniform_(self.weight) def __repr__(self) -> str: + """ArcFace representation.""" return "ArcFace(in_features={},out_features={},s={},m={})".format( self.in_features, self.out_features, self.s, self.m ) diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 5c1b4aa720..59337b2a9e 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -48,6 +48,7 @@ def __init__( nn.init.xavier_uniform_(self.weight) def __repr__(self) -> str: + """CosFace representation.""" return "CosFace(in_features={},out_features={},s={},m={})".format( self.in_features, self.out_features, self.s, self.m ) diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index 1f094cd50e..26ec19e487 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -33,6 +33,7 @@ def __init__(self, in_features: int, num_classes: int): nn.init.zeros_(self.bias) def __repr__(self) -> str: + """SoftMax representation.""" return "SoftMax(in_features={},out_features={})".format( self.in_features, self.out_features ) From ee26d7a1980795aa386265ca11ca0d856d0c9b53 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 20 Sep 2020 20:40:44 +0300 Subject: [PATCH 10/23] another docs fix --- catalyst/contrib/nn/modules/arcface.py | 2 +- catalyst/contrib/nn/modules/cosface.py | 2 +- catalyst/contrib/nn/modules/softmax.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index afa023ad89..88cbaac625 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -55,7 +55,7 @@ def __init__( nn.init.xavier_uniform_(self.weight) def __repr__(self) -> str: - """ArcFace representation.""" + """Object representation.""" return "ArcFace(in_features={},out_features={},s={},m={})".format( self.in_features, self.out_features, self.s, self.m ) diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 59337b2a9e..cb51ec5fd7 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -48,7 +48,7 @@ def __init__( nn.init.xavier_uniform_(self.weight) def __repr__(self) -> str: - """CosFace representation.""" + """Object representation.""" return "CosFace(in_features={},out_features={},s={},m={})".format( self.in_features, self.out_features, self.s, self.m ) diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index 26ec19e487..49d84e716d 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -33,7 +33,7 @@ def __init__(self, in_features: int, num_classes: int): nn.init.zeros_(self.bias) def __repr__(self) -> str: - """SoftMax representation.""" + """"Object representation.""" return "SoftMax(in_features={},out_features={})".format( self.in_features, self.out_features ) From 0887d13181a026de7d99ad15ca268014b8bd48bb Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sun, 20 Sep 2020 20:58:43 +0300 Subject: [PATCH 11/23] and another docs fix --- catalyst/contrib/nn/modules/softmax.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index 49d84e716d..f17c5aa3ef 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -33,7 +33,7 @@ def __init__(self, in_features: int, num_classes: int): nn.init.zeros_(self.bias) def __repr__(self) -> str: - """"Object representation.""" + """Object representation.""" return "SoftMax(in_features={},out_features={})".format( self.in_features, self.out_features ) From e95ac6fa9df15f64e420a8766b03f92c22f1e061 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Fri, 25 Sep 2020 22:17:24 +0300 Subject: [PATCH 12/23] fixed arcface --- catalyst/contrib/nn/modules/arcface.py | 41 +++++++++++++---------- catalyst/contrib/nn/modules/cosface.py | 9 +++-- catalyst/contrib/nn/modules/softmax.py | 9 +++-- catalyst/contrib/nn/tests/test_modules.py | 15 ++++----- 4 files changed, 44 insertions(+), 30 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 88cbaac625..f98d3df51a 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -28,6 +28,7 @@ def __init__( out_features: int, s: float = 64.0, m: float = 0.5, + eps: float = 1e-6, ): """ Args: @@ -37,17 +38,16 @@ def __init__( Default: ``64.0``. m (float, optional): margin. Default: ``0.5``. + eps (float, optional): operation accuracy. + Default: ``1e-6``. """ super(ArcFace, self).__init__() self.in_features = in_features self.out_features = out_features self.s = s self.m = m - - self.cos_m = math.cos(m) - self.sin_m = math.sin(m) - self.th = math.cos(math.pi - m) - self.mm = math.sin(math.pi - m) * m + self.threshold = math.pi - m + self.eps = eps self.weight = nn.Parameter( torch.FloatTensor(out_features, in_features) @@ -56,8 +56,8 @@ def __init__( def __repr__(self) -> str: """Object representation.""" - return "ArcFace(in_features={},out_features={},s={},m={})".format( - self.in_features, self.out_features, self.s, self.m + return "ArcFace(in_features={},out_features={},s={},m={},eps={})".format( + self.in_features, self.out_features, self.s, self.m, self.eps ) def forward( @@ -66,22 +66,29 @@ def forward( """ Args: input (torch.Tensor): input features, - expected shapes BxF. + expected shapes ``BxF`` where ``B`` + is batch dimension and ``F`` is an + input feature dimension. target (torch.Tensor): target classes, - expected shapes B. + expected shapes ``B`` where + ``B`` is batch dimension. Returns: - torch.Tensor with loss value. + logits tensor with shapes ``BxC`` where C is a number of classes. """ - cosine = F.linear(F.normalize(input), F.normalize(self.weight)) - sine = torch.sqrt(1.0 - torch.pow(cosine, 2)) - phi = cosine * self.cos_m - sine * self.sin_m - - phi = torch.where(cosine > self.th, phi, cosine - self.mm) + cos_theta = F.linear(F.normalize(input), F.normalize(self.weight)) + theta = torch.acos( + torch.clamp(cos_theta, -1.0 + self.eps, 1.0 - self.eps) + ) - one_hot = torch.zeros(cosine.size()).to(input.device) + one_hot = torch.zeros_like(cos_theta, device=input.device) one_hot.scatter_(1, target.view(-1, 1).long(), 1) - logits = (one_hot * phi) + ((1.0 - one_hot) * cosine) + + mask = torch.where( + theta > self.threshold, torch.zeros_like(one_hot), one_hot + ) + + logits = torch.cos(torch.where(mask.bool(), theta + self.m, theta)) logits *= self.s return logits diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index cb51ec5fd7..5a9847dafa 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -59,12 +59,15 @@ def forward( """ Args: input (torch.Tensor): input features, - expected shapes BxF. + expected shapes ``BxF`` where ``B`` + is batch dimension and ``F`` is an + input feature dimension. target (torch.Tensor): target classes, - expected shapes B. + expected shapes ``B`` where + ``B`` is batch dimension. Returns: - torch.Tensor with loss value. + logits tensor with shapes ``BxC`` where C is a number of classes. """ cosine = F.linear(F.normalize(input), F.normalize(self.weight)) phi = cosine - self.m diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index f17c5aa3ef..4a815afe25 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -42,9 +42,14 @@ def forward(self, input: torch.Tensor) -> torch.Tensor: """ Args: input (torch.Tensor): input features, - expected shapes BxF. + expected shapes ``BxF`` where ``B`` + is batch dimension and ``F`` is an + input feature dimension. + target (torch.Tensor): target classes, + expected shapes ``B`` where + ``B`` is batch dimension. Returns: - torch.Tensor with loss value. + logits tensor with shapes ``BxC`` where C is a number of classes. """ return F.linear(input, self.weight, self.bias) diff --git a/catalyst/contrib/nn/tests/test_modules.py b/catalyst/contrib/nn/tests/test_modules.py index e971f1bfbe..a99ecd8140 100644 --- a/catalyst/contrib/nn/tests/test_modules.py +++ b/catalyst/contrib/nn/tests/test_modules.py @@ -61,6 +61,7 @@ def test_arcface_with_cross_entropy_loss(): n_classes = 3 s = 3.0 m = 0.5 + eps = 1e-8 # fmt: off features = np.array( @@ -81,7 +82,7 @@ def test_arcface_with_cross_entropy_loss(): ) # fmt: on - layer = ArcFace(emb_size, n_classes, s, m) + layer = ArcFace(emb_size, n_classes, s, m, eps) layer.weight.data = torch.from_numpy(weight) loss_fn = nn.CrossEntropyLoss(reduction="none") @@ -89,14 +90,12 @@ def test_arcface_with_cross_entropy_loss(): normalized_projection = normalize(weight) # 3x4 cosine = normalized_features @ normalized_projection.T # 2x4 * 4x3 = 2x3 - sine = np.sqrt(1 - np.power(cosine, 2)) # 2x3 - phi = cosine * np.cos(m) - sine * np.sin(m) # 2x3 - phi = np.where( - cosine > np.cos(np.pi - m), phi, cosine - np.sin(np.pi - m) * m - ) # 2x3 + theta = np.arccos(np.clip(cosine, -1 + eps, 1 - eps)) # 2x3 - mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") # one_hot(target) - feats = (mask * phi + (1.0 - mask) * cosine) * s # 2x3 + # one_hot(target) + mask = np.array([[1, 0, 0], [0, 0, 1]], dtype="l") + mask = np.where(theta > (np.pi - m), np.zeros_like(mask), mask) # 2x3 + feats = np.cos(np.where(mask > 0, theta + m, theta)) * s # 2x3 expected_loss = cross_entropy(feats, mask, 1) actual = ( From 6c5c5d9cdf8e33d45183abec6c308651df8e43c4 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sat, 26 Sep 2020 00:21:38 +0300 Subject: [PATCH 13/23] fix: docs --- catalyst/contrib/nn/modules/arcface.py | 3 ++- catalyst/contrib/nn/modules/cosface.py | 3 ++- catalyst/contrib/nn/modules/softmax.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index f98d3df51a..57af46db9a 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -74,7 +74,8 @@ def forward( ``B`` is batch dimension. Returns: - logits tensor with shapes ``BxC`` where C is a number of classes. + logits tensor with shapes ``BxC`` + where C is a number of classes. """ cos_theta = F.linear(F.normalize(input), F.normalize(self.weight)) theta = torch.acos( diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 5a9847dafa..2dcb68a38e 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -67,7 +67,8 @@ def forward( ``B`` is batch dimension. Returns: - logits tensor with shapes ``BxC`` where C is a number of classes. + logits tensor with shapes ``BxC`` + where C is a number of classes. """ cosine = F.linear(F.normalize(input), F.normalize(self.weight)) phi = cosine - self.m diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index 4a815afe25..e05d44f6f1 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -50,6 +50,7 @@ def forward(self, input: torch.Tensor) -> torch.Tensor: ``B`` is batch dimension. Returns: - logits tensor with shapes ``BxC`` where C is a number of classes. + logits tensor with shapes ``BxC`` + where C is a number of classes. """ return F.linear(input, self.weight, self.bias) From e27db82f35733e6e439c146ffb1ca74bf4f0669d Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Sat, 26 Sep 2020 02:31:25 +0300 Subject: [PATCH 14/23] fixed docs --- catalyst/contrib/nn/modules/arcface.py | 18 ++++++++++++++---- catalyst/contrib/nn/modules/cosface.py | 4 ++-- catalyst/contrib/nn/modules/softmax.py | 7 ++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 57af46db9a..4966ec653b 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -56,8 +56,18 @@ def __init__( def __repr__(self) -> str: """Object representation.""" - return "ArcFace(in_features={},out_features={},s={},m={},eps={})".format( - self.in_features, self.out_features, self.s, self.m, self.eps + return ( + "ArcFace(" + + ",".join( + [ + f"in_features={self.in_features}", + f"out_features={self.out_features}", + f"s={self.s}", + f"m={self.m}", + f"eps={self.eps}", + ] + ) + + ")" ) def forward( @@ -74,8 +84,8 @@ def forward( ``B`` is batch dimension. Returns: - logits tensor with shapes ``BxC`` - where C is a number of classes. + tensor (logits) with shapes ``BxC`` + where ``C`` is a number of classes. """ cos_theta = F.linear(F.normalize(input), F.normalize(self.weight)) theta = torch.acos( diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 2dcb68a38e..82ee6b2bdb 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -67,8 +67,8 @@ def forward( ``B`` is batch dimension. Returns: - logits tensor with shapes ``BxC`` - where C is a number of classes. + tensor (logits) with shapes ``BxC`` + where ``C`` is a number of classes. """ cosine = F.linear(F.normalize(input), F.normalize(self.weight)) phi = cosine - self.m diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index e05d44f6f1..72d07dbb34 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -45,12 +45,9 @@ def forward(self, input: torch.Tensor) -> torch.Tensor: expected shapes ``BxF`` where ``B`` is batch dimension and ``F`` is an input feature dimension. - target (torch.Tensor): target classes, - expected shapes ``B`` where - ``B`` is batch dimension. Returns: - logits tensor with shapes ``BxC`` - where C is a number of classes. + tensor (logits) with shapes ``BxC`` + where ``C`` is a number of classes. """ return F.linear(input, self.weight, self.bias) From ef0d14712923958c1b774fcd85b1d2bba430d047 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Wed, 30 Sep 2020 23:19:54 +0300 Subject: [PATCH 15/23] new docs format & SubCenterArcFace --- catalyst/contrib/nn/modules/__init__.py | 2 +- catalyst/contrib/nn/modules/arcface.py | 122 +++++++++++++++++++++--- catalyst/contrib/nn/modules/cosface.py | 21 ++-- catalyst/contrib/nn/modules/softmax.py | 9 +- 4 files changed, 124 insertions(+), 30 deletions(-) diff --git a/catalyst/contrib/nn/modules/__init__.py b/catalyst/contrib/nn/modules/__init__.py index 96401825eb..8ec4d226c7 100644 --- a/catalyst/contrib/nn/modules/__init__.py +++ b/catalyst/contrib/nn/modules/__init__.py @@ -33,5 +33,5 @@ ) from catalyst.contrib.nn.modules.softmax import SoftMax -from catalyst.contrib.nn.modules.arcface import ArcFace +from catalyst.contrib.nn.modules.arcface import ArcFace, SubCenterArcFace from catalyst.contrib.nn.modules.cosface import CosFace diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 4966ec653b..1a8e7c2908 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -6,11 +6,21 @@ class ArcFace(nn.Module): - """Implementation of ArcFace loss for metric learning. + """Implementation of `ArcFace: Additive Angular Margin Loss for Deep Face Recognition`_. - .. _ArcFace: Additive Angular Margin Loss for Deep Face Recognition: + .. _ArcFace\: Additive Angular Margin Loss for Deep Face Recognition: https://arxiv.org/abs/1801.07698v1 + Args: + in_features: size of each input sample. + out_features: size of each output sample. + s: norm of input feature. + Default: ``64.0``. + m: margin. + Default: ``0.5``. + eps: operation accuracy. + Default: ``1e-6``. + Example: >>> layer = ArcFace(5, 10, s=1.31, m=0.5) >>> loss_fn = nn.CrosEntropyLoss() @@ -30,17 +40,6 @@ def __init__( m: float = 0.5, eps: float = 1e-6, ): - """ - Args: - in_features (int): size of each input sample. - out_features (int): size of each output sample. - s (float, optional): norm of input feature, - Default: ``64.0``. - m (float, optional): margin. - Default: ``0.5``. - eps (float, optional): operation accuracy. - Default: ``1e-6``. - """ super(ArcFace, self).__init__() self.in_features = in_features self.out_features = out_features @@ -103,3 +102,100 @@ def forward( logits *= self.s return logits + + +class SubCenterArcFace(nn.Module): + """Implementation of `Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces`_. + + .. _Sub-center ArcFace\: Boosting Face Recognition by Large-scale Noisy Web Faces: + https://ibug.doc.ic.ac.uk/media/uploads/documents/eccv_1445.pdf + + Args: + in_features: size of each input sample. + out_features: size of each output sample. + s: norm of input feature, + Default: ``64.0``. + m: margin. + Default: ``0.5``. + k: number of possible class centroids. + Default: ``3``. + eps (float, optional): operation accuracy. + Default: ``1e-6``. + + Example: + >>> layer = SubCenterArcFace(5, 10, s=1.31, m=0.35, k=2) + >>> loss_fn = nn.CrosEntropyLoss() + >>> embedding = torch.randn(3, 5, requires_grad=True) + >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> output = layer(embedding, target) + >>> loss = loss_fn(output, target) + >>> loss.backward() + + """ + + def __init__( + self, + in_features: int, + out_features: int, + s: float = 64.0, + m: float = 0.50, + k: int = 3, + eps: float = 1e-6, + ): + super(SubCenterArcFace, self).__init__() + self.in_features = in_features + self.out_features = out_features + + self.s = s + self.m = m + self.k = k + self.eps = eps + + self.weight = nn.Parameter( + torch.FloatTensor(k, in_features, out_features) + ) + nn.init.xavier_uniform_(self.weight) + + self.threshold = math.pi - self.m + + def __repr__(self) -> str: + """Object representation.""" + return ( + "SubCenterArcFace(" + + ",".join( + [ + f"in_features={self.in_features}", + f"out_features={self.out_features}", + f"s={self.s}", + f"m={self.m}", + f"k={self.k}", + f"eps={self.eps}", + ] + ) + + ")" + ) + + def forward(self, input, label): + cos_theta = torch.bmm( + F.normalize(input) + .unsqueeze(0) + .expand(self.k, *input.shape), # k*b*f + F.normalize( + self.weight, dim=1 + ), # normalize in_features dim # k*f*c + ) # k*b*f + cos_theta = torch.max(cos_theta, dim=0)[0] # b*f + theta = torch.acos( + torch.clamp(cos_theta, -1.0 + self.eps, 1.0 - self.eps) + ) + + one_hot = torch.zeros(cos_theta.size()).to(input.device) + one_hot.scatter_(1, label.view(-1, 1).long(), 1) + + selected = torch.where( + theta > self.threshold, torch.zeros_like(one_hot), one_hot + ) + + output = torch.cos(torch.where(selected.bool(), theta + self.m, theta)) + output *= self.s + return output diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 82ee6b2bdb..6c159029b1 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -4,11 +4,19 @@ class CosFace(nn.Module): - """Implementation of CosFace loss for metric learning. + """Implementation of `CosFace\: Large Margin Cosine Loss for Deep Face Recognition`_. - .. _CosFace: Large Margin Cosine Loss for Deep Face Recognition: + .. _CosFace\: Large Margin Cosine Loss for Deep Face Recognition: https://arxiv.org/abs/1801.09414 + Args: + in_features: size of each input sample. + out_features: size of each output sample. + s: norm of input feature. + Default: ``64.0``. + m: margin. + Default: ``0.35``. + Example: >>> layer = CosFaceLoss(5, 10, s=1.31, m=0.1) >>> loss_fn = nn.CrosEntropyLoss() @@ -27,15 +35,6 @@ def __init__( s: float = 64.0, m: float = 0.35, ): - """ - Args: - in_features (int): size of each input sample. - out_features (int): size of each output sample. - s (float, optional): norm of input feature, - Default: ``64.0``. - m (float, optional): margin. - Default: ``0.35``. - """ super(CosFace, self).__init__() self.in_features = in_features self.out_features = out_features diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index 72d07dbb34..6d881b4039 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -6,6 +6,10 @@ class SoftMax(nn.Module): """Implementation of SoftMax head for metric learning. + Args: + in_features (int): size of each input sample. + num_classes (int): size of each output sample. + Example: >>> layer = SoftMax() >>> loss_fn = nn.CrosEntropyLoss() @@ -18,11 +22,6 @@ class SoftMax(nn.Module): """ def __init__(self, in_features: int, num_classes: int): - """ - Args: - in_features (int): size of each input sample. - num_classes (int): size of each output sample. - """ super(SoftMax, self).__init__() self.in_features = in_features self.out_features = num_classes From 46508aad9203a46430638507b4e32f2aa9ed69a7 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Wed, 30 Sep 2020 23:21:00 +0300 Subject: [PATCH 16/23] arcface title --- docs/api/contrib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/contrib.rst b/docs/api/contrib.rst index c238f7c07f..d48bbbb9a7 100644 --- a/docs/api/contrib.rst +++ b/docs/api/contrib.rst @@ -250,7 +250,7 @@ Wing Modules ~~~~~~~~~~~~~~~~ -ArcFace: Additive Angular Margin Loss for Deep Face Recognition +ArcFace and SubCenterArcFace """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" .. automodule:: catalyst.contrib.nn.modules.arcface :members: From ed6a1c51060ba803d9c90c618073c67625d0f11f Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Wed, 30 Sep 2020 23:54:13 +0300 Subject: [PATCH 17/23] docs --- catalyst/contrib/nn/modules/arcface.py | 22 ++++++++++++++++++---- catalyst/contrib/nn/modules/cosface.py | 11 +++++++++-- catalyst/contrib/nn/modules/softmax.py | 8 +++++++- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 1a8e7c2908..0a4b539c06 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -6,7 +6,8 @@ class ArcFace(nn.Module): - """Implementation of `ArcFace: Additive Angular Margin Loss for Deep Face Recognition`_. + """Implementation of + `ArcFace: Additive Angular Margin Loss for Deep Face Recognition`_. .. _ArcFace\: Additive Angular Margin Loss for Deep Face Recognition: https://arxiv.org/abs/1801.07698v1 @@ -21,6 +22,12 @@ class ArcFace(nn.Module): eps: operation accuracy. Default: ``1e-6``. + Shape: + - Input: :math:`(batch, H_{in})` where + :math:`H_{in} = in\_features`. + - Output: :math:`(batch, H_{out})` where + :math:`H_{out} = out\_features`. + Example: >>> layer = ArcFace(5, 10, s=1.31, m=0.5) >>> loss_fn = nn.CrosEntropyLoss() @@ -39,7 +46,7 @@ def __init__( s: float = 64.0, m: float = 0.5, eps: float = 1e-6, - ): + ): # noqa: D107 super(ArcFace, self).__init__() self.in_features = in_features self.out_features = out_features @@ -105,7 +112,8 @@ def forward( class SubCenterArcFace(nn.Module): - """Implementation of `Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces`_. + """Implementation of + `Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces`_. .. _Sub-center ArcFace\: Boosting Face Recognition by Large-scale Noisy Web Faces: https://ibug.doc.ic.ac.uk/media/uploads/documents/eccv_1445.pdf @@ -122,6 +130,12 @@ class SubCenterArcFace(nn.Module): eps (float, optional): operation accuracy. Default: ``1e-6``. + Shape: + - Input: :math:`(batch, H_{in})` where + :math:`H_{in} = in\_features`. + - Output: :math:`(batch, H_{out})` where + :math:`H_{out} = out\_features`. + Example: >>> layer = SubCenterArcFace(5, 10, s=1.31, m=0.35, k=2) >>> loss_fn = nn.CrosEntropyLoss() @@ -141,7 +155,7 @@ def __init__( m: float = 0.50, k: int = 3, eps: float = 1e-6, - ): + ): # noqa: D107 super(SubCenterArcFace, self).__init__() self.in_features = in_features self.out_features = out_features diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 6c159029b1..66b63bd4d3 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -4,7 +4,8 @@ class CosFace(nn.Module): - """Implementation of `CosFace\: Large Margin Cosine Loss for Deep Face Recognition`_. + """Implementation of + `CosFace\: Large Margin Cosine Loss for Deep Face Recognition`_. .. _CosFace\: Large Margin Cosine Loss for Deep Face Recognition: https://arxiv.org/abs/1801.09414 @@ -17,6 +18,12 @@ class CosFace(nn.Module): m: margin. Default: ``0.35``. + Shape: + - Input: :math:`(batch, H_{in})` where + :math:`H_{in} = in\_features`. + - Output: :math:`(batch, H_{out})` where + :math:`H_{out} = out\_features`. + Example: >>> layer = CosFaceLoss(5, 10, s=1.31, m=0.1) >>> loss_fn = nn.CrosEntropyLoss() @@ -34,7 +41,7 @@ def __init__( out_features: int, s: float = 64.0, m: float = 0.35, - ): + ): # noqa: D107 super(CosFace, self).__init__() self.in_features = in_features self.out_features = out_features diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index 6d881b4039..bcf2644181 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -10,6 +10,12 @@ class SoftMax(nn.Module): in_features (int): size of each input sample. num_classes (int): size of each output sample. + Shape: + - Input: :math:`(batch, H_{in})` where + :math:`H_{in} = in\_features`. + - Output: :math:`(batch, H_{out})` where + :math:`H_{out} = out\_features`. + Example: >>> layer = SoftMax() >>> loss_fn = nn.CrosEntropyLoss() @@ -21,7 +27,7 @@ class SoftMax(nn.Module): """ - def __init__(self, in_features: int, num_classes: int): + def __init__(self, in_features: int, num_classes: int): # noqa: D107 super(SoftMax, self).__init__() self.in_features = in_features self.out_features = num_classes From c621648d5cc27975533e357b8eb81536e18c73b4 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Wed, 30 Sep 2020 23:55:28 +0300 Subject: [PATCH 18/23] docs for forward method --- catalyst/contrib/nn/modules/arcface.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 0a4b539c06..1f5319d6ef 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -190,6 +190,20 @@ def __repr__(self) -> str: ) def forward(self, input, label): + """ + Args: + input (torch.Tensor): input features, + expected shapes ``BxF`` where ``B`` + is batch dimension and ``F`` is an + input feature dimension. + target (torch.Tensor): target classes, + expected shapes ``B`` where + ``B`` is batch dimension. + + Returns: + tensor (logits) with shapes ``BxC`` + where ``C`` is a number of classes. + """ cos_theta = torch.bmm( F.normalize(input) .unsqueeze(0) From 90418ca2011cd4f10ab9269c4f7be5c624ef1c0e Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Wed, 30 Sep 2020 23:58:19 +0300 Subject: [PATCH 19/23] typings & docs --- catalyst/contrib/nn/modules/arcface.py | 6 ++---- catalyst/contrib/nn/modules/cosface.py | 4 +--- catalyst/contrib/nn/modules/softmax.py | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 1f5319d6ef..8e93da081e 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -76,9 +76,7 @@ def __repr__(self) -> str: + ")" ) - def forward( - self, input: torch.Tensor, target: torch.Tensor - ) -> torch.Tensor: + def forward(self, input, target): """ Args: input (torch.Tensor): input features, @@ -196,7 +194,7 @@ def forward(self, input, label): expected shapes ``BxF`` where ``B`` is batch dimension and ``F`` is an input feature dimension. - target (torch.Tensor): target classes, + label (torch.Tensor): target classes, expected shapes ``B`` where ``B`` is batch dimension. diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 66b63bd4d3..72112cc363 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -59,9 +59,7 @@ def __repr__(self) -> str: self.in_features, self.out_features, self.s, self.m ) - def forward( - self, input: torch.Tensor, target: torch.Tensor - ) -> torch.Tensor: + def forward(self, input, target): """ Args: input (torch.Tensor): input features, diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index bcf2644181..bb3bc03677 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -43,7 +43,7 @@ def __repr__(self) -> str: self.in_features, self.out_features ) - def forward(self, input: torch.Tensor) -> torch.Tensor: + def forward(self, input): """ Args: input (torch.Tensor): input features, From f4752df88f11ecd7e0bdc6567084253427a968f1 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Thu, 1 Oct 2020 00:00:09 +0300 Subject: [PATCH 20/23] moved noqa comment --- catalyst/contrib/nn/modules/arcface.py | 4 ++-- catalyst/contrib/nn/modules/cosface.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 8e93da081e..4be50a54ae 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -39,14 +39,14 @@ class ArcFace(nn.Module): """ - def __init__( + def __init__( # noqa: D107 self, in_features: int, out_features: int, s: float = 64.0, m: float = 0.5, eps: float = 1e-6, - ): # noqa: D107 + ): super(ArcFace, self).__init__() self.in_features = in_features self.out_features = out_features diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 72112cc363..7d46181ace 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -35,13 +35,13 @@ class CosFace(nn.Module): """ - def __init__( + def __init__( # noqa: D107 self, in_features: int, out_features: int, s: float = 64.0, m: float = 0.35, - ): # noqa: D107 + ): super(CosFace, self).__init__() self.in_features = in_features self.out_features = out_features From 63797404fb975c814300da3500ee20508250a3fb Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Thu, 1 Oct 2020 00:05:59 +0300 Subject: [PATCH 21/23] fixed docs --- catalyst/contrib/nn/modules/arcface.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 4be50a54ae..72536b61b4 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -111,9 +111,11 @@ def forward(self, input, target): class SubCenterArcFace(nn.Module): """Implementation of - `Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces`_. + `Sub-center ArcFace: Boosting Face Recognition + by Large-scale Noisy Web Faces`_. - .. _Sub-center ArcFace\: Boosting Face Recognition by Large-scale Noisy Web Faces: + .. _Sub-center ArcFace\: Boosting Face Recognition \ + by Large-scale Noisy Web Faces: https://ibug.doc.ic.ac.uk/media/uploads/documents/eccv_1445.pdf Args: @@ -150,7 +152,7 @@ def __init__( in_features: int, out_features: int, s: float = 64.0, - m: float = 0.50, + m: float = 0.5, k: int = 3, eps: float = 1e-6, ): # noqa: D107 From 1fdbc8917095caf79162e02a1610dfa1e32bfcab Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Thu, 1 Oct 2020 00:07:02 +0300 Subject: [PATCH 22/23] fixed init docs --- catalyst/contrib/nn/modules/arcface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 72536b61b4..4c7bc0e107 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -147,7 +147,7 @@ class SubCenterArcFace(nn.Module): """ - def __init__( + def __init__( # noqa: D107 self, in_features: int, out_features: int, @@ -155,7 +155,7 @@ def __init__( m: float = 0.5, k: int = 3, eps: float = 1e-6, - ): # noqa: D107 + ): super(SubCenterArcFace, self).__init__() self.in_features = in_features self.out_features = out_features From 800ae52c826ffdd55c6fe661d595d93f7ab509f0 Mon Sep 17 00:00:00 2001 From: Dmytro Doroshenko Date: Fri, 2 Oct 2020 00:23:01 +0300 Subject: [PATCH 23/23] fixed examples --- catalyst/contrib/nn/modules/arcface.py | 11 ++++++----- catalyst/contrib/nn/modules/cosface.py | 2 +- catalyst/contrib/nn/modules/softmax.py | 16 +++++++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/catalyst/contrib/nn/modules/arcface.py b/catalyst/contrib/nn/modules/arcface.py index 4c7bc0e107..74132f4be1 100644 --- a/catalyst/contrib/nn/modules/arcface.py +++ b/catalyst/contrib/nn/modules/arcface.py @@ -32,7 +32,7 @@ class ArcFace(nn.Module): >>> layer = ArcFace(5, 10, s=1.31, m=0.5) >>> loss_fn = nn.CrosEntropyLoss() >>> embedding = torch.randn(3, 5, requires_grad=True) - >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> target = torch.empty(3, dtype=torch.long).random_(10) >>> output = layer(embedding, target) >>> loss = loss_fn(output, target) >>> loss.backward() @@ -140,7 +140,7 @@ class SubCenterArcFace(nn.Module): >>> layer = SubCenterArcFace(5, 10, s=1.31, m=0.35, k=2) >>> loss_fn = nn.CrosEntropyLoss() >>> embedding = torch.randn(3, 5, requires_grad=True) - >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> target = torch.empty(3, dtype=torch.long).random_(10) >>> output = layer(embedding, target) >>> loss = loss_fn(output, target) >>> loss.backward() @@ -224,6 +224,7 @@ def forward(self, input, label): theta > self.threshold, torch.zeros_like(one_hot), one_hot ) - output = torch.cos(torch.where(selected.bool(), theta + self.m, theta)) - output *= self.s - return output + logits = torch.cos(torch.where(selected.bool(), theta + self.m, theta)) + logits *= self.s + + return logits diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py index 7d46181ace..7e2ceb02f9 100644 --- a/catalyst/contrib/nn/modules/cosface.py +++ b/catalyst/contrib/nn/modules/cosface.py @@ -28,7 +28,7 @@ class CosFace(nn.Module): >>> layer = CosFaceLoss(5, 10, s=1.31, m=0.1) >>> loss_fn = nn.CrosEntropyLoss() >>> embedding = torch.randn(3, 5, requires_grad=True) - >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> target = torch.empty(3, dtype=torch.long).random_(10) >>> output = layer(embedding, target) >>> loss = loss_fn(output, target) >>> loss.backward() diff --git a/catalyst/contrib/nn/modules/softmax.py b/catalyst/contrib/nn/modules/softmax.py index bb3bc03677..7404b7f230 100644 --- a/catalyst/contrib/nn/modules/softmax.py +++ b/catalyst/contrib/nn/modules/softmax.py @@ -4,11 +4,17 @@ class SoftMax(nn.Module): - """Implementation of SoftMax head for metric learning. + """Implementation of + `Significance of Softmax-based Features in Comparison to + Distance Metric Learning-based Features`_. + + .. _Significance of Softmax-based Features in Comparison to \ + Distance Metric Learning-based Features: + https://arxiv.org/abs/1712.10151 Args: - in_features (int): size of each input sample. - num_classes (int): size of each output sample. + in_features: size of each input sample. + out_features: size of each output sample. Shape: - Input: :math:`(batch, H_{in})` where @@ -17,10 +23,10 @@ class SoftMax(nn.Module): :math:`H_{out} = out\_features`. Example: - >>> layer = SoftMax() + >>> layer = SoftMax(5, 10) >>> loss_fn = nn.CrosEntropyLoss() >>> embedding = torch.randn(3, 5, requires_grad=True) - >>> target = torch.empty(3, dtype=torch.long).random_(5) + >>> target = torch.empty(3, dtype=torch.long).random_(10) >>> output = layer(embedding, target) >>> loss = loss_fn(output, target) >>> loss.backward()