Feature: AdaCos (#958)

* adacos * adacos * fixed repr & zeros_like * removed redundant comma
catalyst-team · Oct 10, 2020 · cdad455 · cdad455
1 parent f9b68b6
commit cdad455
Show file tree

Hide file tree

Showing 4 changed files with 117 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - docs for MetricCallbacks ([#947](https://github.com/catalyst-team/catalyst/pull/947)) 
 - SoftMax, CosFace, ArcFace layers to contrib ([#939](https://github.com/catalyst-team/catalyst/pull/939))
+- AdaCos to contrib ([#958](https://github.com/catalyst-team/catalyst/pull/958))
 
 ### Changed
 

diff --git a/catalyst/contrib/nn/modules/__init__.py b/catalyst/contrib/nn/modules/__init__.py
@@ -34,4 +34,4 @@
 
 from catalyst.contrib.nn.modules.softmax import SoftMax
 from catalyst.contrib.nn.modules.arcface import ArcFace, SubCenterArcFace
-from catalyst.contrib.nn.modules.cosface import CosFace
+from catalyst.contrib.nn.modules.cosface import CosFace, AdaCos
diff --git a/catalyst/contrib/nn/modules/cosface.py b/catalyst/contrib/nn/modules/cosface.py
@@ -1,3 +1,5 @@
+import math
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -82,3 +84,115 @@ def forward(self, input, target):
         logits *= self.s
 
         return logits
+
+
+class AdaCos(nn.Module):
+    """Implementation of
+    `AdaCos: Adaptively Scaling Cosine Logits for \
+        Effectively Learning Deep Face Representations`_.
+
+    .. _AdaCos\: Adaptively Scaling Cosine Logits for \
+        Effectively Learning Deep Face Representations:
+        https://arxiv.org/abs/1905.00292
+
+    Args:
+        in_features: size of each input sample.
+        out_features: size of each output sample.
+        dynamical_s: option to use dynamical scale parameter.
+            If ``False`` then will be used initial scale.
+            Default: ``True``.
+        eps: operation accuracy.
+            Default: ``1e-6``.
+
+    Shape:
+        - Input: :math:`(batch, H_{in})` where
+          :math:`H_{in} = in\_features`.
+        - Output: :math:`(batch, H_{out})` where
+          :math:`H_{out} = out\_features`.
+
+    Example:
+        >>> layer = AdaCos(5, 10)
+        >>> loss_fn = nn.CrosEntropyLoss()
+        >>> embedding = torch.randn(3, 5, requires_grad=True)
+        >>> target = torch.empty(3, dtype=torch.long).random_(10)
+        >>> output = layer(embedding, target)
+        >>> loss = loss_fn(output, target)
+        >>> loss.backward()
+
+    """
+
+    def __init__(  # noqa: D107
+        self,
+        in_features: int,
+        out_features: int,
+        dynamical_s: bool = True,
+        eps: float = 1e-6,
+    ):
+        super(AdaCos, self).__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+        self.s = math.sqrt(2) * math.log(out_features - 1)
+        self.eps = eps
+
+        self.weight = nn.Parameter(
+            torch.FloatTensor(out_features, in_features)
+        )
+        nn.init.xavier_uniform_(self.weight)
+
+    def __repr__(self) -> str:
+        """Object representation."""
+        rep = (
+            "AdaCos("
+            f"in_features={self.in_features},"
+            f"out_features={self.out_features},"
+            f"s={self.s},"
+            f"eps={self.eps}"
+            ")"
+        )
+        return rep
+
+    def forward(
+        self, input: torch.Tensor, target: torch.LongTensor
+    ) -> torch.Tensor:
+        """
+        Args:
+            input: input features,
+                expected shapes ``BxF`` where ``B``
+                is batch dimension and ``F`` is an
+                input feature dimension.
+            target: target classes,
+                expected shapes ``B`` where
+                ``B`` is batch dimension.
+
+        Returns:
+            tensor (logits) with shapes ``BxC``
+            where ``C`` is a number of classes
+            (out_features).
+        """
+        cos_theta = F.linear(F.normalize(input), F.normalize(self.weight))
+        theta = torch.acos(
+            torch.clamp(cos_theta, -1.0 + self.eps, 1.0 - self.eps)
+        )
+
+        one_hot = torch.zeros_like(cos_theta)
+        one_hot.scatter_(1, target.view(-1, 1).long(), 1)
+
+        if self.train:
+            with torch.no_grad():
+                B_avg = (
+                    torch.where(
+                        one_hot < 1,
+                        torch.exp(self.s * cos_theta),
+                        torch.zeros_like(cos_theta),
+                    )
+                    .sum(1)
+                    .mean()
+                )
+                theta_median = theta[one_hot > 0].median()
+                theta_median = torch.min(
+                    torch.full_like(theta_median, math.pi / 4), theta_median
+                )
+                self.s = (torch.log(B_avg) / torch.cos(theta_median)).item()
+
+        logits = self.s * cos_theta
+        return logits
diff --git a/docs/api/contrib.rst b/docs/api/contrib.rst
@@ -264,7 +264,7 @@ Common modules
     :undoc-members:
     :show-inheritance:
 
-CosFace: Large Margin Cosine Loss for Deep Face Recognition
+CosFace and AdaCos
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 .. automodule:: catalyst.contrib.nn.modules.cosface
     :members: