Add TFConvNextModel (#15750)

* feat: initial implementation of convnext in tensorflow. * fix: sample code for the classification model. * chore: added checked for from the classification model. * chore: set bias initializer in the classification head. * chore: updated license terms. * chore: removed ununsed imports * feat: enabled argument during using drop_path. * chore: replaced tf.identity with layers.Activation(linear). * chore: edited default checkpoint. * fix: minor bugs in the initializations. * partial-fix: tf model errors for loading pretrained pt weights. * partial-fix: call method updated * partial-fix: cross loading of weights (4x3 variables to be matched) * chore: removed unneeded comment. * removed playground.py * rebasing * rebasing and removing playground.py. * fix: renaming TFConvNextStage conv and layer norm layers * chore: added initializers and other minor additions. * chore: added initializers and other minor additions. * add: tests for convnext. * fix: integration tester class. * fix: issues mentioned in pr feedback (round 1). * fix: how output_hidden_states arg is propoagated inside the network. * feat: handling of arg for pure cnn models. * chore: added a note on equal contribution in model docs. * rebasing * rebasing and removing playground.py. * feat: encapsulation for the convnext trunk. * Fix variable naming; Test-related corrections; Run make fixup * chore: added Joao as a contributor to convnext. * rebasing * rebasing and removing playground.py. * rebasing * rebasing and removing playground.py. * chore: corrected copyright year and added comment on NHWC. * chore: fixed the black version and ran formatting. * chore: ran make style. * chore: removed from_pt argument from test, ran make style. * rebasing * rebasing and removing playground.py. * rebasing * rebasing and removing playground.py. * fix: tests in the convnext subclass, ran make style. * rebasing * rebasing and removing playground.py. * rebasing * rebasing and removing playground.py. * chore: moved convnext test to the correct location * fix: locations for the test file of convnext. * fix: convnext tests. * chore: applied sgugger's suggestion for dealing w/ output_attentions. * chore: added comments. * chore: applied updated quality enviornment style. * chore: applied formatting with quality enviornment. * chore: revert to the previous tests/test_modeling_common.py. * chore: revert to the original test_modeling_common.py * chore: revert to previous states for test_modeling_tf_common.py and modeling_tf_utils.py * fix: tests for convnext. * chore: removed output_attentions argument from convnext config. * chore: revert to the earlier tf utils. * fix: output shapes of the hidden states * chore: removed unnecessary comment * chore: reverting to the right test_modeling_tf_common.py. * Styling nits Co-authored-by: ariG23498 <aritra.born2fly@gmail.com> Co-authored-by: Joao Gante <joao@huggingface.co> Co-authored-by: Sylvain Gugger <Sylvain.gugger@gmail.com>
huggingface · Feb 25, 2022 · 84eaa6a · 84eaa6a
1 parent 0b5bf6a
commit 84eaa6a
Show file tree

Hide file tree

Showing 11 changed files with 964 additions and 9 deletions.
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -179,7 +179,7 @@ Flax), PyTorch, and/or TensorFlow.
 |           Canine            |       ✅       |       ❌       |       ✅        |         ❌         |      ❌      |
 |            CLIP             |       ✅       |       ✅       |       ✅        |         ✅         |      ✅      |
 |          ConvBERT           |       ✅       |       ✅       |       ✅        |         ✅         |      ❌      |
-|          ConvNext           |       ❌       |       ❌       |       ✅        |         ❌         |      ❌      |
+|          ConvNext           |       ❌       |       ❌       |       ✅        |         ✅         |      ❌      |
 |            CTRL             |       ✅       |       ❌       |       ✅        |         ✅         |      ❌      |
 |           DeBERTa           |       ✅       |       ✅       |       ✅        |         ✅         |      ❌      |
 |         DeBERTa-v2          |       ✅       |       ❌       |       ✅        |         ✅         |      ❌      |

diff --git a/docs/source/model_doc/convnext.mdx b/docs/source/model_doc/convnext.mdx
@@ -37,7 +37,8 @@ alt="drawing" width="600"/>
 
 <small> ConvNeXT architecture. Taken from the <a href="https://arxiv.org/abs/2201.03545">original paper</a>.</small>
 
-This model was contributed by [nielsr](https://huggingface.co/nielsr). The original code can be found [here](https://github.com/facebookresearch/ConvNeXt).
+This model was contributed by [nielsr](https://huggingface.co/nielsr). TensorFlow version of the model was contributed by [ariG23498](https://github.com/ariG23498),
+[gante](https://github.com/gante), and [sayakpaul](https://github.com/sayakpaul) (equal contribution). The original code can be found [here](https://github.com/facebookresearch/ConvNeXt).
 
 ## ConvNeXT specific outputs
 
@@ -63,4 +64,16 @@ This model was contributed by [nielsr](https://huggingface.co/nielsr). The origi
 ## ConvNextForImageClassification
 
 [[autodoc]] ConvNextForImageClassification
-    - forward
+    - forward
+
+
+## TFConvNextModel
+
+[[autodoc]] TFConvNextModel
+    - call
+
+
+## TFConvNextForImageClassification
+
+[[autodoc]] TFConvNextForImageClassification
+    - call
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -1743,6 +1743,13 @@
             "TFConvBertPreTrainedModel",
         ]
     )
+    _import_structure["models.convnext"].extend(
+        [
+            "TFConvNextForImageClassification",
+            "TFConvNextModel",
+            "TFConvNextPreTrainedModel",
+        ]
+    )
     _import_structure["models.ctrl"].extend(
         [
             "TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST",
@@ -3751,6 +3758,7 @@
             TFConvBertModel,
             TFConvBertPreTrainedModel,
         )
+        from .models.convnext import TFConvNextForImageClassification, TFConvNextModel, TFConvNextPreTrainedModel
         from .models.ctrl import (
             TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST,
             TFCTRLForSequenceClassification,

diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
@@ -311,9 +311,10 @@ def booleans_processing(config, **kwargs):
     final_booleans = {}
 
     if tf.executing_eagerly():
-        final_booleans["output_attentions"] = (
-            kwargs["output_attentions"] if kwargs["output_attentions"] is not None else config.output_attentions
-        )
+        # Pure conv models (such as ConvNext) do not have `output_attentions`
+        final_booleans["output_attentions"] = kwargs.get("output_attentions", None)
+        if final_booleans["output_attentions"] is None:
+            final_booleans["output_attentions"] = config.output_attentions
         final_booleans["output_hidden_states"] = (
             kwargs["output_hidden_states"]
             if kwargs["output_hidden_states"] is not None

diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py
@@ -36,6 +36,7 @@
         ("rembert", "TFRemBertModel"),
         ("roformer", "TFRoFormerModel"),
         ("convbert", "TFConvBertModel"),
+        ("convnext", "TFConvNextModel"),
         ("led", "TFLEDModel"),
         ("lxmert", "TFLxmertModel"),
         ("mt5", "TFMT5Model"),
@@ -155,6 +156,7 @@
     [
         # Model for Image-classsification
         ("vit", "TFViTForImageClassification"),
+        ("convnext", "TFConvNextForImageClassification"),
     ]
 )
 

diff --git a/src/transformers/models/convnext/__init__.py b/src/transformers/models/convnext/__init__.py
@@ -18,7 +18,7 @@
 from typing import TYPE_CHECKING
 
 # rely on isort to merge the imports
-from ...file_utils import _LazyModule, is_torch_available, is_vision_available
+from ...file_utils import _LazyModule, is_tf_available, is_torch_available, is_vision_available
 
 
 _import_structure = {
@@ -36,6 +36,12 @@
         "ConvNextPreTrainedModel",
     ]
 
+if is_tf_available():
+    _import_structure["modeling_tf_convnext"] = [
+        "TFConvNextForImageClassification",
+        "TFConvNextModel",
+        "TFConvNextPreTrainedModel",
+    ]
 
 if TYPE_CHECKING:
     from .configuration_convnext import CONVNEXT_PRETRAINED_CONFIG_ARCHIVE_MAP, ConvNextConfig
@@ -51,6 +57,9 @@
             ConvNextPreTrainedModel,
         )
 
+    if is_tf_available():
+        from .modeling_convnext import TFConvNextForImageClassification, TFConvNextModel, TFConvNextPreTrainedModel
+
 
 else:
     import sys

diff --git a/src/transformers/models/convnext/configuration_convnext.py b/src/transformers/models/convnext/configuration_convnext.py
@@ -85,6 +85,7 @@ def __init__(
         is_encoder_decoder=False,
         layer_scale_init_value=1e-6,
         drop_path_rate=0.0,
+        image_size=224,
         **kwargs
     ):
         super().__init__(**kwargs)
@@ -99,3 +100,4 @@ def __init__(
         self.layer_norm_eps = layer_norm_eps
         self.layer_scale_init_value = layer_scale_init_value
         self.drop_path_rate = drop_path_rate
+        self.image_size = image_size