Small fixes for TF-ESM1b and ESM-1b weight conversions (#19683)

huggingface · Oct 18, 2022 · fd5eac5 · fd5eac5
1 parent 90071fe
commit fd5eac5
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/src/transformers/models/esm/convert_esm.py b/src/transformers/models/esm/convert_esm.py
@@ -149,7 +149,7 @@ def convert_esm_checkpoint_to_pytorch(
         self_attn.value.weight.data = esm_layer.self_attn.v_proj.weight
         self_attn.value.bias.data = esm_layer.self_attn.v_proj.bias
 
-        if hasattr(esm_layer.self_attn, "rot_emb"):
+        if getattr(esm_layer.self_attn, "rot_emb", None) is not None:
             # Matt: Although inv_freq is not a trainable weight, it is computed at model init and cached.
             # During the training of ESM-2 the model was converted to float16 precision, which also converts
             # the inv_freq tensor, and the loss of precision remains even if the model is loaded later as float32.

diff --git a/src/transformers/models/esm/modeling_tf_esm.py b/src/transformers/models/esm/modeling_tf_esm.py
@@ -136,7 +136,7 @@ def __init__(self, config, name=None):
         )
 
         if config.emb_layer_norm_before:
-            self.layer_norm = LayerNormalization(epsilon=config.layer_norm_eps)
+            self.layer_norm = LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm")
         else:
             self.layer_norm = None
         # Matt: I think this line was copied incorrectly from BERT, disabling for now