Small improvements

huggingface · Jul 9, 2021 · 85e16bd · 85e16bd
1 parent 386eead
commit 85e16bd
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 13 deletions.
diff --git a/docs/source/model_doc/layoutlmv2.rst b/docs/source/model_doc/layoutlmv2.rst
@@ -1,5 +1,5 @@
 .. 
-    Copyright 2020 The HuggingFace Team. All rights reserved.
+    Copyright 2021 The HuggingFace Team. All rights reserved.
 
     Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
     the License. You may obtain a copy of the License at
@@ -76,7 +76,7 @@ Tips:
          ]
 
 Here, :obj:`width` and :obj:`height` correspond to the width and height of the original document in which the token
-occurs. Those can be obtained using the Python Image Library (PIL) library for example, as follows:
+occurs (before resizing the image). Those can be obtained using the Python Image Library (PIL) library for example, as follows:
 
 .. code-block::
 

diff --git a/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py b/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py
@@ -68,30 +68,30 @@ class LayoutLMv2Config(PretrainedConfig):
         layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
             The epsilon used by the layer normalization layers.
         max_2d_position_embeddings (:obj:`int`, `optional`, defaults to 1024):
-            The maximum value that the 2D position embedding might ever used. Typically set this to something large
+            The maximum value that the 2D position embedding might ever be used with. Typically set this to something large
             just in case (e.g., 1024).
         max_rel_pos (:obj:`int`, `optional`, defaults to 128):
-            ...
+            The maximum number of relative positions to be used in the self-attention mechanism.
         rel_pos_bins (:obj:`int`, `optional`, defaults to 32):
-            The number of relative position bins.
+            The number of relative position bins to be used in the self-attention mechanism.
         fast_qkv (:obj:`bool`, `optional`, defaults to :obj:`True`):
             Whether or not to use a single matrix for the queries, keys, values in the self-attention layers.
         max_rel_2d_pos (:obj:`int`, `optional`, defaults to 256):
-            The maximum number of relative 2D positions.
+            The maximum number of relative 2D positions in the self-attention mechanism.
         rel_2d_pos_bins (:obj:`int`, `optional`, defaults to 64):
-            The number of 2D relative position bins.
+            The number of 2D relative position bins in the self-attention mechanism.
         convert_sync_batchnorm (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            ...
+            Whether or not to convert BatchNorm layers to SyncNorm layers before wrapping the visual backbone with DDP.
         image_feature_pool_shape (:obj:`List[int]`, `optional`, defaults to [7, 7, 256]):
             The shape of the average-pooled feature map.
         coordinate_size (:obj:`int`, `optional`, defaults to 128):
-            Coordinate size to use.
+            Dimension of the coordinate embeddings.
         shape_size (:obj:`int`, `optional`, defaults to 128):
-            Shape size to use.
+            Dimension of the width and height embeddings.
         has_relative_attention_bias (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            Whether or not to use a relative attention bias.
+            Whether or not to use a relative attention bias in the self-attention mechanism.
         has_spatial_attention_bias (:obj:`bool`, `optional`, defaults to :obj:`True`):
-            Whether or not to use a spatial attention bias.
+            Whether or not to use a spatial attention bias in the self-attention mechanism.
         has_visual_segment_embedding (:obj:`bool`, `optional`, defaults to :obj:`False`):
             Whether or not to add visual segment embeddings.
 

diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
@@ -87,7 +87,7 @@ def _cal_spatial_position_embeddings(self, bbox):
             right_position_embeddings = self.x_position_embeddings(bbox[:, :, 2])
             lower_position_embeddings = self.y_position_embeddings(bbox[:, :, 3])
         except IndexError as e:
-            raise IndexError("The :obj:`bbox`coordinate values should be within 0-1000 range.") from e
+            raise IndexError("The :obj:`bbox` coordinate values should be within 0-1000 range.") from e
 
         h_position_embeddings = self.h_position_embeddings(bbox[:, :, 3] - bbox[:, :, 1])
         w_position_embeddings = self.w_position_embeddings(bbox[:, :, 2] - bbox[:, :, 0])
@@ -1254,6 +1254,7 @@ class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
         self.num_labels = config.num_labels
+        config.has_visual_segment_embedding = True
         self.layoutlmv2 = LayoutLMv2Model(config)
         self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)