Fix failing tests and add suggestion from code review

huggingface · Jul 28, 2021 · 381b235 · 381b235
1 parent 8c82e3f
commit 381b235
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 10 deletions.
diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
@@ -1240,10 +1240,10 @@ def forward(
     LAYOUTLMV2_START_DOCSTRING,
 )
 class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
-    def __init__(self, config):
+    def __init__(self, config, has_visual_segment_embedding=True):
         super().__init__(config)
         self.num_labels = config.num_labels
-        config.has_visual_segment_embedding = True
+        config.has_visual_segment_embedding = has_visual_segment_embedding
         self.layoutlmv2 = LayoutLMv2Model(config)
         self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
 

diff --git a/tests/test_processor_layoutlmv2.py b/tests/test_processor_layoutlmv2.py
@@ -30,6 +30,7 @@
     from transformers import LayoutLMv2FeatureExtractor, LayoutLMv2Processor
 
 
+@require_pytesseract
 class LayoutLMv2ProcessorTest(unittest.TestCase):
     def setUp(self):
         vocab_tokens = [
@@ -106,7 +107,7 @@ def test_save_load_pretrained_additional_features(self):
         self.assertIsInstance(processor.feature_extractor, LayoutLMv2FeatureExtractor)
 
 
-# integration tests
+# different use cases tests
 @require_torch
 @require_pytesseract
 class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):

diff --git a/tests/test_tokenization_layoutlmv2.py b/tests/test_tokenization_layoutlmv2.py
@@ -1055,32 +1055,32 @@ def test_batch_encode_plus_tensors(self):
                 words, boxes = self.get_words_and_boxes()
 
                 # A Tensor cannot be build by sequences which are not the same size
-                self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes, return_tensors="pt")
-                self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes, return_tensors="tf")
+                self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="pt")
+                self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="tf")
 
                 if tokenizer.pad_token_id is None:
                     self.assertRaises(
                         ValueError,
                         tokenizer.batch_encode_plus,
                         words,
-                        boxes,
+                        boxes=boxes,
                         padding=True,
                         return_tensors="pt",
                     )
                     self.assertRaises(
                         ValueError,
                         tokenizer.batch_encode_plus,
                         words,
-                        boxes,
+                        boxes=boxes,
                         padding="longest",
                         return_tensors="tf",
                     )
                 else:
-                    pytorch_tensor = tokenizer.batch_encode_plus(words, boxes, padding=True, return_tensors="pt")
+                    pytorch_tensor = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True, return_tensors="pt")
                     tensorflow_tensor = tokenizer.batch_encode_plus(
-                        words, boxes, padding="longest", return_tensors="tf"
+                        words, boxes=boxes, padding="longest", return_tensors="tf"
                     )
-                    encoded_sequences = tokenizer.batch_encode_plus(words, boxes, padding=True)
+                    encoded_sequences = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True)
 
                     for key in encoded_sequences.keys():
                         pytorch_value = pytorch_tensor[key].tolist()