Skip to content

Commit

Permalink
Fix failing tests and add suggestion from code review
Browse files Browse the repository at this point in the history
  • Loading branch information
NielsRogge committed Jul 28, 2021
1 parent 8c82e3f commit 381b235
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
4 changes: 2 additions & 2 deletions src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
Expand Up @@ -1240,10 +1240,10 @@ def forward(
LAYOUTLMV2_START_DOCSTRING,
)
class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel):
def __init__(self, config):
def __init__(self, config, has_visual_segment_embedding=True):
super().__init__(config)
self.num_labels = config.num_labels
config.has_visual_segment_embedding = True
config.has_visual_segment_embedding = has_visual_segment_embedding
self.layoutlmv2 = LayoutLMv2Model(config)
self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

Expand Down
3 changes: 2 additions & 1 deletion tests/test_processor_layoutlmv2.py
Expand Up @@ -30,6 +30,7 @@
from transformers import LayoutLMv2FeatureExtractor, LayoutLMv2Processor


@require_pytesseract
class LayoutLMv2ProcessorTest(unittest.TestCase):
def setUp(self):
vocab_tokens = [
Expand Down Expand Up @@ -106,7 +107,7 @@ def test_save_load_pretrained_additional_features(self):
self.assertIsInstance(processor.feature_extractor, LayoutLMv2FeatureExtractor)


# integration tests
# different use cases tests
@require_torch
@require_pytesseract
class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase):
Expand Down
14 changes: 7 additions & 7 deletions tests/test_tokenization_layoutlmv2.py
Expand Up @@ -1055,32 +1055,32 @@ def test_batch_encode_plus_tensors(self):
words, boxes = self.get_words_and_boxes()

# A Tensor cannot be build by sequences which are not the same size
self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes, return_tensors="pt")
self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes, return_tensors="tf")
self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="pt")
self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="tf")

if tokenizer.pad_token_id is None:
self.assertRaises(
ValueError,
tokenizer.batch_encode_plus,
words,
boxes,
boxes=boxes,
padding=True,
return_tensors="pt",
)
self.assertRaises(
ValueError,
tokenizer.batch_encode_plus,
words,
boxes,
boxes=boxes,
padding="longest",
return_tensors="tf",
)
else:
pytorch_tensor = tokenizer.batch_encode_plus(words, boxes, padding=True, return_tensors="pt")
pytorch_tensor = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True, return_tensors="pt")
tensorflow_tensor = tokenizer.batch_encode_plus(
words, boxes, padding="longest", return_tensors="tf"
words, boxes=boxes, padding="longest", return_tensors="tf"
)
encoded_sequences = tokenizer.batch_encode_plus(words, boxes, padding=True)
encoded_sequences = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True)

for key in encoded_sequences.keys():
pytorch_value = pytorch_tensor[key].tolist()
Expand Down

0 comments on commit 381b235

Please sign in to comment.