Creates unit test for multiple ngram BLEU score function

nltk#2320 corpus_bleu function runs inefficiently when being used with different weightings by recalculating the underlying values each time the function is called instead of reusing them. * Creates a unit test with the expected behavior of a more general function that can take multiple weightings and return multiple BLEU scores
agannon · Oct 7, 2019 · bbde4b9 · bbde4b9
1 parent 1d7f0d5
commit bbde4b9
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 1 deletion.
diff --git a/nltk/test/unit/translate/test_bleu.py b/nltk/test/unit/translate/test_bleu.py
@@ -12,7 +12,7 @@
     modified_precision,
     brevity_penalty,
     closest_ref_length,
-)
+    corpus_bleu_multiple_weights)
 from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
 
 
@@ -269,3 +269,38 @@ def test_corpus_bleu_with_bad_sentence(self):
                 )
         except AttributeError:  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
             self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4)
+
+
+class TestBLEUWithMultipleWeights(unittest.TestCase):
+    def test_corpus_bleu_with_multiple_weights(self):
+        hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
+                'ensures', 'that', 'the', 'military', 'always',
+                'obeys', 'the', 'commands', 'of', 'the', 'party']
+        ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
+                 'ensures', 'that', 'the', 'military', 'will', 'forever',
+                 'heed', 'Party', 'commands']
+        ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
+                 'guarantees', 'the', 'military', 'forces', 'always',
+                 'being', 'under', 'the', 'command', 'of', 'the', 'Party']
+        ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
+                 'army', 'always', 'to', 'heed', 'the', 'directions',
+                 'of', 'the', 'party']
+        hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
+                'interested', 'in', 'world', 'history']
+        ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
+                 'because', 'he', 'read', 'the', 'book']
+        weight_1 = (1, 0, 0, 0)
+        weight_2 = (0, 1, 0, 0)
+        weight_3 = (0, 0, 1, 0)
+
+        bleu_scores = corpus_bleu_multiple_weights(
+            list_of_references=[[ref1a, ref1b, ref1c], [ref2a]],
+            hypotheses=[hyp1, hyp2],
+            weights=[weight_1, weight_2, weight_3],
+        )
+        assert bleu_scores[weight_1] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]],
+                                                    [hyp1, hyp2], weight_1)
+        assert bleu_scores[weight_2] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]],
+                                                    [hyp1, hyp2], weight_2)
+        assert bleu_scores[weight_3] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]],
+                                                    [hyp1, hyp2], weight_3)
diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py
@@ -106,6 +106,16 @@ def sentence_bleu(
     )
 
 
+def corpus_bleu_multiple_weights(
+        list_of_references,
+        hypotheses,
+        weights,
+        smoothing_function=None,
+        auto_reweigh=False
+):
+    pass
+
+
 def corpus_bleu(
     list_of_references,
     hypotheses,