From bbde4b9dbb6948e9dbc1e4ab2ad5baa1d91d998a Mon Sep 17 00:00:00 2001 From: andrewgannon Date: Mon, 7 Oct 2019 19:10:20 -0400 Subject: [PATCH] Creates unit test for multiple ngram BLEU score function https://github.com/nltk/nltk/issues/2320 corpus_bleu function runs inefficiently when being used with different weightings by recalculating the underlying values each time the function is called instead of reusing them. * Creates a unit test with the expected behavior of a more general function that can take multiple weightings and return multiple BLEU scores --- nltk/test/unit/translate/test_bleu.py | 37 ++++++++++++++++++++++++++- nltk/translate/bleu_score.py | 10 ++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/nltk/test/unit/translate/test_bleu.py b/nltk/test/unit/translate/test_bleu.py index a97d4dec1c..ed03b8e63b 100644 --- a/nltk/test/unit/translate/test_bleu.py +++ b/nltk/test/unit/translate/test_bleu.py @@ -12,7 +12,7 @@ modified_precision, brevity_penalty, closest_ref_length, -) + corpus_bleu_multiple_weights) from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction @@ -269,3 +269,38 @@ def test_corpus_bleu_with_bad_sentence(self): ) except AttributeError: # unittest.TestCase.assertWarns is only supported in Python >= 3.2. self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4) + + +class TestBLEUWithMultipleWeights(unittest.TestCase): + def test_corpus_bleu_with_multiple_weights(self): + hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', + 'ensures', 'that', 'the', 'military', 'always', + 'obeys', 'the', 'commands', 'of', 'the', 'party'] + ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', + 'ensures', 'that', 'the', 'military', 'will', 'forever', + 'heed', 'Party', 'commands'] + ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which', + 'guarantees', 'the', 'military', 'forces', 'always', + 'being', 'under', 'the', 'command', 'of', 'the', 'Party'] + ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', + 'army', 'always', 'to', 'heed', 'the', 'directions', + 'of', 'the', 'party'] + hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', + 'interested', 'in', 'world', 'history'] + ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', + 'because', 'he', 'read', 'the', 'book'] + weight_1 = (1, 0, 0, 0) + weight_2 = (0, 1, 0, 0) + weight_3 = (0, 0, 1, 0) + + bleu_scores = corpus_bleu_multiple_weights( + list_of_references=[[ref1a, ref1b, ref1c], [ref2a]], + hypotheses=[hyp1, hyp2], + weights=[weight_1, weight_2, weight_3], + ) + assert bleu_scores[weight_1] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]], + [hyp1, hyp2], weight_1) + assert bleu_scores[weight_2] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]], + [hyp1, hyp2], weight_2) + assert bleu_scores[weight_3] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]], + [hyp1, hyp2], weight_3) diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py index a78a364a26..aa5222970e 100644 --- a/nltk/translate/bleu_score.py +++ b/nltk/translate/bleu_score.py @@ -106,6 +106,16 @@ def sentence_bleu( ) +def corpus_bleu_multiple_weights( + list_of_references, + hypotheses, + weights, + smoothing_function=None, + auto_reweigh=False +): + pass + + def corpus_bleu( list_of_references, hypotheses,