Skip to content

Commit

Permalink
Creates unit test for multiple ngram BLEU score function
Browse files Browse the repository at this point in the history
nltk#2320

corpus_bleu function runs inefficiently when being used with
different weightings by recalculating the underlying values
each time the function is called instead of reusing them.

* Creates a unit test with the expected behavior of a more general
  function that can take multiple weightings and return multiple
  BLEU scores
  • Loading branch information
agannon committed Oct 7, 2019
1 parent 1d7f0d5 commit bbde4b9
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
37 changes: 36 additions & 1 deletion nltk/test/unit/translate/test_bleu.py
Expand Up @@ -12,7 +12,7 @@
modified_precision,
brevity_penalty,
closest_ref_length,
)
corpus_bleu_multiple_weights)
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction


Expand Down Expand Up @@ -269,3 +269,38 @@ def test_corpus_bleu_with_bad_sentence(self):
)
except AttributeError: # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4)


class TestBLEUWithMultipleWeights(unittest.TestCase):
def test_corpus_bleu_with_multiple_weights(self):
hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
'ensures', 'that', 'the', 'military', 'always',
'obeys', 'the', 'commands', 'of', 'the', 'party']
ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
'ensures', 'that', 'the', 'military', 'will', 'forever',
'heed', 'Party', 'commands']
ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
'guarantees', 'the', 'military', 'forces', 'always',
'being', 'under', 'the', 'command', 'of', 'the', 'Party']
ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
'army', 'always', 'to', 'heed', 'the', 'directions',
'of', 'the', 'party']
hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
'interested', 'in', 'world', 'history']
ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
'because', 'he', 'read', 'the', 'book']
weight_1 = (1, 0, 0, 0)
weight_2 = (0, 1, 0, 0)
weight_3 = (0, 0, 1, 0)

bleu_scores = corpus_bleu_multiple_weights(
list_of_references=[[ref1a, ref1b, ref1c], [ref2a]],
hypotheses=[hyp1, hyp2],
weights=[weight_1, weight_2, weight_3],
)
assert bleu_scores[weight_1] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]],
[hyp1, hyp2], weight_1)
assert bleu_scores[weight_2] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]],
[hyp1, hyp2], weight_2)
assert bleu_scores[weight_3] == corpus_bleu([[ref1a, ref1b, ref1c], [ref2a]],
[hyp1, hyp2], weight_3)
10 changes: 10 additions & 0 deletions nltk/translate/bleu_score.py
Expand Up @@ -106,6 +106,16 @@ def sentence_bleu(
)


def corpus_bleu_multiple_weights(
list_of_references,
hypotheses,
weights,
smoothing_function=None,
auto_reweigh=False
):
pass


def corpus_bleu(
list_of_references,
hypotheses,
Expand Down

0 comments on commit bbde4b9

Please sign in to comment.