From c692b0d6433cd298d93360c04236355b3f107edc Mon Sep 17 00:00:00 2001 From: Robby Horvath Date: Thu, 14 Oct 2021 06:35:14 -0400 Subject: [PATCH] Fix Bleu Score smoothing function from taking log(0) (#2839) * Add conditional to prevent log(0) * Add name to AUTHORS.md * Add smoothin 4 test Co-authored-by: Steven Bird --- AUTHORS.md | 1 + nltk/test/unit/translate/test_bleu.py | 10 ++++++++++ nltk/translate/bleu_score.py | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index 49e99d05d1..2b4a88f98f 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -280,6 +280,7 @@ - Hiroki Teranishi - Ruben Cartuyvels - Dalton Pearson +- Robby Horvath - Gavish Poddar - Saibo Geng - Ahmet Yildirim diff --git a/nltk/test/unit/translate/test_bleu.py b/nltk/test/unit/translate/test_bleu.py index fa26df5823..e6afbd7dc0 100644 --- a/nltk/test/unit/translate/test_bleu.py +++ b/nltk/test/unit/translate/test_bleu.py @@ -181,6 +181,16 @@ def test_empty_hypothesis(self): hypothesis = [] assert sentence_bleu(references, hypothesis) == 0 + def test_length_one_hypothesis(self): + # Test case where there's hypothesis is of length 1 in Smoothing method 4. + references = ["The candidate has no alignment to any of the references".split()] + hypothesis = ["Foo"] + method4 = SmoothingFunction().method4 + try: + sentence_bleu(references, hypothesis, smoothing_function=method4) + except ValueError: + pass # unittest.TestCase.assertWarns is only supported in Python >= 3.2. + def test_empty_references(self): # Test case where there's reference is empty. references = [[]] diff --git a/nltk/translate/bleu_score.py b/nltk/translate/bleu_score.py index 29e93a731e..b0d50e57d9 100644 --- a/nltk/translate/bleu_score.py +++ b/nltk/translate/bleu_score.py @@ -215,7 +215,7 @@ def corpus_bleu( p_n = smoothing_function( p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths ) - s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n)) + s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n) if p_i > 0) s = bp * math.exp(math.fsum(s)) return s