From bd756be730ef83fcb0e351bbf87716ad03889b62 Mon Sep 17 00:00:00 2001 From: Devashish Lal Date: Fri, 14 Jun 2019 02:13:33 +0530 Subject: [PATCH 1/5] Plot function bugs fixed - fixed matplot lib import - fixed samples query (v definition_ - fixed frequency and labels logic --- nltk/probability.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/nltk/probability.py b/nltk/probability.py index 80ee1180de..a77d071e2d 100755 --- a/nltk/probability.py +++ b/nltk/probability.py @@ -1904,7 +1904,7 @@ def plot(self, *args, **kwargs): :type conditions: list """ try: - from matplotlib import plt + import matplotlib.pyplot as plt #import statment fix except ImportError: raise ValueError( 'The plot function requires matplotlib to be installed.' @@ -1917,32 +1917,37 @@ def plot(self, *args, **kwargs): title = _get_kwarg(kwargs, 'title', '') samples = _get_kwarg( kwargs, 'samples', sorted(set(v for c in conditions - if v in self for v in self[c])) ) # this computation could be wasted if "linewidth" not in kwargs: kwargs["linewidth"] = 2 - + freqs = [] for condition in conditions: if cumulative: - freqs = list(self[condition]._cumulative_frequencies(samples)) + # freqs should be a list of list where each sub list will be a frequency of a condition + freqs.append(list(self[condition]._cumulative_frequencies(samples))) ylabel = "Cumulative Counts" legend_loc = 'lower right' if percents: - freqs = [f / freqs[len(freqs) - 1] * 100 for f in freqs] + freqs[-1] = [f / freqs[len(freqs) - 1] * 100 for f in freqs] ylabel = "Cumulative Percents" else: - freqs = [self[condition][sample] for sample in samples] + freqs.append([self[condition][sample] for sample in samples]) ylabel = "Counts" legend_loc = 'upper right' # percents = [f * 100 for f in freqs] only in ConditionalProbDist? - kwargs['label'] = "%s" % condition - ax.plot(freqs, *args, **kwargs) - + + ax = plt.gca() + i = 0 + for freq in freqs: + kwargs['label'] = conditions[i] #label for each condition + i += 1 + ax.plot(freq, *args, **kwargs) ax.legend(loc=legend_loc) ax.grid(True, color="silver") - ax.set_xticks(range(len(samples)), [text_type(s) for s in samples], rotation=90) + ax.set_xticks(range(len(samples))) + ax.set_xticklabels([text_type(s) for s in samples], rotation=90) if title: ax.set_title(title) ax.set_xlabel("Samples") From 86a7d88c5f32b50c583c2dcca46669eb4a8d4876 Mon Sep 17 00:00:00 2001 From: Devashish Lal Date: Fri, 14 Jun 2019 10:04:34 +0530 Subject: [PATCH 2/5] nonexistent keys fixed if added to samples query to check if c in self --- nltk/probability.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nltk/probability.py b/nltk/probability.py index a77d071e2d..d098963c17 100755 --- a/nltk/probability.py +++ b/nltk/probability.py @@ -1917,6 +1917,7 @@ def plot(self, *args, **kwargs): title = _get_kwarg(kwargs, 'title', '') samples = _get_kwarg( kwargs, 'samples', sorted(set(v for c in conditions + if c in self for v in self[c])) ) # this computation could be wasted if "linewidth" not in kwargs: From 3c4bfa0cd4ca8cd5d75c5f961dde64dfab01a42d Mon Sep 17 00:00:00 2001 From: Devashish Lal Date: Fri, 14 Jun 2019 10:13:41 +0530 Subject: [PATCH 3/5] name added --- AUTHORS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index eef7f03b25..23e177a8a0 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -252,7 +252,8 @@ - Osman Zubair - Viresh Gupta - Ondřej Cífka -- Iris X. Zhou +- Iris X. Zhou +- Devashish Lal ## Others whose work we've taken and included in NLTK, but who didn't directly contribute it: From 7d8bef3a64ab6ec68c3c15cb3b9ccde791ad1efd Mon Sep 17 00:00:00 2001 From: Devashish Lal Date: Fri, 14 Jun 2019 10:41:31 +0530 Subject: [PATCH 4/5] test_polt inputs fixed conditions input for the test plot should be a list for strings rather than a string --- nltk/test/unit/test_cfd_mutation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nltk/test/unit/test_cfd_mutation.py b/nltk/test/unit/test_cfd_mutation.py index befa1fc1df..7e21d7e88a 100644 --- a/nltk/test/unit/test_cfd_mutation.py +++ b/nltk/test/unit/test_cfd_mutation.py @@ -16,7 +16,7 @@ def test_plot(self): empty = ConditionalFreqDist() self.assertEqual(empty.conditions(),[]) try: - empty.plot(conditions="BUG") # nonexistent keys shouldn't be added + empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added except: pass self.assertEqual(empty.conditions(),[]) From ecdcc57b56ea9109872cf442f6445f68f0acdbb9 Mon Sep 17 00:00:00 2001 From: Devashish Lal Date: Fri, 14 Jun 2019 10:53:12 +0530 Subject: [PATCH 5/5] fixed retrieval of conditions from kwargs previously samples used to check if c in self and that would pass the unit test The output would be a graph with no plots but a legend with non existent conditions The correct output should have no conditions in the legend for non existent keys Hence in the case of conditions being supplied through kwargs it need to be checked with self(conditions exist in self or not) --- nltk/probability.py | 68 ++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/nltk/probability.py b/nltk/probability.py index d098963c17..9dc110c0a2 100755 --- a/nltk/probability.py +++ b/nltk/probability.py @@ -1913,46 +1913,46 @@ def plot(self, *args, **kwargs): cumulative = _get_kwarg(kwargs, 'cumulative', False) percents = _get_kwarg(kwargs, 'percents', False) - conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions())) + conditions = [c for c in _get_kwarg(kwargs, 'conditions', self.conditions()) if c in self] # conditions should be in self title = _get_kwarg(kwargs, 'title', '') samples = _get_kwarg( - kwargs, 'samples', sorted(set(v for c in conditions - if c in self - for v in self[c])) + kwargs, 'samples', sorted(set(v + for c in conditions + for v in self[c])) ) # this computation could be wasted if "linewidth" not in kwargs: kwargs["linewidth"] = 2 - freqs = [] - for condition in conditions: - if cumulative: - # freqs should be a list of list where each sub list will be a frequency of a condition - freqs.append(list(self[condition]._cumulative_frequencies(samples))) - ylabel = "Cumulative Counts" - legend_loc = 'lower right' - if percents: - freqs[-1] = [f / freqs[len(freqs) - 1] * 100 for f in freqs] - ylabel = "Cumulative Percents" - else: - freqs.append([self[condition][sample] for sample in samples]) - ylabel = "Counts" - legend_loc = 'upper right' - # percents = [f * 100 for f in freqs] only in ConditionalProbDist? - - ax = plt.gca() - i = 0 - for freq in freqs: - kwargs['label'] = conditions[i] #label for each condition - i += 1 - ax.plot(freq, *args, **kwargs) - ax.legend(loc=legend_loc) - ax.grid(True, color="silver") - ax.set_xticks(range(len(samples))) - ax.set_xticklabels([text_type(s) for s in samples], rotation=90) - if title: - ax.set_title(title) - ax.set_xlabel("Samples") - ax.set_ylabel(ylabel) + if (len(conditions) != 0): + freqs = [] + for condition in conditions: + if cumulative: + # freqs should be a list of list where each sub list will be a frequency of a condition + freqs.append(list(self[condition]._cumulative_frequencies(samples))) + ylabel = "Cumulative Counts" + legend_loc = 'lower right' + if percents: + freqs[-1] = [f / freqs[len(freqs) - 1] * 100 for f in freqs] + ylabel = "Cumulative Percents" + else: + freqs.append([self[condition][sample] for sample in samples]) + ylabel = "Counts" + legend_loc = 'upper right' + # percents = [f * 100 for f in freqs] only in ConditionalProbDist? + + i = 0 + for freq in freqs: + kwargs['label'] = conditions[i] #label for each condition + i += 1 + ax.plot(freq, *args, **kwargs) + ax.legend(loc=legend_loc) + ax.grid(True, color="silver") + ax.set_xticks(range(len(samples))) + ax.set_xticklabels([text_type(s) for s in samples], rotation=90) + if title: + ax.set_title(title) + ax.set_xlabel("Samples") + ax.set_ylabel(ylabel) plt.show() return ax