diff --git a/nltk/grammar.py b/nltk/grammar.py index 49c38300ac..630d5c6f78 100644 --- a/nltk/grammar.py +++ b/nltk/grammar.py @@ -5,6 +5,7 @@ # Edward Loper # Jason Narad # Peter Ljunglöf +# Tom Aarsen <> # URL: # For license information, see LICENSE.TXT # @@ -1317,7 +1318,7 @@ def _read_fcfg_production(input, fstruct_reader): _ARROW_RE = re.compile(r"\s* -> \s*", re.VERBOSE) _PROBABILITY_RE = re.compile(r"( \[ [\d\.]+ \] ) \s*", re.VERBOSE) -_TERMINAL_RE = re.compile(r'( "[^"]+" | \'[^\']+\' ) \s*', re.VERBOSE) +_TERMINAL_RE = re.compile(r'( "[^"]*" | \'[^\']*\' ) \s*', re.VERBOSE) _DISJUNCTION_RE = re.compile(r"\| \s*", re.VERBOSE) @@ -1531,48 +1532,6 @@ def cfg_demo(): print() -toy_pcfg1 = PCFG.fromstring( - """ - S -> NP VP [1.0] - NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] - Det -> 'the' [0.8] | 'my' [0.2] - N -> 'man' [0.5] | 'telescope' [0.5] - VP -> VP PP [0.1] | V NP [0.7] | V [0.2] - V -> 'ate' [0.35] | 'saw' [0.65] - PP -> P NP [1.0] - P -> 'with' [0.61] | 'under' [0.39] - """ -) - -toy_pcfg2 = PCFG.fromstring( - """ - S -> NP VP [1.0] - VP -> V NP [.59] - VP -> V [.40] - VP -> VP PP [.01] - NP -> Det N [.41] - NP -> Name [.28] - NP -> NP PP [.31] - PP -> P NP [1.0] - V -> 'saw' [.21] - V -> 'ate' [.51] - V -> 'ran' [.28] - N -> 'boy' [.11] - N -> 'cookie' [.12] - N -> 'table' [.13] - N -> 'telescope' [.14] - N -> 'hill' [.5] - Name -> 'Jack' [.52] - Name -> 'Bob' [.48] - P -> 'with' [.61] - P -> 'under' [.39] - Det -> 'the' [.41] - Det -> 'a' [.31] - Det -> 'my' [.28] - """ -) - - def pcfg_demo(): """ A demonstration showing how a ``PCFG`` can be created and used. @@ -1582,6 +1541,47 @@ def pcfg_demo(): from nltk.corpus import treebank from nltk.parse import pchart + toy_pcfg1 = PCFG.fromstring( + """ + S -> NP VP [1.0] + NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] + Det -> 'the' [0.8] | 'my' [0.2] + N -> 'man' [0.5] | 'telescope' [0.5] + VP -> VP PP [0.1] | V NP [0.7] | V [0.2] + V -> 'ate' [0.35] | 'saw' [0.65] + PP -> P NP [1.0] + P -> 'with' [0.61] | 'under' [0.39] + """ + ) + + toy_pcfg2 = PCFG.fromstring( + """ + S -> NP VP [1.0] + VP -> V NP [.59] + VP -> V [.40] + VP -> VP PP [.01] + NP -> Det N [.41] + NP -> Name [.28] + NP -> NP PP [.31] + PP -> P NP [1.0] + V -> 'saw' [.21] + V -> 'ate' [.51] + V -> 'ran' [.28] + N -> 'boy' [.11] + N -> 'cookie' [.12] + N -> 'table' [.13] + N -> 'telescope' [.14] + N -> 'hill' [.5] + Name -> 'Jack' [.52] + Name -> 'Bob' [.48] + P -> 'with' [.61] + P -> 'under' [.39] + Det -> 'the' [.41] + Det -> 'a' [.31] + Det -> 'my' [.28] + """ + ) + pcfg_prods = toy_pcfg1.productions() pcfg_prod = pcfg_prods[2] diff --git a/nltk/parse/generate.py b/nltk/parse/generate.py index 951020afa9..9eaa925848 100644 --- a/nltk/parse/generate.py +++ b/nltk/parse/generate.py @@ -42,14 +42,11 @@ def _generate_all(grammar, items, depth): for frag1 in _generate_one(grammar, items[0], depth): for frag2 in _generate_all(grammar, items[1:], depth): yield frag1 + frag2 - except RuntimeError as _error: - if _error.message == "maximum recursion depth exceeded": - # Helpful error message while still showing the recursion stack. - raise RuntimeError( - "The grammar has rule(s) that yield infinite recursion!!" - ) from _error - else: - raise + except RecursionError as error: + # Helpful error message while still showing the recursion stack. + raise RuntimeError( + "The grammar has rule(s) that yield infinite recursion!" + ) from error else: yield [] diff --git a/nltk/parse/util.py b/nltk/parse/util.py index b2714e0eca..66e62d303f 100644 --- a/nltk/parse/util.py +++ b/nltk/parse/util.py @@ -1,6 +1,7 @@ # Natural Language Toolkit: Parser Utility Functions # # Author: Ewan Klein +# Tom Aarsen <> # # Copyright (C) 2001-2021 NLTK Project # URL: diff --git a/nltk/parse/viterbi.py b/nltk/parse/viterbi.py index 3629292efc..c027652856 100644 --- a/nltk/parse/viterbi.py +++ b/nltk/parse/viterbi.py @@ -337,9 +337,50 @@ def demo(): import time from nltk import tokenize - from nltk.grammar import toy_pcfg1, toy_pcfg2 + from nltk.grammar import PCFG from nltk.parse import ViterbiParser + toy_pcfg1 = PCFG.fromstring( + """ + S -> NP VP [1.0] + NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] + Det -> 'the' [0.8] | 'my' [0.2] + N -> 'man' [0.5] | 'telescope' [0.5] + VP -> VP PP [0.1] | V NP [0.7] | V [0.2] + V -> 'ate' [0.35] | 'saw' [0.65] + PP -> P NP [1.0] + P -> 'with' [0.61] | 'under' [0.39] + """ + ) + + toy_pcfg2 = PCFG.fromstring( + """ + S -> NP VP [1.0] + VP -> V NP [.59] + VP -> V [.40] + VP -> VP PP [.01] + NP -> Det N [.41] + NP -> Name [.28] + NP -> NP PP [.31] + PP -> P NP [1.0] + V -> 'saw' [.21] + V -> 'ate' [.51] + V -> 'ran' [.28] + N -> 'boy' [.11] + N -> 'cookie' [.12] + N -> 'table' [.13] + N -> 'telescope' [.14] + N -> 'hill' [.5] + Name -> 'Jack' [.52] + Name -> 'Bob' [.48] + P -> 'with' [.61] + P -> 'under' [.39] + Det -> 'the' [.41] + Det -> 'a' [.31] + Det -> 'my' [.28] + """ + ) + # Define two demos. Each demo has a sentence and a grammar. demos = [ ("I saw the man with my telescope", toy_pcfg1), diff --git a/nltk/test/generate.doctest b/nltk/test/generate.doctest index a23f53a9cc..ff9b96ddb8 100644 --- a/nltk/test/generate.doctest +++ b/nltk/test/generate.doctest @@ -64,3 +64,15 @@ The number of sentences of different max depths: 114 >>> len(list(generate(grammar))) 114 + +Infinite grammars will throw a RecursionError when not bounded by some ``depth``: + + >>> grammar = CFG.fromstring(""" + ... S -> A B + ... A -> B + ... B -> "b" | A + ... """) + >>> list(generate(grammar)) + Traceback (most recent call last): + ... + RuntimeError: The grammar has rule(s) that yield infinite recursion! diff --git a/nltk/test/grammar.doctest b/nltk/test/grammar.doctest index 91349bc81a..fce33264af 100644 --- a/nltk/test/grammar.doctest +++ b/nltk/test/grammar.doctest @@ -46,3 +46,24 @@ Chomsky Normal Form grammar (Test for bug 474) >>> g = CFG.fromstring("VP^ -> VBP NP^") >>> g.productions()[0].lhs() VP^ + +Grammars can contain both empty strings and empty productions: + + >>> from nltk.grammar import CFG + >>> from nltk.parse.generate import generate + >>> grammar = CFG.fromstring(""" + ... S -> A B + ... A -> 'a' + ... # An empty string: + ... B -> 'b' | '' + ... """) + >>> list(generate(grammar)) + [['a', 'b'], ['a', '']] + >>> grammar = CFG.fromstring(""" + ... S -> A B + ... A -> 'a' + ... # An empty production: + ... B -> 'b' | + ... """) + >>> list(generate(grammar)) + [['a', 'b'], ['a']] diff --git a/nltk/test/parse.doctest b/nltk/test/parse.doctest index 818783bde8..187a1ca143 100644 --- a/nltk/test/parse.doctest +++ b/nltk/test/parse.doctest @@ -545,7 +545,43 @@ Unit tests for the Probabilistic CFG class >>> from nltk.corpus import treebank >>> from itertools import islice - >>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2 + >>> from nltk.grammar import PCFG, induce_pcfg + >>> toy_pcfg1 = PCFG.fromstring(""" + ... S -> NP VP [1.0] + ... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] + ... Det -> 'the' [0.8] | 'my' [0.2] + ... N -> 'man' [0.5] | 'telescope' [0.5] + ... VP -> VP PP [0.1] | V NP [0.7] | V [0.2] + ... V -> 'ate' [0.35] | 'saw' [0.65] + ... PP -> P NP [1.0] + ... P -> 'with' [0.61] | 'under' [0.39] + ... """) + + >>> toy_pcfg2 = PCFG.fromstring(""" + ... S -> NP VP [1.0] + ... VP -> V NP [.59] + ... VP -> V [.40] + ... VP -> VP PP [.01] + ... NP -> Det N [.41] + ... NP -> Name [.28] + ... NP -> NP PP [.31] + ... PP -> P NP [1.0] + ... V -> 'saw' [.21] + ... V -> 'ate' [.51] + ... V -> 'ran' [.28] + ... N -> 'boy' [.11] + ... N -> 'cookie' [.12] + ... N -> 'table' [.13] + ... N -> 'telescope' [.14] + ... N -> 'hill' [.5] + ... Name -> 'Jack' [.52] + ... Name -> 'Bob' [.48] + ... P -> 'with' [.61] + ... P -> 'under' [.39] + ... Det -> 'the' [.41] + ... Det -> 'a' [.31] + ... Det -> 'my' [.28] + ... """) Create a set of PCFG productions.