nltk · tomaarsen · Dec 16, 2021 · Nov 28, 2021 · Nov 28, 2021 · Dec 16, 2021
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ web/_build
 # Test artifacts and coverage reports
 *.tox
 *.errs
+.hypothesis
 .noseids
 .coverage*
 nltk/test/*.html

diff --git a/nltk/parse/bllip.py b/nltk/parse/bllip.py
@@ -100,11 +100,11 @@ def _ensure_bllip_import_or_error(ie=ie):
 def _ensure_ascii(words):
     try:
         for i, word in enumerate(words):
-            word.decode("ascii")
-    except UnicodeDecodeError as e:
+            word.encode("ascii")
+    except UnicodeEncodeError as e:
         raise ValueError(
-            "Token %d (%r) is non-ASCII. BLLIP Parser "
-            "currently doesn't support non-ASCII inputs." % (i, word)
+            f"Token {i} ({word!r}) is non-ASCII. BLLIP Parser "
+            "currently doesn't support non-ASCII inputs."
         ) from e
 
 
@@ -163,7 +163,7 @@ def __init__(
             self.rrp.load_reranker_model(
                 features_filename=reranker_features,
                 weights_filename=reranker_weights,
-                **reranker_options
+                **reranker_options,
             )
 
     def parse(self, sentence):

diff --git a/nltk/test/unit/test_bllip.py b/nltk/test/unit/test_bllip.py
@@ -0,0 +1,42 @@
+import pytest
+
+from nltk.data import find
+from nltk.parse.bllip import BllipParser
+from nltk.tree import Tree
+
+
+@pytest.fixture(scope="module")
+def parser():
+    model_dir = find("models/bllip_wsj_no_aux").path
+    return BllipParser.from_unified_model_dir(model_dir)
+
+
+def setup_module():
+    pytest.importorskip("bllipparser")
+
+
+class TestBllipParser:
+    def test_parser_loads_a_valid_tree(self, parser):
+        parsed = parser.parse("I saw the man with the telescope")
+        tree = next(parsed)
+
+        assert isinstance(tree, Tree)
+        assert (
+            tree.pformat()
+            == """
+(S1
+  (S
+    (NP (PRP I))
+    (VP
+      (VBD saw)
+      (NP (DT the) (NN man))
+      (PP (IN with) (NP (DT the) (NN telescope))))))
+""".strip()
+        )
+
+    def test_tagged_parse_finds_matching_element(self, parser):
+        parsed = parser.parse("I saw the man with the telescope")
+        tagged_tree = next(parser.tagged_parse([("telescope", "NN")]))
+
+        assert isinstance(tagged_tree, Tree)
+        assert tagged_tree.pformat() == "(S1 (NP (NN telescope)))"