Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes some spurious typos and checks #2336

Merged
merged 4 commits into from Jul 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
132 changes: 67 additions & 65 deletions nltk/parse/malt.py
Expand Up @@ -328,72 +328,74 @@ def train_from_file(self, conll_file, verbose=False):


if __name__ == '__main__':
'''
"""
A demonstration function to show how NLTK users can use the malt parser API.

>>> from nltk import pos_tag
>>> assert 'MALT_PARSER' in os.environ, str(
... "Please set MALT_PARSER in your global environment, e.g.:\n"
... "$ export MALT_PARSER='/home/user/maltparser-1.7.2/'")
>>>
>>> assert 'MALT_MODEL' in os.environ, str(
... "Please set MALT_MODEL in your global environment, e.g.:\n"
... "$ export MALT_MODEL='/home/user/engmalt.linear-1.7.mco'")
>>>
>>> _dg1_str = str("1 John _ NNP _ _ 2 SUBJ _ _\n"
... "2 sees _ VB _ _ 0 ROOT _ _\n"
... "3 a _ DT _ _ 4 SPEC _ _\n"
... "4 dog _ NN _ _ 2 OBJ _ _\n"
... "5 . _ . _ _ 2 PUNCT _ _\n")
>>>
>>>
>>> _dg2_str = str("1 John _ NNP _ _ 2 SUBJ _ _\n"
... "2 walks _ VB _ _ 0 ROOT _ _\n"
... "3 . _ . _ _ 2 PUNCT _ _\n")
>>> dg1 = DependencyGraph(_dg1_str)
>>> dg2 = DependencyGraph(_dg2_str)
>>> # Initialize a MaltParser object
>>> parser_dirname = 'maltparser-1.7.2'
>>> mp = MaltParser(parser_dirname=parser_dirname)
>>>
>>> # Trains a model.
>>> mp.train([dg1,dg2], verbose=False)
>>> sent1 = ['John','sees','Mary', '.']
>>> sent2 = ['John', 'walks', 'a', 'dog', '.']
>>>
>>> # Parse a single sentence.
>>> parsed_sent1 = mp.parse_one(sent1)
>>> parsed_sent2 = mp.parse_one(sent2)
>>> print (parsed_sent1.tree())
(sees John Mary .)
>>> print (parsed_sent2.tree())
(walks John (dog a) .)
>>>
>>> # Parsing multiple sentences.
>>> sentences = [sent1,sent2]
>>> parsed_sents = mp.parse_sents(sentences)
>>> print(next(next(parsed_sents)).tree())
(sees John Mary .)
>>> print(next(next(parsed_sents)).tree())
(walks John (dog a) .)
>>>
>>> # Initialize a MaltParser object with an English pre-trained model.
>>> parser_dirname = 'maltparser-1.7.2'
>>> model_name = 'engmalt.linear-1.7.mco'
>>> mp = MaltParser(parser_dirname=parser_dirname, model_filename=model_name, tagger=pos_tag)
>>> sent1 = 'I shot an elephant in my pajamas .'.split()
>>> sent2 = 'Time flies like banana .'.split()
>>> # Parse a single sentence.
>>> print(mp.parse_one(sent1).tree())
(shot I (elephant an) (in (pajamas my)) .)
# Parsing multiple sentences
>>> sentences = [sent1,sent2]
>>> parsed_sents = mp.parse_sents(sentences)
>>> print(next(next(parsed_sents)).tree())
(shot I (elephant an) (in (pajamas my)) .)
>>> print(next(next(parsed_sents)).tree())
(flies Time (like banana) .)
'''
>>> from nltk import pos_tag
>>> assert 'MALT_PARSER' in os.environ, str(
... "Please set MALT_PARSER in your global environment, e.g.:\n"
... "$ export MALT_PARSER='/home/user/maltparser-1.7.2/'")
>>>
>>> assert 'MALT_MODEL' in os.environ, str(
... "Please set MALT_MODEL in your global environment, e.g.:\n"
... "$ export MALT_MODEL='/home/user/engmalt.linear-1.7.mco'")
>>>
>>> _dg1_str = str("1 John _ NNP _ _ 2 SUBJ _ _\n"
... "2 sees _ VB _ _ 0 ROOT _ _\n"
... "3 a _ DT _ _ 4 SPEC _ _\n"
... "4 dog _ NN _ _ 2 OBJ _ _\n"
... "5 . _ . _ _ 2 PUNCT _ _\n")
>>>
>>>
>>> _dg2_str = str("1 John _ NNP _ _ 2 SUBJ _ _\n"
... "2 walks _ VB _ _ 0 ROOT _ _\n"
... "3 . _ . _ _ 2 PUNCT _ _\n")
>>> dg1 = DependencyGraph(_dg1_str)
>>> dg2 = DependencyGraph(_dg2_str)
>>> # Initialize a MaltParser object
>>> parser_dirname = 'maltparser-1.7.2'
>>> mp = MaltParser(parser_dirname=parser_dirname)
>>>
>>> # Trains a model.
>>> mp.train([dg1,dg2], verbose=False)
>>> sent1 = ['John','sees','Mary', '.']
>>> sent2 = ['John', 'walks', 'a', 'dog', '.']
>>>
>>> # Parse a single sentence.
>>> parsed_sent1 = mp.parse_one(sent1)
>>> parsed_sent2 = mp.parse_one(sent2)
>>> print (parsed_sent1.tree())
(sees John Mary .)
>>> print (parsed_sent2.tree())
(walks John (dog a) .)
>>>
>>> # Parsing multiple sentences.
>>> sentences = [sent1,sent2]
>>> parsed_sents = mp.parse_sents(sentences)
>>> print(next(next(parsed_sents)).tree())
(sees John Mary .)
>>> print(next(next(parsed_sents)).tree())
(walks John (dog a) .)
>>>
>>> # Initialize a MaltParser object with an English pre-trained model.
>>> parser_dirname = 'maltparser-1.7.2'
>>> model_name = 'engmalt.linear-1.7.mco'
>>> mp = MaltParser(parser_dirname=parser_dirname, model_filename=model_name, tagger=pos_tag)
>>> sent1 = 'I shot an elephant in my pajamas .'.split()
>>> sent2 = 'Time flies like banana .'.split()
>>>
>>> # Parse a single sentence.
>>> print(mp.parse_one(sent1).tree())
(shot I (elephant an) (in (pajamas my)) .)
>>>
# Parsing multiple sentences
>>> sentences = [sent1,sent2]
>>> parsed_sents = mp.parse_sents(sentences)
>>> print(next(next(parsed_sents)).tree())
(shot I (elephant an) (in (pajamas my)) .)
>>> print(next(next(parsed_sents)).tree())
(flies Time (like banana) .)
"""

import doctest

doctest.testmod()
6 changes: 3 additions & 3 deletions nltk/test/corpus.doctest
Expand Up @@ -94,7 +94,7 @@ If the reader methods are called without any arguments, they will
typically load all documents in the corpus.

>>> len(inaugural.words())
145735
149797

If a corpus contains a README file, it can be accessed with a ``readme()`` method:

Expand Down Expand Up @@ -387,8 +387,8 @@ examples illustrate the use of the wordlist corpora:

>>> stopwords.fileids() # doctest: +ELLIPSIS
['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', ...]
>>> stopwords.words('portuguese') # doctest: +ELLIPSIS
['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para', ...]
>>> sorted(stopwords.words('portuguese')) # doctest: +ELLIPSIS
['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', ...]
>>> names.fileids()
['female.txt', 'male.txt']
>>> names.words('male.txt') # doctest: +ELLIPSIS
Expand Down
6 changes: 3 additions & 3 deletions nltk/test/unit/test_wordnet.py
Expand Up @@ -198,13 +198,13 @@ def test_wordnet_similarities(self):
)

def test_omw_lemma_no_trailing_underscore(self):
expected = [
expected = sorted([
u'popolna_sprememba_v_mišljenju',
u'popoln_obrat',
u'preobrat',
u'preobrat_v_mišljenju'
]
self.assertEqual(S('about-face.n.02').lemma_names(lang='slv'), expected)
])
self.assertEqual(sorted(S('about-face.n.02').lemma_names(lang='slv')), expected)

def test_iterable_type_for_all_lemma_names(self):
# Duck-test for iterables.
Expand Down