diff --git a/nltk/corpus/__init__.py b/nltk/corpus/__init__.py index e951c6028c..d348810a4f 100644 --- a/nltk/corpus/__init__.py +++ b/nltk/corpus/__init__.py @@ -361,6 +361,9 @@ WordNetCorpusReader, LazyCorpusLoader("omw", CorpusReader, r".*/wn-data-.*\.tab", encoding="utf8"), ) +wordnet31 = LazyCorpusLoader("wordnet31", WordNetCorpusReader, None) +# wordnet2021 is scheduled for release in 2021 :) +# wordnet2021 = LazyCorpusLoader("wordnet2021", WordNetCorpusReader, None) wordnet_ic = LazyCorpusLoader("wordnet_ic", WordNetICCorpusReader, r".*\.dat") words = LazyCorpusLoader( "words", WordListCorpusReader, r"(?!README|\.).*", encoding="ascii" diff --git a/nltk/corpus/reader/wordnet.py b/nltk/corpus/reader/wordnet.py index 580ea3a853..fda24a4360 100644 --- a/nltk/corpus/reader/wordnet.py +++ b/nltk/corpus/reader/wordnet.py @@ -32,6 +32,7 @@ import math import re +import warnings from collections import defaultdict, deque from functools import total_ordering from itertools import chain, islice @@ -1130,6 +1131,11 @@ def __init__(self, root, omw_reader): Construct a new wordnet corpus reader, with the given root directory. """ + if omw_reader is None: + warnings.warn( + "The multilingual functions are not available with this Wordnet version" + ) + super().__init__(root, self._FILES, encoding=self._ENCODING) # A index that provides the file offset @@ -1287,8 +1293,9 @@ def _compute_max_depth(self, pos, simulate_root): def get_version(self): fh = self._data_file(ADJ) + fh.seek(0) for line in fh: - match = re.search(r"WordNet (\d+\.\d+) Copyright", line) + match = re.search(r"Word[nN]et (\d+|\d+\.\d+) Copyright", line) if match is not None: version = match.group(1) fh.seek(0) diff --git a/nltk/test/wordnet.doctest b/nltk/test/wordnet.doctest index e1ed56657b..642f19ab5b 100644 --- a/nltk/test/wordnet.doctest +++ b/nltk/test/wordnet.doctest @@ -758,6 +758,27 @@ classified.a.02): [Synset('restricted.a.01'), [Synset('classified.a.02')]]]] +---------------------------------------------------------------- +Loading alternative Wordnet versions +---------------------------------------------------------------- + + >>> print("Wordnet {}".format(wn.get_version())) + Wordnet 3.0 + + >>> from nltk.corpus import wordnet31 as wn31 + >>> print("Wordnet {}".format(wn31.get_version())) + Wordnet 3.1 + + >>> print(wn.synset('restrain.v.01').hyponyms()) + [Synset('confine.v.03'), Synset('control.v.02'), Synset('hold.v.36'), Synset('inhibit.v.04')] + + >>> print(wn31.synset('restrain.v.01').hyponyms()) + [Synset('enchain.v.01'), Synset('fetter.v.01'), Synset('ground.v.02'), Synset('impound.v.02'), Synset('pen_up.v.01'), Synset('pinion.v.01'), Synset('pound.v.06'), Synset('tie_down.v.01')] + + >>> print(wn31.synset('restrain.v.04').hyponyms()) + [Synset('baffle.v.03'), Synset('confine.v.02'), Synset('control.v.02'), Synset('hold.v.36'), Synset('rule.v.07'), Synset('swallow.v.06'), Synset('wink.v.04')] + + ------------- Teardown test -------------