Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support alternative Wordnet versions #2860

Merged
merged 7 commits into from Oct 26, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions nltk/corpus/__init__.py
Expand Up @@ -361,6 +361,9 @@
WordNetCorpusReader,
LazyCorpusLoader("omw", CorpusReader, r".*/wn-data-.*\.tab", encoding="utf8"),
)
wordnet31 = LazyCorpusLoader("wordnet31", WordNetCorpusReader, None)
# wordnet2021 is scheduled for release in 2021 :)
# wordnet2021 = LazyCorpusLoader("wordnet2021", WordNetCorpusReader, None)
wordnet_ic = LazyCorpusLoader("wordnet_ic", WordNetICCorpusReader, r".*\.dat")
words = LazyCorpusLoader(
"words", WordListCorpusReader, r"(?!README|\.).*", encoding="ascii"
Expand Down
8 changes: 7 additions & 1 deletion nltk/corpus/reader/wordnet.py
Expand Up @@ -32,6 +32,7 @@

import math
import re
import warnings
from collections import defaultdict, deque
from functools import total_ordering
from itertools import chain, islice
Expand Down Expand Up @@ -1130,6 +1131,11 @@ def __init__(self, root, omw_reader):
Construct a new wordnet corpus reader, with the given root
directory.
"""
if omw_reader is None:
warnings.warn(
"The multilingual functions are not available with this Wordnet version"
)

super().__init__(root, self._FILES, encoding=self._ENCODING)

# A index that provides the file offset
Expand Down Expand Up @@ -1288,7 +1294,7 @@ def _compute_max_depth(self, pos, simulate_root):
def get_version(self):
fh = self._data_file(ADJ)
for line in fh:
match = re.search(r"WordNet (\d+\.\d+) Copyright", line)
match = re.search(r"Word[nN]et (\d+|\d+\.\d+) Copyright", line)
if match is not None:
version = match.group(1)
fh.seek(0)
Expand Down
21 changes: 21 additions & 0 deletions nltk/test/wordnet.doctest
Expand Up @@ -758,6 +758,27 @@ classified.a.02):
[Synset('restricted.a.01'), [Synset('classified.a.02')]]]]


----------------------------------------------------------------
Loading alternative Wordnet versions
----------------------------------------------------------------

>>> print("Wordnet {}".format(wn.get_version()))
Wordnet 3.0

>>> from nltk.corpus import wordnet31 as wn31
>>> print("Wordnet {}".format(wn31.get_version()))
Wordnet 3.1

>>> print(wn.synset('restrain.v.01').hyponyms())
[Synset('confine.v.03'), Synset('control.v.02'), Synset('hold.v.36'), Synset('inhibit.v.04')]

>>> print(wn31.synset('restrain.v.01').hyponyms())
[Synset('enchain.v.01'), Synset('fetter.v.01'), Synset('ground.v.02'), Synset('impound.v.02'), Synset('pen_up.v.01'), Synset('pinion.v.01'), Synset('pound.v.06'), Synset('tie_down.v.01')]

>>> print(wn31.synset('restrain.v.04').hyponyms())
[Synset('baffle.v.03'), Synset('confine.v.02'), Synset('control.v.02'), Synset('hold.v.36'), Synset('rule.v.07'), Synset('swallow.v.06'), Synset('wink.v.04')]


-------------
Teardown test
-------------
Expand Down