diff --git a/nltk/internals.py b/nltk/internals.py index 0aa36b7ccf..ba92adb168 100644 --- a/nltk/internals.py +++ b/nltk/internals.py @@ -238,11 +238,11 @@ def read_str(s, start_position): break # Process it, using eval. Strings with invalid escape sequences - # might raise ValueEerror. + # might raise ValueError. try: return eval(s[start_position : match.end()]), match.end() except ValueError as e: - raise ReadError("invalid string (%s)" % e) from e + raise ReadError("valid escape sequence", start_position) from e _READ_INT_RE = re.compile(r"-?\d+") diff --git a/nltk/test/internals.doctest b/nltk/test/internals.doctest index 742db1c287..38688c3272 100644 --- a/nltk/test/internals.doctest +++ b/nltk/test/internals.doctest @@ -138,3 +138,23 @@ It works for classic classes, too: False >>> overridden(D().f) True + + +read_str() +~~~~~~~~~~~~ + >>> from nltk.internals import read_str + +Test valid scenarios + + >>> read_str("'valid string'", 0) + ('valid string', 14) + +Now test invalid scenarios + >>> read_str("should error", 0) + Traceback (most recent call last): + ... + nltk.internals.ReadError: Expected open quote at 0 + >>> read_str("'should error", 0) + Traceback (most recent call last): + ... + nltk.internals.ReadError: Expected close quote at 1 diff --git a/nltk/util.py b/nltk/util.py index da843c3dc4..2b41871fa3 100644 --- a/nltk/util.py +++ b/nltk/util.py @@ -1013,7 +1013,7 @@ def skipgrams(sequence, n, k, **kwargs): ###################################################################### # inherited from pywordnet, by Oliver Steele -def binary_search_file(file, key, cache={}, cacheDepth=-1): +def binary_search_file(file, key, cache=None, cacheDepth=-1): """ Return the line from the file with first word key. Searches through a sorted file using the binary search algorithm. @@ -1036,6 +1036,9 @@ def binary_search_file(file, key, cache={}, cacheDepth=-1): end = file.tell() - 1 file.seek(0) + if cache is None: + cache = {} + while start < end: lastState = start, end middle = (start + end) // 2