Skip to content

Commit

Permalink
Lazily compile regular expressions to speed up load time
Browse files Browse the repository at this point in the history
  • Loading branch information
ofek committed Nov 14, 2021
1 parent c151139 commit 1e905a7
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 27 deletions.
15 changes: 4 additions & 11 deletions tomli/_parser.py
Expand Up @@ -3,14 +3,7 @@
from typing import Any, BinaryIO, Dict, FrozenSet, Iterable, NamedTuple, Optional, Tuple
import warnings

from tomli._re import (
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
)
from tomli._re import Patterns, match_to_datetime, match_to_localtime, match_to_number
from tomli._types import Key, ParseFloat, Pos

ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
Expand Down Expand Up @@ -605,21 +598,21 @@ def parse_value( # noqa: C901
return pos + 5, False

# Dates and times
datetime_match = RE_DATETIME.match(src, pos)
datetime_match = Patterns.datetime.match(src, pos)
if datetime_match:
try:
datetime_obj = match_to_datetime(datetime_match)
except ValueError as e:
raise suffixed_err(src, pos, "Invalid date or datetime") from e
return datetime_match.end(), datetime_obj
localtime_match = RE_LOCALTIME.match(src, pos)
localtime_match = Patterns.localtime.match(src, pos)
if localtime_match:
return localtime_match.end(), match_to_localtime(localtime_match)

# Integers and "normal" floats.
# The regex will greedily match any type starting with a decimal
# char, so needs to be located after handling of dates and times.
number_match = RE_NUMBER.match(src, pos)
number_match = Patterns.number.match(src, pos)
if number_match:
return number_match.end(), match_to_number(number_match, parse_float)

Expand Down
45 changes: 29 additions & 16 deletions tomli/_re.py
Expand Up @@ -10,8 +10,26 @@
# - 00:32:00
_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?"

RE_NUMBER = re.compile(
r"""

class _LazyPatternCompiler:
def __getattr__(self, name: str) -> "re.Pattern":
if name == "datetime":
pattern = re.compile(
fr"""
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
(?:
[T ]
{_TIME_RE_STR}
(?:(Z)|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
)?
""",
flags=re.VERBOSE,
)
elif name == "localtime":
pattern = re.compile(_TIME_RE_STR)
elif name == "number":
pattern = re.compile(
r"""
0
(?:
x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
Expand All @@ -27,20 +45,15 @@
(?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
)
""",
flags=re.VERBOSE,
)
RE_LOCALTIME = re.compile(_TIME_RE_STR)
RE_DATETIME = re.compile(
fr"""
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
(?:
[T ]
{_TIME_RE_STR}
(?:(Z)|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
)?
""",
flags=re.VERBOSE,
)
flags=re.VERBOSE,
)
else: # pragma: no cover
raise AttributeError(f"Unknown pattern: {name}")

setattr(self, name, pattern)
return pattern

Patterns = _LazyPatternCompiler()


def match_to_datetime(match: "re.Match") -> Union[datetime, date]:
Expand Down

0 comments on commit 1e905a7

Please sign in to comment.