From 1e905a71f176da17a26c0db75bb3c570e6499c2b Mon Sep 17 00:00:00 2001 From: Ofek Lev Date: Sun, 14 Nov 2021 18:01:27 -0500 Subject: [PATCH] Lazily compile regular expressions to speed up load time --- tomli/_parser.py | 15 ++++----------- tomli/_re.py | 45 +++++++++++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/tomli/_parser.py b/tomli/_parser.py index 89e81c3..4d326b5 100644 --- a/tomli/_parser.py +++ b/tomli/_parser.py @@ -3,14 +3,7 @@ from typing import Any, BinaryIO, Dict, FrozenSet, Iterable, NamedTuple, Optional, Tuple import warnings -from tomli._re import ( - RE_DATETIME, - RE_LOCALTIME, - RE_NUMBER, - match_to_datetime, - match_to_localtime, - match_to_number, -) +from tomli._re import Patterns, match_to_datetime, match_to_localtime, match_to_number from tomli._types import Key, ParseFloat, Pos ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) @@ -605,21 +598,21 @@ def parse_value( # noqa: C901 return pos + 5, False # Dates and times - datetime_match = RE_DATETIME.match(src, pos) + datetime_match = Patterns.datetime.match(src, pos) if datetime_match: try: datetime_obj = match_to_datetime(datetime_match) except ValueError as e: raise suffixed_err(src, pos, "Invalid date or datetime") from e return datetime_match.end(), datetime_obj - localtime_match = RE_LOCALTIME.match(src, pos) + localtime_match = Patterns.localtime.match(src, pos) if localtime_match: return localtime_match.end(), match_to_localtime(localtime_match) # Integers and "normal" floats. # The regex will greedily match any type starting with a decimal # char, so needs to be located after handling of dates and times. - number_match = RE_NUMBER.match(src, pos) + number_match = Patterns.number.match(src, pos) if number_match: return number_match.end(), match_to_number(number_match, parse_float) diff --git a/tomli/_re.py b/tomli/_re.py index 9126829..dd47526 100644 --- a/tomli/_re.py +++ b/tomli/_re.py @@ -10,8 +10,26 @@ # - 00:32:00 _TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?" -RE_NUMBER = re.compile( - r""" + +class _LazyPatternCompiler: + def __getattr__(self, name: str) -> "re.Pattern": + if name == "datetime": + pattern = re.compile( + fr""" +([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27 +(?: + [T ] + {_TIME_RE_STR} + (?:(Z)|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset +)? +""", + flags=re.VERBOSE, + ) + elif name == "localtime": + pattern = re.compile(_TIME_RE_STR) + elif name == "number": + pattern = re.compile( + r""" 0 (?: x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex @@ -27,20 +45,15 @@ (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part ) """, - flags=re.VERBOSE, -) -RE_LOCALTIME = re.compile(_TIME_RE_STR) -RE_DATETIME = re.compile( - fr""" -([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27 -(?: - [T ] - {_TIME_RE_STR} - (?:(Z)|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset -)? -""", - flags=re.VERBOSE, -) + flags=re.VERBOSE, + ) + else: # pragma: no cover + raise AttributeError(f"Unknown pattern: {name}") + + setattr(self, name, pattern) + return pattern + +Patterns = _LazyPatternCompiler() def match_to_datetime(match: "re.Match") -> Union[datetime, date]: