Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize pyparsing import time by deferring regex compile #363

Merged
merged 1 commit into from
Feb 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
50 changes: 36 additions & 14 deletions pyparsing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@
#


if sys.version_info >= (3, 8):
from functools import cached_property
else:
class cached_property:
def __init__(self, func):
self._func = func

def __get__(self, instance, owner=None):
ret = instance.__dict__[self._func.__name__] = self._func(instance)
return ret


class __compat__(__config_flags):
"""
A cross-version compatibility configuration for pyparsing features that will be
Expand Down Expand Up @@ -467,7 +479,6 @@ def __init__(self, savelist: bool = False):
self.modalResults = True
# custom debug actions
self.debugActions = self.DebugActions(None, None, None)
self.re = None
# avoid redundant calls to preParse
self.callPreparse = True
self.callDuringTry = False
Expand Down Expand Up @@ -2926,19 +2937,12 @@ def __init__(
if not pattern:
raise ValueError("null string passed to Regex; use Empty() instead")

self.pattern = pattern
self._re = None
self.reString = self.pattern = pattern
self.flags = flags

try:
self.re = re.compile(self.pattern, self.flags)
self.reString = self.pattern
except sre_constants.error:
raise ValueError(
"invalid pattern ({!r}) passed to Regex".format(pattern)
)

elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
self.re = pattern
self._re = pattern
self.pattern = self.reString = pattern.pattern
self.flags = flags

Expand All @@ -2947,18 +2951,36 @@ def __init__(
"Regex may only be constructed with a string or a compiled RE object"
)

self.re_match = self.re.match

self.errmsg = "Expected " + self.name
self.mayIndexError = False
self.mayReturnEmpty = self.re_match("") is not None
self.asGroupList = asGroupList
self.asMatch = asMatch
if self.asGroupList:
self.parseImpl = self.parseImplAsGroupList
if self.asMatch:
self.parseImpl = self.parseImplAsMatch


@cached_property
def re(self):
if self._re:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be possible to always assign to re and thus remove the extra attribute for a given pattern

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, see __init__

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@asottile my understanding is that a cached property can be assigned a value using the same attribute name, as its using the dict as cache

that way the assigment should suffice

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, but self._re is used separately

return self._re
else:
try:
return re.compile(self.pattern, self.flags)
except sre_constants.error:
raise ValueError(
"invalid pattern ({!r}) passed to Regex".format(pattern)
)

@cached_property
def re_match(self):
return self.re.match

@cached_property
def mayReturnEmpty(self):
return self.re_match("") is not None

def _generateDefaultName(self):
return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))

Expand Down
2 changes: 1 addition & 1 deletion tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3486,7 +3486,7 @@ def testMatch(expression, instring, shouldPass, expectedString=None):

try:
print("lets try an invalid RE")
invRe = pp.Regex("(\"[^\"]*\")|('[^']*'")
invRe = pp.Regex("(\"[^\"]*\")|('[^']*'").re
except Exception as e:
print("successfully rejected an invalid RE:", end=" ")
print(e)
Expand Down