From ce3915b83fa5af139f051a8eeb3e3e96ab78838a Mon Sep 17 00:00:00 2001 From: facelessuser Date: Fri, 10 Sep 2021 15:10:29 -0600 Subject: [PATCH] Add pretty print debug --- docs/src/markdown/about/changelog.md | 1 + docs/src/markdown/about/development.md | 57 +++++++++++ soupsieve/css_types.py | 8 +- soupsieve/pretty.py | 136 +++++++++++++++++++++++++ 4 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 soupsieve/pretty.py diff --git a/docs/src/markdown/about/changelog.md b/docs/src/markdown/about/changelog.md index 3d84535d..2717b15e 100644 --- a/docs/src/markdown/about/changelog.md +++ b/docs/src/markdown/about/changelog.md @@ -8,6 +8,7 @@ ignore empty ones. As the scraping environment is different that a browser environment, it was chosen not to aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform as expected. +- **NEW**: Add support to output a pretty print format of a compiled `SelectorList` for debug purposes. ## 2.2.1 diff --git a/docs/src/markdown/about/development.md b/docs/src/markdown/about/development.md index 5af0ed6d..7609dc24 100644 --- a/docs/src/markdown/about/development.md +++ b/docs/src/markdown/about/development.md @@ -193,6 +193,63 @@ object that may chain other `SelectorLists` objects depending on the complexity a selector list, then you will get multiple `Selector` objects (one for each compound selector in the list) which in turn may chain other `Selector` objects. +To view the selector list in in a compiled object for debugging purposes, one can access it via `SoupSieve.selectors`, +though it is recommended to pretty print them: + +```pycon3 +>>> import soupsieve as sv +>>> sv.compile('this > that.class[name=value]').selectors.pretty() +SelectorList( + selectors=( + Selector( + tag=SelectorTag( + name='that', + prefix=None), + ids=(), + classes=( + 'class', + ), + attributes=( + SelectorAttribute( + attribute='name', + prefix='', + pattern=re.compile( + '^value$'), + xml_type_pattern=None), + ), + nth=(), + selectors=(), + relation=SelectorList( + selectors=( + Selector( + tag=SelectorTag( + name='this', + prefix=None), + ids=(), + classes=(), + attributes=(), + nth=(), + selectors=(), + relation=SelectorList( + selectors=(), + is_not=False, + is_html=False), + rel_type='>', + contains=(), + lang=(), + flags=0), + ), + is_not=False, + is_html=False), + rel_type=None, + contains=(), + lang=(), + flags=0), + ), + is_not=False, + is_html=False) +``` + ### `SelectorList` ```py3 diff --git a/soupsieve/css_types.py b/soupsieve/css_types.py index c2b9f30d..c56cc959 100644 --- a/soupsieve/css_types.py +++ b/soupsieve/css_types.py @@ -1,6 +1,7 @@ """CSS selector structure items.""" import copyreg from collections.abc import Hashable, Mapping +from .pretty import pretty __all__ = ( 'Selector', @@ -80,11 +81,16 @@ def __repr__(self): # pragma: no cover """Representation.""" return "{}({})".format( - self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) + self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) ) __str__ = __repr__ + def pretty(self): # pragma: no cover + """Pretty print.""" + + print(pretty(self)) + class ImmutableDict(Mapping): """Hashable, immutable dictionary.""" diff --git a/soupsieve/pretty.py b/soupsieve/pretty.py new file mode 100644 index 00000000..3dba79db --- /dev/null +++ b/soupsieve/pretty.py @@ -0,0 +1,136 @@ +""" +Format a pretty string of a `SoupSieve` object for easy debugging. + +This won't necessarily support all types and such, and definitely +not support custom outputs. + +It is mainly geared towards our types as the `SelectorList` +object is a beast to look at without some indentation and newlines. +The format and various output types is fairly known (though it +hasn't been tested extensively to make sure we aren't missing corners). + +Example: + +``` +>>> import soupsieve as sv +>>> sv.compile('this > that.class[name=value]').selectors.pretty() +SelectorList( + selectors=( + Selector( + tag=SelectorTag( + name='that', + prefix=None), + ids=(), + classes=( + 'class', + ), + attributes=( + SelectorAttribute( + attribute='name', + prefix='', + pattern=re.compile( + '^value$'), + xml_type_pattern=None), + ), + nth=(), + selectors=(), + relation=SelectorList( + selectors=( + Selector( + tag=SelectorTag( + name='this', + prefix=None), + ids=(), + classes=(), + attributes=(), + nth=(), + selectors=(), + relation=SelectorList( + selectors=(), + is_not=False, + is_html=False), + rel_type='>', + contains=(), + lang=(), + flags=0), + ), + is_not=False, + is_html=False), + rel_type=None, + contains=(), + lang=(), + flags=0), + ), + is_not=False, + is_html=False) +``` +""" +import re + +RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(') +RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=') +RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}') +RE_LSTRT = re.compile(r'\[') +RE_DSTRT = re.compile(r'\{') +RE_TSTRT = re.compile(r'\(') +RE_LEND = re.compile(r'\]') +RE_DEND = re.compile(r'\}') +RE_TEND = re.compile(r'\)') +RE_INT = re.compile(r'\d+') +RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+') +RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"') +RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'") +RE_SEP = re.compile(r'\s*(,)\s*') +RE_DSEP = re.compile(r'\s*(:)\s*') + +TOKENS = { + 'class': RE_CLASS, + 'param': RE_PARAM, + 'empty': RE_EMPTY, + 'lstrt': RE_LSTRT, + 'dstrt': RE_DSTRT, + 'tstrt': RE_TSTRT, + 'lend': RE_LEND, + 'dend': RE_DEND, + 'tend': RE_TEND, + 'sqstr': RE_SQSTR, + 'sep': RE_SEP, + 'dsep': RE_DSEP, + 'int': RE_INT, + 'kword': RE_KWORD, + 'dqstr': RE_DQSTR +} + + +def pretty(obj): # pragma: no cover + """Make the object output string pretty.""" + + sel = str(obj) + index = 0 + end = len(sel) - 1 + indent = 0 + output = [] + + while index <= end: + m = None + for k, v in TOKENS.items(): + m = v.match(sel, index) + + if m: + name = k + index = m.end(0) + if name in ('class', 'lstrt', 'dstrt', 'tstrt'): + indent += 4 + output.append('{}\n{}'.format(m.group(0), " " * indent)) + elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'): + output.append(m.group(0)) + elif name in ('lend', 'dend', 'tend'): + indent -= 4 + output.append(m.group(0)) + elif name in ('sep',): + output.append('{}\n{}'.format(m.group(1), " " * indent)) + elif name in ('dsep',): + output.append('{} '.format(m.group(1))) + break + + return ''.join(output)