Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pretty print debug #227

Merged
merged 1 commit into from Sep 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/src/markdown/about/changelog.md
Expand Up @@ -8,6 +8,7 @@
ignore empty ones. As the scraping environment is different that a browser environment, it was chosen not to
aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform
as expected.
- **NEW**: Add support to output a pretty print format of a compiled `SelectorList` for debug purposes.

## 2.2.1

Expand Down
57 changes: 57 additions & 0 deletions docs/src/markdown/about/development.md
Expand Up @@ -193,6 +193,63 @@ object that may chain other `SelectorLists` objects depending on the complexity
a selector list, then you will get multiple `Selector` objects (one for each compound selector in the list) which in
turn may chain other `Selector` objects.

To view the selector list in in a compiled object for debugging purposes, one can access it via `SoupSieve.selectors`,
though it is recommended to pretty print them:

```pycon3
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='that',
prefix=None),
ids=(),
classes=(
'class',
),
attributes=(
SelectorAttribute(
attribute='name',
prefix='',
pattern=re.compile(
'^value$'),
xml_type_pattern=None),
),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='this',
prefix=None),
ids=(),
classes=(),
attributes=(),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(),
is_not=False,
is_html=False),
rel_type='>',
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False),
rel_type=None,
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False)
```

### `SelectorList`

```py3
Expand Down
8 changes: 7 additions & 1 deletion soupsieve/css_types.py
@@ -1,6 +1,7 @@
"""CSS selector structure items."""
import copyreg
from collections.abc import Hashable, Mapping
from .pretty import pretty

__all__ = (
'Selector',
Expand Down Expand Up @@ -80,11 +81,16 @@ def __repr__(self): # pragma: no cover
"""Representation."""

return "{}({})".format(
self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)

__str__ = __repr__

def pretty(self): # pragma: no cover
"""Pretty print."""

print(pretty(self))


class ImmutableDict(Mapping):
"""Hashable, immutable dictionary."""
Expand Down
136 changes: 136 additions & 0 deletions soupsieve/pretty.py
@@ -0,0 +1,136 @@
"""
Format a pretty string of a `SoupSieve` object for easy debugging.

This won't necessarily support all types and such, and definitely
not support custom outputs.

It is mainly geared towards our types as the `SelectorList`
object is a beast to look at without some indentation and newlines.
The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).

Example:

```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='that',
prefix=None),
ids=(),
classes=(
'class',
),
attributes=(
SelectorAttribute(
attribute='name',
prefix='',
pattern=re.compile(
'^value$'),
xml_type_pattern=None),
),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='this',
prefix=None),
ids=(),
classes=(),
attributes=(),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(),
is_not=False,
is_html=False),
rel_type='>',
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False),
rel_type=None,
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False)
```
"""
import re

RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
RE_LSTRT = re.compile(r'\[')
RE_DSTRT = re.compile(r'\{')
RE_TSTRT = re.compile(r'\(')
RE_LEND = re.compile(r'\]')
RE_DEND = re.compile(r'\}')
RE_TEND = re.compile(r'\)')
RE_INT = re.compile(r'\d+')
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
RE_SEP = re.compile(r'\s*(,)\s*')
RE_DSEP = re.compile(r'\s*(:)\s*')

TOKENS = {
'class': RE_CLASS,
'param': RE_PARAM,
'empty': RE_EMPTY,
'lstrt': RE_LSTRT,
'dstrt': RE_DSTRT,
'tstrt': RE_TSTRT,
'lend': RE_LEND,
'dend': RE_DEND,
'tend': RE_TEND,
'sqstr': RE_SQSTR,
'sep': RE_SEP,
'dsep': RE_DSEP,
'int': RE_INT,
'kword': RE_KWORD,
'dqstr': RE_DQSTR
}


def pretty(obj): # pragma: no cover
"""Make the object output string pretty."""

sel = str(obj)
index = 0
end = len(sel) - 1
indent = 0
output = []

while index <= end:
m = None
for k, v in TOKENS.items():
m = v.match(sel, index)

if m:
name = k
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
break

return ''.join(output)