Skip to content

Commit

Permalink
Merge pull request scrapy#141 from Gallaecio/selector-data-preview
Browse files Browse the repository at this point in the history
Shorten selector representations using an ellipsis
  • Loading branch information
kmike committed Jul 11, 2019
2 parents 1327e0d + 63fe189 commit fafba41
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 5 deletions.
4 changes: 2 additions & 2 deletions parsel/selector.py
Expand Up @@ -7,7 +7,7 @@
import six
from lxml import etree, html

from .utils import flatten, iflatten, extract_regex
from .utils import flatten, iflatten, extract_regex, shorten
from .csstranslator import HTMLTranslator, GenericTranslator


Expand Down Expand Up @@ -358,6 +358,6 @@ def __bool__(self):
__nonzero__ = __bool__

def __str__(self):
data = repr(self.get()[:40])
data = repr(shorten(self.get(), width=40))
return "<%s xpath=%r data=%s>" % (type(self).__name__, self._expr, data)
__repr__ = __str__
13 changes: 12 additions & 1 deletion parsel/utils.py
Expand Up @@ -80,4 +80,15 @@ def extract_regex(regex, text, replace_entities=True):
strings = flatten(strings)
if not replace_entities:
return strings
return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings]
return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings]


def shorten(text, width, suffix='...'):
"""Truncate the given text to fit in the given width."""
if len(text) <= width:
return text
if width > len(suffix):
return text[:width-len(suffix)] + suffix
if width >= 0:
return suffix[len(suffix)-width:]
raise ValueError('width must be equal or greater than 0')
4 changes: 2 additions & 2 deletions tests/test_selector.py
Expand Up @@ -133,9 +133,9 @@ def test_representation_slice(self):
body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b')
sel = self.sscls(text=body)

representation = "<Selector xpath='//input/@name' data='{}'>".format(40 * 'b')
representation = "<Selector xpath='//input/@name' data='{}...'>".format(37 * 'b')
if six.PY2:
representation = "<Selector xpath='//input/@name' data=u'{}'>".format(40 * 'b')
representation = "<Selector xpath='//input/@name' data=u'{}...'>".format(37 * 'b')

self.assertEqual(
[repr(it) for it in sel.xpath('//input/@name')],
Expand Down
26 changes: 26 additions & 0 deletions tests/test_utils.py
@@ -0,0 +1,26 @@
from parsel.utils import shorten

from pytest import mark, raises
import six


@mark.parametrize(
'width,expected',
(
(-1, ValueError),
(0, u''),
(1, u'.'),
(2, u'..'),
(3, u'...'),
(4, u'f...'),
(5, u'fo...'),
(6, u'foobar'),
(7, u'foobar'),
)
)
def test_shorten(width, expected):
if isinstance(expected, six.string_types):
assert shorten(u'foobar', width) == expected
else:
with raises(expected):
shorten(u'foobar', width)

0 comments on commit fafba41

Please sign in to comment.