diff --git a/parsel/selector.py b/parsel/selector.py index bbd4289a..666a3036 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -7,7 +7,7 @@ import six from lxml import etree, html -from .utils import flatten, iflatten, extract_regex +from .utils import flatten, iflatten, extract_regex, shorten from .csstranslator import HTMLTranslator, GenericTranslator @@ -358,6 +358,6 @@ def __bool__(self): __nonzero__ = __bool__ def __str__(self): - data = repr(self.get()[:40]) + data = repr(shorten(self.get(), width=40)) return "<%s xpath=%r data=%s>" % (type(self).__name__, self._expr, data) __repr__ = __str__ diff --git a/parsel/utils.py b/parsel/utils.py index 56bb105d..458bc6cc 100644 --- a/parsel/utils.py +++ b/parsel/utils.py @@ -80,4 +80,15 @@ def extract_regex(regex, text, replace_entities=True): strings = flatten(strings) if not replace_entities: return strings - return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings] \ No newline at end of file + return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings] + + +def shorten(text, width, suffix='...'): + """Truncate the given text to fit in the given width.""" + if len(text) <= width: + return text + if width > len(suffix): + return text[:width-len(suffix)] + suffix + if width >= 0: + return suffix[len(suffix)-width:] + raise ValueError('width must be equal or greater than 0') diff --git a/tests/test_selector.py b/tests/test_selector.py index e504166a..c8845a5f 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -133,9 +133,9 @@ def test_representation_slice(self): body = u"

".format(50 * 'b') sel = self.sscls(text=body) - representation = "".format(40 * 'b') + representation = "".format(37 * 'b') if six.PY2: - representation = "".format(40 * 'b') + representation = "".format(37 * 'b') self.assertEqual( [repr(it) for it in sel.xpath('//input/@name')], diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..da20ec2f --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,26 @@ +from parsel.utils import shorten + +from pytest import mark, raises +import six + + +@mark.parametrize( + 'width,expected', + ( + (-1, ValueError), + (0, u''), + (1, u'.'), + (2, u'..'), + (3, u'...'), + (4, u'f...'), + (5, u'fo...'), + (6, u'foobar'), + (7, u'foobar'), + ) +) +def test_shorten(width, expected): + if isinstance(expected, six.string_types): + assert shorten(u'foobar', width) == expected + else: + with raises(expected): + shorten(u'foobar', width)