Skip to content
This repository has been archived by the owner on Aug 2, 2023. It is now read-only.

Commit

Permalink
Improve object repr slicing on Python 2. Fixes #1407 (#1429)
Browse files Browse the repository at this point in the history
  • Loading branch information
fabioz committed May 17, 2019
1 parent 343ad5f commit f115036
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import sys
import time


#=========================================================================
# Load filters with tests we should skip
#=========================================================================
Expand All @@ -35,6 +34,8 @@ def is_in_xdist_node():


connected = False


def connect_to_server_for_communication_to_xml_rpc_on_xdist():
global connected
if connected:
Expand Down Expand Up @@ -68,7 +69,9 @@ def start_redirect():


def get_curr_output():
return State.buf_out.getvalue(), State.buf_err.getvalue()
buf_out = State.buf_out
buf_err = State.buf_err
return buf_out.getvalue() if buf_out is not None else '', buf_err.getvalue() if buf_err is not None else ''


def pytest_unconfigure():
Expand Down Expand Up @@ -136,6 +139,7 @@ def pytest_collection_modifyitems(session, config, items):

from py.io import TerminalWriter


def _get_error_contents_from_report(report):
if report.longrepr is not None:
tw = TerminalWriter(stringio=True)
Expand All @@ -148,11 +152,13 @@ def _get_error_contents_from_report(report):

return ''


def pytest_collectreport(report):
error_contents = _get_error_contents_from_report(report)
if error_contents:
report_test('fail', '<collect errors>', '<collect errors>', '', error_contents, 0.0)


def append_strings(s1, s2):
if s1.__class__ == s2.__class__:
return s1 + s2
Expand Down Expand Up @@ -183,7 +189,6 @@ def append_strings(s1, s2):
return s1 + s2



def pytest_runtest_logreport(report):
if is_in_xdist_node():
# When running with xdist, we don't want the report to be called from the node, only
Expand Down Expand Up @@ -254,9 +259,11 @@ def report_test(status, filename, test, captured_output, error_contents, duratio
pydev_runfiles_xml_rpc.notifyTest(
status, captured_output, error_contents, filename, test, time_str)


if not hasattr(pytest, 'hookimpl'):
raise AssertionError('Please upgrade pytest (the current version of pytest: %s is unsupported)' % (pytest.__version__,))


@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item, call):
outcome = yield
Expand Down
103 changes: 100 additions & 3 deletions src/ptvsd/_vendored/pydevd/_pydevd_bundle/pydevd_safe_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

# Gotten from ptvsd for supporting the format expected there.
import sys
from _pydevd_bundle.pydevd_constants import IS_PY2
import locale
import json

# Py3 compat - alias unicode to str, and xrange to range
try:
Expand All @@ -17,6 +20,12 @@


class SafeRepr(object):
# Can be used to override the encoding from locale.getpreferredencoding()
locale_preferred_encoding = None

# Can be used to override the encoding used for sys.stdout.encoding
sys_stdout_encoding = None

# String types are truncated to maxstring_outer when at the outer-
# most level, and truncated to maxstring_inner characters inside
# collections.
Expand Down Expand Up @@ -74,8 +83,18 @@ class SafeRepr(object):
raw_value = False

def __call__(self, obj):
'''
:param object obj:
The object for which we want a representation.
:return str:
Returns bytes encoded as utf-8 on py2 and str on py3.
'''
try:
return ''.join(self._repr(obj, 0))
if IS_PY2:
return ''.join((x.encode('utf-8') if isinstance(x, unicode) else x) for x in self._repr(obj, 0))
else:
return ''.join(self._repr(obj, 0))
except Exception:
try:
return 'An exception was raised: %r' % sys.exc_info()[1]
Expand Down Expand Up @@ -271,10 +290,14 @@ def _repr_obj(self, obj, level, limit_inner, limit_outer):
try:
if self.raw_value:
# For raw value retrieval, ignore all limits.
if isinstance(obj, bytes):
yield obj.decode('latin-1')
return

try:
mv = memoryview(obj)
except Exception:
yield unicode(obj)
yield self._convert_to_unicode_or_bytes_repr(repr(obj))
return
else:
# Map bytes to Unicode codepoints with same values.
Expand All @@ -296,14 +319,88 @@ def _repr_obj(self, obj, level, limit_inner, limit_outer):
limit = limit_inner if level > 0 else limit_outer

if limit >= len(obj_repr):
yield obj_repr
yield self._convert_to_unicode_or_bytes_repr(obj_repr)
return

# Slightly imprecise calculations - we may end up with a string that is
# up to 3 characters longer than limit. If you need precise formatting,
# you are using the wrong class.
left_count, right_count = max(1, int(2 * limit / 3)), max(1, int(limit / 3)) # noqa

if IS_PY2 and isinstance(obj_repr, bytes):
# If we can convert to unicode before slicing, that's better (but don't do
# it if it's not possible as we may be dealing with actual binary data).

obj_repr = self._bytes_as_unicode_if_possible(obj_repr)
if isinstance(obj_repr, unicode):
# Deal with high-surrogate leftovers on Python 2.
try:
if left_count > 0 and unichr(0xD800) <= obj_repr[left_count - 1] <= unichr(0xDBFF):
left_count -= 1
except ValueError:
# On Jython unichr(0xD800) will throw an error:
# ValueError: unichr() arg is a lone surrogate in range (0xD800, 0xDFFF) (Jython UTF-16 encoding)
# Just ignore it in this case.
pass

start = obj_repr[:left_count]

# Note: yielding unicode is fine (it'll be properly converted to utf-8 if needed).
yield start
yield '...'

# Deal with high-surrogate leftovers on Python 2.
try:
if right_count > 0 and unichr(0xD800) <= obj_repr[-right_count - 1] <= unichr(0xDBFF):
right_count -= 1
except ValueError:
# On Jython unichr(0xD800) will throw an error:
# ValueError: unichr() arg is a lone surrogate in range (0xD800, 0xDFFF) (Jython UTF-16 encoding)
# Just ignore it in this case.
pass

yield obj_repr[-right_count:]
return
else:
# We can't decode it (binary string). Use repr() of bytes.
obj_repr = repr(obj_repr)

yield obj_repr[:left_count]
yield '...'
yield obj_repr[-right_count:]

def _convert_to_unicode_or_bytes_repr(self, obj_repr):
if IS_PY2 and isinstance(obj_repr, bytes):
obj_repr = self._bytes_as_unicode_if_possible(obj_repr)
if isinstance(obj_repr, bytes):
# If we haven't been able to decode it this means it's some binary data
# we can't make sense of, so, we need its repr() -- otherwise json
# encoding may break later on.
obj_repr = repr(obj_repr)
return obj_repr

def _bytes_as_unicode_if_possible(self, obj_repr):
# We try to decode with 3 possible encoding (sys.stdout.encoding,
# locale.getpreferredencoding() and 'utf-8). If no encoding can decode
# the input, we return the original bytes.
try_encodings = []
encoding = self.sys_stdout_encoding or getattr(sys.stdout, 'encoding', '')
if encoding:
try_encodings.append(encoding.lower())

preferred_encoding = self.locale_preferred_encoding or locale.getpreferredencoding()
if preferred_encoding:
preferred_encoding = preferred_encoding.lower()
if preferred_encoding not in try_encodings:
try_encodings.append(preferred_encoding)

if 'utf-8' not in try_encodings:
try_encodings.append('utf-8')

for encoding in try_encodings:
try:
return obj_repr.decode(encoding)
except UnicodeDecodeError:
pass

return obj_repr # Return the original version (in bytes)
7 changes: 4 additions & 3 deletions src/ptvsd/_vendored/pydevd/tests_python/test_debugger_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,11 @@ def write_step_in(self, thread_id):
arguments = pydevd_schema.StepInArguments(threadId=thread_id)
self.wait_for_response(self.write_request(pydevd_schema.StepInRequest(arguments)))

def write_step_next(self, thread_id):
def write_step_next(self, thread_id, wait_for_response=True):
next_request = self.write_request(
pydevd_schema.NextRequest(pydevd_schema.NextArguments(thread_id)))
self.wait_for_response(next_request)
if wait_for_response:
self.wait_for_response(next_request)

def write_step_out(self, thread_id):
stepout_request = self.write_request(
Expand Down Expand Up @@ -628,7 +629,7 @@ def test_case_skipping_filters(case_setup, custom_setup):
if IS_JYTHON:
json_facade.write_continue(wait_for_response=False)
else:
json_facade.write_step_next(json_hit.thread_id)
json_facade.write_step_next(json_hit.thread_id, wait_for_response=False)

writer.finished_ok = True

Expand Down
121 changes: 121 additions & 0 deletions src/ptvsd/_vendored/pydevd/tests_python/test_safe_repr.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# coding: utf-8
import collections
import sys
import re
import pytest
from _pydevd_bundle.pydevd_safe_repr import SafeRepr
import json
from _pydevd_bundle.pydevd_constants import IS_JYTHON, IS_PY2

try:
import numpy as np
Expand Down Expand Up @@ -593,3 +596,121 @@ def test_zeros(self):
value = np.zeros(SafeRepr.maxcollection[0] + 1)

self.assert_unchanged(value, repr(value))


@pytest.mark.parametrize('params', [
# In python 2, unicode slicing may or may not work well depending on whether it's a ucs-2 or
# ucs-4 build (so, we have to strip the high-surrogate if it's ucs-2 and the number of chars
# will be different).
{'maxother_outer': 20, 'input': u"😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄F😄FF😄F", 'output': (u"😄😄😄😄😄😄...FF😄F", u"😄😄😄😄😄😄😄😄😄😄😄😄😄...F😄FF😄F")},
{'maxother_outer': 20, 'input': u"😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄FFFFFFFF", 'output': (u"😄😄😄😄😄😄...FFFFFF", u"😄😄😄😄😄😄😄😄😄😄😄😄😄...FFFFFF")},
{'maxother_outer': 20, 'input': u"🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐FFFFFFFF", 'output': (u"🌐🌐🌐🌐🌐🌐...FFFFFF", u"🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐🌐...FFFFFF")},
{'maxother_outer': 10, 'input': u"😄😄😄😄😄😄😄😄😄FFFFFFFF", 'output': (u"😄😄😄...FFF", u"😄😄😄😄😄😄...FFF")},
{'maxother_outer': 10, 'input': u"🌐🌐🌐🌐🌐🌐🌐🌐🌐FFFFFFFF", 'output': (u"🌐🌐🌐...FFF", u"🌐🌐🌐🌐🌐🌐...FFF")},
# Regular unicode
{'maxother_outer': 20, 'input': u"ωωωωωωωωωωωωωωωωωωωωωωωFFFFFFFF", 'output': u"ωωωωωωωωωωωωω...FFFFFF"},
{'maxother_outer': 20, 'input': u"������������FFFFFFFF", 'output': u"������������F...FFFFFF"},
{'maxother_outer': 10, 'input': u"������������FFFFFFFF", 'output': u"������...FFF"},
# Note that we actually get the repr() in this case as we can't decode it with any of the available encodings.
{'maxother_outer': 10, 'input': b'\xed\xbd\xbf\xff\xfe\xfa\xfd' * 10, 'output': b"'\\xed\\...fd'"},
{'maxother_outer': 20, 'input': b'\xed\xbd\xbf\xff\xfe\xfa\xfd' * 10, 'output': b"'\\xed\\xbd\\xbf...a\\xfd'"},
# Check that we use repr() even if it fits the maxother_outer limit.
{'maxother_outer': 100, 'input': b'\xed\xbd\xbf\xff\xfe\xfa\xfd', 'output': "'\\xed\\xbd\\xbf\\xff\\xfe\\xfa\\xfd'"},
# Note that with latin1 encoding we can actually decode the string but when encoding back to utf-8 we have garbage
# (couldn't find a good approach to know what to do here as we've actually been able to decode it as
# latin-1 because it's a very permissive encoding).
{
'maxother_outer': 10,
'sys_stdout_encoding': 'latin1',
'input': b'\xed\xbd\xbf\xff\xfe\xfa\xfd' * 10,
'output': b'\xc3\xad\xc2\xbd\xc2\xbf\xc3\xbf\xc3\xbe\xc3\xba...\xc3\xbe\xc3\xba\xc3\xbd'
},
])
@pytest.mark.skipif(not IS_PY2, reason='Py2 specific test.')
def test_py2_bytes_slicing(params):
safe_repr = SafeRepr()
safe_repr.locale_preferred_encoding = 'ascii'
safe_repr.sys_stdout_encoding = params.get('sys_stdout_encoding', 'ascii')

safe_repr.maxother_outer = params['maxother_outer']

# This is the encoding that we expect back (because json needs to be able to encode it
# later on, so, the return from SafeRepr must always be utf-8 regardless of the input).
encoding = 'utf-8'

class MyObj(object):

def __repr__(self):
ret = params['input']
if isinstance(ret, unicode):
ret = ret.encode(encoding)
return ret

expected_output = params['output']
computed = safe_repr(MyObj())

expect_unicode = False
if isinstance(expected_output, unicode):
expect_unicode = True
if isinstance(expected_output, tuple) and isinstance(expected_output[0], unicode):
expect_unicode = True

if expect_unicode:
computed = computed.decode(encoding)
if isinstance(expected_output, tuple):
assert computed in expected_output
else:
assert computed == expected_output
else:
assert repr(computed) == repr(expected_output)

# Check that we can json-encode the return.
assert json.dumps(computed)


@pytest.mark.parametrize('params', [
{'maxother_outer': 20, 'input': "😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄FFFFFFFF", 'output': '😄😄😄😄😄😄😄😄😄😄😄😄😄...FFFFFF'},
{'maxother_outer': 10, 'input': "😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄😄FFFFFFFF", 'output': '😄😄😄😄😄😄...FFF'},
{'maxother_outer': 10, 'input': u"������������FFFFFFFF", 'output': u"������...FFF"},
# Because we can't return bytes, byte-related tests aren't needed (and str works as it should).
])
@pytest.mark.skipif(IS_PY2, reason='Py3 specific test')
def test_py3_str_slicing(params):
# Note: much simpler in python because __repr__ is required to return str
# (which is actually unicode).
safe_repr = SafeRepr()
safe_repr.locale_preferred_encoding = 'ascii'
safe_repr.sys_stdout_encoding = params.get('sys_stdout_encoding', 'ascii')

safe_repr.maxother_outer = params['maxother_outer']

class MyObj(object):

def __repr__(self):
return params['input']

expected_output = params['output']
computed = safe_repr(MyObj())
assert repr(computed) == repr(expected_output)

# Check that we can json-encode the return.
assert json.dumps(computed)


def test_raw():
safe_repr = SafeRepr()
safe_repr.raw_value = True
obj = b'\xed\xbd\xbf\xff\xfe\xfa\xfd'
raw_value_repr = safe_repr(obj)
assert isinstance(raw_value_repr, str) # bytes on py2, str on py3
if IS_PY2:
assert raw_value_repr == obj.decode('latin1').encode('utf-8')
else:
assert raw_value_repr == obj.decode('latin1')

0 comments on commit f115036

Please sign in to comment.