Skip to content

Commit

Permalink
fix(utils): strip_string() checks text length counting bytes not chars (
Browse files Browse the repository at this point in the history
#1711)

The truncation and indexes in the AnnotatedValues it's done by number of bytes
and not number of characters.

Fixes GH-1691
  • Loading branch information
mgaligniana committed Oct 27, 2022
1 parent 1240743 commit e2674d4
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
2 changes: 1 addition & 1 deletion sentry_sdk/utils.py
Expand Up @@ -841,7 +841,7 @@ def strip_string(value, max_length=None):
# This is intentionally not just the default such that one can patch `MAX_STRING_LENGTH` and affect `strip_string`.
max_length = MAX_STRING_LENGTH

length = len(value)
length = len(value.encode("utf-8"))

if length > max_length:
return AnnotatedValue(
Expand Down
21 changes: 21 additions & 0 deletions tests/utils/test_general.py
Expand Up @@ -15,6 +15,8 @@
iter_event_stacktraces,
to_base64,
from_base64,
strip_string,
AnnotatedValue,
)
from sentry_sdk._compat import text_type, string_types

Expand Down Expand Up @@ -217,3 +219,22 @@ def test_failed_base64_conversion(input):
# failures
if type(input) not in string_types:
assert to_base64(input) is None


def test_strip_string():
# If value is None returns None.
assert strip_string(None) is None

# If max_length is not passed, returns the full text (up to 1024 bytes).
text_1024_long = "a" * 1024
assert strip_string(text_1024_long).count("a") == 1024

# If value exceeds the max_length, returns an AnnotatedValue.
text_1025_long = "a" * 1025
stripped_text = strip_string(text_1025_long)
assert isinstance(stripped_text, AnnotatedValue)
assert stripped_text.value.count("a") == 1021 # + '...' is 1024

# If text has unicode characters, it counts bytes and not number of characters.
text_with_unicode_character = "éê"
assert strip_string(text_with_unicode_character, max_length=2).value == "é..."

0 comments on commit e2674d4

Please sign in to comment.