Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(envelope) Add support for implicitly sized envelope items #1229

Merged
merged 5 commits into from
Nov 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 9 additions & 4 deletions sentry_sdk/envelope.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,13 +295,18 @@ def deserialize_from(
if not line:
return None
headers = parse_json(line)
length = headers["length"]
payload = f.read(length)
if headers.get("type") in ("event", "transaction"):
length = headers.get("length")
if length is not None:
payload = f.read(length)
f.readline()
else:
# if no length was specified we need to read up to the end of line
# and remove it (if it is present, i.e. not the very last char in an eof terminated envelope)
payload = f.readline().rstrip(b"\n")
if headers.get("type") in ("event", "transaction", "metric_buckets"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"metric_buckets" is this addition intentional? If so, please let's document it at least in the PR description as it seems unrelated to the title.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, sorry sneaked it in since I bumped into the problem while testing metrics.
"metric_buckets" is the new item type that contains metrics... will detail in description.

rv = cls(headers=headers, payload=PayloadRef(json=parse_json(payload)))
else:
rv = cls(headers=headers, payload=payload)
f.readline()
return rv

@classmethod
Expand Down
132 changes: 132 additions & 0 deletions tests/test_envelope.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,135 @@ def test_envelope_headers(
"event_id": "15210411201320122115110420122013",
"sent_at": "2012-11-21T12:31:12.415908Z",
}


def test_envelope_with_sized_items():
"""
Tests that it successfully parses envelopes with
the item size specified in the header
"""
envelope_raw = (
b'{"event_id":"9ec79c33ec9942ab8353589fcb2e04dc"}\n'
+ b'{"type":"type1","length":4 }\n1234\n'
+ b'{"type":"type2","length":4 }\nabcd\n'
+ b'{"type":"type3","length":0}\n\n'
+ b'{"type":"type4","length":4 }\nab12\n'
)
envelope_raw_eof_terminated = envelope_raw[:-1]

for envelope_raw in (envelope_raw, envelope_raw_eof_terminated):
actual = Envelope.deserialize(envelope_raw)

items = [item for item in actual]

assert len(items) == 4

assert items[0].type == "type1"
assert items[0].get_bytes() == b"1234"

assert items[1].type == "type2"
assert items[1].get_bytes() == b"abcd"

assert items[2].type == "type3"
assert items[2].get_bytes() == b""

assert items[3].type == "type4"
assert items[3].get_bytes() == b"ab12"

assert actual.headers["event_id"] == "9ec79c33ec9942ab8353589fcb2e04dc"


def test_envelope_with_implicitly_sized_items():
"""
Tests that it successfully parses envelopes with
the item size not specified in the header
"""
envelope_raw = (
b'{"event_id":"9ec79c33ec9942ab8353589fcb2e04dc"}\n'
+ b'{"type":"type1"}\n1234\n'
+ b'{"type":"type2"}\nabcd\n'
+ b'{"type":"type3"}\n\n'
+ b'{"type":"type4"}\nab12\n'
)
envelope_raw_eof_terminated = envelope_raw[:-1]

for envelope_raw in (envelope_raw, envelope_raw_eof_terminated):
actual = Envelope.deserialize(envelope_raw)
assert actual.headers["event_id"] == "9ec79c33ec9942ab8353589fcb2e04dc"

items = [item for item in actual]

assert len(items) == 4

assert items[0].type == "type1"
assert items[0].get_bytes() == b"1234"

assert items[1].type == "type2"
assert items[1].get_bytes() == b"abcd"

assert items[2].type == "type3"
assert items[2].get_bytes() == b""

assert items[3].type == "type4"
assert items[3].get_bytes() == b"ab12"


def test_envelope_with_two_attachments():
"""
Test that items are correctly parsed in an envelope with to size specified items
"""
two_attachments = (
b'{"event_id":"9ec79c33ec9942ab8353589fcb2e04dc","dsn":"https://e12d836b15bb49d7bbf99e64295d995b:@sentry.io/42"}\n'
+ b'{"type":"attachment","length":10,"content_type":"text/plain","filename":"hello.txt"}\n'
+ b"\xef\xbb\xbfHello\r\n\n"
+ b'{"type":"event","length":41,"content_type":"application/json","filename":"application.log"}\n'
+ b'{"message":"hello world","level":"error"}\n'
)
two_attachments_eof_terminated = two_attachments[
:-1
] # last \n is optional, without it should still be a valid envelope

for envelope_raw in (two_attachments, two_attachments_eof_terminated):
actual = Envelope.deserialize(envelope_raw)
items = [item for item in actual]

assert len(items) == 2
assert items[0].get_bytes() == b"\xef\xbb\xbfHello\r\n"
assert items[1].payload.json == {"message": "hello world", "level": "error"}


def test_envelope_with_empty_attachments():
"""
Test that items are correctly parsed in an envelope with two 0 length items (with size specified in the header
"""
two_empty_attachments = (
b'{"event_id":"9ec79c33ec9942ab8353589fcb2e04dc"}\n'
+ b'{"type":"attachment","length":0}\n\n'
+ b'{"type":"attachment","length":0}\n\n'
)

two_empty_attachments_eof_terminated = two_empty_attachments[
:-1
] # last \n is optional, without it should still be a valid envelope

for envelope_raw in (two_empty_attachments, two_empty_attachments_eof_terminated):
actual = Envelope.deserialize(envelope_raw)
items = [item for item in actual]

assert len(items) == 2
assert items[0].get_bytes() == b""
assert items[1].get_bytes() == b""


def test_envelope_without_headers():
"""
Test that an envelope without headers is parsed successfully
"""
envelope_without_headers = (
b"{}\n" + b'{"type":"session"}\n' + b'{"started": "2020-02-07T14:16:00Z"}'
)
actual = Envelope.deserialize(envelope_without_headers)
items = [item for item in actual]

assert len(items) == 1
assert items[0].payload.get_bytes() == b'{"started": "2020-02-07T14:16:00Z"}'