From da4a21c8396511d1afee5fa3674a110d28821515 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Thu, 19 Sep 2019 09:24:18 +0200 Subject: [PATCH 1/5] fix: Do not capture SQL params for now --- sentry_sdk/tracing.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 1fced63001..03f09207fd 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -2,6 +2,7 @@ import uuid import contextlib +from collections import deque from datetime import datetime import sentry_sdk @@ -23,6 +24,7 @@ from typing import Dict from typing import List from typing import Tuple + from typing import Deque _traceparent_header_format_re = re.compile( "^[ \t]*" # whitespace @@ -107,16 +109,16 @@ def __init__( self.hub = hub self._tags = {} # type: Dict[str, str] self._data = {} # type: Dict[str, Any] - self._finished_spans = None # type: Optional[List[Span]] + self._finished_spans = None # type: Optional[Deque[Span]] self.start_timestamp = datetime.now() #: End timestamp of span self.timestamp = None # type: Optional[datetime] - def init_finished_spans(self): + def init_finished_spans(self, maxlen): # type: () -> None if self._finished_spans is None: - self._finished_spans = [] + self._finished_spans = deque(maxlen=maxlen) def __repr__(self): # type: () -> str @@ -354,15 +356,10 @@ def record_sql_queries( executemany, # type: bool ): # type: (...) -> Generator[Span, None, None] - if not params_list or params_list == [None]: - params_list = None - - if paramstyle == "pyformat": - paramstyle = "format" - + # TODO: Bring back capturing of params query = _format_sql(cursor, query) - data = {"db.params": params_list, "db.paramstyle": paramstyle} + data = {} if executemany: data["db.executemany"] = True From 3a27d87f293cb086050258888a7fd02ef58763f7 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Thu, 19 Sep 2019 09:47:04 +0200 Subject: [PATCH 2/5] fix: Discard entire span tree if too large --- sentry_sdk/consts.py | 1 + sentry_sdk/hub.py | 5 ++++- sentry_sdk/tracing.py | 48 +++++++++++++++++++++++++++++++------------ 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 9d52c3573d..b6d345a608 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -49,6 +49,7 @@ def __init__( # DO NOT ENABLE THIS RIGHT NOW UNLESS YOU WANT TO EXCEED YOUR EVENT QUOTA IMMEDIATELY traces_sample_rate=0.0, # type: float traceparent_v2=False, # type: bool + _experiments={}, # type: Dict[str, Any] ): # type: (...) -> None pass diff --git a/sentry_sdk/hub.py b/sentry_sdk/hub.py index fa72a42113..f804747fbc 100644 --- a/sentry_sdk/hub.py +++ b/sentry_sdk/hub.py @@ -450,7 +450,10 @@ def start_span( span.sampled = random.random() < sample_rate if span.sampled: - span.init_finished_spans() + max_spans = ( + client and client.options["_experiments"].get("max_spans") or 1000 + ) + span.init_finished_spans(maxlen=max_spans) return span diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 03f09207fd..ed34142c84 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -2,7 +2,6 @@ import uuid import contextlib -from collections import deque from datetime import datetime import sentry_sdk @@ -24,7 +23,7 @@ from typing import Dict from typing import List from typing import Tuple - from typing import Deque + from typing import Iterator _traceparent_header_format_re = re.compile( "^[ \t]*" # whitespace @@ -66,6 +65,30 @@ def __iter__(self): yield k[len(self.prefix) :] +class _SpanRecorder(object): + __slots__ = ("maxlen", "finished_spans") + + def __init__(self): + # type: () -> None + self.maxlen = 1000 + self.finished_spans = None # type: Optional[List[Span]] + + def append(self, span): + # type: (Span) -> None + if self.finished_spans is not None: + if len(self.finished_spans) >= self.maxlen: + # If the span tree grows too large we decided it's better to + # discard all spans instead of trying to apply some expensive + # trimming. + self.finished_spans = None + else: + self.finished_spans.append(span) + + def __iter__(self): + # type: () -> Iterator[Span] + return iter(self.finished_spans or ()) + + class Span(object): __slots__ = ( "trace_id", @@ -80,7 +103,7 @@ class Span(object): "timestamp", "_tags", "_data", - "_finished_spans", + "_span_recorder", "hub", "_context_manager_state", ) @@ -109,16 +132,18 @@ def __init__( self.hub = hub self._tags = {} # type: Dict[str, str] self._data = {} # type: Dict[str, Any] - self._finished_spans = None # type: Optional[Deque[Span]] + self._span_recorder = _SpanRecorder() # type: _SpanRecorder self.start_timestamp = datetime.now() #: End timestamp of span self.timestamp = None # type: Optional[datetime] def init_finished_spans(self, maxlen): - # type: () -> None - if self._finished_spans is None: - self._finished_spans = deque(maxlen=maxlen) + # type: (int) -> None + if self._span_recorder.finished_spans is None: + self._span_recorder.finished_spans = [] + + self._span_recorder.maxlen = maxlen def __repr__(self): # type: () -> str @@ -164,7 +189,7 @@ def new_span(self, **kwargs): sampled=self.sampled, **kwargs ) - rv._finished_spans = self._finished_spans + rv._span_recorder = self._span_recorder return rv @classmethod @@ -254,8 +279,7 @@ def finish(self, hub=None): self.timestamp = datetime.now() - if self._finished_spans is not None: - self._finished_spans.append(self) + self._span_recorder.append(self) _maybe_create_breadcrumbs_from_span(hub, self) @@ -286,9 +310,7 @@ def finish(self, hub=None): "contexts": {"trace": self.get_trace_context()}, "timestamp": self.timestamp, "start_timestamp": self.start_timestamp, - "spans": [ - s.to_json() for s in (self._finished_spans or ()) if s is not self - ], + "spans": [s.to_json() for s in self._span_recorder if s is not self], } ) From 57c18dcd9f3cd420d227be9b9d92f2c4bd50fc69 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Thu, 19 Sep 2019 12:09:03 +0200 Subject: [PATCH 3/5] ref: Send partial span tree instead of clearing it --- sentry_sdk/tracing.py | 60 ++++++++++++++++++++++++------------------- tests/test_tracing.py | 15 +++++++++++ 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index ed34142c84..7258e8dedb 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -23,7 +23,6 @@ from typing import Dict from typing import List from typing import Tuple - from typing import Iterator _traceparent_header_format_re = re.compile( "^[ \t]*" # whitespace @@ -66,27 +65,28 @@ def __iter__(self): class _SpanRecorder(object): - __slots__ = ("maxlen", "finished_spans") + __slots__ = ("maxlen", "finished_spans", "open_span_count") - def __init__(self): - # type: () -> None - self.maxlen = 1000 - self.finished_spans = None # type: Optional[List[Span]] + def __init__(self, maxlen): + # type: (int) -> None + self.maxlen = maxlen + self.open_span_count = 0 # type: int + self.finished_spans = [] # type: List[Span] - def append(self, span): + def start_span(self, span): # type: (Span) -> None - if self.finished_spans is not None: - if len(self.finished_spans) >= self.maxlen: - # If the span tree grows too large we decided it's better to - # discard all spans instead of trying to apply some expensive - # trimming. - self.finished_spans = None - else: - self.finished_spans.append(span) - def __iter__(self): - # type: () -> Iterator[Span] - return iter(self.finished_spans or ()) + # This is just so that we don't run out of memory while recording a lot + # of spans. At some point we just stop and flush out the start of the + # trace tree (i.e. the first n spans with the smallest + # start_timestamp). + self.open_span_count += 1 + if self.open_span_count > self.maxlen: + span._span_recorder = None + + def finish_span(self, span): + # type: (Span) -> None + self.finished_spans.append(span) class Span(object): @@ -132,18 +132,18 @@ def __init__( self.hub = hub self._tags = {} # type: Dict[str, str] self._data = {} # type: Dict[str, Any] - self._span_recorder = _SpanRecorder() # type: _SpanRecorder self.start_timestamp = datetime.now() #: End timestamp of span self.timestamp = None # type: Optional[datetime] + self._span_recorder = None # type: Optional[_SpanRecorder] + def init_finished_spans(self, maxlen): # type: (int) -> None - if self._span_recorder.finished_spans is None: - self._span_recorder.finished_spans = [] - - self._span_recorder.maxlen = maxlen + if self._span_recorder is None: + self._span_recorder = _SpanRecorder(maxlen) + self._span_recorder.start_span(self) def __repr__(self): # type: () -> str @@ -189,6 +189,7 @@ def new_span(self, **kwargs): sampled=self.sampled, **kwargs ) + rv._span_recorder = self._span_recorder return rv @@ -279,10 +280,13 @@ def finish(self, hub=None): self.timestamp = datetime.now() - self._span_recorder.append(self) - _maybe_create_breadcrumbs_from_span(hub, self) + if self._span_recorder is None: + return None + + self._span_recorder.finish_span(self) + if self.transaction is None: # If this has no transaction set we assume there's a parent # transaction for this span that would be flushed out eventually. @@ -310,7 +314,11 @@ def finish(self, hub=None): "contexts": {"trace": self.get_trace_context()}, "timestamp": self.timestamp, "start_timestamp": self.start_timestamp, - "spans": [s.to_json() for s in self._span_recorder if s is not self], + "spans": [ + s.to_json() + for s in self._span_recorder.finished_spans + if s is not self + ], } ) diff --git a/tests/test_tracing.py b/tests/test_tracing.py index 2e118cd2d9..0bb3e1c972 100644 --- a/tests/test_tracing.py +++ b/tests/test_tracing.py @@ -127,3 +127,18 @@ def foo(): gc.collect() assert len(references) == expected_refcount + + +def test_span_trimming(sentry_init, capture_events): + sentry_init(traces_sample_rate=1.0, _experiments={"max_spans": 3}) + events = capture_events() + + with Hub.current.start_span(transaction="hi"): + for i in range(10): + with Hub.current.start_span(op="foo{}".format(i)): + pass + + event, = events + span1, span2 = event["spans"] + assert span1["op"] == "foo0" + assert span2["op"] == "foo1" From cdb71e5bf4eb76e4f96b08757d0cb209c93b61fa Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Thu, 19 Sep 2019 13:20:15 +0200 Subject: [PATCH 4/5] fix: Update tests --- tests/integrations/django/test_basic.py | 10 ++-------- tests/integrations/sqlalchemy/test_sqlalchemy.py | 4 ++-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/integrations/django/test_basic.py b/tests/integrations/django/test_basic.py index 0504307a78..3a2038e2cf 100644 --- a/tests/integrations/django/test_basic.py +++ b/tests/integrations/django/test_basic.py @@ -188,7 +188,6 @@ def test_sql_queries(sentry_init, capture_events, with_integration): crumb = event["breadcrumbs"][-1] assert crumb["message"] == "SELECT count(*) FROM people_person WHERE foo = %s" - assert crumb["data"]["db.params"] == [123] @pytest.mark.django_db @@ -216,7 +215,6 @@ def test_sql_dict_query_params(sentry_init, capture_events): assert crumb["message"] == ( "SELECT count(*) FROM people_person WHERE foo = %(my_foo)s" ) - assert crumb["data"]["db.params"] == {"my_foo": 10} @pytest.mark.parametrize( @@ -249,7 +247,6 @@ def test_sql_psycopg2_string_composition(sentry_init, capture_events, query): event, = events crumb = event["breadcrumbs"][-1] assert crumb["message"] == ('SELECT %(my_param)s FROM "foobar"') - assert crumb["data"]["db.params"] == {"my_param": 10} @pytest.mark.django_db @@ -288,16 +285,13 @@ def test_sql_psycopg2_placeholders(sentry_init, capture_events): assert event["breadcrumbs"][-2:] == [ { "category": "query", - "data": {"db.paramstyle": "format"}, + "data": {}, "message": "create table my_test_table (foo text, bar date)", "type": "default", }, { "category": "query", - "data": { - "db.params": {"first_var": "fizz", "second_var": "not a date"}, - "db.paramstyle": "format", - }, + "data": {}, "message": 'insert into my_test_table ("foo", "bar") values (%(first_var)s, ' "%(second_var)s)", "type": "default", diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index b5cb47804a..86c215a00a 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -48,13 +48,13 @@ class Address(Base): assert event["breadcrumbs"][-2:] == [ { "category": "query", - "data": {"db.params": ["Bob"], "db.paramstyle": "qmark"}, + "data": {}, "message": "INSERT INTO person (name) VALUES (?)", "type": "default", }, { "category": "query", - "data": {"db.params": [1, 0], "db.paramstyle": "qmark"}, + "data": {}, "message": "SELECT person.id AS person_id, person.name AS person_name \n" "FROM person\n" " LIMIT ? OFFSET ?", From a2b0a42998eee7a70f4e4076655d5010de0e8e15 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Thu, 19 Sep 2019 15:44:22 +0200 Subject: [PATCH 5/5] feat: Add experiment to get back params --- sentry_sdk/tracing.py | 17 +++++++-- tests/integrations/django/test_basic.py | 36 +++++++++++++++---- .../sqlalchemy/test_sqlalchemy.py | 8 +++-- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 7258e8dedb..748c00a9b4 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -386,10 +386,23 @@ def record_sql_queries( executemany, # type: bool ): # type: (...) -> Generator[Span, None, None] - # TODO: Bring back capturing of params + + # TODO: Bring back capturing of params by default + if hub.client and hub.client.options["_experiments"].get( + "record_sql_params", False + ): + if not params_list or params_list == [None]: + params_list = None + + if paramstyle == "pyformat": + paramstyle = "format" + else: + params_list = None + paramstyle = None + query = _format_sql(cursor, query) - data = {} + data = {"db.params": params_list, "db.paramstyle": paramstyle} if executemany: data["db.executemany"] = True diff --git a/tests/integrations/django/test_basic.py b/tests/integrations/django/test_basic.py index 3a2038e2cf..40160a2c55 100644 --- a/tests/integrations/django/test_basic.py +++ b/tests/integrations/django/test_basic.py @@ -170,7 +170,9 @@ def test_sql_queries(sentry_init, capture_events, with_integration): sentry_init( integrations=[DjangoIntegration()] if with_integration else [], send_default_pii=True, + _experiments={"record_sql_params": True}, ) + from django.db import connection sql = connection.cursor() @@ -188,11 +190,16 @@ def test_sql_queries(sentry_init, capture_events, with_integration): crumb = event["breadcrumbs"][-1] assert crumb["message"] == "SELECT count(*) FROM people_person WHERE foo = %s" + assert crumb["data"]["db.params"] == [123] @pytest.mark.django_db def test_sql_dict_query_params(sentry_init, capture_events): - sentry_init(integrations=[DjangoIntegration()], send_default_pii=True) + sentry_init( + integrations=[DjangoIntegration()], + send_default_pii=True, + _experiments={"record_sql_params": True}, + ) from django.db import connections @@ -215,6 +222,7 @@ def test_sql_dict_query_params(sentry_init, capture_events): assert crumb["message"] == ( "SELECT count(*) FROM people_person WHERE foo = %(my_foo)s" ) + assert crumb["data"]["db.params"] == {"my_foo": 10} @pytest.mark.parametrize( @@ -228,7 +236,11 @@ def test_sql_dict_query_params(sentry_init, capture_events): ) @pytest.mark.django_db def test_sql_psycopg2_string_composition(sentry_init, capture_events, query): - sentry_init(integrations=[DjangoIntegration()], send_default_pii=True) + sentry_init( + integrations=[DjangoIntegration()], + send_default_pii=True, + _experiments={"record_sql_params": True}, + ) from django.db import connections if "postgres" not in connections: @@ -247,11 +259,16 @@ def test_sql_psycopg2_string_composition(sentry_init, capture_events, query): event, = events crumb = event["breadcrumbs"][-1] assert crumb["message"] == ('SELECT %(my_param)s FROM "foobar"') + assert crumb["data"]["db.params"] == {"my_param": 10} @pytest.mark.django_db def test_sql_psycopg2_placeholders(sentry_init, capture_events): - sentry_init(integrations=[DjangoIntegration()], send_default_pii=True) + sentry_init( + integrations=[DjangoIntegration()], + send_default_pii=True, + _experiments={"record_sql_params": True}, + ) from django.db import connections if "postgres" not in connections: @@ -285,13 +302,16 @@ def test_sql_psycopg2_placeholders(sentry_init, capture_events): assert event["breadcrumbs"][-2:] == [ { "category": "query", - "data": {}, + "data": {"db.paramstyle": "format"}, "message": "create table my_test_table (foo text, bar date)", "type": "default", }, { "category": "query", - "data": {}, + "data": { + "db.params": {"first_var": "fizz", "second_var": "not a date"}, + "db.paramstyle": "format", + }, "message": 'insert into my_test_table ("foo", "bar") values (%(first_var)s, ' "%(second_var)s)", "type": "default", @@ -493,7 +513,11 @@ def test_does_not_capture_403(sentry_init, client, capture_events, endpoint): def test_middleware_spans(sentry_init, client, capture_events): - sentry_init(integrations=[DjangoIntegration()], traces_sample_rate=1.0) + sentry_init( + integrations=[DjangoIntegration()], + traces_sample_rate=1.0, + _experiments={"record_sql_params": True}, + ) events = capture_events() _content, status, _headers = client.get(reverse("message")) diff --git a/tests/integrations/sqlalchemy/test_sqlalchemy.py b/tests/integrations/sqlalchemy/test_sqlalchemy.py index 86c215a00a..e918f954f4 100644 --- a/tests/integrations/sqlalchemy/test_sqlalchemy.py +++ b/tests/integrations/sqlalchemy/test_sqlalchemy.py @@ -8,7 +8,9 @@ def test_orm_queries(sentry_init, capture_events): - sentry_init(integrations=[SqlalchemyIntegration()]) + sentry_init( + integrations=[SqlalchemyIntegration()], _experiments={"record_sql_params": True} + ) events = capture_events() Base = declarative_base() @@ -48,13 +50,13 @@ class Address(Base): assert event["breadcrumbs"][-2:] == [ { "category": "query", - "data": {}, + "data": {"db.params": ["Bob"], "db.paramstyle": "qmark"}, "message": "INSERT INTO person (name) VALUES (?)", "type": "default", }, { "category": "query", - "data": {}, + "data": {"db.params": [1, 0], "db.paramstyle": "qmark"}, "message": "SELECT person.id AS person_id, person.name AS person_name \n" "FROM person\n" " LIMIT ? OFFSET ?",