From 7ec87fc5597b727e492d7345edabc73c1c1d7592 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Thu, 24 Nov 2022 17:50:38 -0500 Subject: [PATCH 1/2] ref(profiling): Eagerly hash stack for profiles Hashing the stack is an expensive operation and the same stack is used for parallel transactions happening on various threads. Instead of hashing it each time it's used. --- sentry_sdk/profiler.py | 59 ++++++++++++++++++++++-------------------- tests/test_profiler.py | 8 +++--- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index 28e96016ca..586de0cfbe 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -53,7 +53,9 @@ from typing_extensions import TypedDict import sentry_sdk.tracing - RawSampleData = Tuple[int, Sequence[Tuple[str, Sequence[RawFrameData]]]] + RawStack = Tuple[RawFrameData, ...] + RawSample = Sequence[Tuple[str, RawStack]] + RawSampleWithId = Sequence[Tuple[str, int, RawStack]] ProcessedStack = Tuple[int, ...] @@ -153,7 +155,7 @@ def teardown_profiler(): def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH): - # type: (Optional[FrameType], int) -> Sequence[RawFrameData] + # type: (Optional[FrameType], int) -> Tuple[RawFrameData, ...] """ Extracts the stack starting the specified frame. The extracted stack assumes the specified frame is the top of the stack, and works back @@ -324,12 +326,12 @@ class SampleBuffer(object): def __init__(self, capacity): # type: (int) -> None - self.buffer = [None] * capacity # type: List[Optional[RawSampleData]] + self.buffer = [None] * capacity # type: List[Optional[Tuple[int, RawSampleWithId]]] self.capacity = capacity # type: int self.idx = 0 # type: int - def write(self, sample): - # type: (RawSampleData) -> None + def write(self, ts, raw_sample): + # type: (int, RawSample) -> None """ Writing to the buffer is not thread safe. There is the possibility that parallel writes will overwrite one another. @@ -342,7 +344,24 @@ def write(self, sample): any synchronization mechanisms here like locks. """ idx = self.idx - self.buffer[idx] = sample + + sample = [ + ( + thread_id, + # Instead of mapping the stack into frame ids and hashing + # that as a tuple, we can directly hash the stack. + # This saves us from having to generate yet another list. + # Additionally, using the stack as the key directly is + # costly because the stack can be large, so we pre-hash + # the stack, and use the hash as the key as this will be + # needed a few times to improve performance. + hash(stack), + stack, + ) + for thread_id, stack in raw_sample + ] + + self.buffer[idx] = (ts, sample) self.idx = (idx + 1) % self.capacity def slice_profile(self, start_ns, stop_ns): @@ -353,27 +372,13 @@ def slice_profile(self, start_ns, stop_ns): frames = dict() # type: Dict[RawFrameData, int] frames_list = list() # type: List[ProcessedFrame] - # TODO: This is doing an naive iteration over the - # buffer and extracting the appropriate samples. - # - # Is it safe to assume that the samples are always in - # chronological order and binary search the buffer? for ts, sample in filter(None, self.buffer): if start_ns > ts or ts > stop_ns: continue elapsed_since_start_ns = str(ts - start_ns) - for tid, stack in sample: - # Instead of mapping the stack into frame ids and hashing - # that as a tuple, we can directly hash the stack. - # This saves us from having to generate yet another list. - # Additionally, using the stack as the key directly is - # costly because the stack can be large, so we pre-hash - # the stack, and use the hash as the key as this will be - # needed a few times to improve performance. - hashed_stack = hash(stack) - + for tid, hashed_stack, stack in sample: # Check if the stack is indexed first, this lets us skip # indexing frames if it's not necessary if hashed_stack not in stacks: @@ -429,13 +434,11 @@ def _sample_stack(*args, **kwargs): """ self.write( - ( - nanosecond_time(), - [ - (str(tid), extract_stack(frame)) - for tid, frame in sys._current_frames().items() - ], - ) + nanosecond_time(), + [ + (str(tid), extract_stack(frame)) + for tid, frame in sys._current_frames().items() + ] ) return _sample_stack diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 11e92630cf..819187a200 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -195,8 +195,8 @@ def __init__(self, capacity, sample_data=None): def make_sampler(self): def _sample_stack(*args, **kwargs): - print("writing", self.sample_data[0]) - self.write(self.sample_data.pop(0)) + ts, sample = self.sample_data.pop(0) + self.write(ts, sample) return _sample_stack @@ -706,7 +706,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): ) def test_sample_buffer(capacity, start_ns, stop_ns, samples, profile): buffer = SampleBuffer(capacity) - for sample in samples: - buffer.write(sample) + for ts, sample in samples: + buffer.write(ts, sample) result = buffer.slice_profile(start_ns, stop_ns) assert result == profile From 26f2cfe627c32be5a021de6eda964700205e4c43 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Fri, 25 Nov 2022 10:57:53 -0500 Subject: [PATCH 2/2] run black --- sentry_sdk/profiler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index 586de0cfbe..dac7f14c80 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -326,7 +326,9 @@ class SampleBuffer(object): def __init__(self, capacity): # type: (int) -> None - self.buffer = [None] * capacity # type: List[Optional[Tuple[int, RawSampleWithId]]] + self.buffer = [ + None + ] * capacity # type: List[Optional[Tuple[int, RawSampleWithId]]] self.capacity = capacity # type: int self.idx = 0 # type: int @@ -438,7 +440,7 @@ def _sample_stack(*args, **kwargs): [ (str(tid), extract_stack(frame)) for tid, frame in sys._current_frames().items() - ] + ], ) return _sample_stack