From f40da644142bd89092e81ef72beaf66687fe9a15 Mon Sep 17 00:00:00 2001 From: Carl Friedrich Bolz-Tereick Date: Mon, 30 May 2022 18:35:05 +0200 Subject: [PATCH] perf: PyTracer improvements (#1388) * cache the bound method of _trace on self this speeds up pure python tracing because we don't have to re-create a bound method object all the time * optimize checking whether a file should be traced the optimization works based on the following heuristic: in a majority of cases, functions call other functions in the same file. In that situation we don't have to re-check whether we should trace the file * fix optimization in the presence of contexts * fix too long line --- coverage/pytracer.py | 65 ++++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/coverage/pytracer.py b/coverage/pytracer.py index 08050b586..4389c9ed7 100644 --- a/coverage/pytracer.py +++ b/coverage/pytracer.py @@ -67,6 +67,11 @@ def __init__(self): # On exit, self.in_atexit = True atexit.register(setattr, self, 'in_atexit', True) + # cache a bound method on the instance, so that we don't have to + # re-create a bound method object all the time + self._cached_bound_method_trace = self._trace + + def __repr__(self): return "".format( id(self), @@ -105,7 +110,7 @@ def _trace(self, frame, event, arg_unused): #self.log(":", frame.f_code.co_filename, frame.f_lineno, frame.f_code.co_name + "()", event) - if (self.stopped and sys.gettrace() == self._trace): # pylint: disable=comparison-with-callable + if (self.stopped and sys.gettrace() == self._cached_bound_method_trace): # pylint: disable=comparison-with-callable # The PyTrace.stop() method has been called, possibly by another # thread, let's deactivate ourselves now. if 0: @@ -129,12 +134,13 @@ def _trace(self, frame, event, arg_unused): context_maybe = self.should_start_context(frame) if context_maybe is not None: self.context = context_maybe - self.started_context = True + started_context = True self.switch_context(self.context) else: - self.started_context = False + started_context = False else: - self.started_context = False + started_context = False + self.started_context = started_context # Entering a new frame. Decide if we should trace in this file. self._activity = True @@ -143,23 +149,33 @@ def _trace(self, frame, event, arg_unused): self.cur_file_data, self.cur_file_name, self.last_line, - self.started_context, + started_context, ) ) + + # Improve tracing performance: when calling a function, both caller + # and callee are often within the same file. if that's the case, we + # don't have to re-check whether to trace the corresponding + # function (which is a little bit espensive since it involves + # dictionary lookups). This optimization is only correct if we + # didn't start a context. filename = frame.f_code.co_filename - self.cur_file_name = filename - disp = self.should_trace_cache.get(filename) - if disp is None: - disp = self.should_trace(filename, frame) - self.should_trace_cache[filename] = disp - - self.cur_file_data = None - if disp.trace: - tracename = disp.source_filename - if tracename not in self.data: - self.data[tracename] = set() - self.cur_file_data = self.data[tracename] - else: + if filename != self.cur_file_name or started_context: + self.cur_file_name = filename + disp = self.should_trace_cache.get(filename) + if disp is None: + disp = self.should_trace(filename, frame) + self.should_trace_cache[filename] = disp + + self.cur_file_data = None + if disp.trace: + tracename = disp.source_filename + if tracename not in self.data: + self.data[tracename] = set() + self.cur_file_data = self.data[tracename] + else: + frame.f_trace_lines = False + elif not self.cur_file_data: frame.f_trace_lines = False # The call event is really a "start frame" event, and happens for @@ -225,7 +241,7 @@ def _trace(self, frame, event, arg_unused): if self.started_context: self.context = None self.switch_context(None) - return self._trace + return self._cached_bound_method_trace def start(self): """Start this Tracer. @@ -243,10 +259,10 @@ def start(self): # function, but we are marked as running again, so maybe it # will be ok? #self.log("~", "starting on different threads") - return self._trace + return self._cached_bound_method_trace - sys.settrace(self._trace) - return self._trace + sys.settrace(self._cached_bound_method_trace) + return self._cached_bound_method_trace def stop(self): """Stop this Tracer.""" @@ -271,9 +287,10 @@ def stop(self): # so don't warn if we are in atexit on PyPy and the trace function # has changed to None. dont_warn = (env.PYPY and env.PYPYVERSION >= (5, 4) and self.in_atexit and tf is None) - if (not dont_warn) and tf != self._trace: # pylint: disable=comparison-with-callable + if (not dont_warn) and tf != self._cached_bound_method_trace: # pylint: disable=comparison-with-callable self.warn( - f"Trace function changed, data is likely wrong: {tf!r} != {self._trace!r}", + f"Trace function changed, data is likely wrong: " + f"{tf!r} != {self._cached_bound_method_trace!r}", slug="trace-changed", )