From aa6f2515b2fefced69be933952bd1f3319d0de58 Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Sun, 6 Nov 2022 12:30:41 -0600 Subject: [PATCH] Python 3.11: Fix a memory leak switching to greenlets. This leak was present in the original implementation of Python 3.11 support (#306) Fixes #328 and fixes https://github.com/gevent/gevent/issues/1924 ; I have run gevent's test suite locally on 3.11 with this fix without seeing any regressions. --- CHANGES.rst | 9 +- setup.py | 3 +- src/greenlet/greenlet.cpp | 8 +- src/greenlet/greenlet_greenlet.hpp | 98 ++++++++++++++++++- src/greenlet/tests/__init__.py | 1 + src/greenlet/tests/test_cpp.py | 6 ++ src/greenlet/tests/test_leaks.py | 151 +++++++++++++++++++++++++++++ 7 files changed, 268 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a47521fc..aa1621c3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,9 @@ 2.0.1 (unreleased) ================== -- Nothing changed yet. +- Python 3.11: Fix a memory leak. See `issue 328 + `_ and + `gevent issue 1924 `_. 2.0.0.post0 (2022-11-03) @@ -154,6 +156,8 @@ Changes - Add musllinux (Alpine) binary wheels. +.. important:: This preliminary support for Python 3.11 leaks memory. + Please upgrade to greenlet 2 if you're using Python 3.11. 1.1.3 (2022-08-25) ================== @@ -161,6 +165,9 @@ Changes - Add support for Python 3.11. Please note that Windows binary wheels are not available at this time. +.. important:: This preliminary support for Python 3.11 leaks memory. + Please upgrade to greenlet 2 if you're using Python 3.11. + 1.1.2 (2021-09-29) ================== diff --git a/setup.py b/setup.py index 6c3984c0..f1931fd5 100755 --- a/setup.py +++ b/setup.py @@ -226,7 +226,8 @@ def get_greenlet_version(): ], 'test': [ 'objgraph', - 'faulthandler; python_version == "2.7" and platform_python_implementation == "CPython"', + 'faulthandler; python_version == "2.7" and platform_python_implementation == "CPython"', + 'psutil', ], }, python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*", diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp index 5990323a..b761cab3 100644 --- a/src/greenlet/greenlet.cpp +++ b/src/greenlet/greenlet.cpp @@ -1438,12 +1438,15 @@ UserGreenlet::inner_bootstrap(OwnedGreenlet& origin_greenlet, OwnedObject& _run) result = single_result(result); } this->release_args(); + this->python_state.did_finish(PyThreadState_GET()); result = g_handle_exit(result); assert(this->thread_state()->borrow_current() == this->_self); + /* jump back to parent */ this->stack_state.set_inactive(); /* dead */ + // TODO: Can we decref some things here? Release our main greenlet // and maybe parent? for (Greenlet* parent = this->_parent; @@ -2029,7 +2032,6 @@ UserGreenlet::tp_clear() this->_parent.CLEAR(); this->_main_greenlet.CLEAR(); this->_run_callable.CLEAR(); - return 0; } @@ -2140,6 +2142,10 @@ Greenlet::~Greenlet() UserGreenlet::~UserGreenlet() { + // Python 3.11: If we don't clear out the raw frame datastack + // when deleting an unfinished greenlet, + // TestLeaks.test_untracked_memory_doesnt_increase_unfinished_thread_dealloc_in_main fails. + this->python_state.did_finish(nullptr); this->tp_clear(); } diff --git a/src/greenlet/greenlet_greenlet.hpp b/src/greenlet/greenlet_greenlet.hpp index 292fce4d..cc02c5c5 100644 --- a/src/greenlet/greenlet_greenlet.hpp +++ b/src/greenlet/greenlet_greenlet.hpp @@ -146,10 +146,10 @@ namespace greenlet int recursion_depth; int trash_delete_nesting; #if GREENLET_PY311 - _PyInterpreterFrame *current_frame; - _PyStackChunk *datastack_chunk; - PyObject **datastack_top; - PyObject **datastack_limit; + _PyInterpreterFrame* current_frame; + _PyStackChunk* datastack_chunk; + PyObject** datastack_top; + PyObject** datastack_limit; #endif public: @@ -170,6 +170,7 @@ namespace greenlet void set_new_cframe(_PyCFrame& frame) G_NOEXCEPT; #endif void will_switch_from(PyThreadState *const origin_tstate) G_NOEXCEPT; + void did_finish(PyThreadState* tstate) G_NOEXCEPT; }; class StackState @@ -213,9 +214,13 @@ namespace greenlet inline intptr_t stack_saved() const G_NOEXCEPT; inline char* stack_start() const G_NOEXCEPT; static inline StackState make_main() G_NOEXCEPT; +#ifdef GREENLET_USE_STDIO friend std::ostream& operator<<(std::ostream& os, const StackState& s); +#endif }; +#ifdef GREENLET_USE_STDIO std::ostream& operator<<(std::ostream& os, const StackState& s); +#endif class SwitchingArgs { @@ -933,14 +938,97 @@ void PythonState::set_new_cframe(_PyCFrame& frame) G_NOEXCEPT } #endif - const PythonState::OwnedFrame& PythonState::top_frame() const G_NOEXCEPT { return this->_top_frame; } +void PythonState::did_finish(PyThreadState* tstate) G_NOEXCEPT +{ +#if GREENLET_PY311 + // See https://github.com/gevent/gevent/issues/1924 and + // https://github.com/python-greenlet/greenlet/issues/328. In + // short, Python 3.11 allocates memory for frames as a sort of + // linked list that's kept as part of PyThreadState in the + // ``datastack_chunk`` member and friends. These are saved and + // restored as part of switching greenlets. + // + // When we initially switch to a greenlet, we set those to NULL. + // That causes the frame management code to treat this like a + // brand new thread and start a fresh list of chunks, beginning + // with a new "root" chunk. As we make calls in this greenlet, + // those chunks get added, and as calls return, they get popped. + // But the frame code (pystate.c) is careful to make sure that the + // root chunk never gets popped. + // + // Thus, when a greenlet exits for the last time, there will be at + // least a single root chunk that we must be responsible for + // deallocating. + // + // The complex part is that these chunks are allocated and freed + // using ``_PyObject_VirtualAlloc``/``Free``. Those aren't public + // functions, and they aren't exported for linking. It so happens + // that we know they are just thin wrappers around the Arena + // allocator, so we can use that directly to deallocate in a + // compatible way. + // + // CAUTION: Check this implementation detail on every major version. + // + // It might be nice to be able to do this in our destructor, but + // can we be sure that no one else is using that memory? Plus, as + // described below, our pointers may not even be valid anymore. As + // a special case, there is one time that we know we can do this, + // and that's from the destructor of the associated UserGreenlet + // (NOT main greenlet) + PyObjectArenaAllocator alloc; + _PyStackChunk* chunk = nullptr; + if (tstate) { + // We really did finish, we can never be switched to again. + chunk = tstate->datastack_chunk; + // Unfortunately, we can't do much sanity checking. Our + // this->datastack_chunk pointer is out of date (evaluation may + // have popped down through it already) so we can't verify that + // we deallocate it. I don't think we can even check datastack_top + // for the same reason. + + PyObject_GetArenaAllocator(&alloc); + tstate->datastack_chunk = nullptr; + tstate->datastack_limit = nullptr; + tstate->datastack_top = nullptr; + + } + else if (this->datastack_chunk) { + // The UserGreenlet (NOT the main greenlet!) is being deallocated. If we're + // still holding a stack chunk, it's garbage because we know + // we can never switch back to let cPython clean it up. + // Because the last time we got switched away from, and we + // haven't run since then, we know our chain is valid and can + // be dealloced. + chunk = this->datastack_chunk; + PyObject_GetArenaAllocator(&alloc); + } + + if (alloc.free && chunk) { + // In case the arena mechanism has been torn down already. + while (chunk) { + _PyStackChunk *prev = chunk->previous; + chunk->previous = nullptr; + alloc.free(alloc.ctx, chunk, chunk->size); + chunk = prev; + } + } + + this->datastack_chunk = nullptr; + this->datastack_limit = nullptr; + this->datastack_top = nullptr; +#endif +} + + + using greenlet::StackState; + #ifdef GREENLET_USE_STDIO #include using std::cerr; diff --git a/src/greenlet/tests/__init__.py b/src/greenlet/tests/__init__.py index d0b14fed..7ff5afb9 100644 --- a/src/greenlet/tests/__init__.py +++ b/src/greenlet/tests/__init__.py @@ -45,6 +45,7 @@ def __new__(cls, classname, bases, classDict): classDict[key] = value return type.__new__(cls, classname, bases, classDict) + class TestCase(TestCaseMetaClass( "NewBase", (unittest.TestCase,), diff --git a/src/greenlet/tests/test_cpp.py b/src/greenlet/tests/test_cpp.py index f835c6e1..955566b4 100644 --- a/src/greenlet/tests/test_cpp.py +++ b/src/greenlet/tests/test_cpp.py @@ -11,6 +11,8 @@ def run_unhandled_exception_in_greenlet_aborts(): # This is used in multiprocessing.Process and must be picklable # so it needs to be global. + + def _(): _test_extension_cpp.test_exception_switch_and_do_in_g2( _test_extension_cpp.test_exception_throw @@ -63,3 +65,7 @@ def test_unhandled_exception_aborts(self): def test_unhandled_exception_in_greenlet_aborts(self): # verify that unhandled throw called in greenlet aborts too self._do_test_unhandled_exception(run_unhandled_exception_in_greenlet_aborts) + + +if __name__ == '__main__': + __import__('unittest').main() diff --git a/src/greenlet/tests/test_leaks.py b/src/greenlet/tests/test_leaks.py index 7ff4e595..0ed43b05 100644 --- a/src/greenlet/tests/test_leaks.py +++ b/src/greenlet/tests/test_leaks.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Testing scenarios that may have leaked. """ @@ -10,9 +11,14 @@ import weakref import threading +import psutil + import greenlet from . import TestCase from .leakcheck import fails_leakcheck +from .leakcheck import ignores_leakcheck +from .leakcheck import RUNNING_ON_GITHUB_ACTIONS +from .leakcheck import RUNNING_ON_MANYLINUX try: from sys import intern @@ -295,3 +301,148 @@ def test_issue251_issue252_explicit_reference_not_collectable(self): self._check_issue251( manually_collect_background=False, explicit_reference_to_switch=True) + + UNTRACK_ATTEMPTS = 100 + + def _only_test_some_versions(self): + # We're only looking for this problem specifically on 3.11, + # and this set of tests is relatively fragile, depending on + # OS and memory management details. So we want to run it on 3.11+ + # (obviously) but not every older 3.x version in order to reduce + # false negatives. + if sys.version_info[0] >= 3 and sys.version_info[:2] < (3, 8): + self.skipTest('Only observed on 3.11') + if sys.version_info[0] == 2 and RUNNING_ON_GITHUB_ACTIONS: + self.skipTest('Hard to get a stable pattern here') + if RUNNING_ON_MANYLINUX: + self.skipTest("Slow and not worth repeating here") + + @ignores_leakcheck + # Because we're just trying to track raw memory, not objects, and running + # the leakcheck makes an already slow test slower. + def test_untracked_memory_doesnt_increase(self): + # See https://github.com/gevent/gevent/issues/1924 + # and https://github.com/python-greenlet/greenlet/issues/328 + self._only_test_some_versions() + def f(): + return 1 + + ITER = 10000 + def run_it(): + for _ in range(ITER): + greenlet.greenlet(f).switch() + + # Establish baseline + for _ in range(3): + run_it() + + # uss: (Linux, macOS, Windows): aka "Unique Set Size", this is + # the memory which is unique to a process and which would be + # freed if the process was terminated right now. + uss_before = psutil.Process().memory_full_info().uss + + for count in range(self.UNTRACK_ATTEMPTS): + uss_before = max(uss_before, psutil.Process().memory_full_info().uss) + run_it() + + uss_after = psutil.Process().memory_full_info().uss + if uss_after <= uss_before and count > 1: + break + + self.assertLessEqual(uss_after, uss_before) + + def _check_untracked_memory_thread(self, deallocate_in_thread=True): + self._only_test_some_versions() + # Like the above test, but what if there are a bunch of + # unfinished greenlets in a thread that dies? + # Does it matter if we deallocate in the thread or not? + EXIT_COUNT = [0] + + def f(): + try: + greenlet.getcurrent().parent.switch() + except greenlet.GreenletExit: + EXIT_COUNT[0] += 1 + raise + return 1 + + ITER = 10000 + def run_it(): + glets = [] + for _ in range(ITER): + # Greenlet starts, switches back to us. + # We keep a strong reference to the greenlet though so it doesn't + # get a GreenletExit exception. + g = greenlet.greenlet(f) + glets.append(g) + g.switch() + + return glets + + test = self + + class ThreadFunc: + uss_before = uss_after = 0 + glets = () + ITER = 2 + def __call__(self): + self.uss_before = psutil.Process().memory_full_info().uss + + for _ in range(self.ITER): + self.glets += tuple(run_it()) + + for g in self.glets: + test.assertIn('suspended active', str(g)) + # Drop them. + if deallocate_in_thread: + self.glets = () + self.uss_after = psutil.Process().memory_full_info().uss + + # Establish baseline + uss_before = uss_after = None + for count in range(self.UNTRACK_ATTEMPTS): + EXIT_COUNT[0] = 0 + thread_func = ThreadFunc() + t = threading.Thread(target=thread_func) + t.start() + t.join(30) + self.assertFalse(t.is_alive()) + + if uss_before is None: + uss_before = thread_func.uss_before + + uss_before = max(uss_before, thread_func.uss_before) + if deallocate_in_thread: + self.assertEqual(thread_func.glets, ()) + self.assertEqual(EXIT_COUNT[0], ITER * thread_func.ITER) + + del thread_func # Deallocate the greenlets; but this won't raise into them + del t + if not deallocate_in_thread: + self.assertEqual(EXIT_COUNT[0], 0) + if deallocate_in_thread: + self.wait_for_pending_cleanups() + + uss_after = psutil.Process().memory_full_info().uss + # See if we achieve a non-growth state at some point. Break when we do. + if uss_after <= uss_before and count > 1: + break + + self.wait_for_pending_cleanups() + uss_after = psutil.Process().memory_full_info().uss + self.assertLessEqual(uss_after, uss_before, "after attempts %d" % (count,)) + + @ignores_leakcheck + # Because we're just trying to track raw memory, not objects, and running + # the leakcheck makes an already slow test slower. + def test_untracked_memory_doesnt_increase_unfinished_thread_dealloc_in_thread(self): + self._check_untracked_memory_thread(deallocate_in_thread=True) + + @ignores_leakcheck + # Because the main greenlets from the background threads do not exit in a timely fashion, + # we fail the object-based leakchecks. + def test_untracked_memory_doesnt_increase_unfinished_thread_dealloc_in_main(self): + self._check_untracked_memory_thread(deallocate_in_thread=False) + +if __name__ == '__main__': + __import__('unittest').main()