From e68655c37eb6cf17ad2da353f2c294fb2d2237c9 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 15 Sep 2022 15:42:41 +0200 Subject: [PATCH 01/11] CI test the future loky-3.3.0 branch --- joblib/externals/loky/__init__.py | 2 +- joblib/externals/loky/process_executor.py | 39 ++++++++++++++++------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/joblib/externals/loky/__init__.py b/joblib/externals/loky/__init__.py index cd56e26c2..b6bb48ab8 100644 --- a/joblib/externals/loky/__init__.py +++ b/joblib/externals/loky/__init__.py @@ -29,4 +29,4 @@ "wrap_non_picklable_objects", "set_loky_pickler"] -__version__ = '3.2.0' +__version__ = '3.3.0.dev0' diff --git a/joblib/externals/loky/process_executor.py b/joblib/externals/loky/process_executor.py index 62b0a2ea8..4e3e819ce 100644 --- a/joblib/externals/loky/process_executor.py +++ b/joblib/externals/loky/process_executor.py @@ -678,9 +678,12 @@ def process_result_item(self, result_item): with self.processes_management_lock: p = self.processes.pop(result_item, None) - # p can be None is the executor is concurrently shutting down. + # p can be None if the executor is concurrently shutting down. if p is not None: p._worker_exit_lock.release() + mp.util.debug( + f"joining {p.name} when processing {p.pid} as result_item" + ) p.join() del p @@ -699,7 +702,8 @@ def process_result_item(self, result_item): "executor. This can be caused by a too short worker " "timeout or by a memory leak.", UserWarning ) - executor._adjust_process_count() + with executor._processes_management_lock: + executor._adjust_process_count() executor = None else: # Received a _ResultItem so mark the future as completed. @@ -837,15 +841,23 @@ def join_executor_internals(self): self.thread_wakeup.close() # If .join() is not called on the created processes then - # some ctx.Queue methods may deadlock on Mac OS X. - active_processes = list(self.processes.values()) - mp.util.debug(f"joining {len(active_processes)} processes") - for p in active_processes: - mp.util.debug(f"joining process {p.name}") - p.join() + # some ctx.Queue methods may deadlock on macOS. + with self.processes_management_lock: + mp.util.debug(f"joining {len(self.processes)} processes") + n_joined_processes = 0 + while True: + try: + pid, p = self.processes.popitem() + mp.util.debug(f"joining process {p.name} with pid {pid}") + p.join() + n_joined_processes += 1 + except KeyError: + break - mp.util.debug("executor management thread clean shutdown of worker " - f"processes: {active_processes}") + mp.util.debug( + "executor management thread clean shutdown of " + f"{n_joined_processes} workers" + ) def get_n_children_alive(self): # This is an upper bound on the number of children alive. @@ -1082,7 +1094,7 @@ def _start_executor_manager_thread(self): _python_exit) def _adjust_process_count(self): - for _ in range(len(self._processes), self._max_workers): + while len(self._processes) < self._max_workers: worker_exit_lock = self._context.BoundedSemaphore(1) args = (self._call_queue, self._result_queue, self._initializer, self._initargs, self._processes_management_lock, @@ -1098,7 +1110,10 @@ def _adjust_process_count(self): p._worker_exit_lock = worker_exit_lock p.start() self._processes[p.pid] = p - mp.util.debug(f'Adjust process count : {self._processes}') + mp.util.debug( + f"Adjusted process count to {self._max_workers}: " + f"{[(p.name, pid) for pid, p in self._processes.items()]}" + ) def _ensure_executor_running(self): """ensures all workers and management thread are running From 666c0a5c489c7268124aee030c0b28c39710b55e Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 15 Sep 2022 16:13:28 +0200 Subject: [PATCH 02/11] [DEBUG] More intensive macos CI and disable sklearn tests --- azure-pipelines.yml | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a3c1e40bc..6c880b387 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -45,17 +45,17 @@ jobs: PYTHON_VERSION: "pypy3" LOKY_MAX_CPU_COUNT: "2" - linux_py39_sklearn_tests: - imageName: 'ubuntu-latest' - PYTHON_VERSION: "3.9" - # SKIP_TESTS: "true" - SKLEARN_TESTS: "true" + # linux_py39_sklearn_tests: + # imageName: 'ubuntu-latest' + # PYTHON_VERSION: "3.9" + # # SKIP_TESTS: "true" + # SKLEARN_TESTS: "true" linux_py310_distributed: # To be updated regularly to use the most recent versions of the # dependencies. imageName: 'ubuntu-latest' PYTHON_VERSION: "3.10" - EXTRA_CONDA_PACKAGES: "numpy=1.22 distributed=2022.2.0" + EXTRA_CONDA_PACKAGES: "numpy=1.23 distributed=2022.2.0" linux_py37_distributed: imageName: 'ubuntu-latest' PYTHON_VERSION: "3.7" @@ -63,7 +63,7 @@ jobs: linux_py310_cython: imageName: 'ubuntu-latest' PYTHON_VERSION: "3.10" - EXTRA_CONDA_PACKAGES: "numpy=1.22" + EXTRA_CONDA_PACKAGES: "numpy=1.23" CYTHON: "true" linux_py37_no_multiprocessing_no_lzma: imageName: 'ubuntu-latest' @@ -78,12 +78,25 @@ jobs: windows_py310: imageName: "windows-latest" PYTHON_VERSION: "3.10" - EXTRA_CONDA_PACKAGES: "numpy=1.22" + EXTRA_CONDA_PACKAGES: "numpy=1.23" macos_py310: imageName: "macos-latest" PYTHON_VERSION: "3.10" - EXTRA_CONDA_PACKAGES: "numpy=1.22" + EXTRA_CONDA_PACKAGES: "numpy=1.23" + macos_py310_2: + imageName: "macos-latest" + PYTHON_VERSION: "3.10" + EXTRA_CONDA_PACKAGES: "numpy=1.23" + macos_py310_3: + imageName: "macos-latest" + PYTHON_VERSION: "3.10" + EXTRA_CONDA_PACKAGES: "numpy=1.23" + macos_py310_4: + imageName: "macos-latest" + PYTHON_VERSION: "3.10" + EXTRA_CONDA_PACKAGES: "numpy=1.23" + macos_py37_no_numpy: imageName: "macos-latest" PYTHON_VERSION: "3.7" From 9508a189402c95d096c2ab72e26d91e401efb48d Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 15 Sep 2022 16:46:40 +0200 Subject: [PATCH 03/11] Trigger CI From c8f3f325194b359b23b998e93acd2d8743538d3c Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 15 Sep 2022 17:05:06 +0200 Subject: [PATCH 04/11] Trigger CI From 0bfd3a6f47c09d534b1e8805c6bccdf6902ac1c1 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 15 Sep 2022 21:48:03 +0200 Subject: [PATCH 05/11] Loky 3.3.0 --- CHANGES.rst | 5 +++-- joblib/externals/loky/__init__.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bded2b4dd..1c6c90c2b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -34,8 +34,9 @@ Development version - Vendor cloudpickle 2.2.0 which adds support for PyPy 3.8+. -- Vendor loky 3.2.0 which fixes a bug with leaking processes in case of - nested loky parallel calls. +- Vendor loky 3.3.0 which fixes a bug with leaking processes in case of + nested loky parallel calls and more reliability spawn the correct + number of reusable workers. Release 1.1.0 -------------- diff --git a/joblib/externals/loky/__init__.py b/joblib/externals/loky/__init__.py index b6bb48ab8..fd2008d78 100644 --- a/joblib/externals/loky/__init__.py +++ b/joblib/externals/loky/__init__.py @@ -29,4 +29,4 @@ "wrap_non_picklable_objects", "set_loky_pickler"] -__version__ = '3.3.0.dev0' +__version__ = '3.3.0' From 9bdd0b9b7b5de2292b0ab45c6acd45b6f8c934af Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 15 Sep 2022 22:05:16 +0200 Subject: [PATCH 06/11] Trigger CI From 1ae97c82ac4523038627d142f57e434b82743d1c Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 16 Sep 2022 07:29:56 +0200 Subject: [PATCH 07/11] Trigger CI From 43ab3d4d1372924c5360d0634f943dc0b34d7180 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 16 Sep 2022 08:38:34 +0200 Subject: [PATCH 08/11] Trigger CI From e9ead52cbcce96f66ed8799aa386a0ead26027bd Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 16 Sep 2022 09:15:58 +0200 Subject: [PATCH 09/11] Trigger CI From 5fbeff37c760dfee6f85c42b09089d41b7f80a63 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 16 Sep 2022 09:56:18 +0200 Subject: [PATCH 10/11] Restore regular CI config --- azure-pipelines.yml | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6c880b387..3b58a4d3c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -45,11 +45,11 @@ jobs: PYTHON_VERSION: "pypy3" LOKY_MAX_CPU_COUNT: "2" - # linux_py39_sklearn_tests: - # imageName: 'ubuntu-latest' - # PYTHON_VERSION: "3.9" - # # SKIP_TESTS: "true" - # SKLEARN_TESTS: "true" + linux_py39_sklearn_tests: + imageName: 'ubuntu-latest' + PYTHON_VERSION: "3.9" + # SKIP_TESTS: "true" + SKLEARN_TESTS: "true" linux_py310_distributed: # To be updated regularly to use the most recent versions of the # dependencies. @@ -84,19 +84,6 @@ jobs: imageName: "macos-latest" PYTHON_VERSION: "3.10" EXTRA_CONDA_PACKAGES: "numpy=1.23" - macos_py310_2: - imageName: "macos-latest" - PYTHON_VERSION: "3.10" - EXTRA_CONDA_PACKAGES: "numpy=1.23" - macos_py310_3: - imageName: "macos-latest" - PYTHON_VERSION: "3.10" - EXTRA_CONDA_PACKAGES: "numpy=1.23" - macos_py310_4: - imageName: "macos-latest" - PYTHON_VERSION: "3.10" - EXTRA_CONDA_PACKAGES: "numpy=1.23" - macos_py37_no_numpy: imageName: "macos-latest" PYTHON_VERSION: "3.7" From eaafe22bb05f5c2258f85f206002318567e3b3ed Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 16 Sep 2022 10:50:06 +0200 Subject: [PATCH 11/11] Make test_many_parallel_calls_on_same_object run faster to reduce likelihood to get killed on slow macos --- joblib/test/test_memmapping.py | 40 +++++++++++++++------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/joblib/test/test_memmapping.py b/joblib/test/test_memmapping.py index 9c495ef2f..bdc825f06 100644 --- a/joblib/test/test_memmapping.py +++ b/joblib/test/test_memmapping.py @@ -613,29 +613,25 @@ def test_many_parallel_calls_on_same_object(backend): delayed(return_slice_of_data)(data, 0, 20) for _ in range(10) ) - slice_of_data = Parallel( - n_jobs=2, max_nbytes=1, backend='{b}')( - delayed(return_slice_of_data)(data, 0, 20) - for _ in range(10) - ) '''.format(b=backend) - - for _ in range(3): - env = os.environ.copy() - env['PYTHONPATH'] = os.path.dirname(__file__) - p = subprocess.Popen([sys.executable, '-c', cmd], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, env=env) - p.wait() - out, err = p.communicate() - assert p.returncode == 0, err - assert out == b'' - if sys.version_info[:3] not in [(3, 8, 0), (3, 8, 1)]: - # In early versions of Python 3.8, a reference leak - # https://github.com/cloudpipe/cloudpickle/issues/327, holds - # references to pickled objects, generating race condition during - # cleanup finalizers of joblib and noisy resource_tracker outputs. - assert b'resource_tracker' not in err + env = os.environ.copy() + env['PYTHONPATH'] = os.path.dirname(__file__) + p = subprocess.Popen( + [sys.executable, '-c', cmd], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + env=env, + ) + p.wait() + out, err = p.communicate() + assert p.returncode == 0, err + assert out == b'' + if sys.version_info[:3] not in [(3, 8, 0), (3, 8, 1)]: + # In early versions of Python 3.8, a reference leak + # https://github.com/cloudpipe/cloudpickle/issues/327, holds + # references to pickled objects, generating race condition during + # cleanup finalizers of joblib and noisy resource_tracker outputs. + assert b'resource_tracker' not in err @with_numpy