From 978bd903939222d43bbf7865338ef7329210bb21 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 7 Feb 2022 19:34:51 +0100 Subject: [PATCH] Fix remaining resource_tracker-related test warning failures (#1263) --- CHANGES.rst | 4 ++++ joblib/_memmapping_reducer.py | 4 ++++ joblib/disk.py | 2 +- joblib/test/test_memmapping.py | 35 +++++++++++++++++++++++++++------- 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bff605c94..8b3d7e257 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,10 @@ Latest changes Development version ------------------- +- Avoid unnecessary warnings when workers and main process delete + the temporary memmap folder contents concurrently. + https://github.com/joblib/joblib/pull/1263 + Release 1.1.0 -------------- diff --git a/joblib/_memmapping_reducer.py b/joblib/_memmapping_reducer.py index d58382222..9d350c032 100644 --- a/joblib/_memmapping_reducer.py +++ b/joblib/_memmapping_reducer.py @@ -99,6 +99,10 @@ def unlink_file(filename): raise else: time.sleep(.2) + except FileNotFoundError: + # In case of a race condition when deleting the temporary folder, + # avoid noisy FileNotFoundError exception in the resource tracker. + pass resource_tracker._CLEANUP_FUNCS['file'] = unlink_file diff --git a/joblib/disk.py b/joblib/disk.py index 1bf0756d0..32fbb89f6 100644 --- a/joblib/disk.py +++ b/joblib/disk.py @@ -69,7 +69,7 @@ def mkdirp(d): # exception. this mechanism ensures that the sub-process gc have the time to # collect and close the memmaps before we fail. RM_SUBDIRS_RETRY_TIME = 0.1 -RM_SUBDIRS_N_RETRY = 5 +RM_SUBDIRS_N_RETRY = 10 def rm_subdirs(path, onerror=None): diff --git a/joblib/test/test_memmapping.py b/joblib/test/test_memmapping.py index f110751fb..d3ed46825 100644 --- a/joblib/test/test_memmapping.py +++ b/joblib/test/test_memmapping.py @@ -9,6 +9,8 @@ import subprocess import threading +import pytest + from joblib.test.common import with_numpy, np from joblib.test.common import setup_autokill from joblib.test.common import teardown_autokill @@ -679,7 +681,7 @@ def test_memmap_returned_as_regular_array(backend): @with_numpy @with_multiprocessing -@parametrize("backend", ["multiprocessing", param("loky", marks=xfail)]) +@parametrize("backend", ["multiprocessing", "loky"]) def test_resource_tracker_silent_when_reference_cycles(backend): # There is a variety of reasons that can make joblib with loky backend # output noisy warnings when a reference cycle is preventing a memmap from @@ -687,10 +689,22 @@ def test_resource_tracker_silent_when_reference_cycles(backend): # deletes the temporary folder if it was not done before, which can # interact badly with the resource_tracker. We don't risk leaking any # resources, but this will likely make joblib output a lot of low-level - # confusing messages. This test is marked as xfail for now: but a next PR - # should fix this behavior. + # confusing messages. + # + # This test makes sure that the resource_tracker is silent when a reference + # has been collected concurrently on non-Windows platforms. + # # Note that the script in ``cmd`` is the exact same script as in # test_permission_error_windows_reference_cycle. + if backend == "loky" and sys.platform.startswith('win'): + # XXX: on Windows, reference cycles can delay timely garbage collection + # and make it impossible to properly delete the temporary folder in the + # main process because of permission errors. + pytest.xfail( + "The temporary folder cannot be deleted on Windows in the " + "presence of a reference cycle" + ) + cmd = """if 1: import numpy as np from joblib import Parallel, delayed @@ -714,8 +728,10 @@ def test_resource_tracker_silent_when_reference_cycles(backend): stdout=subprocess.PIPE) p.wait() out, err = p.communicate() - assert p.returncode == 0, out.decode() - assert b"resource_tracker" not in err, err.decode() + out = out.decode() + err = err.decode() + assert p.returncode == 0, out + "\n\n" + err + assert "resource_tracker" not in err, err @with_numpy @@ -817,8 +833,13 @@ def get_temp_folder(parallel_obj, backend): for i in range(1)) except ValueError as e: # the temporary folder should be deleted by the end of this - # call - if os.path.exists(temp_folder): + # call but apparently on some file systems, this takes + # some time to be visible. + for i in range(10): + if not os.path.exists(temp_folder): + break + sleep(.1) + else: raise AssertionError( temp_folder + " was not deleted" ) from e