Merge branch 'master' into no_multiprocessing

joblib · Feb 4, 2022 · d6e6c1f · d6e6c1f
2 parents 22c9688 + ea8562f
commit d6e6c1f
Show file tree

Hide file tree

Showing 33 changed files with 88 additions and 149 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -272,7 +272,7 @@ Maxime Weyl
 Maxime Weyl
 
     Loading a corrupted cached file with mmap mode enabled would
-    recompute the results and return them without memmory mapping.
+    recompute the results and return them without memory mapping.
 
 
 Release 0.12.3
@@ -883,7 +883,7 @@ Release 0.6.5
 2012-09-15
 Yannick Schwartz
 
-    BUG: make sure that sets and dictionnaries give reproducible hashes
+    BUG: make sure that sets and dictionaries give reproducible hashes
 
 
 2012-07-18
@@ -914,7 +914,7 @@ GaelVaroquaux
 
     BUG: non-reproducible hashing: order of kwargs
 
-    The ordering of a dictionnary is random. As a result the function hashing
+    The ordering of a dictionary is random. As a result the function hashing
     was not reproducible. Pretty hard to test
 
 Release 0.6.3
@@ -1224,7 +1224,7 @@ Gael varoquaux
 Gael varoquaux
 2010-07-29
 
-    MISC: Silence tests (and hopefuly Yaroslav :P)
+    MISC: Silence tests (and hopefully Yaroslav :P)
 
 Release 0.4.3
 ----------------

diff --git a/README.rst b/README.rst
@@ -48,7 +48,7 @@ Dependencies
 ============
 
 - Joblib has no mandatory dependencies besides Python (supported versions are
-  3.6+).
+  3.7+).
 - Joblib has an optional dependency on Numpy (at least version 1.6.1) for array
   manipulation.
 - Joblib includes its own vendored copy of

diff --git a/TODO.rst b/TODO.rst
@@ -40,7 +40,7 @@ Tasks at hand on joblib, in increasing order of difficulty.
 
 * add a 'argument_hash' keyword argument to Memory.cache, to be able to
   replace the hashing logic of memory for the input arguments. It should
-  accept as an input the dictionnary of arguments, as returned in
+  accept as an input the dictionary of arguments, as returned in
   func_inspect, and return a string.
 
 * add a sqlite db for provenance tracking. Store computation time and usage 

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -54,33 +54,33 @@ jobs:
         imageName: 'ubuntu-latest'
         PYTHON_VERSION: "3.7"
         EXTRA_CONDA_PACKAGES: "numpy=1.15 distributed=2.13"
-      linux_py36_cython:
+      linux_py37_cython:
         imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.6"
-        EXTRA_CONDA_PACKAGES: "numpy=1.14"
+        PYTHON_VERSION: "3.7"
+        EXTRA_CONDA_PACKAGES: "numpy=1.15"
         CYTHON: "true"
-      linux_py36_no_multiprocessing_no_lzma:
+      linux_py37_no_multiprocessing_no_lzma:
         imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.6"
-        EXTRA_CONDA_PACKAGES: "numpy=1.14"
+        PYTHON_VERSION: "3.7"
+        EXTRA_CONDA_PACKAGES: "numpy=1.15"
         JOBLIB_MULTIPROCESSING: "0"
         NO_LZMA: "1"
-      linux_py36_no_numpy:
+      linux_py37_no_numpy:
         imageName: 'ubuntu-latest'
-        PYTHON_VERSION: "3.6"
+        PYTHON_VERSION: "3.7"
 
       windows_py38:
-        imageName: "vs2017-win2016"
+        imageName: "windows-latest"
         PYTHON_VERSION: "3.8"
         EXTRA_CONDA_PACKAGES: "numpy=1.18"
 
       macos_py38:
-        imageName: "macos-10.14"
+        imageName: "macos-latest"
         PYTHON_VERSION: "3.8"
         EXTRA_CONDA_PACKAGES: "numpy=1.18"
-      macos_py36_no_numpy:
-        imageName: "macos-10.14"
-        PYTHON_VERSION: "3.6"
+      macos_py37_no_numpy:
+        imageName: "macos-latest"
+        PYTHON_VERSION: "3.7"
 
   variables:
     JUNITXML: 'test-data.xml'

diff --git a/benchmarks/bench_pickle.py b/benchmarks/bench_pickle.py
@@ -1,7 +1,7 @@
 """
 Benching joblib pickle I/O.
 
-Warning: this is slow, and the benchs are easily offset by other disk
+Warning: this is slow, and the benches are easily offset by other disk
 activity.
 """
 import os

diff --git a/continuous_integration/install.sh b/continuous_integration/install.sh
@@ -18,7 +18,7 @@ create_new_conda_env() {
 }
 
 create_new_pypy3_env() {
-    PYPY_FOLDER="pypy3.6-v7.3.1-linux64"
+    PYPY_FOLDER="pypy3.7-v7.3.7-linux64"
     wget https://downloads.python.org/pypy/$PYPY_FOLDER.tar.bz2
     tar xvf $PYPY_FOLDER.tar.bz2
     $PYPY_FOLDER/bin/pypy3 -m venv pypy3

diff --git a/continuous_integration/run_tests.sh b/continuous_integration/run_tests.sh
@@ -28,14 +28,20 @@ if [[ "$SKLEARN_TESTS" == "true" ]]; then
     # Install scikit-learn from conda and test against the installed
     # development version of joblib.
     conda remove -y numpy
-    conda install -y -c conda-forge cython pillow scikit-learn
+    conda install -y -c conda-forge cython pillow pip numpy scipy
+    pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
     python -c "import sklearn; print('Testing scikit-learn', sklearn.__version__)"
 
     # Move to a dedicated folder to avoid being polluted by joblib specific conftest.py
     # and disable the doctest plugin to avoid issues with doctests in scikit-learn
     # docstrings that require setting print_changed_only=True temporarily.
     cd "/tmp"
-    pytest -vl --maxfail=5 -p no:doctest -k "not test_import_is_deprecated" --pyargs sklearn
+    pytest -vl --maxfail=5 -p no:doctest \
+        # Don't worry about deprecated imports: this is tested for real
+        # in upstream scikit-learn and this is not joblib's responsibility.
+        # Let's skip this test to avoid false positives in joblib's CI.
+        -k "not test_import_is_deprecated" \
+        --pyargs sklearn
 fi
 
 if [[ "$SKIP_TESTS" != "true" && "$COVERAGE" == "true" ]]; then

diff --git a/doc/memory.rst b/doc/memory.rst
@@ -391,8 +391,8 @@ Gotchas
   ``joblib.Memory`` cache can get invalidated when upgrading ``joblib``.
   Invalidation can also happen when upgrading a third party library (such as
   ``numpy``): in such a case, only the cached function calls with parameters
-  that are constructs (or contain references to contructs) defined in the
-  upgraded library should potentially be invalidated after the uprade.
+  that are constructs (or contain references to constructs) defined in the
+  upgraded library should potentially be invalidated after the upgrade.
 
 
 Ignoring some arguments

diff --git a/examples/compressors_comparison.py b/examples/compressors_comparison.py
@@ -8,7 +8,7 @@
 GZip compression methods.
 For each compared compression method, this example dumps and reloads a
 dataset fetched from an online machine-learning database. This gives 3
-informations: the size on disk of the compressed data, the time spent to dump
+information: the size on disk of the compressed data, the time spent to dump
 and the time spent to reload the data from disk.
 """
 

diff --git a/examples/serialization_and_wrappers.py b/examples/serialization_and_wrappers.py
@@ -36,7 +36,7 @@ def func_async(i, *args):
 
 
 ###############################################################################
-# For most use-cases, using ``cloudpickle``` is efficient enough. However, this
+# For most use-cases, using ``cloudpickle`` is efficient enough. However, this
 # solution can be very slow to serialize large python objects, such as dict or
 # list, compared to the standard ``pickle`` serialization.
 #

diff --git a/joblib/_store_backends.py b/joblib/_store_backends.py
@@ -130,7 +130,7 @@ def configure(self, location, verbose=0, backend_options=dict()):
         verbose: int
             The level of verbosity of the store
         backend_options: dict
-            Contains a dictionnary of named paremeters used to configure the
+            Contains a dictionary of named parameters used to configure the
             store backend.
         """
 

diff --git a/joblib/compressor.py b/joblib/compressor.py
@@ -89,7 +89,7 @@ class CompressorWrapper():
     prefix: bytestring
         A bytestring corresponding to the magic number that identifies the
         file format associated to the compressor.
-    extention: str
+    extension: str
         The file extension used to automatically select this compressor during
         a dump to a file.
     """

diff --git a/joblib/disk.py b/joblib/disk.py
@@ -66,7 +66,7 @@ def mkdirp(d):
 
 # if a rmtree operation fails in rm_subdirs, wait for this much time (in secs),
 # then retry up to RM_SUBDIRS_N_RETRY times. If it still fails, raise the
-# exception. this mecanism ensures that the sub-process gc have the time to
+# exception. this mechanism ensures that the sub-process gc have the time to
 # collect and close the memmaps before we fail.
 RM_SUBDIRS_RETRY_TIME = 0.1
 RM_SUBDIRS_N_RETRY = 5
@@ -119,7 +119,7 @@ def delete_folder(folder_path, onerror=None, allow_non_empty=True):
                             folder_path, ignore_errors=False, onerror=None
                         )
                         util.debug(
-                            "Sucessfully deleted {}".format(folder_path))
+                            "Successfully deleted {}".format(folder_path))
                         break
                     else:
                         raise OSError(

diff --git a/joblib/externals/cloudpickle/cloudpickle.py b/joblib/externals/cloudpickle/cloudpickle.py
@@ -594,7 +594,7 @@ def _create_parametrized_type_hint(origin, args):
 
 
 def parametrized_type_hint_getinitargs(obj):
-    # The distorted type check sematic for typing construct becomes:
+    # The distorted type check semantic for typing construct becomes:
     # ``type(obj) is type(TypeHint)``, which means "obj is a
     # parametrized TypeHint"
     if type(obj) is type(Literal):  # pragma: no branch

diff --git a/joblib/externals/loky/backend/queues.py b/joblib/externals/loky/backend/queues.py
@@ -209,7 +209,7 @@ def __init__(self, reducers=None, ctx=None):
             else:
                 self._wlock = ctx.Lock()
 
-        # Add possiblity to use custom reducers
+        # Add possibility to use custom reducers
         self._reducers = reducers
 
     def close(self):

diff --git a/joblib/externals/loky/backend/resource_tracker.py b/joblib/externals/loky/backend/resource_tracker.py
@@ -36,7 +36,7 @@
 # Note that this behavior differs from CPython's resource_tracker, which only
 # implements list of shared resources, and not a proper refcounting scheme.
 # Also, CPython's resource tracker will only attempt to cleanup those shared
-# resources once all procsses connected to the resouce tracker have exited.
+# resources once all procsses connected to the resource tracker have exited.
 
 
 import os
@@ -118,7 +118,7 @@ def ensure_running(self):
                 self._pid = None
 
                 warnings.warn('resource_tracker: process died unexpectedly, '
-                              'relaunching.  Some folders/sempahores might '
+                              'relaunching.  Some folders/semaphores might '
                               'leak.')
 
             fds_to_pass = []

diff --git a/joblib/externals/loky/backend/spawn.py b/joblib/externals/loky/backend/spawn.py
@@ -105,7 +105,7 @@ def get_preparation_data(name, init_main_module=True):
             _resource_tracker as mp_resource_tracker
         )
         # multiprocessing's resource_tracker must be running before loky
-        # process is created (othewise the child won't be able to use it if it
+        # process is created (otherwise the child won't be able to use it if it
         # is created later on)
         mp_resource_tracker.ensure_running()
         d["mp_tracker_args"] = {

diff --git a/joblib/externals/loky/backend/utils.py b/joblib/externals/loky/backend/utils.py
@@ -117,7 +117,7 @@ def _recursive_terminate(pid):
 
 
 def get_exitcodes_terminated_worker(processes):
-    """Return a formated string with the exitcodes of terminated workers.
+    """Return a formatted string with the exitcodes of terminated workers.
 
     If necessary, wait (up to .25s) for the system to correctly set the
     exitcode of one terminated worker.

diff --git a/joblib/externals/loky/cloudpickle_wrapper.py b/joblib/externals/loky/cloudpickle_wrapper.py
@@ -24,7 +24,7 @@ def __reduce__(self):
         return _reconstruct_wrapper, (_pickled_object, self._keep_wrapper)
 
     def __getattr__(self, attr):
-        # Ensure that the wrapped object can be used seemlessly as the
+        # Ensure that the wrapped object can be used seamlessly as the
         # previous object.
         if attr not in ['_obj', '_keep_wrapper']:
             return getattr(self._obj, attr)

diff --git a/joblib/externals/loky/process_executor.py b/joblib/externals/loky/process_executor.py
@@ -1048,7 +1048,7 @@ def _start_executor_manager_thread(self):
         if self._executor_manager_thread is None:
             mp.util.debug('_start_executor_manager_thread called')
 
-            # When the executor gets garbarge collected, the weakref callback
+            # When the executor gets garbage collected, the weakref callback
             # will wake up the queue management thread so that it can terminate
             # if there is no pending work item.
             def weakref_cb(

diff --git a/joblib/hashing.py b/joblib/hashing.py
@@ -193,7 +193,7 @@ def save(self, obj):
                 obj_c_contiguous = obj.T
             else:
                 # Cater for non-single-segment arrays: this creates a
-                # copy, and thus aleviates this issue.
+                # copy, and thus alleviates this issue.
                 # XXX: There might be a more efficient way of doing this
                 obj_c_contiguous = obj.flatten()
 

diff --git a/joblib/memory.py b/joblib/memory.py
@@ -130,7 +130,7 @@ def _store_backend_factory(backend, location, verbose=0, backend_options=None):
         return obj
     elif location is not None:
         warnings.warn(
-            "Instanciating a backend using a {} as a location is not "
+            "Instantiating a backend using a {} as a location is not "
             "supported by joblib. Returning None instead.".format(
                 location.__class__.__name__), UserWarning)
 
@@ -196,7 +196,7 @@ class MemorizedResult(Logger):
 
     func: function or str
         function whose output is cached. The string case is intended only for
-        instanciation based on the output of repr() on another instance.
+        instantiation based on the output of repr() on another instance.
         (namely eval(repr(memorized_instance)) works).
 
     argument_hash: str
@@ -487,7 +487,7 @@ def _cached_call(self, args, kwargs, shelving=False):
         metadata = None
         msg = None
 
-        # Wether or not the memorized function must be called
+        # Whether or not the memorized function must be called
         must_call = False
 
         # FIXME: The statements below should be try/excepted
@@ -563,8 +563,8 @@ def func_code_info(self):
             # (which should be called once on self) gets called in the process
             # in which self.func was defined, this caching mechanism prevents
             # undesired cache clearing when the cached function is called in
-            # an environement where the introspection utilities get_func_code
-            # relies on do not work (typicially, in joblib child processes).
+            # an environment where the introspection utilities get_func_code
+            # relies on do not work (typically, in joblib child processes).
             # See #1035 for  more info
             # TODO (pierreglaser): do the same with get_func_name?
             self._func_code_info = get_func_code(self.func)
@@ -876,12 +876,6 @@ class Memory(Logger):
             The 'local' backend is using regular filesystem operations to
             manipulate data (open, mv, etc) in the backend.
 
-        cachedir: str or None, optional
-
-            .. deprecated: 0.12
-                'cachedir' has been deprecated in 0.12 and will be
-                removed in 0.14. Use the 'location' parameter instead.
-
         mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
             The memmapping mode used when loading from cache
             numpy arrays. See numpy.load for the meaning of the
@@ -906,17 +900,16 @@ class Memory(Logger):
             actually reduce the cache size to be less than ``bytes_limit``.
 
         backend_options: dict, optional
-            Contains a dictionnary of named parameters used to configure
+            Contains a dictionary of named parameters used to configure
             the store backend.
     """
     # ------------------------------------------------------------------------
     # Public interface
     # ------------------------------------------------------------------------
 
-    def __init__(self, location=None, backend='local', cachedir=None,
+    def __init__(self, location=None, backend='local',
                  mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
                  backend_options=None):
-        # XXX: Bad explanation of the None value of cachedir
         Logger.__init__(self)
         self._verbose = verbose
         self.mmap_mode = mmap_mode
@@ -931,22 +924,6 @@ def __init__(self, location=None, backend='local', cachedir=None,
         if compress and mmap_mode is not None:
             warnings.warn('Compressed results cannot be memmapped',
                           stacklevel=2)
-        if cachedir is not None:
-            if location is not None:
-                raise ValueError(
-                    'You set both "location={0!r} and "cachedir={1!r}". '
-                    "'cachedir' has been deprecated in version "
-                    "0.12 and will be removed in version 0.14.\n"
-                    'Please only set "location={0!r}"'.format(
-                        location, cachedir))
-
-            warnings.warn(
-                "The 'cachedir' parameter has been deprecated in version "
-                "0.12 and will be removed in version 0.14.\n"
-                'You provided "cachedir={0!r}", '
-                'use "location={0!r}" instead.'.format(cachedir),
-                DeprecationWarning, stacklevel=2)
-            location = cachedir
 
         self.location = location
         if isinstance(location, str):
@@ -957,17 +934,6 @@ def __init__(self, location=None, backend='local', cachedir=None,
             backend_options=dict(compress=compress, mmap_mode=mmap_mode,
                                  **backend_options))
 
-    @property
-    def cachedir(self):
-        warnings.warn(
-            "The 'cachedir' attribute has been deprecated in version 0.12 "
-            "and will be removed in version 0.14.\n"
-            "Use os.path.join(memory.location, 'joblib') attribute instead.",
-            DeprecationWarning, stacklevel=2)
-        if self.location is None:
-            return None
-        return os.path.join(self.location, 'joblib')
-
     def cache(self, func=None, ignore=None, verbose=None, mmap_mode=False):
         """ Decorates the given function func to only compute its return
             value for input arguments not cached on disk.