Skip to content

Commit

Permalink
Merge branch 'master' into no_multiprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
ogrisel committed Feb 4, 2022
2 parents 22c9688 + ea8562f commit d6e6c1f
Show file tree
Hide file tree
Showing 33 changed files with 88 additions and 149 deletions.
8 changes: 4 additions & 4 deletions CHANGES.rst
Expand Up @@ -272,7 +272,7 @@ Maxime Weyl
Maxime Weyl

Loading a corrupted cached file with mmap mode enabled would
recompute the results and return them without memmory mapping.
recompute the results and return them without memory mapping.


Release 0.12.3
Expand Down Expand Up @@ -883,7 +883,7 @@ Release 0.6.5
2012-09-15
Yannick Schwartz

BUG: make sure that sets and dictionnaries give reproducible hashes
BUG: make sure that sets and dictionaries give reproducible hashes


2012-07-18
Expand Down Expand Up @@ -914,7 +914,7 @@ GaelVaroquaux

BUG: non-reproducible hashing: order of kwargs

The ordering of a dictionnary is random. As a result the function hashing
The ordering of a dictionary is random. As a result the function hashing
was not reproducible. Pretty hard to test

Release 0.6.3
Expand Down Expand Up @@ -1224,7 +1224,7 @@ Gael varoquaux
Gael varoquaux
2010-07-29

MISC: Silence tests (and hopefuly Yaroslav :P)
MISC: Silence tests (and hopefully Yaroslav :P)

Release 0.4.3
----------------
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Expand Up @@ -48,7 +48,7 @@ Dependencies
============

- Joblib has no mandatory dependencies besides Python (supported versions are
3.6+).
3.7+).
- Joblib has an optional dependency on Numpy (at least version 1.6.1) for array
manipulation.
- Joblib includes its own vendored copy of
Expand Down
2 changes: 1 addition & 1 deletion TODO.rst
Expand Up @@ -40,7 +40,7 @@ Tasks at hand on joblib, in increasing order of difficulty.

* add a 'argument_hash' keyword argument to Memory.cache, to be able to
replace the hashing logic of memory for the input arguments. It should
accept as an input the dictionnary of arguments, as returned in
accept as an input the dictionary of arguments, as returned in
func_inspect, and return a string.

* add a sqlite db for provenance tracking. Store computation time and usage
Expand Down
26 changes: 13 additions & 13 deletions azure-pipelines.yml
Expand Up @@ -54,33 +54,33 @@ jobs:
imageName: 'ubuntu-latest'
PYTHON_VERSION: "3.7"
EXTRA_CONDA_PACKAGES: "numpy=1.15 distributed=2.13"
linux_py36_cython:
linux_py37_cython:
imageName: 'ubuntu-latest'
PYTHON_VERSION: "3.6"
EXTRA_CONDA_PACKAGES: "numpy=1.14"
PYTHON_VERSION: "3.7"
EXTRA_CONDA_PACKAGES: "numpy=1.15"
CYTHON: "true"
linux_py36_no_multiprocessing_no_lzma:
linux_py37_no_multiprocessing_no_lzma:
imageName: 'ubuntu-latest'
PYTHON_VERSION: "3.6"
EXTRA_CONDA_PACKAGES: "numpy=1.14"
PYTHON_VERSION: "3.7"
EXTRA_CONDA_PACKAGES: "numpy=1.15"
JOBLIB_MULTIPROCESSING: "0"
NO_LZMA: "1"
linux_py36_no_numpy:
linux_py37_no_numpy:
imageName: 'ubuntu-latest'
PYTHON_VERSION: "3.6"
PYTHON_VERSION: "3.7"

windows_py38:
imageName: "vs2017-win2016"
imageName: "windows-latest"
PYTHON_VERSION: "3.8"
EXTRA_CONDA_PACKAGES: "numpy=1.18"

macos_py38:
imageName: "macos-10.14"
imageName: "macos-latest"
PYTHON_VERSION: "3.8"
EXTRA_CONDA_PACKAGES: "numpy=1.18"
macos_py36_no_numpy:
imageName: "macos-10.14"
PYTHON_VERSION: "3.6"
macos_py37_no_numpy:
imageName: "macos-latest"
PYTHON_VERSION: "3.7"

variables:
JUNITXML: 'test-data.xml'
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_pickle.py
@@ -1,7 +1,7 @@
"""
Benching joblib pickle I/O.
Warning: this is slow, and the benchs are easily offset by other disk
Warning: this is slow, and the benches are easily offset by other disk
activity.
"""
import os
Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/install.sh
Expand Up @@ -18,7 +18,7 @@ create_new_conda_env() {
}

create_new_pypy3_env() {
PYPY_FOLDER="pypy3.6-v7.3.1-linux64"
PYPY_FOLDER="pypy3.7-v7.3.7-linux64"
wget https://downloads.python.org/pypy/$PYPY_FOLDER.tar.bz2
tar xvf $PYPY_FOLDER.tar.bz2
$PYPY_FOLDER/bin/pypy3 -m venv pypy3
Expand Down
10 changes: 8 additions & 2 deletions continuous_integration/run_tests.sh
Expand Up @@ -28,14 +28,20 @@ if [[ "$SKLEARN_TESTS" == "true" ]]; then
# Install scikit-learn from conda and test against the installed
# development version of joblib.
conda remove -y numpy
conda install -y -c conda-forge cython pillow scikit-learn
conda install -y -c conda-forge cython pillow pip numpy scipy
pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
python -c "import sklearn; print('Testing scikit-learn', sklearn.__version__)"

# Move to a dedicated folder to avoid being polluted by joblib specific conftest.py
# and disable the doctest plugin to avoid issues with doctests in scikit-learn
# docstrings that require setting print_changed_only=True temporarily.
cd "/tmp"
pytest -vl --maxfail=5 -p no:doctest -k "not test_import_is_deprecated" --pyargs sklearn
pytest -vl --maxfail=5 -p no:doctest \
# Don't worry about deprecated imports: this is tested for real
# in upstream scikit-learn and this is not joblib's responsibility.
# Let's skip this test to avoid false positives in joblib's CI.
-k "not test_import_is_deprecated" \
--pyargs sklearn
fi

if [[ "$SKIP_TESTS" != "true" && "$COVERAGE" == "true" ]]; then
Expand Down
4 changes: 2 additions & 2 deletions doc/memory.rst
Expand Up @@ -391,8 +391,8 @@ Gotchas
``joblib.Memory`` cache can get invalidated when upgrading ``joblib``.
Invalidation can also happen when upgrading a third party library (such as
``numpy``): in such a case, only the cached function calls with parameters
that are constructs (or contain references to contructs) defined in the
upgraded library should potentially be invalidated after the uprade.
that are constructs (or contain references to constructs) defined in the
upgraded library should potentially be invalidated after the upgrade.


Ignoring some arguments
Expand Down
2 changes: 1 addition & 1 deletion examples/compressors_comparison.py
Expand Up @@ -8,7 +8,7 @@
GZip compression methods.
For each compared compression method, this example dumps and reloads a
dataset fetched from an online machine-learning database. This gives 3
informations: the size on disk of the compressed data, the time spent to dump
information: the size on disk of the compressed data, the time spent to dump
and the time spent to reload the data from disk.
"""

Expand Down
2 changes: 1 addition & 1 deletion examples/serialization_and_wrappers.py
Expand Up @@ -36,7 +36,7 @@ def func_async(i, *args):


###############################################################################
# For most use-cases, using ``cloudpickle``` is efficient enough. However, this
# For most use-cases, using ``cloudpickle`` is efficient enough. However, this
# solution can be very slow to serialize large python objects, such as dict or
# list, compared to the standard ``pickle`` serialization.
#
Expand Down
2 changes: 1 addition & 1 deletion joblib/_store_backends.py
Expand Up @@ -130,7 +130,7 @@ def configure(self, location, verbose=0, backend_options=dict()):
verbose: int
The level of verbosity of the store
backend_options: dict
Contains a dictionnary of named paremeters used to configure the
Contains a dictionary of named parameters used to configure the
store backend.
"""

Expand Down
2 changes: 1 addition & 1 deletion joblib/compressor.py
Expand Up @@ -89,7 +89,7 @@ class CompressorWrapper():
prefix: bytestring
A bytestring corresponding to the magic number that identifies the
file format associated to the compressor.
extention: str
extension: str
The file extension used to automatically select this compressor during
a dump to a file.
"""
Expand Down
4 changes: 2 additions & 2 deletions joblib/disk.py
Expand Up @@ -66,7 +66,7 @@ def mkdirp(d):

# if a rmtree operation fails in rm_subdirs, wait for this much time (in secs),
# then retry up to RM_SUBDIRS_N_RETRY times. If it still fails, raise the
# exception. this mecanism ensures that the sub-process gc have the time to
# exception. this mechanism ensures that the sub-process gc have the time to
# collect and close the memmaps before we fail.
RM_SUBDIRS_RETRY_TIME = 0.1
RM_SUBDIRS_N_RETRY = 5
Expand Down Expand Up @@ -119,7 +119,7 @@ def delete_folder(folder_path, onerror=None, allow_non_empty=True):
folder_path, ignore_errors=False, onerror=None
)
util.debug(
"Sucessfully deleted {}".format(folder_path))
"Successfully deleted {}".format(folder_path))
break
else:
raise OSError(
Expand Down
2 changes: 1 addition & 1 deletion joblib/externals/cloudpickle/cloudpickle.py
Expand Up @@ -594,7 +594,7 @@ def _create_parametrized_type_hint(origin, args):


def parametrized_type_hint_getinitargs(obj):
# The distorted type check sematic for typing construct becomes:
# The distorted type check semantic for typing construct becomes:
# ``type(obj) is type(TypeHint)``, which means "obj is a
# parametrized TypeHint"
if type(obj) is type(Literal): # pragma: no branch
Expand Down
2 changes: 1 addition & 1 deletion joblib/externals/loky/backend/queues.py
Expand Up @@ -209,7 +209,7 @@ def __init__(self, reducers=None, ctx=None):
else:
self._wlock = ctx.Lock()

# Add possiblity to use custom reducers
# Add possibility to use custom reducers
self._reducers = reducers

def close(self):
Expand Down
4 changes: 2 additions & 2 deletions joblib/externals/loky/backend/resource_tracker.py
Expand Up @@ -36,7 +36,7 @@
# Note that this behavior differs from CPython's resource_tracker, which only
# implements list of shared resources, and not a proper refcounting scheme.
# Also, CPython's resource tracker will only attempt to cleanup those shared
# resources once all procsses connected to the resouce tracker have exited.
# resources once all procsses connected to the resource tracker have exited.


import os
Expand Down Expand Up @@ -118,7 +118,7 @@ def ensure_running(self):
self._pid = None

warnings.warn('resource_tracker: process died unexpectedly, '
'relaunching. Some folders/sempahores might '
'relaunching. Some folders/semaphores might '
'leak.')

fds_to_pass = []
Expand Down
2 changes: 1 addition & 1 deletion joblib/externals/loky/backend/spawn.py
Expand Up @@ -105,7 +105,7 @@ def get_preparation_data(name, init_main_module=True):
_resource_tracker as mp_resource_tracker
)
# multiprocessing's resource_tracker must be running before loky
# process is created (othewise the child won't be able to use it if it
# process is created (otherwise the child won't be able to use it if it
# is created later on)
mp_resource_tracker.ensure_running()
d["mp_tracker_args"] = {
Expand Down
2 changes: 1 addition & 1 deletion joblib/externals/loky/backend/utils.py
Expand Up @@ -117,7 +117,7 @@ def _recursive_terminate(pid):


def get_exitcodes_terminated_worker(processes):
"""Return a formated string with the exitcodes of terminated workers.
"""Return a formatted string with the exitcodes of terminated workers.
If necessary, wait (up to .25s) for the system to correctly set the
exitcode of one terminated worker.
Expand Down
2 changes: 1 addition & 1 deletion joblib/externals/loky/cloudpickle_wrapper.py
Expand Up @@ -24,7 +24,7 @@ def __reduce__(self):
return _reconstruct_wrapper, (_pickled_object, self._keep_wrapper)

def __getattr__(self, attr):
# Ensure that the wrapped object can be used seemlessly as the
# Ensure that the wrapped object can be used seamlessly as the
# previous object.
if attr not in ['_obj', '_keep_wrapper']:
return getattr(self._obj, attr)
Expand Down
2 changes: 1 addition & 1 deletion joblib/externals/loky/process_executor.py
Expand Up @@ -1048,7 +1048,7 @@ def _start_executor_manager_thread(self):
if self._executor_manager_thread is None:
mp.util.debug('_start_executor_manager_thread called')

# When the executor gets garbarge collected, the weakref callback
# When the executor gets garbage collected, the weakref callback
# will wake up the queue management thread so that it can terminate
# if there is no pending work item.
def weakref_cb(
Expand Down
2 changes: 1 addition & 1 deletion joblib/hashing.py
Expand Up @@ -193,7 +193,7 @@ def save(self, obj):
obj_c_contiguous = obj.T
else:
# Cater for non-single-segment arrays: this creates a
# copy, and thus aleviates this issue.
# copy, and thus alleviates this issue.
# XXX: There might be a more efficient way of doing this
obj_c_contiguous = obj.flatten()

Expand Down
48 changes: 7 additions & 41 deletions joblib/memory.py
Expand Up @@ -130,7 +130,7 @@ def _store_backend_factory(backend, location, verbose=0, backend_options=None):
return obj
elif location is not None:
warnings.warn(
"Instanciating a backend using a {} as a location is not "
"Instantiating a backend using a {} as a location is not "
"supported by joblib. Returning None instead.".format(
location.__class__.__name__), UserWarning)

Expand Down Expand Up @@ -196,7 +196,7 @@ class MemorizedResult(Logger):
func: function or str
function whose output is cached. The string case is intended only for
instanciation based on the output of repr() on another instance.
instantiation based on the output of repr() on another instance.
(namely eval(repr(memorized_instance)) works).
argument_hash: str
Expand Down Expand Up @@ -487,7 +487,7 @@ def _cached_call(self, args, kwargs, shelving=False):
metadata = None
msg = None

# Wether or not the memorized function must be called
# Whether or not the memorized function must be called
must_call = False

# FIXME: The statements below should be try/excepted
Expand Down Expand Up @@ -563,8 +563,8 @@ def func_code_info(self):
# (which should be called once on self) gets called in the process
# in which self.func was defined, this caching mechanism prevents
# undesired cache clearing when the cached function is called in
# an environement where the introspection utilities get_func_code
# relies on do not work (typicially, in joblib child processes).
# an environment where the introspection utilities get_func_code
# relies on do not work (typically, in joblib child processes).
# See #1035 for more info
# TODO (pierreglaser): do the same with get_func_name?
self._func_code_info = get_func_code(self.func)
Expand Down Expand Up @@ -876,12 +876,6 @@ class Memory(Logger):
The 'local' backend is using regular filesystem operations to
manipulate data (open, mv, etc) in the backend.
cachedir: str or None, optional
.. deprecated: 0.12
'cachedir' has been deprecated in 0.12 and will be
removed in 0.14. Use the 'location' parameter instead.
mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
The memmapping mode used when loading from cache
numpy arrays. See numpy.load for the meaning of the
Expand All @@ -906,17 +900,16 @@ class Memory(Logger):
actually reduce the cache size to be less than ``bytes_limit``.
backend_options: dict, optional
Contains a dictionnary of named parameters used to configure
Contains a dictionary of named parameters used to configure
the store backend.
"""
# ------------------------------------------------------------------------
# Public interface
# ------------------------------------------------------------------------

def __init__(self, location=None, backend='local', cachedir=None,
def __init__(self, location=None, backend='local',
mmap_mode=None, compress=False, verbose=1, bytes_limit=None,
backend_options=None):
# XXX: Bad explanation of the None value of cachedir
Logger.__init__(self)
self._verbose = verbose
self.mmap_mode = mmap_mode
Expand All @@ -931,22 +924,6 @@ def __init__(self, location=None, backend='local', cachedir=None,
if compress and mmap_mode is not None:
warnings.warn('Compressed results cannot be memmapped',
stacklevel=2)
if cachedir is not None:
if location is not None:
raise ValueError(
'You set both "location={0!r} and "cachedir={1!r}". '
"'cachedir' has been deprecated in version "
"0.12 and will be removed in version 0.14.\n"
'Please only set "location={0!r}"'.format(
location, cachedir))

warnings.warn(
"The 'cachedir' parameter has been deprecated in version "
"0.12 and will be removed in version 0.14.\n"
'You provided "cachedir={0!r}", '
'use "location={0!r}" instead.'.format(cachedir),
DeprecationWarning, stacklevel=2)
location = cachedir

self.location = location
if isinstance(location, str):
Expand All @@ -957,17 +934,6 @@ def __init__(self, location=None, backend='local', cachedir=None,
backend_options=dict(compress=compress, mmap_mode=mmap_mode,
**backend_options))

@property
def cachedir(self):
warnings.warn(
"The 'cachedir' attribute has been deprecated in version 0.12 "
"and will be removed in version 0.14.\n"
"Use os.path.join(memory.location, 'joblib') attribute instead.",
DeprecationWarning, stacklevel=2)
if self.location is None:
return None
return os.path.join(self.location, 'joblib')

def cache(self, func=None, ignore=None, verbose=None, mmap_mode=False):
""" Decorates the given function func to only compute its return
value for input arguments not cached on disk.
Expand Down

0 comments on commit d6e6c1f

Please sign in to comment.