dask · milesgranger · Feb 6, 2024 · Feb 7, 2024 · Feb 7, 2024 · Apr 4, 2024
diff --git a/continuous_integration/environment-3.10.yaml b/continuous_integration/environment-3.10.yaml
@@ -74,4 +74,4 @@ dependencies:
   - jinja2
   - pip
   - pip:
-    - git+https://github.com/dask/distributed
+    - git+https://github.com/milesgranger/distributed@milesgranger/rename-future-to-task
diff --git a/continuous_integration/environment-3.11.yaml b/continuous_integration/environment-3.11.yaml
@@ -75,5 +75,5 @@ dependencies:
   - jinja2
   - pip
   - pip:
-    - git+https://github.com/dask/distributed
-    - git+https://github.com/dask-contrib/dask-expr
+    - git+https://github.com/milesgranger/distributed@milesgranger/rename-future-to-task
+    - git+https://github.com/milesgranger/dask-expr@milesgranger/rename-future-to-task
diff --git a/continuous_integration/environment-3.12.yaml b/continuous_integration/environment-3.12.yaml
@@ -74,5 +74,5 @@ dependencies:
   - jinja2
   - pip
   - pip:
-    - git+https://github.com/dask/distributed
-    - git+https://github.com/dask-contrib/dask-expr
+    - git+https://github.com/milesgranger/distributed@milesgranger/rename-future-to-task
+    - git+https://github.com/milesgranger/dask-expr@milesgranger/rename-future-to-task
diff --git a/continuous_integration/environment-3.9.yaml b/continuous_integration/environment-3.9.yaml
@@ -74,4 +74,4 @@ dependencies:
   - jinja2
   - pip
   - pip:
-    - git+https://github.com/dask/distributed
+    - git+https://github.com/milesgranger/distributed@milesgranger/rename-future-to-task
diff --git a/continuous_integration/environment-mindeps-distributed.yaml b/continuous_integration/environment-mindeps-distributed.yaml
@@ -15,7 +15,7 @@ dependencies:
   # optional dependencies pulled in by pip install dask[distributed]
   - pip
   - pip:
-      - git+https://github.com/dask/distributed
+      - git+https://github.com/milesgranger/distributed@milesgranger/rename-future-to-task
   # test dependencies
   - pre-commit
   - pytest

diff --git a/continuous_integration/environment-mindeps-optional.yaml b/continuous_integration/environment-mindeps-optional.yaml
@@ -50,7 +50,7 @@ dependencies:
   - pip
   - pip:
       # optional dependencies pulled in by pip install dask[distributed]
-      - git+https://github.com/dask/distributed
+      - git+https://github.com/milesgranger/distributed@milesgranger/rename-future-to-task
   # test dependencies
   - pre-commit
   - pytest

diff --git a/dask/array/utils.py b/dask/array/utils.py
@@ -238,7 +238,7 @@ def _check_chunks(x, check_ndim=True, scheduler=None):
     x = x.persist(scheduler=scheduler)
     for idx in itertools.product(*(range(len(c)) for c in x.chunks)):
         chunk = x.dask[(x.name,) + idx]
-        if hasattr(chunk, "result"):  # it's a future
+        if hasattr(chunk, "result"):  # it's a task
             chunk = chunk.result()
         if not hasattr(chunk, "dtype"):
             chunk = np.array(chunk, dtype="O")

diff --git a/dask/base.py b/dask/base.py
@@ -317,7 +317,7 @@ def persist(self, **kwargs):
         task scheduler.  If the task scheduler supports asynchronous computing,
         such as is the case of the dask.distributed scheduler, then persist
         will return *immediately* and the return value's task graph will
-        contain Dask Future objects.  However if the task scheduler only
+        contain Dask Task objects.  However if the task scheduler only
         supports blocking computation then the call to persist will *block*
         and the return value's task graph will contain concrete Python results.
 
@@ -933,7 +933,7 @@ def persist(*args, traverse=True, optimize_graph=True, scheduler=None, **kwargs)
     >>> df['in-debt'] = df.balance < 0  # doctest: +SKIP
     >>> df = df.persist()  # triggers computation  # doctest: +SKIP
 
-    >>> df.value().min()  # future computations are now fast  # doctest: +SKIP
+    >>> df.value().min()  # task computations are now fast  # doctest: +SKIP
     -10
     >>> df.value().max()  # doctest: +SKIP
     100
@@ -1525,7 +1525,7 @@ def wait(x, timeout=None, return_when="ALL_COMPLETED"):
     """Wait until computation has finished
 
     This is a compatibility alias for ``dask.distributed.wait``.
-    If it is applied onto Dask collections without Dask Futures or if Dask
+    If it is applied onto Dask collections without Dask Tasks or if Dask
     distributed is not installed then it is a no-op
     """
     try:

diff --git a/dask/dataframe/core.py b/dask/dataframe/core.py
@@ -5377,7 +5377,7 @@ def set_index(
 
         With ``sort=True``, this function is much more expensive. Under normal
         operation this function does an initial pass over the index column to
-        compute approximate quantiles to serve as future divisions. It then passes
+        compute approximate quantiles to serve as task divisions. It then passes
         over the data a second time, splitting up each input partition into several
         pieces and sharing those pieces to all of the output partitions now in
         sorted order.

diff --git a/dask/dataframe/io/io.py b/dask/dataframe/io/io.py
@@ -584,7 +584,7 @@ def to_records(df):
 
 @insert_meta_param_description
 def from_delayed(
-    dfs: Delayed | distributed.Future | Iterable[Delayed | distributed.Future],
+    dfs: Delayed | distributed.Task | Iterable[Delayed | distributed.Task],
     meta=None,
     divisions: tuple | Literal["sorted"] | None = None,
     prefix: str = "from-delayed",
@@ -595,7 +595,7 @@ def from_delayed(
     Parameters
     ----------
     dfs :
-        A ``dask.delayed.Delayed``, a ``distributed.Future``, or an iterable of either
+        A ``dask.delayed.Delayed``, a ``distributed.Task``, or an iterable of either
         of these objects, e.g. returned by ``client.submit``. These comprise the
         individual partitions of the resulting dataframe.
         If a single object is provided (not an iterable), then the resulting dataframe

diff --git a/dask/dataframe/io/orc/core.py b/dask/dataframe/io/orc/core.py
@@ -178,7 +178,7 @@ def to_orc(
         Key/value pairs to be passed on to the file-system backend, if any.
     compute : bool, default True
         If True (default) then the result is computed immediately. If False
-        then a ``dask.delayed`` object is returned for future computation.
+        then a ``dask.delayed`` object is returned for task computation.
     compute_kwargs : dict, default True
         Options to be passed in to the compute method
 
@@ -235,7 +235,7 @@ def to_orc(
     dsk[(final_name, 0)] = (lambda x: None, part_tasks)
     graph = HighLevelGraph.from_collections((final_name, 0), dsk, dependencies=[df])
 
-    # Compute or return future
+    # Compute or return task
     if compute:
         if compute_kwargs is None:
             compute_kwargs = dict()

diff --git a/dask/dataframe/io/parquet/core.py b/dask/dataframe/io/parquet/core.py
@@ -760,7 +760,7 @@ def to_parquet(
     compute : bool, default True
         If ``True`` (default) then the result is computed immediately. If
         ``False`` then a ``dask.dataframe.Scalar`` object is returned for
-        future computation.
+        task computation.
     compute_kwargs : dict, default True
         Options to be passed in to the compute method
     schema : pyarrow.Schema, dict, "infer", or None, default "infer"
@@ -1100,7 +1100,7 @@ def create_metadata_file(
         inputs to be handled by any one task in the tree. Defaults to 32.
     compute : bool, optional
         If True (default) then the result is computed immediately. If False
-        then a ``dask.delayed`` object is returned for future computation.
+        then a ``dask.delayed`` object is returned for task computation.
     compute_kwargs : dict, optional
         Options to be passed in to the compute method
     fs : fsspec object, optional

diff --git a/dask/layers.py b/dask/layers.py
@@ -995,7 +995,7 @@ class DataFrameIOLayer(Blockwise):
         Whether one or more elements of `inputs` is expected to
         contain a nested task. This argument in only used for
         serialization purposes, and will be deprecated in the
-        future. Default is False.
+        task. Default is False.
     creation_info: dict (optional)
         Dictionary containing the callable function ('func'),
         positional arguments ('args'), and key-word arguments

diff --git a/dask/local.py b/dask/local.py
@@ -110,7 +110,8 @@
 
 import os
 from collections.abc import Mapping, Sequence
-from concurrent.futures import Executor, Future
+from concurrent.futures import Executor
+from concurrent.futures import Future as ConcurrentFuture
 from functools import partial
 from queue import Empty, Queue
 
@@ -538,7 +539,7 @@ class SynchronousExecutor(Executor):
     _max_workers = 1
 
     def submit(self, fn, *args, **kwargs):
-        fut = Future()
+        fut = ConcurrentFuture()
         try:
             fut.set_result(fn(*args, **kwargs))
         except BaseException as e:
@@ -585,7 +586,7 @@ def submit(self, fn, *args, **kwargs):
 
 
 def submit_apply_async(apply_async, fn, *args, **kwargs):
-    fut = Future()
+    fut = ConcurrentFuture()
     apply_async(fn, args, kwargs, fut.set_result, fut.set_exception)
     return fut
 

diff --git a/dask/tests/test_distributed.py b/dask/tests/test_distributed.py
@@ -107,8 +107,8 @@ def test_futures_to_delayed_dataframe(c):
 
     df = pd.DataFrame({"x": [1, 2, 3]})
 
-    futures = c.scatter([df, df])
-    ddf = dd.from_delayed(futures)
+    tasks = c.scatter([df, df])
+    ddf = dd.from_delayed(tasks)
     dd.utils.assert_eq(ddf.compute(), pd.concat([df, df], axis=0))
 
     # Make sure from_delayed is Blockwise
@@ -242,8 +242,8 @@ def foo():
 def test_futures_to_delayed_bag(c):
     L = [1, 2, 3]
 
-    futures = c.scatter([L, L])
-    b = db.from_delayed(futures)
+    tasks = c.scatter([L, L])
+    b = db.from_delayed(tasks)
     assert list(b) == L + L
 
 
@@ -254,9 +254,9 @@ def test_futures_to_delayed_array(c):
     np = pytest.importorskip("numpy")
     x = np.arange(5)
 
-    futures = c.scatter([x, x])
+    tasks = c.scatter([x, x])
     A = da.concatenate(
-        [da.from_delayed(f, shape=x.shape, dtype=x.dtype) for f in futures], axis=0
+        [da.from_delayed(f, shape=x.shape, dtype=x.dtype) for f in tasks], axis=0
     )
     assert_eq(A.compute(), np.concatenate([x, x], axis=0))
 
@@ -469,16 +469,16 @@ def test_blockwise_array_creation(c, io, fuse):
     ],
 )
 @pytest.mark.parametrize("fuse", [True, False, None])
-@pytest.mark.parametrize("from_futures", [True, False])
-def test_blockwise_dataframe_io(c, tmpdir, io, fuse, from_futures):
+@pytest.mark.parametrize("from_tasks", [True, False])
+def test_blockwise_dataframe_io(c, tmpdir, io, fuse, from_tasks):
     pd = pytest.importorskip("pandas")
     dd = pytest.importorskip("dask.dataframe")
     if dd._dask_expr_enabled():
         pytest.xfail("doesn't work yet")
 
     df = pd.DataFrame({"x": [1, 2, 3] * 5, "y": range(15)})
 
-    if from_futures:
+    if from_tasks:
         parts = [df.iloc[:5], df.iloc[5:10], df.iloc[10:15]]
         futs = c.scatter(parts)
         ddf0 = dd.from_delayed(futs, meta=parts[0])

diff --git a/dask/typing.py b/dask/typing.py
@@ -309,7 +309,7 @@ def persist(self: CollType, **kwargs: Any) -> CollType:
         asynchronous computing, such as is the case of the
         dask.distributed scheduler, then persist will return
         *immediately* and the return value's task graph will contain
-        Dask Future objects. However if the task scheduler only
+        Dask Task objects. However if the task scheduler only
         supports blocking computation then the call to persist will
         *block* and the return value's task graph will contain
         concrete Python results.

diff --git a/docs/source/10-minutes-to-dask.rst b/docs/source/10-minutes-to-dask.rst
@@ -471,10 +471,10 @@ run into code that is parallelizable, but isn't just a big DataFrame or array.
 
          c = c.compute()  # This triggers all of the above computations
 
-   .. tab-item:: Futures: Immediate
+   .. tab-item:: Tasks: Immediate
 
-      Unlike the interfaces described so far, Futures are eager. Computation starts as soon
-      as the function is submitted (see :doc:`futures`).
+      Unlike the interfaces described so far, Tasks are eager. Computation starts as soon
+      as the function is submitted (see :doc:`tasks`).
 
       .. code-block:: python
 
@@ -496,7 +496,7 @@ run into code that is parallelizable, but isn't just a big DataFrame or array.
 
       .. note::
 
-         Futures can only be used with distributed cluster. See the section below for more
+         Tasks can only be used with distributed cluster. See the section below for more
          information.
 
 

diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -7,7 +7,7 @@ Dask APIs generally follow from upstream APIs:
 -  :doc:`DataFrames <dataframe-api>` follows Pandas
 -  :doc:`Bag <bag-api>` follows map/filter/groupby/reduce common in Spark and Python iterators
 -  :doc:`Delayed <delayed-api>` wraps general Python code
--  :doc:`Futures <futures>` follows `concurrent.futures <https://docs.python.org/3/library/concurrent.futures.html>`_ from the standard library for real-time computation.
+-  :doc:`Tasks <tasks>` follows `concurrent.futures <https://docs.python.org/3/library/concurrent.futures.html>`_ from the standard library for real-time computation.
 
 .. toctree::
    :maxdepth: 1
@@ -17,7 +17,7 @@ Dask APIs generally follow from upstream APIs:
    DataFrame <dataframe-api.rst>
    Bag <bag-api.rst>
    Delayed <delayed-api.rst>
-   Futures <futures>
+   Tasks <tasks>
 
 
 Additionally, Dask has its own functions to start computations, persist data in

diff --git a/docs/source/caching.rst b/docs/source/caching.rst
@@ -4,16 +4,16 @@ Opportunistic Caching
 Dask usually removes intermediate values as quickly as possible in order to
 make space for more data to flow through your computation.  However, in some
 cases, we may want to hold onto intermediate values, because they might be
-useful for future computations in an interactive session.
+useful for task computations in an interactive session.
 
 We need to balance the following concerns:
 
-1.  Intermediate results might be useful in future unknown computations
+1.  Intermediate results might be useful in task unknown computations
 2.  Intermediate results also fill up memory, reducing space for the rest of our
     current computation
 
 Negotiating between these two concerns helps us to leverage the memory that we
-have available to speed up future, unanticipated computations.  Which intermediate results
+have available to speed up task, unanticipated computations.  Which intermediate results
 should we keep?
 
 This document explains an experimental, opportunistic caching mechanism that automatically
@@ -81,7 +81,7 @@ Automatic Opportunistic Caching
 -------------------------------
 
 Another approach is to watch *all* intermediate computations, and *guess* which
-ones might be valuable to keep for the future.  Dask has an *opportunistic
+ones might be valuable to keep for the task.  Dask has an *opportunistic
 caching mechanism* that stores intermediate tasks that show the following
 characteristics:
 

diff --git a/docs/source/custom-collections.rst b/docs/source/custom-collections.rst
@@ -210,7 +210,7 @@ created. It too has three stages:
 2. **Computation**
 
    Same as in ``compute``, except in the case of the distributed scheduler,
-   where the values in ``results`` are futures instead of values.
+   where the values in ``results`` are tasks instead of values.
 
 3. **Postpersist**
 
@@ -221,7 +221,7 @@ created. It too has three stages:
    - A ``rebuild`` function, which takes in a persisted graph.  The keys of
      this graph are the same as ``__dask_keys__`` for the corresponding
      collection, and the values are computed results (for the single-machine
-     scheduler) or futures (for the distributed scheduler).
+     scheduler) or tasks (for the distributed scheduler).
    - A tuple of extra arguments to pass to ``rebuild`` after the graph
 
    To build the outputs of ``persist``, the list of collections and results is

diff --git a/docs/source/delayed.rst b/docs/source/delayed.rst
@@ -161,7 +161,7 @@ Real time
 ---------
 
 Sometimes you want to create and destroy work during execution, launch tasks
-from other tasks, etc.  For this, see the :doc:`Futures <futures>` interface.
+from other tasks, etc.  For this, see the :doc:`Tasks <tasks>` interface.
 
 
 Best Practices

diff --git a/docs/source/deploying-python-advanced.rst b/docs/source/deploying-python-advanced.rst
@@ -37,8 +37,8 @@ cleaning everything up.
        async with Scheduler() as s:
            async with Worker(s.address) as w1, Worker(s.address) as w2:
                async with Client(s.address, asynchronous=True) as client:
-                   future = client.submit(lambda x: x + 1, 10)
-                   result = await future
+                   task = client.submit(lambda x: x + 1, 10)
+                   result = await task
                    print(result)
 
    asyncio.get_event_loop().run_until_complete(f())
@@ -154,8 +154,8 @@ computation, and then allow things to clean up after that computation..
        async with Scheduler() as s:
            async with Worker(s.address) as w1, Worker(s.address) as w2:
                async with Client(s.address, asynchronous=True) as client:
-                   future = client.submit(lambda x: x + 1, 10)
-                   result = await future
+                   task = client.submit(lambda x: x + 1, 10)
+                   result = await task
                    print(result)
 
    asyncio.get_event_loop().run_until_complete(f())

diff --git a/docs/source/develop.rst b/docs/source/develop.rst
@@ -24,7 +24,7 @@ Dask conversation happens in the following places:
 
 For usage questions and bug reports we prefer the use of Discourse, Stack Overflow
 and GitHub issues over Slack chat.  Discourse, GitHub and Stack Overflow are more easily
-searchable by future users, so conversations had there can be useful to many more people
+searchable by task users, so conversations had there can be useful to many more people
 than just those directly involved.
 
 .. _`Dask Discourse forum`: https://dask.discourse.group
@@ -142,7 +142,7 @@ Test
 ~~~~
 
 Dask employs extensive unit tests to ensure correctness of code both for today
-and for the future.  Test coverage is expected for all code contributions.
+and for the task.  Test coverage is expected for all code contributions.
 
 Tests are written in a py.test style with bare functions:
 
@@ -375,7 +375,7 @@ Distributed submit PRs.  In this case, the gpuCI bot will comment on the PR:
 Dask Maintainers can then approve gpuCI builds for these PRs with following choices:
 
 - To only approve the PR contributor for the current PR, leave a comment which states ``ok to test``
-- To approve the current PR and all future PRs from the contributor, leave a comment which states ``add to allowlist``
+- To approve the current PR and all task PRs from the contributor, leave a comment which states ``add to allowlist``
 
 
 .. _Sphinx: https://www.sphinx-doc.org/