diff --git a/CHANGELOG.md b/CHANGELOG.md index 596bd9977..d3967faec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## 12.1.0 + +### Added + +- Progress.open and Progress.wrap_file method to track the progress while reading from a file or file-like object https://github.com/willmcgugan/rich/pull/1759 ### Added diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 3dc31bf80..aca64423d 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -18,6 +18,7 @@ The following people have contributed to the development of Rich: - [Finn Hughes](https://github.com/finnhughes) - [Josh Karpel](https://github.com/JoshKarpel) - [Andrew Kettmann](https://github.com/akettmann) +- [Martin Larralde](https://github.com/althonos) - [Hedy Li](https://github.com/hedythedev) - [Luka Mamukashvili](https://github.com/UltraStudioLTD) - [Alexander Mancevice](https://github.com/amancevice) diff --git a/docs/source/progress.rst b/docs/source/progress.rst index 4ce358836..f905b255e 100644 --- a/docs/source/progress.rst +++ b/docs/source/progress.rst @@ -26,6 +26,16 @@ For basic usage call the :func:`~rich.progress.track` function, which accepts a for n in track(range(n), description="Processing..."): do_work(n) + +To get a progress bar while reading from a file, you may consider using the :func:`~rich.progress.read` function, which accepts a path, or a *file-like* object. It will return a *file-like* object in *binary mode* that will update the progress information as it's being read from. Here's an example, tracking the progresses made by :func:`json.load` to load a file:: + + import json + from rich.progress import read + + with read("data.json", description="Loading data...") as f: + data = json.load(f) + + Advanced usage -------------- @@ -34,9 +44,9 @@ If you require multiple tasks in the display, or wish to configure the columns i The Progress class is designed to be used as a *context manager* which will start and stop the progress display automatically. Here's a simple example:: - + import time - + from rich.progress import Progress with Progress() as progress: @@ -179,7 +189,7 @@ If you have another Console object you want to use, pass it in to the :class:`~r with Progress(console=my_console) as progress: my_console.print("[bold blue]Starting work!") do_work(progress) - + Redirecting stdout / stderr ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -199,6 +209,42 @@ If the :class:`~rich.progress.Progress` class doesn't offer exactly what you nee def get_renderables(self): yield Panel(self.make_tasks_table(self.tasks)) +Reading from a file +~~~~~~~~~~~~~~~~~~~ + +You can obtain a progress-tracking reader using the :meth:`~rich.progress.Progress.open` method by giving it a path. You can specify the number of bytes to be read, but by default :meth:`~rich.progress.Progress.open` will query the size of the file with :func:`os.stat`. You are responsible for closing the file, and you should consider using a *context* to make sure it is closed :: + + import json + from rich.progress import Progress + + with Progress() as progress: + with progress.open("data.json", "rb") as file: + json.load(file) + + +Note that in the above snippet we use the `"rb"` mode, because we needed the file to be opened in binary mode to pass it to :func:`json.load`. If the API consuming the file is expecting an object in *text mode* (for instance, :func:`csv.reader`), you can open the file with the `"r"` mode, which happens to be the default :: + + from rich.progress import Progress + + with Progress() as progress: + with progress.open("README.md") as file: + for line in file: + print(line) + + +Reading from a file-like object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can obtain a progress-tracking reader wrapping a file-like object using the :meth:`~rich.progress.Progress.wrap_file` method. The file-like object must be in *binary mode*, and a total must be provided, unless it was provided to a :class:`~rich.progress.Task` created beforehand. The returned reader may be used in a context, but will not take care of closing the wrapped file :: + + import json + from rich.progress import Progress + + with Progress() as progress: + with open("data.json", "rb") as file: + json.load(progress.wrap_file(file, total=2048)) + + Multiple Progress ----------------- @@ -208,4 +254,3 @@ Example ------- See `downloader.py `_ for a realistic application of a progress display. This script can download multiple concurrent files with a progress bar, transfer speed and file size. - diff --git a/examples/cp_progress.py b/examples/cp_progress.py new file mode 100644 index 000000000..0f4059d02 --- /dev/null +++ b/examples/cp_progress.py @@ -0,0 +1,39 @@ +""" +A very minimal `cp` clone that displays a progress bar. +""" +import os +import shutil +import sys + +from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + TaskID, + TextColumn, + TimeRemainingColumn, + TransferSpeedColumn, +) + +progress = Progress( + TextColumn("[bold blue]{task.description}", justify="right"), + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.1f}%", + "•", + DownloadColumn(), + "•", + TransferSpeedColumn(), + "•", + TimeRemainingColumn(), +) + +if __name__ == "__main__": + if len(sys.argv) == 3: + + with progress: + desc = os.path.basename(sys.argv[1]) + with progress.read(sys.argv[1], description=desc) as src: + with open(sys.argv[2], "wb") as dst: + shutil.copyfileobj(src, dst) + else: + print("Usage:\n\tpython cp_progress.py SRC DST") diff --git a/rich/progress.py b/rich/progress.py index e4abbdb66..a3b30a85b 100644 --- a/rich/progress.py +++ b/rich/progress.py @@ -1,28 +1,44 @@ +import io +import sys +import typing +import warnings from abc import ABC, abstractmethod from collections import deque from collections.abc import Sized from dataclasses import dataclass, field from datetime import timedelta +from io import RawIOBase, UnsupportedOperation from math import ceil +from mmap import mmap +from os import PathLike, stat from threading import Event, RLock, Thread from types import TracebackType from typing import ( Any, + BinaryIO, Callable, + ContextManager, Deque, Dict, + Generic, Iterable, List, NamedTuple, NewType, Optional, Sequence, + TextIO, Tuple, Type, TypeVar, Union, ) +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal # pragma: no cover + from . import filesize, get_console from .console import Console, JustifyMethod, RenderableType, Group from .highlighter import Highlighter @@ -41,6 +57,9 @@ GetTimeCallable = Callable[[], float] +_I = typing.TypeVar("_I", TextIO, BinaryIO) + + class _TrackThread(Thread): """A thread to periodically update progress.""" @@ -149,6 +168,320 @@ def track( ) +class _Reader(RawIOBase, BinaryIO): + """A reader that tracks progress while it's being read from.""" + + def __init__( + self, + handle: BinaryIO, + progress: "Progress", + task: TaskID, + close_handle: bool = True, + ) -> None: + self.handle = handle + self.progress = progress + self.task = task + self.close_handle = close_handle + self._closed = False + + def __enter__(self) -> "_Reader": + self.handle.__enter__() + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + self.close() + + def __iter__(self) -> BinaryIO: + return self + + def __next__(self) -> bytes: + line = next(self.handle) + self.progress.advance(self.task, advance=len(line)) + return line + + @property + def closed(self) -> bool: + return self._closed + + def fileno(self) -> int: + return self.handle.fileno() + + def isatty(self) -> bool: + return self.handle.isatty() + + def readable(self) -> bool: + return self.handle.readable() + + def seekable(self) -> bool: + return self.handle.seekable() + + def writable(self) -> bool: + return False + + def read(self, size: int = -1) -> bytes: + block = self.handle.read(size) + self.progress.advance(self.task, advance=len(block)) + return block + + def readinto(self, b: Union[bytearray, memoryview, mmap]): # type: ignore[no-untyped-def, override] + n = self.handle.readinto(b) # type: ignore[attr-defined] + self.progress.advance(self.task, advance=n) + return n + + def readline(self, size: int = -1) -> bytes: # type: ignore[override] + line = self.handle.readline(size) + self.progress.advance(self.task, advance=len(line)) + return line + + def readlines(self, hint: int = -1) -> List[bytes]: + lines = self.handle.readlines(hint) + self.progress.advance(self.task, advance=sum(map(len, lines))) + return lines + + def close(self) -> None: + if self.close_handle: + self.handle.close() + self._closed = True + + def seek(self, offset: int, whence: int = 0) -> int: + pos = self.handle.seek(offset, whence) + self.progress.update(self.task, completed=pos) + return pos + + def tell(self) -> int: + return self.handle.tell() + + def write(self, s: Any) -> int: + raise UnsupportedOperation("write") + + +class _ReadContext(ContextManager[_I], Generic[_I]): + """A utility class to handle a context for both a reader and a progress.""" + + def __init__(self, progress: "Progress", reader: _I) -> None: + self.progress = progress + self.reader: _I = reader + + def __enter__(self) -> _I: + self.progress.start() + return self.reader.__enter__() + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + self.progress.stop() + self.reader.__exit__(exc_type, exc_val, exc_tb) + + +def wrap_file( + file: BinaryIO, + total: int, + *, + description: str = "Reading...", + auto_refresh: bool = True, + console: Optional[Console] = None, + transient: bool = False, + get_time: Optional[Callable[[], float]] = None, + refresh_per_second: float = 10, + style: StyleType = "bar.back", + complete_style: StyleType = "bar.complete", + finished_style: StyleType = "bar.finished", + pulse_style: StyleType = "bar.pulse", + disable: bool = False, +) -> ContextManager[BinaryIO]: + """Read bytes from a file while tracking progress. + + Args: + file (Union[str, PathLike[str], BinaryIO]): The path to the file to read, or a file-like object in binary mode. + total (int): Total number of bytes to read. + description (str, optional): Description of task show next to progress bar. Defaults to "Reading". + auto_refresh (bool, optional): Automatic refresh, disable to force a refresh after each iteration. Default is True. + transient: (bool, optional): Clear the progress on exit. Defaults to False. + console (Console, optional): Console to write to. Default creates internal Console instance. + refresh_per_second (float): Number of times per second to refresh the progress information. Defaults to 10. + style (StyleType, optional): Style for the bar background. Defaults to "bar.back". + complete_style (StyleType, optional): Style for the completed bar. Defaults to "bar.complete". + finished_style (StyleType, optional): Style for a finished bar. Defaults to "bar.done". + pulse_style (StyleType, optional): Style for pulsing bars. Defaults to "bar.pulse". + disable (bool, optional): Disable display of progress. + Returns: + ContextManager[BinaryIO]: A context manager yielding a progress reader. + + """ + + columns: List["ProgressColumn"] = ( + [TextColumn("[progress.description]{task.description}")] if description else [] + ) + columns.extend( + ( + BarColumn( + style=style, + complete_style=complete_style, + finished_style=finished_style, + pulse_style=pulse_style, + ), + DownloadColumn(), + TimeRemainingColumn(), + ) + ) + progress = Progress( + *columns, + auto_refresh=auto_refresh, + console=console, + transient=transient, + get_time=get_time, + refresh_per_second=refresh_per_second or 10, + disable=disable, + ) + + reader = progress.wrap_file(file, total=total, description=description) + return _ReadContext(progress, reader) + + +@typing.overload +def open( + file: Union[str, "PathLike[str]", bytes], + mode: Union[Literal["rt"], Literal["r"]], + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + *, + total: Optional[int] = None, + description: str = "Reading...", + auto_refresh: bool = True, + console: Optional[Console] = None, + transient: bool = False, + get_time: Optional[Callable[[], float]] = None, + refresh_per_second: float = 10, + style: StyleType = "bar.back", + complete_style: StyleType = "bar.complete", + finished_style: StyleType = "bar.finished", + pulse_style: StyleType = "bar.pulse", + disable: bool = False, +) -> ContextManager[TextIO]: + pass + + +@typing.overload +def open( + file: Union[str, "PathLike[str]", bytes], + mode: Literal["rb"], + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + *, + total: Optional[int] = None, + description: str = "Reading...", + auto_refresh: bool = True, + console: Optional[Console] = None, + transient: bool = False, + get_time: Optional[Callable[[], float]] = None, + refresh_per_second: float = 10, + style: StyleType = "bar.back", + complete_style: StyleType = "bar.complete", + finished_style: StyleType = "bar.finished", + pulse_style: StyleType = "bar.pulse", + disable: bool = False, +) -> ContextManager[BinaryIO]: + pass + + +def open( + file: Union[str, "PathLike[str]", bytes], + mode: Union[Literal["rb"], Literal["rt"], Literal["r"]] = "r", + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + *, + total: Optional[int] = None, + description: str = "Reading...", + auto_refresh: bool = True, + console: Optional[Console] = None, + transient: bool = False, + get_time: Optional[Callable[[], float]] = None, + refresh_per_second: float = 10, + style: StyleType = "bar.back", + complete_style: StyleType = "bar.complete", + finished_style: StyleType = "bar.finished", + pulse_style: StyleType = "bar.pulse", + disable: bool = False, +) -> Union[ContextManager[BinaryIO], ContextManager[TextIO]]: + """Read bytes from a file while tracking progress. + + Args: + path (Union[str, PathLike[str], BinaryIO]): The path to the file to read, or a file-like object in binary mode. + mode (str): The mode to use to open the file. Only supports "r", "rb" or "rt". + buffering (int): The buffering strategy to use, see :func:`io.open`. + encoding (str, optional): The encoding to use when reading in text mode, see :func:`io.open`. + errors (str, optional): The error handling strategy for decoding errors, see :func:`io.open`. + newline (str, optional): The strategy for handling newlines in text mode, see :func:`io.open` + total: (int, optional): Total number of bytes to read. Must be provided if reading from a file handle. Default for a path is os.stat(file).st_size. + description (str, optional): Description of task show next to progress bar. Defaults to "Reading". + auto_refresh (bool, optional): Automatic refresh, disable to force a refresh after each iteration. Default is True. + transient: (bool, optional): Clear the progress on exit. Defaults to False. + console (Console, optional): Console to write to. Default creates internal Console instance. + refresh_per_second (float): Number of times per second to refresh the progress information. Defaults to 10. + style (StyleType, optional): Style for the bar background. Defaults to "bar.back". + complete_style (StyleType, optional): Style for the completed bar. Defaults to "bar.complete". + finished_style (StyleType, optional): Style for a finished bar. Defaults to "bar.done". + pulse_style (StyleType, optional): Style for pulsing bars. Defaults to "bar.pulse". + disable (bool, optional): Disable display of progress. + encoding (str, optional): The encoding to use when reading in text mode. + + Returns: + ContextManager[BinaryIO]: A context manager yielding a progress reader. + + """ + + columns: List["ProgressColumn"] = ( + [TextColumn("[progress.description]{task.description}")] if description else [] + ) + columns.extend( + ( + BarColumn( + style=style, + complete_style=complete_style, + finished_style=finished_style, + pulse_style=pulse_style, + ), + DownloadColumn(), + TimeRemainingColumn(), + ) + ) + progress = Progress( + *columns, + auto_refresh=auto_refresh, + console=console, + transient=transient, + get_time=get_time, + refresh_per_second=refresh_per_second or 10, + disable=disable, + ) + + reader = progress.open( + file, + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + total=total, + description=description, + ) + return _ReadContext(progress, reader) # type: ignore[return-value, type-var] + + class ProgressColumn(ABC): """Base class for a widget to use in progress display.""" @@ -794,6 +1127,157 @@ def track( advance(task_id, 1) refresh() + def wrap_file( + self, + file: BinaryIO, + total: Optional[int] = None, + *, + task_id: Optional[TaskID] = None, + description: str = "Reading...", + ) -> BinaryIO: + """Track progress file reading from a binary file. + + Args: + file (BinaryIO): A file-like object opened in binary mode. + total (int, optional): Total number of bytes to read. This must be provided unless a task with a total is also given. + task_id (TaskID): Task to track. Default is new task. + description (str, optional): Description of task, if new task is created. + + Returns: + BinaryIO: A readable file-like object in binary mode. + + Raises: + ValueError: When no total value can be extracted from the arguments or the task. + """ + # attempt to recover the total from the task + total_bytes: Optional[float] = None + if total is not None: + total_bytes = total + elif task_id is not None: + with self._lock: + total_bytes = self._tasks[task_id].total + if total_bytes is None: + raise ValueError( + f"unable to get the total number of bytes, please specify 'total'" + ) + + # update total of task or create new task + if task_id is None: + task_id = self.add_task(description, total=total_bytes) + else: + self.update(task_id, total=total_bytes) + + return _Reader(file, self, task_id, close_handle=False) + + @typing.overload + def open( + self, + file: Union[str, "PathLike[str]", bytes], + mode: Literal["rb"], + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + *, + total: Optional[int] = None, + task_id: Optional[TaskID] = None, + description: str = "Reading...", + ) -> BinaryIO: + pass + + @typing.overload + def open( + self, + file: Union[str, "PathLike[str]", bytes], + mode: Union[Literal["r"], Literal["rt"]], + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + *, + total: Optional[int] = None, + task_id: Optional[TaskID] = None, + description: str = "Reading...", + ) -> TextIO: + pass + + def open( + self, + file: Union[str, "PathLike[str]", bytes], + mode: Union[Literal["rb"], Literal["rt"], Literal["r"]] = "r", + buffering: int = -1, + encoding: Optional[str] = None, + errors: Optional[str] = None, + newline: Optional[str] = None, + *, + total: Optional[int] = None, + task_id: Optional[TaskID] = None, + description: str = "Reading...", + ) -> Union[BinaryIO, TextIO]: + """Track progress while reading from a binary file. + + Args: + path (Union[str, PathLike[str]]): The path to the file to read. + mode (str): The mode to use to open the file. Only supports "r", "rb" or "rt". + buffering (int): The buffering strategy to use, see :func:`io.open`. + encoding (str, optional): The encoding to use when reading in text mode, see :func:`io.open`. + errors (str, optional): The error handling strategy for decoding errors, see :func:`io.open`. + newline (str, optional): The strategy for handling newlines in text mode, see :func:`io.open`. + total (int, optional): Total number of bytes to read. If none given, os.stat(path).st_size is used. + task_id (TaskID): Task to track. Default is new task. + description (str, optional): Description of task, if new task is created. + + Returns: + BinaryIO: A readable file-like object in binary mode. + + Raises: + ValueError: When an invalid mode is given. + """ + # normalize the mode (always rb, rt) + _mode = "".join(sorted(mode, reverse=False)) + if _mode not in ("br", "rt", "r"): + raise ValueError("invalid mode {!r}".format(mode)) + + # patch buffering to provide the same behaviour as the builtin `open` + line_buffering = buffering == 1 + if _mode == "br" and buffering == 1: + warnings.warn( + "line buffering (buffering=1) isn't supported in binary mode, the default buffer size will be used", + RuntimeWarning, + ) + buffering = -1 + elif _mode == "rt" or _mode == "r": + if buffering == 0: + raise ValueError("can't have unbuffered text I/O") + elif buffering == 1: + buffering = -1 + + # attempt to get the total with `os.stat` + if total is None: + total = stat(file).st_size + + # update total of task or create new task + if task_id is None: + task_id = self.add_task(description, total=total) + else: + self.update(task_id, total=total) + + # open the file in binary mode, + handle = io.open(file, "rb", buffering=buffering) + reader = _Reader(handle, self, task_id, close_handle=True) + + # wrap the reader in a `TextIOWrapper` if text mode + if mode == "r" or mode == "rt": + return io.TextIOWrapper( + reader, + encoding=encoding, + errors=errors, + newline=newline, + line_buffering=line_buffering, + ) + + return reader + def start_task(self, task_id: TaskID) -> None: """Start a task. diff --git a/tests/test_progress.py b/tests/test_progress.py index db2e825d3..d3c6171c9 100644 --- a/tests/test_progress.py +++ b/tests/test_progress.py @@ -1,11 +1,14 @@ # encoding=utf-8 import io +import os +import tempfile from time import sleep from types import SimpleNamespace import pytest +import rich.progress from rich.progress_bar import ProgressBar from rich.console import Console from rich.highlighter import NullHighlighter @@ -549,6 +552,84 @@ def test_no_output_if_progress_is_disabled() -> None: assert result == expected +def test_open() -> None: + console = Console( + file=io.StringIO(), + force_terminal=True, + width=60, + color_system="truecolor", + legacy_windows=False, + _environ={}, + ) + progress = Progress( + console=console, + ) + + fd, filename = tempfile.mkstemp() + with os.fdopen(fd, "wb") as f: + f.write(b"Hello, World!") + try: + with rich.progress.open(filename) as f: + assert f.read() == "Hello, World!" + assert f.closed + finally: + os.remove(filename) + + +def test_open_text_mode() -> None: + fd, filename = tempfile.mkstemp() + with os.fdopen(fd, "wb") as f: + f.write(b"Hello, World!") + try: + with rich.progress.open(filename, "r") as f: + assert f.read() == "Hello, World!" + assert f.closed + finally: + os.remove(filename) + + +def test_wrap_file() -> None: + fd, filename = tempfile.mkstemp() + with os.fdopen(fd, "wb") as f: + total = f.write(b"Hello, World!") + try: + with open(filename, "rb") as file: + with rich.progress.wrap_file(file, total=total) as f: + assert f.read() == b"Hello, World!" + assert f.closed + assert not f.handle.closed + assert not file.closed + assert file.closed + finally: + os.remove(filename) + + +def test_wrap_file_task_total() -> None: + console = Console( + file=io.StringIO(), + force_terminal=True, + width=60, + color_system="truecolor", + legacy_windows=False, + _environ={}, + ) + progress = Progress( + console=console, + ) + + fd, filename = tempfile.mkstemp() + with os.fdopen(fd, "wb") as f: + total = f.write(b"Hello, World!") + try: + with progress: + with open(filename, "rb") as file: + task_id = progress.add_task("Reading", total=total) + with progress.wrap_file(file, task_id=task_id) as f: + assert f.read() == b"Hello, World!" + finally: + os.remove(filename) + + if __name__ == "__main__": _render = render_progress() print(_render)