Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

progressbar cleanup #2436

Merged
merged 15 commits into from
Aug 29, 2019
56 changes: 46 additions & 10 deletions dvc/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from copy import deepcopy
from concurrent.futures import ThreadPoolExecutor

logger = logging.getLogger(__name__)


class TqdmThreadPoolExecutor(ThreadPoolExecutor):
"""
Expand Down Expand Up @@ -33,43 +35,72 @@ class Tqdm(tqdm):
maximum-compatibility tqdm-based progressbars
"""

BAR_FMT_DEFAULT = (
"{percentage:3.0f}%|{bar:10}|{desc} {bar:-10b}{n}/{total}"
" [{elapsed}<{remaining}, {rate_fmt:>11}{postfix}]"
)
BAR_FMT_NOTOTAL = (
"{desc} {bar:b}{n} [{elapsed}<??:??, {rate_fmt:>11}{postfix}]"
)

def __init__(
self,
iterable=None,
disable=None,
bytes=False, # pylint: disable=W0622
level=logging.ERROR,
desc=None,
desc_truncate=None,
leave=None,
level_leave=logging.DEBUG,
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
bar_format=None,
bytes=False, # pylint: disable=W0622
**kwargs
):
"""
bytes : shortcut for
`unit='B', unit_scale=True, unit_divisor=1024, miniters=1`
desc_truncate : like `desc` but will truncate to 10 chars
desc : persists after `close()`
desc_truncate : like `desc` but will `truncate()` and not persist
level : effective logging level for determining `disable`;
used only if `disable` is unspecified
level_leave : effective logging level for determining `leave`;
used only if `leave` is unspecified
kwargs : anything accepted by `tqdm.tqdm()`
"""
kwargs = deepcopy(kwargs)
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
kwargs.setdefault("unit_scale", True)
if bytes:
for k, v in dict(
unit="B", unit_scale=True, unit_divisor=1024, miniters=1
).items():
kwargs.setdefault(k, v)
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
if desc is not None:
self.desc_persist = desc
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
if desc_truncate is not None:
kwargs.setdefault("desc", self.truncate(desc_truncate))
desc = self.truncate(desc_truncate)
if disable is None:
disable = (
logging.getLogger(__name__).getEffectiveLevel()
>= logging.CRITICAL
)
disable = logger.getEffectiveLevel() > level
if leave is None:
leave = logger.getEffectiveLevel() <= level_leave
if bar_format is None:
if kwargs.get("total", hasattr(iterable, "__len__")):
bar_format = self.BAR_FMT_DEFAULT
else:
bar_format = self.BAR_FMT_NOTOTAL
super(Tqdm, self).__init__(
iterable=iterable, disable=disable, leave=leave, **kwargs
iterable=iterable,
disable=disable,
leave=leave,
desc=desc,
bar_format=bar_format,
**kwargs
)

def update_desc(self, desc, n=1, truncate=True):
"""
Calls `set_description(truncate(desc))` and `update(n)`
Calls `set_description_str(truncate(desc))` and `update(n)`
"""
self.set_description(
self.set_description_str(
self.truncate(desc) if truncate else desc, refresh=False
)
self.update(n)
Expand All @@ -79,6 +110,11 @@ def update_to(self, current, total=None):
self.total = total # pylint: disable=W0613,W0201
self.update(current - self.n)

def close(self):
if hasattr(self, "desc_persist"):
self.set_description_str(self.desc_persist, refresh=False)
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
super(Tqdm, self).close()

@classmethod
def truncate(cls, s, max_len=25, end=True, fill="..."):
"""
Expand Down
4 changes: 3 additions & 1 deletion dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,9 @@ def cache_exists(self, checksums, jobs=None):
if not self.no_traverse:
return list(set(checksums) & set(self.all()))

with Tqdm(total=len(checksums), unit="md5") as pbar:
with Tqdm(
desc="Querying remote cache", total=len(checksums), unit="md5"
) as pbar:

def exists_with_progress(path_info):
ret = self.exists(path_info)
Expand Down
12 changes: 7 additions & 5 deletions dvc/remote/local/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,9 @@ def move(self, from_info, to_info):
def cache_exists(self, checksums, jobs=None):
return [
checksum
for checksum in Tqdm(checksums, unit="md5")
for checksum in Tqdm(
checksums, unit="md5", desc="Querying local cache"
)
if not self.changed_cache_file(checksum)
]

Expand Down Expand Up @@ -313,13 +315,13 @@ def status(
show_checksums=False,
download=False,
):
logger.info(
logger.debug(
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
"Preparing to collect status from {}".format(remote.path_info)
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
)
ret = self._group(checksum_infos, show_checksums=show_checksums) or {}
md5s = list(ret)

logger.info("Collecting information from local cache...")
logger.debug("Collecting information from local cache...")
local_exists = self.cache_exists(md5s, jobs=jobs)

# This is a performance optimization. We can safely assume that,
Expand All @@ -329,7 +331,7 @@ def status(
if download and sorted(local_exists) == sorted(md5s):
remote_exists = local_exists
else:
logger.info("Collecting information from remote cache...")
logger.debug("Collecting information from remote cache...")
remote_exists = list(remote.cache_exists(md5s, jobs=jobs))

self._fill_statuses(ret, local_exists, remote_exists)
Expand Down Expand Up @@ -377,7 +379,7 @@ def _process(
show_checksums=False,
download=False,
):
logger.info(
logger.debug(
"Preparing to {} '{}'".format(
"download data from" if download else "upload data to",
remote.path_info,
Expand Down
5 changes: 3 additions & 2 deletions dvc/remote/ssh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,9 @@ def cache_exists(self, checksums, jobs=None):
if not self.no_traverse:
return list(set(checksums) & set(self.all()))

with Tqdm(total=len(checksums), unit="md5") as pbar:
with Tqdm(
desc="Querying remote cache", total=len(checksums), unit="md5"
) as pbar:

def exists_with_progress(chunks):
return self.batch_exists(chunks, callback=pbar.update_desc)
Expand All @@ -278,5 +280,4 @@ def exists_with_progress(chunks):
results = executor.map(exists_with_progress, chunks)
in_remote = itertools.chain.from_iterable(results)
ret = list(itertools.compress(checksums, in_remote))
pbar.update_desc("", 0) # clear path name description
return ret
9 changes: 7 additions & 2 deletions dvc/repo/checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,14 @@ def checkout(self, target=None, with_deps=False, force=False, recursive=False):
with self.state:
_cleanup_unused_links(self, all_stages)
total = get_all_files_numbers(stages)
if total == 0:
logger.info("Nothing to do")
with Tqdm(
total=total, unit="file", desc="Checkout", disable=total == 0
total=total,
unit="file",
desc="Checkout",
disable=total == 0,
level_leave=logging.INFO,
) as pbar:
for stage in stages:
if stage.locked:
Expand All @@ -51,4 +57,3 @@ def checkout(self, target=None, with_deps=False, force=False, recursive=False):
)

stage.checkout(force=force, progress_callback=pbar.update_desc)
pbar.update_desc("Checkout", 0) # clear path name description
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def run(self):
"funcy>=1.12",
"pathspec>=0.5.9",
"shortuuid>=0.5.0",
"tqdm>=4.34.0",
"tqdm>=4.35.0",
"win-unicode-console>=0.5; sys_platform == 'win32'",
]

Expand Down