Skip to content

Commit

Permalink
speed up cache by approximately 42x by avoiding pathlib (#1953)
Browse files Browse the repository at this point in the history
  • Loading branch information
asottile committed Feb 4, 2021
1 parent df4dd38 commit 3fca540
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 23 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -26,6 +26,8 @@

- use lowercase hex strings (#1692)

- speed up caching by avoiding pathlib (#1950)

#### _Packaging_

- Self-contained native _Black_ binaries are now provided for releases via GitHub
Expand Down
14 changes: 9 additions & 5 deletions src/black/__init__.py
Expand Up @@ -93,7 +93,7 @@
Timestamp = float
FileSize = int
CacheInfo = Tuple[Timestamp, FileSize]
Cache = Dict[Path, CacheInfo]
Cache = Dict[str, CacheInfo]
out = partial(click.secho, bold=True, err=True)
err = partial(click.secho, fg="red", err=True)

Expand Down Expand Up @@ -724,7 +724,8 @@ def reformat_one(
if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
cache = read_cache(mode)
res_src = src.resolve()
if res_src in cache and cache[res_src] == get_cache_info(res_src):
res_src_s = str(res_src)
if res_src_s in cache and cache[res_src_s] == get_cache_info(res_src):
changed = Changed.CACHED
if changed is not Changed.CACHED and format_file_in_place(
src, fast=fast, write_back=write_back, mode=mode
Expand Down Expand Up @@ -6781,8 +6782,8 @@ def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set
"""
todo, done = set(), set()
for src in sources:
src = src.resolve()
if cache.get(src) != get_cache_info(src):
res_src = src.resolve()
if cache.get(str(res_src)) != get_cache_info(res_src):
todo.add(src)
else:
done.add(src)
Expand All @@ -6794,7 +6795,10 @@ def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
cache_file = get_cache_file(mode)
try:
CACHE_DIR.mkdir(parents=True, exist_ok=True)
new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}}
new_cache = {
**cache,
**{str(src.resolve()): get_cache_info(src) for src in sources},
}
with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
pickle.dump(new_cache, f, protocol=4)
os.replace(f.name, cache_file)
Expand Down
39 changes: 21 additions & 18 deletions tests/test_black.py
Expand Up @@ -1003,7 +1003,7 @@ def test_cache_broken_file(self) -> None:
fobj.write("print('hello')")
self.invokeBlack([str(src)])
cache = black.read_cache(mode)
self.assertIn(src, cache)
self.assertIn(str(src), cache)

def test_cache_single_file_already_cached(self) -> None:
mode = DEFAULT_MODE
Expand Down Expand Up @@ -1035,8 +1035,8 @@ def test_cache_multiple_files(self) -> None:
with two.open("r") as fobj:
self.assertEqual(fobj.read(), 'print("hello")\n')
cache = black.read_cache(mode)
self.assertIn(one, cache)
self.assertIn(two, cache)
self.assertIn(str(one), cache)
self.assertIn(str(two), cache)

def test_no_cache_when_writeback_diff(self) -> None:
mode = DEFAULT_MODE
Expand Down Expand Up @@ -1116,8 +1116,8 @@ def test_write_cache_read_cache(self) -> None:
src.touch()
black.write_cache({}, [src], mode)
cache = black.read_cache(mode)
self.assertIn(src, cache)
self.assertEqual(cache[src], black.get_cache_info(src))
self.assertIn(str(src), cache)
self.assertEqual(cache[str(src)], black.get_cache_info(src))

def test_filter_cached(self) -> None:
with TemporaryDirectory() as workspace:
Expand All @@ -1128,7 +1128,10 @@ def test_filter_cached(self) -> None:
uncached.touch()
cached.touch()
cached_but_changed.touch()
cache = {cached: black.get_cache_info(cached), cached_but_changed: (0.0, 0)}
cache = {
str(cached): black.get_cache_info(cached),
str(cached_but_changed): (0.0, 0),
}
todo, done = black.filter_cached(
cache, {uncached, cached, cached_but_changed}
)
Expand Down Expand Up @@ -1156,8 +1159,8 @@ def test_failed_formatting_does_not_get_cached(self) -> None:
fobj.write('print("hello")\n')
self.invokeBlack([str(workspace)], exit_code=123)
cache = black.read_cache(mode)
self.assertNotIn(failing, cache)
self.assertIn(clean, cache)
self.assertNotIn(str(failing), cache)
self.assertIn(str(clean), cache)

def test_write_cache_write_fail(self) -> None:
mode = DEFAULT_MODE
Expand Down Expand Up @@ -1210,9 +1213,9 @@ def test_read_cache_line_lengths(self) -> None:
path.touch()
black.write_cache({}, [path], mode)
one = black.read_cache(mode)
self.assertIn(path, one)
self.assertIn(str(path), one)
two = black.read_cache(short_mode)
self.assertNotIn(path, two)
self.assertNotIn(str(path), two)

def test_single_file_force_pyi(self) -> None:
pyi_mode = replace(DEFAULT_MODE, is_pyi=True)
Expand All @@ -1226,9 +1229,9 @@ def test_single_file_force_pyi(self) -> None:
actual = fh.read()
# verify cache with --pyi is separate
pyi_cache = black.read_cache(pyi_mode)
self.assertIn(path, pyi_cache)
self.assertIn(str(path), pyi_cache)
normal_cache = black.read_cache(DEFAULT_MODE)
self.assertNotIn(path, normal_cache)
self.assertNotIn(str(path), normal_cache)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(contents, actual)
black.assert_stable(contents, actual, pyi_mode)
Expand All @@ -1255,8 +1258,8 @@ def test_multi_file_force_pyi(self) -> None:
pyi_cache = black.read_cache(pyi_mode)
normal_cache = black.read_cache(reg_mode)
for path in paths:
self.assertIn(path, pyi_cache)
self.assertNotIn(path, normal_cache)
self.assertIn(str(path), pyi_cache)
self.assertNotIn(str(path), normal_cache)

def test_pipe_force_pyi(self) -> None:
source, expected = read_data("force_pyi")
Expand All @@ -1280,9 +1283,9 @@ def test_single_file_force_py36(self) -> None:
actual = fh.read()
# verify cache with --target-version is separate
py36_cache = black.read_cache(py36_mode)
self.assertIn(path, py36_cache)
self.assertIn(str(path), py36_cache)
normal_cache = black.read_cache(reg_mode)
self.assertNotIn(path, normal_cache)
self.assertNotIn(str(path), normal_cache)
self.assertEqual(actual, expected)

@event_loop()
Expand All @@ -1307,8 +1310,8 @@ def test_multi_file_force_py36(self) -> None:
pyi_cache = black.read_cache(py36_mode)
normal_cache = black.read_cache(reg_mode)
for path in paths:
self.assertIn(path, pyi_cache)
self.assertNotIn(path, normal_cache)
self.assertIn(str(path), pyi_cache)
self.assertNotIn(str(path), normal_cache)

def test_pipe_force_py36(self) -> None:
source, expected = read_data("force_py36")
Expand Down

0 comments on commit 3fca540

Please sign in to comment.