Skip to content

Commit

Permalink
Compress each file in a ThreadPool
Browse files Browse the repository at this point in the history
  • Loading branch information
rik committed Apr 2, 2023
1 parent 08a176a commit 32e2611
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 24 deletions.
21 changes: 14 additions & 7 deletions src/whitenoise/compress.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import argparse
import concurrent.futures
import gzip
import os
import re
Expand Down Expand Up @@ -77,7 +78,7 @@ def should_compress(self, filename):
def log(self, message):
pass

def compress(self, path):
def _lazy_compress(self, path):
with open(path, "rb") as f:
stat_result = os.fstat(f.fileno())
data = f.read()
Expand All @@ -94,6 +95,9 @@ def compress(self, path):
if self.is_compressed_effectively("Gzip", path, size, compressed):
yield self.write_data(path, compressed, ".gz", stat_result)

def compress(self, path):
return list(self._lazy_compress(path))

@staticmethod
def compress_gzip(data):
output = BytesIO()
Expand Down Expand Up @@ -133,6 +137,12 @@ def write_data(self, path, data, suffix, stat_result):
os.utime(filename, (stat_result.st_atime, stat_result.st_mtime))
return filename

def files_to_compress(self, root):
for dirpath, _dirs, files in os.walk(root):
for filename in files:
if self.should_compress(filename):
yield os.path.join(dirpath, filename)


def main(argv=None):
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -175,12 +185,9 @@ def main(argv=None):
use_brotli=args.use_brotli,
quiet=args.quiet,
)
for dirpath, _dirs, files in os.walk(args.root):
for filename in files:
if compressor.should_compress(filename):
path = os.path.join(dirpath, filename)
for _compressed in compressor.compress(path):
pass

with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(compressor.compress, compressor.files_to_compress(args.root))

return 0

Expand Down
54 changes: 37 additions & 17 deletions src/whitenoise/storage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import concurrent.futures
import errno
import os
import re
Expand Down Expand Up @@ -30,15 +31,24 @@ def post_process(
return

extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)

for path in paths:
if compressor.should_compress(path):
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield path, compressed_name, True
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

to_compress = (path for path in paths if self.compressor.should_compress(path))
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = (
executor.submit(self._compress_one, path) for path in to_compress
)
for compressed_paths in concurrent.futures.as_completed(futures):
yield from compressed_paths.result()

def _compress_one(self, path: str) -> list[tuple[str, str, bool]]:
compressed: list[tuple[str, str, bool]] = []
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in self.compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
compressed.append((path, compressed_name, True))
return compressed

def create_compressor(self, **kwargs: Any) -> Compressor:
return Compressor(**kwargs)
Expand Down Expand Up @@ -130,14 +140,24 @@ def create_compressor(self, **kwargs):

def compress_files(self, names):
extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)
for name in names:
if compressor.should_compress(name):
path = self.path(name)
prefix_len = len(path) - len(name)
for compressed_path in compressor.compress(path):
compressed_name = compressed_path[prefix_len:]
yield name, compressed_name
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

to_compress = (name for name in names if self.compressor.should_compress(name))
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = (
executor.submit(self._compress_one, name) for name in to_compress
)
for compressed_paths in concurrent.futures.as_completed(futures):
yield from compressed_paths.result()

def _compress_one(self, name: str) -> list[tuple[str, str]]:
compressed: list[tuple[str, str]] = []
path = self.path(name)
prefix_len = len(path) - len(name)
for compressed_path in self.compressor.compress(path):
compressed_name = compressed_path[prefix_len:]
compressed.append((name, compressed_name))
return compressed

def make_helpful_exception(self, exception, name):
"""
Expand Down

0 comments on commit 32e2611

Please sign in to comment.