Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compress each file in a ThreadPool #484

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 14 additions & 7 deletions src/whitenoise/compress.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import argparse
import concurrent.futures
import gzip
import os
import re
Expand Down Expand Up @@ -77,7 +78,7 @@ def should_compress(self, filename):
def log(self, message):
pass

def compress(self, path):
def _lazy_compress(self, path):
with open(path, "rb") as f:
stat_result = os.fstat(f.fileno())
data = f.read()
Expand All @@ -94,6 +95,9 @@ def compress(self, path):
if self.is_compressed_effectively("Gzip", path, size, compressed):
yield self.write_data(path, compressed, ".gz", stat_result)

def compress(self, path):
return list(self._lazy_compress(path))

@staticmethod
def compress_gzip(data):
output = BytesIO()
Expand Down Expand Up @@ -133,6 +137,12 @@ def write_data(self, path, data, suffix, stat_result):
os.utime(filename, (stat_result.st_atime, stat_result.st_mtime))
return filename

def files_to_compress(self, root):
for dirpath, _dirs, files in os.walk(root):
for filename in files:
if self.should_compress(filename):
yield os.path.join(dirpath, filename)


def main(argv=None):
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -175,12 +185,9 @@ def main(argv=None):
use_brotli=args.use_brotli,
quiet=args.quiet,
)
for dirpath, _dirs, files in os.walk(args.root):
for filename in files:
if compressor.should_compress(filename):
path = os.path.join(dirpath, filename)
for _compressed in compressor.compress(path):
pass

with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(compressor.compress, compressor.files_to_compress(args.root))

return 0

Expand Down
54 changes: 37 additions & 17 deletions src/whitenoise/storage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import concurrent.futures
import errno
import os
import re
Expand Down Expand Up @@ -30,15 +31,24 @@ def post_process(
return

extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)

for path in paths:
if compressor.should_compress(path):
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
yield path, compressed_name, True
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

to_compress = (path for path in paths if self.compressor.should_compress(path))
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = (
executor.submit(self._compress_one, path) for path in to_compress
)
for compressed_paths in concurrent.futures.as_completed(futures):
yield from compressed_paths.result()

def _compress_one(self, path: str) -> list[tuple[str, str, bool]]:
compressed: list[tuple[str, str, bool]] = []
full_path = self.path(path)
prefix_len = len(full_path) - len(path)
for compressed_path in self.compressor.compress(full_path):
compressed_name = compressed_path[prefix_len:]
compressed.append((path, compressed_name, True))
return compressed

def create_compressor(self, **kwargs: Any) -> Compressor:
return Compressor(**kwargs)
Expand Down Expand Up @@ -130,14 +140,24 @@ def create_compressor(self, **kwargs):

def compress_files(self, names):
extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None)
compressor = self.create_compressor(extensions=extensions, quiet=True)
for name in names:
if compressor.should_compress(name):
path = self.path(name)
prefix_len = len(path) - len(name)
for compressed_path in compressor.compress(path):
compressed_name = compressed_path[prefix_len:]
yield name, compressed_name
self.compressor = self.create_compressor(extensions=extensions, quiet=True)

to_compress = (name for name in names if self.compressor.should_compress(name))
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = (
executor.submit(self._compress_one, name) for name in to_compress
)
for compressed_paths in concurrent.futures.as_completed(futures):
yield from compressed_paths.result()

def _compress_one(self, name: str) -> list[tuple[str, str]]:
compressed: list[tuple[str, str]] = []
path = self.path(name)
prefix_len = len(path) - len(name)
for compressed_path in self.compressor.compress(path):
compressed_name = compressed_path[prefix_len:]
compressed.append((name, compressed_name))
return compressed

def make_helpful_exception(self, exception, name):
"""
Expand Down