From 730d4f3221edc1bbc7b9aec3a8553b32a46083e1 Mon Sep 17 00:00:00 2001 From: aschmitz <29508+aschmitz@users.noreply.github.com> Date: Fri, 29 Oct 2021 00:36:04 -0500 Subject: [PATCH] change: encode non-JPEG images as PNGs instead of JPEG2000 images This uses Pillow to re-encode any non-JPEG image as a PNG, then inline that image's IDAT chunks as a FlateDecode value, which allows us to reuse the work from the PNG encoder. This means we'll reencode some PNGs we could have passed through directly, but that could be changed later. Alpha layers continue to be handled separately, as appears to be required by the PDF spec. --- weasyprint/document.py | 66 ++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/weasyprint/document.py b/weasyprint/document.py index 4fa5329854..e5b890fafb 100644 --- a/weasyprint/document.py +++ b/weasyprint/document.py @@ -10,6 +10,7 @@ import io import math import shutil +import struct import zlib from os.path import basename from urllib.parse import unquote, urlsplit @@ -255,19 +256,34 @@ def add_group(self, bounding_box): self._x_objects[group.id] = group return group - def _save_jpeg2000(self, pillow_image, optimize): + def _save_png(self, pillow_image, optimize): image_file = io.BytesIO() - try: - pillow_image.save(image_file, format='JPEG2000', optimize=optimize) - except OSError: - # Set number of resolutions to 1 because of - # https://github.com/uclouvain/openjpeg/issues/215 - image_file.seek(0) - pillow_image.save( - image_file, format='JPEG2000', optimize=optimize, - num_resolutions=1) + pillow_image.save(image_file, format='PNG', optimize=optimize) return image_file + def _get_png_data(self, image_file): + image_file.seek(0) + # Read the PNG header, then discard it because we know it's a PNG. If + # this weren't just output from Pillow, we should actually check it. + res=image_file.read(8) + + png_data = b'' + raw_chunk_len = image_file.read(4) + # PNG files consist of a series of chunks. + while len(raw_chunk_len) > 0: + # Each chunk begins with its data length (four bytes, may be zero), + # then its type (four ASCII characters), then the data, then four + # bytes of a CRC. + chunk_len, = struct.unpack('!I', raw_chunk_len) + chunk_type = image_file.read(4) + chunk_data = image_file.read(chunk_len) + if chunk_type == b'IDAT': + png_data += chunk_data + # We aren't checking the CRC, we assume this is a valid PNG. + _chunk_crc = image_file.read(4) + raw_chunk_len = image_file.read(4) + return png_data + def add_image(self, pillow_image, image_rendering, optimize_size): image_name = f'i{pillow_image.id}' self._x_objects[image_name] = None # Set by write_pdf @@ -306,24 +322,42 @@ def add_image(self, pillow_image, image_rendering, optimize_size): extra['Filter'] = '/DCTDecode' image_file = io.BytesIO() pillow_image.save(image_file, format='JPEG', optimize=optimize) + stream = [image_file.getvalue()] else: - extra['Filter'] = '/JPXDecode' + extra['Filter'] = '/FlateDecode' + extra['DecodeParms'] = pydyf.Dictionary({ + # Predictor 15 specifies that we're providing PNG data, + # ostensibly using an "optimum predictor", but doesn't actually + # matter as long as the predictor value is 10+ according to the + # spec. (Other PNG predictor values assert that we're using + # specific predictors that we don't want to commit to, but + # "optimum" can vary.) + 'Predictor': 15, + 'Columns': pillow_image.width, + }) + if pillow_image.mode in ('RGB', 'RGBA'): + # Defaults to 1. + extra['DecodeParms']['Colors'] = 3 if pillow_image.mode in ('RGBA', 'LA'): alpha = pillow_image.getchannel('A') pillow_image = pillow_image.convert(pillow_image.mode[:-1]) - alpha_file = self._save_jpeg2000(alpha, optimize) - extra['SMask'] = pydyf.Stream([alpha_file.getvalue()], extra={ - 'Filter': '/JPXDecode', + alpha_file = self._save_png(alpha, optimize) + extra['SMask'] = pydyf.Stream([self._get_png_data(alpha_file)], extra={ + 'Filter': '/FlateDecode', 'Type': '/XObject', 'Subtype': '/Image', + 'DecodeParms': pydyf.Dictionary({ + 'Predictor': 15, + 'Columns': pillow_image.width, + }), 'Width': pillow_image.width, 'Height': pillow_image.height, 'ColorSpace': '/DeviceGray', 'BitsPerComponent': 8, 'Interpolate': interpolate, }) - image_file = self._save_jpeg2000(pillow_image, optimize) - stream = [image_file.getvalue()] + image_file = self._save_png(pillow_image, optimize) + stream = [self._get_png_data(image_file)] xobject = pydyf.Stream(stream, extra=extra) self._images[image_name] = xobject