Skip to content

Commit

Permalink
change: encode non-JPEG images as PNGs instead of JPEG2000 images
Browse files Browse the repository at this point in the history
This uses Pillow to re-encode any non-JPEG image as a PNG, then inline that
image's IDAT chunks as a FlateDecode value, which allows us to reuse the work
from the PNG encoder. This means we'll reencode some PNGs we could have passed
through directly, but that could be changed later. Alpha layers continue to be
handled separately, as appears to be required by the PDF spec.
  • Loading branch information
aschmitz committed Oct 29, 2021
1 parent a149af9 commit 730d4f3
Showing 1 changed file with 50 additions and 16 deletions.
66 changes: 50 additions & 16 deletions weasyprint/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import io
import math
import shutil
import struct
import zlib
from os.path import basename
from urllib.parse import unquote, urlsplit
Expand Down Expand Up @@ -255,19 +256,34 @@ def add_group(self, bounding_box):
self._x_objects[group.id] = group
return group

def _save_jpeg2000(self, pillow_image, optimize):
def _save_png(self, pillow_image, optimize):
image_file = io.BytesIO()
try:
pillow_image.save(image_file, format='JPEG2000', optimize=optimize)
except OSError:
# Set number of resolutions to 1 because of
# https://github.com/uclouvain/openjpeg/issues/215
image_file.seek(0)
pillow_image.save(
image_file, format='JPEG2000', optimize=optimize,
num_resolutions=1)
pillow_image.save(image_file, format='PNG', optimize=optimize)
return image_file

def _get_png_data(self, image_file):
image_file.seek(0)
# Read the PNG header, then discard it because we know it's a PNG. If
# this weren't just output from Pillow, we should actually check it.
res=image_file.read(8)

png_data = b''
raw_chunk_len = image_file.read(4)
# PNG files consist of a series of chunks.
while len(raw_chunk_len) > 0:
# Each chunk begins with its data length (four bytes, may be zero),
# then its type (four ASCII characters), then the data, then four
# bytes of a CRC.
chunk_len, = struct.unpack('!I', raw_chunk_len)
chunk_type = image_file.read(4)
chunk_data = image_file.read(chunk_len)
if chunk_type == b'IDAT':
png_data += chunk_data
# We aren't checking the CRC, we assume this is a valid PNG.
_chunk_crc = image_file.read(4)
raw_chunk_len = image_file.read(4)
return png_data

def add_image(self, pillow_image, image_rendering, optimize_size):
image_name = f'i{pillow_image.id}'
self._x_objects[image_name] = None # Set by write_pdf
Expand Down Expand Up @@ -306,24 +322,42 @@ def add_image(self, pillow_image, image_rendering, optimize_size):
extra['Filter'] = '/DCTDecode'
image_file = io.BytesIO()
pillow_image.save(image_file, format='JPEG', optimize=optimize)
stream = [image_file.getvalue()]
else:
extra['Filter'] = '/JPXDecode'
extra['Filter'] = '/FlateDecode'
extra['DecodeParms'] = pydyf.Dictionary({
# Predictor 15 specifies that we're providing PNG data,
# ostensibly using an "optimum predictor", but doesn't actually
# matter as long as the predictor value is 10+ according to the
# spec. (Other PNG predictor values assert that we're using
# specific predictors that we don't want to commit to, but
# "optimum" can vary.)
'Predictor': 15,
'Columns': pillow_image.width,
})
if pillow_image.mode in ('RGB', 'RGBA'):
# Defaults to 1.
extra['DecodeParms']['Colors'] = 3
if pillow_image.mode in ('RGBA', 'LA'):
alpha = pillow_image.getchannel('A')
pillow_image = pillow_image.convert(pillow_image.mode[:-1])
alpha_file = self._save_jpeg2000(alpha, optimize)
extra['SMask'] = pydyf.Stream([alpha_file.getvalue()], extra={
'Filter': '/JPXDecode',
alpha_file = self._save_png(alpha, optimize)
extra['SMask'] = pydyf.Stream([self._get_png_data(alpha_file)], extra={
'Filter': '/FlateDecode',
'Type': '/XObject',
'Subtype': '/Image',
'DecodeParms': pydyf.Dictionary({
'Predictor': 15,
'Columns': pillow_image.width,
}),
'Width': pillow_image.width,
'Height': pillow_image.height,
'ColorSpace': '/DeviceGray',
'BitsPerComponent': 8,
'Interpolate': interpolate,
})
image_file = self._save_jpeg2000(pillow_image, optimize)
stream = [image_file.getvalue()]
image_file = self._save_png(pillow_image, optimize)
stream = [self._get_png_data(image_file)]

xobject = pydyf.Stream(stream, extra=extra)
self._images[image_name] = xobject
Expand Down

0 comments on commit 730d4f3

Please sign in to comment.