Skip to content

Commit

Permalink
DDPB-2847: Use a maintained PDF generator (#197)
Browse files Browse the repository at this point in the history
* Port wkhtmltopdf to maintained image

Copied from opg-docker repo, but now using Alpine and Python 3.x

* Switch from Alpine to Debian

The Alpine version doesn't include the QT patch, which is essential for rendering custom fonts.

Debian is the default OS used by `python:3`

* Build wkhtmltopdf image in build process

* Specify version of wkhtmltopdf to use

* Use apt-get rather than apt

* Add cronable script to clean files from /tmp

* Install and run cron on container

We need `service cron restart` to ensure that the cron process starts and acknowledges our new file in `cron.hourly`

* Increase cleanup delay to five minutes

Just to give some breathing extra room for currently-running processes

* Don't expose wkhtmltopdf port

It's not necessary and wasn't the case before
  • Loading branch information
gregtyler committed Jan 13, 2020
1 parent 99aa27f commit 93049fa
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 5 deletions.
3 changes: 3 additions & 0 deletions docker-compose.ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ services:
test:
image: ${AWS_REGISTRY}/digideps/test:${VERSION}
build: ./behat
wkhtmltopdf:
image: ${AWS_REGISTRY}/digideps/wkhtmltopdf:${VERSION}
build: ./wkhtmltopdf
10 changes: 6 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ services:
- ./api/docker/env/api.env
restart: always

wkhtmltopdf:
build: ./wkhtmltopdf
volumes:
- ./wkhtmltopdf/app.py:/app.py
restart: always

postgres:
image: postgres:9.6
ports:
Expand All @@ -144,10 +150,6 @@ services:
image: redis:2.8.21
restart: always

wkhtmltopdf:
image: registry.service.opg.digital/opguk/wkhtmlpdf:0.1.209
restart: always

file-scanner-server:
image: mkodockx/docker-clamav

Expand Down
2 changes: 1 addition & 1 deletion environment/ecr.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ locals {
file_scanner = "${data.aws_ecr_repository.images["file-scanner"].repository_url}:${var.OPG_DOCKER_TAG}"
sync = "${data.aws_ecr_repository.images["sync"].repository_url}:${var.OPG_DOCKER_TAG}"
test = "${data.aws_ecr_repository.images["test"].repository_url}:${var.OPG_DOCKER_TAG}"
wkhtmltopdf = "${data.aws_ecr_repository.images["wkhtmltopdf"].repository_url}:latest"
wkhtmltopdf = "${data.aws_ecr_repository.images["wkhtmltopdf"].repository_url}:${var.OPG_DOCKER_TAG}"
}

repositories = [
Expand Down
23 changes: 23 additions & 0 deletions wkhtmltopdf/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM python:3

# Download and install wkhtmltopdf and bash (because executor can't use 'sh')
RUN apt-get update \
&& apt-get -y install cron wget xfonts-base xfonts-75dpi \
&& wget https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.5/wkhtmltox_0.12.5-1.stretch_amd64.deb \
&& dpkg -i wkhtmltox_0.12.5-1.stretch_amd64.deb

# Install dependencies for running web service
RUN pip install werkzeug executor gunicorn

ADD docker-entrypoint.sh /docker-entrypoint.sh
ADD app.py /app.py
ADD clean-tmp /etc/cron.hourly/clean-tmp
EXPOSE 80

# Make commands executable
RUN ["chmod", "+x", "/etc/cron.hourly/clean-tmp"]
RUN ["chmod", "+x", "/docker-entrypoint.sh"]

ENTRYPOINT ["/docker-entrypoint.sh"]

CMD ["-b", "0.0.0.0:80", "--log-file", "-", "app:application"]
73 changes: 73 additions & 0 deletions wkhtmltopdf/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#! /usr/bin/env python
"""
WSGI APP to convert wkhtmltopdf As a webservice
:copyright: (c) 2013 by Openlabs Technologies & Consulting (P) Limited
:license: BSD, see LICENSE for more details.
"""
import base64
import json
import tempfile

from werkzeug.wsgi import wrap_file
from werkzeug.wrappers import Request, Response
from executor import execute


@Request.application
def application(request):
"""
To use this application, the user must send a POST request with
base64 or form encoded encoded HTML content and the wkhtmltopdf Options in
request data, with keys 'base64_html' and 'options'.
The application will return a response with the PDF file.
"""
if request.method != 'POST':
return

request_is_json = request.content_type.endswith('json')

with tempfile.NamedTemporaryFile(suffix='.html') as source_file:

if request_is_json:
# If a JSON payload is there, all data is in the payload
payload = json.loads(request.data)
source_file.write(base64.b64decode(payload['contents']))
options = payload.get('options', {})
elif request.files:
# First check if any files were uploaded
source_file.write(request.files['file'].read())
# Load any options that may have been provided in options
options = json.loads(request.form.get('options', '{}'))

source_file.flush()

# Evaluate argument to run with subprocess
args = ['wkhtmltopdf']

# Add Global Options
if options:
for option, value in options.items():
args.append('--%s' % option)
if value:
args.append('"%s"' % value)

# Add source file name and output file name
file_name = source_file.name
args += [file_name, file_name + ".pdf"]

# Execute the command using executor
execute(' '.join(args))

return Response(
wrap_file(request.environ, open(file_name + '.pdf')),
mimetype='application/pdf',
direct_passthrough=True,
)


if __name__ == '__main__':
from werkzeug.serving import run_simple
run_simple(
'127.0.0.1', 5000, application, use_debugger=True, use_reloader=True
)
3 changes: 3 additions & 0 deletions wkhtmltopdf/clean-tmp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh

find /tmp -mmin +5 -type f -delete
4 changes: 4 additions & 0 deletions wkhtmltopdf/docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/sh
service cron restart

usr/local/bin/gunicorn $@

0 comments on commit 93049fa

Please sign in to comment.