/
tests.py
135 lines (105 loc) · 4.34 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import sys
import unittest
import binascii
from PyPDF2 import PdfFileReader, PdfFileWriter
# Configure path environment
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")
sys.path.append(PROJECT_ROOT)
if sys.version_info[0] < 3:
def u_(s):
return s.decode("utf-8")
else:
def u_(s):
return s
class PdfReaderTestCases(unittest.TestCase):
def test_PdfReaderFileLoad(self):
"""
Test loading and parsing of a file. Extract text of the file and compare to expected
textual output. Expected outcome: file loads, text matches expected.
"""
with open(os.path.join(RESOURCE_ROOT, "crazyones.pdf"), "rb") as inputfile:
# Load PDF file from file
ipdf = PdfFileReader(inputfile)
ipdf_p1 = ipdf.getPage(0)
# Retrieve the text of the PDF
with open(
os.path.join(RESOURCE_ROOT, "crazyones.txt"), "rb"
) as pdftext_file:
pdftext = pdftext_file.read()
ipdf_p1_text = ipdf_p1.extractText().replace("\n", "").encode("utf-8")
# Compare the text of the PDF to a known source
self.assertEqual(
ipdf_p1_text,
pdftext,
msg="PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n"
% (pdftext, ipdf_p1_text),
)
def test_PdfReaderJpegImage(self):
"""
Test loading and parsing of a file. Extract the image of the file and compare to expected
textual output. Expected outcome: file loads, image matches expected.
"""
with open(os.path.join(RESOURCE_ROOT, "jpeg.pdf"), "rb") as inputfile:
# Load PDF file from file
ipdf = PdfFileReader(inputfile)
# Retrieve the text of the image
with open(os.path.join(RESOURCE_ROOT, "jpeg.txt"), "r") as pdftext_file:
imagetext = pdftext_file.read()
ipdf_p0 = ipdf.getPage(0)
xObject = ipdf_p0["/Resources"]["/XObject"].getObject()
data = xObject["/Im4"].getData()
# Compare the text of the PDF to a known source
self.assertEqual(
binascii.hexlify(data).decode(),
imagetext,
msg="PDF extracted image differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n"
% (imagetext, binascii.hexlify(data).decode()),
)
class AddJsTestCase(unittest.TestCase):
def setUp(self):
ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
self.pdf_file_writer = PdfFileWriter()
self.pdf_file_writer.appendPagesFromReader(ipdf)
def test_add(self):
self.pdf_file_writer.addJS(
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
)
self.assertIn(
"/Names",
self.pdf_file_writer._root_object,
"addJS should add a name catalog in the root object.",
)
self.assertIn(
"/JavaScript",
self.pdf_file_writer._root_object["/Names"],
"addJS should add a JavaScript name tree under the name catalog.",
)
self.assertIn(
"/OpenAction",
self.pdf_file_writer._root_object,
"addJS should add an OpenAction to the catalog.",
)
def test_overwrite(self):
self.pdf_file_writer.addJS(
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
)
first_js = self.get_javascript_name()
self.pdf_file_writer.addJS(
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
)
second_js = self.get_javascript_name()
self.assertNotEqual(
first_js,
second_js,
"addJS should overwrite the previous script in the catalog.",
)
def get_javascript_name(self):
self.assertIn("/Names", self.pdf_file_writer._root_object)
self.assertIn("/JavaScript", self.pdf_file_writer._root_object["/Names"])
self.assertIn(
"/Names", self.pdf_file_writer._root_object["/Names"]["/JavaScript"]
)
return self.pdf_file_writer._root_object["/Names"]["/JavaScript"]["/Names"][0]