Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STY: Fix style issues #743

Merged
merged 4 commits into from Apr 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yaml
Expand Up @@ -43,7 +43,7 @@ jobs:

- name: Test with flake8
run: |
flake8 . --ignore=E203,W503,E501,F405,E226,E128,E225,F403,E201,E202,E231,W504,E241,F401,E261,E302,E211,E701,E228,E111,F841,E117,E127,E251,E266,E
flake8 . --ignore=E203,W503,W504,E,F403,F405
if: matrix.python-version != '2.7'

- name: Test with pytest
Expand Down
10 changes: 9 additions & 1 deletion PyPDF2/__init__.py
Expand Up @@ -3,4 +3,12 @@
from .pagerange import PageRange, parse_filename_page_ranges
from ._version import __version__

__all__ = ["pdf", "PdfFileMerger"]
__all__ = [
"__version__",
"PageRange",
"parse_filename_page_ranges",
"pdf",
"PdfFileMerger",
"PdfFileReader",
"PdfFileWriter",
]
6 changes: 3 additions & 3 deletions PyPDF2/filters.py
Expand Up @@ -55,7 +55,7 @@ def compress(data):
# Unable to import zlib. Attempt to use the System.IO.Compression
# library from the .NET framework. (IronPython only)
import System
from System import IO, Collections, Array
from System import IO, Array

def _string_to_bytearr(buf):
retval = Array.CreateInstance(System.Byte, len(buf))
Expand Down Expand Up @@ -275,7 +275,7 @@ def decode(data, decodeParms=None):
x = 0
hitEod = False
# remove all whitespace from data
data = [y for y in data if not (y in ' \n\r\t')]
data = [y for y in data if y not in ' \n\r\t']
while not hitEod:
c = data[x]
if len(retval) == 0 and c == "<" and data[x+1] == "~":
Expand Down Expand Up @@ -363,7 +363,7 @@ def decode(data, decodeParms=None, height=0):

width = decodeParms["/Columns"]
imgSize = len(data)
tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
tiff_header_struct = '<2shlh' + 'hhll' * 8 + 'h'
tiffHeader = struct.pack(tiff_header_struct,
b'II', # Byte order indication: Little endian
42, # Version number (always 42)
Expand Down
8 changes: 4 additions & 4 deletions PyPDF2/generic.py
Expand Up @@ -381,7 +381,7 @@ def readStringFromStream(stream):
# break occurs. If it's a multi-char EOL, consume the
# second character:
tok = stream.read(1)
if not tok in b_("\n\r"):
if tok not in b_("\n\r"):
stream.seek(-1, 1)
# Then don't add anything to the actual string, since this
# line break was escaped:
Expand Down Expand Up @@ -483,10 +483,10 @@ def readFromStream(stream, pdf):
try:
try:
ret=name.decode('utf-8')
except (UnicodeEncodeError, UnicodeDecodeError) as e:
except (UnicodeEncodeError, UnicodeDecodeError):
ret=name.decode('gbk')
return NameObject(ret)
except (UnicodeEncodeError, UnicodeDecodeError) as e:
except (UnicodeEncodeError, UnicodeDecodeError):
# Name objects should represent irregular characters
# with a '#' followed by the symbol's hex number
if not pdf.strict:
Expand Down Expand Up @@ -843,7 +843,7 @@ def getData(self):

decoded._data = filters.decodeStreamData(self)
for key, value in list(self.items()):
if not key in ("/Length", "/Filter", "/DecodeParms"):
if key not in ("/Length", "/Filter", "/DecodeParms"):
decoded[key] = value
self.decodedSelf = decoded
return decoded._data
Expand Down
5 changes: 2 additions & 3 deletions PyPDF2/merger.py
Expand Up @@ -311,7 +311,6 @@ def _trim_dests(self, pdf, dests, pages):
page set.
"""
new_dests = []
prev_header_added = True
for k, o in list(dests.items()):
for j in range(*pages):
if pdf.getPage(j).getObject() == o['/Page'].getObject():
Expand Down Expand Up @@ -356,7 +355,7 @@ def _write_dests(self):
if p.id == v['/Page']:
v[NameObject('/Page')] = p.out_pagedata
pageno = i
pdf = p.src
pdf = p.src # noqa: F841
break
if pageno is not None:
self.output.addNamedDestinationObject(v)
Expand Down Expand Up @@ -429,7 +428,7 @@ def _write_bookmarks(self, bookmarks=None, parent=None):
b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})

pageno = i
pdf = p.src
pdf = p.src # noqa: F841
break
if pageno is not None:
del b['/Page'], b['/Type']
Expand Down
5 changes: 2 additions & 3 deletions PyPDF2/pagerange.py
Expand Up @@ -89,8 +89,7 @@ def __init__(self, arg):
@staticmethod
def valid(input):
""" True if input is a valid initializer for a PageRange. """
return isinstance(input, slice) or \
isinstance(input, PageRange) or \
return isinstance(input, (slice, PageRange)) or \
(isString(input)
and bool(re.match(PAGE_RANGE_RE, input)))

Expand Down Expand Up @@ -144,7 +143,7 @@ def parse_filename_page_ranges(args):
for arg in args + [None]:
if PageRange.valid(arg):
if not pdf_filename:
raise ValueError("The first argument must be a filename, " \
raise ValueError("The first argument must be a filename, "
"not a page range.")

pairs.append( (pdf_filename, PageRange(arg)) )
Expand Down
71 changes: 28 additions & 43 deletions PyPDF2/pdf.py
Expand Up @@ -41,7 +41,6 @@
__maintainer__ = "Phaseit, Inc."
__maintainer_email = "PyPDF2@phaseit.net"

import string
import math
import struct
import sys
Expand All @@ -57,7 +56,6 @@
else:
from io import BytesIO

from . import filters
from . import utils
import warnings
import codecs
Expand Down Expand Up @@ -543,7 +541,6 @@ def _sweepIndirectReferences(self, externMap, data):
if debug: print((data, "TYPE", data.__class__.__name__))
if isinstance(data, DictionaryObject):
for key, value in list(data.items()):
origvalue = value
value = self._sweepIndirectReferences(externMap, value)
if isinstance(value, StreamObject):
# a dictionary value is a stream. streams must be indirect
Expand Down Expand Up @@ -794,6 +791,11 @@ def removeImages(self, ignoreByteStringObject=False):
to ignore ByteString Objects.
"""
pages = self.getObject(self._pages)['/Kids']
jump_operators = [
b_('cm'), b_('w'), b_('J'), b_('j'), b_('M'), b_('d'), b_('ri'), b_('i'),
b_('gs'), b_('W'), b_('b'), b_('s'), b_('S'), b_('f'), b_('F'), b_('n'), b_('m'), b_('l'),
b_('c'), b_('v'), b_('y'), b_('h'), b_('B'), b_('Do'), b_('sh')
]
for j in range(len(pages)):
page = pages[j]
pageRef = self.getObject(page)
Expand All @@ -804,36 +806,29 @@ def removeImages(self, ignoreByteStringObject=False):
_operations = []
seq_graphics = False
for operands, operator in content.operations:
if operator == b_('Tj'):
text = operands[0]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[0] = TextStringObject()
elif operator == b_("'"):
if operator in [b_('Tj'), b_("'")]:
text = operands[0]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[0] = TextStringObject()
elif operator == b_('"'):
text = operands[2]
if ignoreByteStringObject:
if not isinstance(text, TextStringObject):
operands[2] = TextStringObject()
if ignoreByteStringObject and not isinstance(text, TextStringObject):
operands[2] = TextStringObject()
elif operator == b_("TJ"):
for i in range(len(operands[0])):
if ignoreByteStringObject:
if not isinstance(operands[0][i], TextStringObject):
operands[0][i] = TextStringObject()
if (
ignoreByteStringObject
and not isinstance(operands[0][i], TextStringObject)
):
operands[0][i] = TextStringObject()

if operator == b_('q'):
seq_graphics = True
if operator == b_('Q'):
seq_graphics = False
if seq_graphics:
if operator in [b_('cm'), b_('w'), b_('J'), b_('j'), b_('M'), b_('d'), b_('ri'), b_('i'),
b_('gs'), b_('W'), b_('b'), b_('s'), b_('S'), b_('f'), b_('F'), b_('n'), b_('m'), b_('l'),
b_('c'), b_('v'), b_('y'), b_('h'), b_('B'), b_('Do'), b_('sh')]:
continue
if seq_graphics and operator in jump_operators:
continue
if operator == b_('re'):
continue
_operations.append((operands, operator))
Expand All @@ -856,41 +851,29 @@ def removeText(self, ignoreByteStringObject=False):
if not isinstance(content, ContentStream):
content = ContentStream(content, pageRef)
for operands,operator in content.operations:
if operator == b_('Tj'):
if operator in [b_('Tj'), b_("'")]:
text = operands[0]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[0] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
operands[0] = TextStringObject()
elif operator == b_("'"):
text = operands[0]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[0] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
if isinstance(text, (TextStringObject, ByteStringObject)):
operands[0] = TextStringObject()
elif operator == b_('"'):
text = operands[2]
if not ignoreByteStringObject:
if isinstance(text, TextStringObject):
operands[2] = TextStringObject()
else:
if isinstance(text, TextStringObject) or \
isinstance(text, ByteStringObject):
if isinstance(text, (TextStringObject, ByteStringObject)):
operands[2] = TextStringObject()
elif operator == b_("TJ"):
for i in range(len(operands[0])):
if not ignoreByteStringObject:
if isinstance(operands[0][i], TextStringObject):
operands[0][i] = TextStringObject()
else:
if isinstance(operands[0][i], TextStringObject) or \
isinstance(operands[0][i], ByteStringObject):
if isinstance(operands[0][i], (TextStringObject, ByteStringObject)):
operands[0][i] = TextStringObject()

pageRef.__setitem__(NameObject('/Contents'), content)
Expand Down Expand Up @@ -1172,9 +1155,8 @@ def _showwarning(message, category, filename, lineno, file=warndest, line=None):
if hasattr(stream, 'mode') and 'b' not in stream.mode:
warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning)
if isString(stream):
fileobj = open(stream, 'rb')
stream = BytesIO(b_(fileobj.read()))
fileobj.close()
with open(stream, 'rb') as fileobj:
stream = BytesIO(b_(fileobj.read()))
self.read(stream)
self.stream = stream

Expand Down Expand Up @@ -1729,7 +1711,7 @@ def getObject(self, indirectReference):
return retval

def _decryptObject(self, obj, key):
if isinstance(obj, ByteStringObject) or isinstance(obj, TextStringObject):
if isinstance(obj, (ByteStringObject, TextStringObject)):
obj = createStringObject(utils.RC4_encrypt(key, obj.original_bytes))
elif isinstance(obj, StreamObject):
obj._data = utils.RC4_encrypt(key, obj._data)
Expand All @@ -1752,7 +1734,10 @@ def readObjectHeader(self, stream):
idnum = readUntilWhitespace(stream)
extra |= utils.skipOverWhitespace(stream); stream.seek(-1, 1)
generation = readUntilWhitespace(stream)
obj = stream.read(3)

# although it's not used, it might still be necessary to read
_obj = stream.read(3) # noqa: F841

readNonWhitespace(stream)
stream.seek(-1, 1)
if (extra and self.strict):
Expand Down Expand Up @@ -1938,8 +1923,8 @@ def used_before(num, generation):
# The rest of the elements depend on the xref_type
if xref_type == 0:
# linked list of free objects
next_free_object = getEntry(1)
next_generation = getEntry(2)
next_free_object = getEntry(1) # noqa: F841
next_generation = getEntry(2) # noqa: F841
elif xref_type == 1:
# objects that are in use but are not compressed
byte_offset = getEntry(1)
Expand Down
11 changes: 5 additions & 6 deletions PyPDF2/utils.py
Expand Up @@ -196,11 +196,10 @@ def markLocation(stream):
# Mainly for debugging
RADIUS = 5000
stream.seek(-RADIUS, 1)
outputDoc = open('PyPDF2_pdfLocation.txt', 'wb')
outputDoc.write(stream.read(RADIUS))
outputDoc.write(b'HERE')
outputDoc.write(stream.read(RADIUS))
outputDoc.close()
with open('PyPDF2_pdfLocation.txt', 'wb') as outputDoc:
outputDoc.write(stream.read(RADIUS))
outputDoc.write(b'HERE')
outputDoc.write(stream.read(RADIUS))
stream.seek(-RADIUS, 1)


Expand Down Expand Up @@ -242,7 +241,7 @@ def b_(s):
if len(s) < 2:
bc[s] = r
return r
except Exception as e:
except Exception:
print(s)
r = s.encode('utf-8')
if len(s) < 2:
Expand Down
1 change: 0 additions & 1 deletion PyPDF2/xmp.py
Expand Up @@ -2,7 +2,6 @@
import datetime
import decimal
from .generic import PdfObject
from xml.dom import getDOMImplementation
from xml.dom.minidom import parseString
from .utils import u_

Expand Down
2 changes: 1 addition & 1 deletion Scripts/2-up.py
Expand Up @@ -16,7 +16,7 @@ def main():
print("usage: python 2-up.py input_file output_file")
sys.exit(1)
print("2-up input " + sys.argv[1])
reader = PdfFileReader(open(sys.argv[1], "rb"))
reader = PdfFileReader(sys.argv[1])
writer = PdfFileWriter()
for iter in range(0, reader.getNumPages() - 1, 2):
lhs = reader.getPage(iter)
Expand Down
13 changes: 7 additions & 6 deletions Scripts/booklet.py
@@ -1,12 +1,12 @@
#!/usr/bin/env python

"""
Layout the pages from a PDF file to print a booklet or brochure.
Layout the pages from a PDF file to print a booklet or brochure.

The resulting media size is twice the size of the first page
of the source document. If you print the resulting PDF in duplex
(short edge), you get a center fold brochure that you can staple
together and read as a booklet.
The resulting media size is twice the size of the first page
of the source document. If you print the resulting PDF in duplex
(short edge), you get a center fold brochure that you can staple
together and read as a booklet.
"""

from __future__ import division, print_function
Expand Down Expand Up @@ -63,7 +63,8 @@ def mergePageByNumber(dstPage, pageNumber, xOffset):
mergePageByNumber(page, i, offsets[0])
mergePageByNumber(page, virtualPages - i - 1, offsets[1])

writer.write(open(args.output, "wb"))
with open(args.output, "wb") as fp:
writer.write(fp)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion Scripts/pdf-image-extractor.py
Expand Up @@ -11,7 +11,7 @@


def main(pdf: str):
reader = PyPDF2.PdfFileReader(open(pdf, "rb"))
reader = PyPDF2.PdfFileReader(pdf)
page = reader.pages[30]

if "/XObject" in page["/Resources"]:
Expand Down
4 changes: 2 additions & 2 deletions Tests/test_basic_features.py
Expand Up @@ -14,7 +14,7 @@
def test_basic_features():
output = PdfFileWriter()
document1 = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
input1 = PdfFileReader(open(document1, "rb"))
input1 = PdfFileReader(document1)

# print how many pages input1 has:
print("document1.pdf has %d pages." % input1.getNumPages())
Expand All @@ -32,7 +32,7 @@ def test_basic_features():
# add page 4 from input1, but first add a watermark from another PDF:
page4 = input1.getPage(0)
watermark_pdf = document1
watermark = PdfFileReader(open(watermark_pdf, "rb"))
watermark = PdfFileReader(watermark_pdf)
page4.mergePage(watermark.getPage(0))
output.addPage(page4)

Expand Down