Skip to content

Commit

Permalink
ENH: Make PdfReader.get_object accept integer arguments (#1459)
Browse files Browse the repository at this point in the history
Also fix various type annotations
  • Loading branch information
pubpub-zz committed Dec 1, 2022
1 parent 940819f commit 17897d9
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 17 deletions.
45 changes: 29 additions & 16 deletions PyPDF2/_reader.py
Expand Up @@ -863,14 +863,18 @@ def getDestinationPageNumber(
def _build_destination(
self,
title: str,
array: List[Union[NumberObject, IndirectObject, NullObject, DictionaryObject]],
array: Optional[
List[
Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]
]
],
) -> Destination:
page, typ = None, None
# handle outline items with missing or invalid destination
if (
isinstance(array, (type(None), NullObject))
isinstance(array, (NullObject, str))
or (isinstance(array, ArrayObject) and len(array) == 0)
or (isinstance(array, str))
or array is None
):

page = NullObject()
Expand Down Expand Up @@ -898,7 +902,7 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
# title required for valid outline
# PDF Reference 1.7: TABLE 8.4 Entries in an outline item dictionary
try:
title = node["/Title"]
title = cast("str", node["/Title"])
except KeyError:
if self.strict:
raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")
Expand All @@ -918,23 +922,29 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
dest = dest["/D"]

if isinstance(dest, ArrayObject):
outline_item = self._build_destination(title, dest) # type: ignore
outline_item = self._build_destination(title, dest)
elif isinstance(dest, str):
# named destination, addresses NameObject Issue #193
# TODO : keep named destination instead of replacing it ?
try:
outline_item = self._build_destination(
title, self._namedDests[dest].dest_array
)
except KeyError:
# named destination not found in Name Dict
outline_item = self._build_destination(title, None)
elif isinstance(dest, type(None)):
elif dest is None:
# outline item not required to have destination or action
# PDFv1.7 Table 153
outline_item = self._build_destination(title, dest) # type: ignore
outline_item = self._build_destination(title, dest)
else:
if self.strict:
raise PdfReadError(f"Unexpected destination {dest!r}")
else:
logger_warning(
f"Removed unexpected destination {dest!r} from destination",
__name__,
)
outline_item = self._build_destination(title, None) # type: ignore

# if outline item created, add color, format, and child count if present
Expand All @@ -950,7 +960,6 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
# absolute value = num. visible children
# positive = open/unfolded, negative = closed/folded
outline_item[NameObject("/Count")] = node["/Count"]

return outline_item

@property
Expand Down Expand Up @@ -1154,7 +1163,18 @@ def _get_object_from_stream(
raise PdfReadError("This is a fatal error in strict mode.")
return NullObject()

def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
"""
used to ease development
equivalent to generic.IndirectObject(num,gen,self).get_object()
"""
return IndirectObject(num, gen, self).get_object()

def get_object(
self, indirect_reference: Union[int, IndirectObject]
) -> Optional[PdfObject]:
if isinstance(indirect_reference, int):
indirect_reference = IndirectObject(indirect_reference, 0, self)
retval = self.cache_get_indirect_object(
indirect_reference.generation, indirect_reference.idnum
)
Expand Down Expand Up @@ -1928,13 +1948,6 @@ def xfa(self) -> Optional[Dict[str, Any]]:
retval[tag] = es
return retval

def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
"""
used to ease development
equivalent to generic.IndirectObject(num,gen,self).get_object()
"""
return IndirectObject(num, gen, self).get_object()


class PdfFileReader(PdfReader): # pragma: no cover
def __init__(self, *args: Any, **kwargs: Any) -> None:
Expand Down
41 changes: 40 additions & 1 deletion tests/test_reader.py
Expand Up @@ -17,7 +17,13 @@
PdfReadWarning,
WrongPasswordError,
)
from PyPDF2.generic import Destination
from PyPDF2.generic import (
Destination,
DictionaryObject,
NameObject,
NumberObject,
TextStringObject,
)

from . import get_pdf_from_url, normalize_warnings

Expand Down Expand Up @@ -755,6 +761,12 @@ def test_iss925():
annot.get_object()


def test_get_object():
reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
assert reader.get_object(22)["/Type"] == "/Catalog"
assert reader._get_indirect_object(22, 0)["/Type"] == "/Catalog"


@pytest.mark.xfail(reason="#591")
def test_extract_text_hello_world():
reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
Expand Down Expand Up @@ -1179,3 +1191,30 @@ def test_zeroing_xref():
name = "UTA_OSHA.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
len(reader.pages)


def test_build_outline_item(caplog):
url = "https://github.com/py-pdf/PyPDF2/files/9464742/shiv_resume.pdf"
name = "shiv_resume.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
outline = reader._build_outline_item(
DictionaryObject(
{
NameObject("/Title"): TextStringObject("Toto"),
NameObject("/Dest"): NumberObject(2),
}
)
)
assert "Removed unexpected destination 2 from destination" in caplog.text
assert outline["/Title"] == "Toto"
reader.strict = True
with pytest.raises(PdfReadError) as exc:
reader._build_outline_item(
DictionaryObject(
{
NameObject("/Title"): TextStringObject("Toto"),
NameObject("/Dest"): NumberObject(2),
}
)
)
assert "Unexpected destination 2" in exc.value.args[0]
1 change: 1 addition & 0 deletions tests/test_writer.py
Expand Up @@ -343,6 +343,7 @@ def test_write_metadata():
reader = PdfReader(pdf_path)
writer = PdfWriter()

writer.add_page(reader.pages[0])
for page in reader.pages:
writer.add_page(page)

Expand Down

0 comments on commit 17897d9

Please sign in to comment.