Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Make PdfReader.get_object accept integer arguments #1459

Merged
merged 1 commit into from Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 29 additions & 16 deletions PyPDF2/_reader.py
Expand Up @@ -863,14 +863,18 @@ def getDestinationPageNumber(
def _build_destination(
self,
title: str,
array: List[Union[NumberObject, IndirectObject, NullObject, DictionaryObject]],
array: Optional[
List[
Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]
]
],
) -> Destination:
page, typ = None, None
# handle outline items with missing or invalid destination
if (
isinstance(array, (type(None), NullObject))
isinstance(array, (NullObject, str))
or (isinstance(array, ArrayObject) and len(array) == 0)
or (isinstance(array, str))
or array is None
):

page = NullObject()
Expand Down Expand Up @@ -898,7 +902,7 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
# title required for valid outline
# PDF Reference 1.7: TABLE 8.4 Entries in an outline item dictionary
try:
title = node["/Title"]
title = cast("str", node["/Title"])
except KeyError:
if self.strict:
raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")
Expand All @@ -918,23 +922,29 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
dest = dest["/D"]

if isinstance(dest, ArrayObject):
outline_item = self._build_destination(title, dest) # type: ignore
outline_item = self._build_destination(title, dest)
elif isinstance(dest, str):
# named destination, addresses NameObject Issue #193
# TODO : keep named destination instead of replacing it ?
try:
outline_item = self._build_destination(
title, self._namedDests[dest].dest_array
)
except KeyError:
# named destination not found in Name Dict
outline_item = self._build_destination(title, None)
elif isinstance(dest, type(None)):
elif dest is None:
# outline item not required to have destination or action
# PDFv1.7 Table 153
outline_item = self._build_destination(title, dest) # type: ignore
outline_item = self._build_destination(title, dest)
else:
if self.strict:
raise PdfReadError(f"Unexpected destination {dest!r}")
else:
logger_warning(
f"Removed unexpected destination {dest!r} from destination",
__name__,
)
outline_item = self._build_destination(title, None) # type: ignore

# if outline item created, add color, format, and child count if present
Expand All @@ -950,7 +960,6 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
# absolute value = num. visible children
# positive = open/unfolded, negative = closed/folded
outline_item[NameObject("/Count")] = node["/Count"]

return outline_item

@property
Expand Down Expand Up @@ -1154,7 +1163,18 @@ def _get_object_from_stream(
raise PdfReadError("This is a fatal error in strict mode.")
return NullObject()

def get_object(self, indirect_reference: IndirectObject) -> Optional[PdfObject]:
def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
"""
used to ease development
equivalent to generic.IndirectObject(num,gen,self).get_object()
"""
return IndirectObject(num, gen, self).get_object()

def get_object(
self, indirect_reference: Union[int, IndirectObject]
) -> Optional[PdfObject]:
if isinstance(indirect_reference, int):
indirect_reference = IndirectObject(indirect_reference, 0, self)
retval = self.cache_get_indirect_object(
indirect_reference.generation, indirect_reference.idnum
)
Expand Down Expand Up @@ -1928,13 +1948,6 @@ def xfa(self) -> Optional[Dict[str, Any]]:
retval[tag] = es
return retval

def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
"""
used to ease development
equivalent to generic.IndirectObject(num,gen,self).get_object()
"""
return IndirectObject(num, gen, self).get_object()


class PdfFileReader(PdfReader): # pragma: no cover
def __init__(self, *args: Any, **kwargs: Any) -> None:
Expand Down
41 changes: 40 additions & 1 deletion tests/test_reader.py
Expand Up @@ -17,7 +17,13 @@
PdfReadWarning,
WrongPasswordError,
)
from PyPDF2.generic import Destination
from PyPDF2.generic import (
Destination,
DictionaryObject,
NameObject,
NumberObject,
TextStringObject,
)

from . import get_pdf_from_url, normalize_warnings

Expand Down Expand Up @@ -755,6 +761,12 @@ def test_iss925():
annot.get_object()


def test_get_object():
reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
assert reader.get_object(22)["/Type"] == "/Catalog"
assert reader._get_indirect_object(22, 0)["/Type"] == "/Catalog"


@pytest.mark.xfail(reason="#591")
def test_extract_text_hello_world():
reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
Expand Down Expand Up @@ -1179,3 +1191,30 @@ def test_zeroing_xref():
name = "UTA_OSHA.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
len(reader.pages)


def test_build_outline_item(caplog):
url = "https://github.com/py-pdf/PyPDF2/files/9464742/shiv_resume.pdf"
name = "shiv_resume.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
outline = reader._build_outline_item(
DictionaryObject(
{
NameObject("/Title"): TextStringObject("Toto"),
NameObject("/Dest"): NumberObject(2),
}
)
)
assert "Removed unexpected destination 2 from destination" in caplog.text
assert outline["/Title"] == "Toto"
reader.strict = True
with pytest.raises(PdfReadError) as exc:
reader._build_outline_item(
DictionaryObject(
{
NameObject("/Title"): TextStringObject("Toto"),
NameObject("/Dest"): NumberObject(2),
}
)
)
assert "Unexpected destination 2" in exc.value.args[0]
1 change: 1 addition & 0 deletions tests/test_writer.py
Expand Up @@ -343,6 +343,7 @@ def test_write_metadata():
reader = PdfReader(pdf_path)
writer = PdfWriter()

writer.add_page(reader.pages[0])
for page in reader.pages:
writer.add_page(page)

Expand Down