Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add 'threads' property to PdfWriter #1458

Merged
merged 5 commits into from Dec 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 14 additions & 0 deletions PyPDF2/_reader.py
Expand Up @@ -800,6 +800,20 @@ def getOutlines(
deprecate_with_replacement("getOutlines", "outline")
return self._get_outline(node, outline)

@property
def threads(self) -> Optional[ArrayObject]:
"""
Read-only property for the list of threads see §8.3.2 from PDF 1.7 spec

:return: an Array of Dictionnaries with "/F" and "/I" properties
or None if no articles.
"""
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
if CO.THREADS in catalog:
return cast("ArrayObject", catalog[CO.THREADS])
else:
return None

def _get_page_number_by_indirect(
self, indirect_ref: Union[None, int, NullObject, IndirectObject]
) -> int:
Expand Down
23 changes: 23 additions & 0 deletions PyPDF2/_writer.py
Expand Up @@ -1098,6 +1098,29 @@ def get_outline_root(self) -> TreeObject:

return outline

def get_threads_root(self) -> ArrayObject:
"""
the list of threads see §8.3.2 from PDF 1.7 spec

:return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties
"""
if CO.THREADS in self._root_object:
# TABLE 3.25 Entries in the catalog dictionary
threads = cast(ArrayObject, self._root_object[CO.THREADS])
else:
threads = ArrayObject()
self._root_object[NameObject(CO.THREADS)] = threads
return threads

@property
def threads(self) -> ArrayObject:
"""
Read-only property for the list of threads see §8.3.2 from PDF 1.7 spec

:return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties
"""
return self.get_threads_root()

def getOutlineRoot(self) -> TreeObject: # pragma: no cover
"""
.. deprecated:: 1.28.0
Expand Down
1 change: 1 addition & 0 deletions PyPDF2/constants.py
Expand Up @@ -16,6 +16,7 @@ class Core:
"""Keywords that don't quite belong anywhere else."""

OUTLINES = "/Outlines"
THREADS = "/Threads"
PAGE = "/Page"
PAGES = "/Pages"
CATALOG = "/Catalog"
Expand Down
13 changes: 13 additions & 0 deletions tests/test_reader.py
Expand Up @@ -18,6 +18,7 @@
WrongPasswordError,
)
from PyPDF2.generic import (
ArrayObject,
Destination,
DictionaryObject,
NameObject,
Expand Down Expand Up @@ -1193,6 +1194,18 @@ def test_zeroing_xref():
len(reader.pages)


def test_thread():
url = "https://github.com/py-pdf/PyPDF2/files/9066120/UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
name = "UTA_OSHA.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.threads is None
url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
name = "tika-924666.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert isinstance(reader.threads, ArrayObject)
assert len(reader.threads) >= 1


def test_build_outline_item(caplog):
url = "https://github.com/py-pdf/PyPDF2/files/9464742/shiv_resume.pdf"
name = "shiv_resume.pdf"
Expand Down
10 changes: 10 additions & 0 deletions tests/test_writer.py
Expand Up @@ -7,6 +7,7 @@
from PyPDF2 import PageObject, PdfMerger, PdfReader, PdfWriter
from PyPDF2.errors import PageSizeNotDefinedError
from PyPDF2.generic import (
ArrayObject,
IndirectObject,
NameObject,
NumberObject,
Expand Down Expand Up @@ -856,3 +857,12 @@ def test_startup_dest():
pdf_file_writer.open_destination = None
assert "/OpenAction" not in pdf_file_writer._root_object
pdf_file_writer.open_destination = None


def test_threads_empty():
writer = PdfWriter()
thr = writer.threads
assert isinstance(thr, ArrayObject)
assert len(thr) == 0
thr2 = writer.threads
assert thr == thr2