Skip to content

Commit

Permalink
ENH: Add 'threads' property to PdfWriter (#1458)
Browse files Browse the repository at this point in the history
This currently returns only an empty list . Pending to  PR#1371
  • Loading branch information
pubpub-zz committed Dec 2, 2022
1 parent 1a9f7d9 commit 3e250c5
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 0 deletions.
14 changes: 14 additions & 0 deletions PyPDF2/_reader.py
Expand Up @@ -800,6 +800,20 @@ def getOutlines(
deprecate_with_replacement("getOutlines", "outline")
return self._get_outline(node, outline)

@property
def threads(self) -> Optional[ArrayObject]:
"""
Read-only property for the list of threads see §8.3.2 from PDF 1.7 spec
:return: an Array of Dictionnaries with "/F" and "/I" properties
or None if no articles.
"""
catalog = cast(DictionaryObject, self.trailer[TK.ROOT])
if CO.THREADS in catalog:
return cast("ArrayObject", catalog[CO.THREADS])
else:
return None

def _get_page_number_by_indirect(
self, indirect_ref: Union[None, int, NullObject, IndirectObject]
) -> int:
Expand Down
23 changes: 23 additions & 0 deletions PyPDF2/_writer.py
Expand Up @@ -1098,6 +1098,29 @@ def get_outline_root(self) -> TreeObject:

return outline

def get_threads_root(self) -> ArrayObject:
"""
the list of threads see §8.3.2 from PDF 1.7 spec
:return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties
"""
if CO.THREADS in self._root_object:
# TABLE 3.25 Entries in the catalog dictionary
threads = cast(ArrayObject, self._root_object[CO.THREADS])
else:
threads = ArrayObject()
self._root_object[NameObject(CO.THREADS)] = threads
return threads

@property
def threads(self) -> ArrayObject:
"""
Read-only property for the list of threads see §8.3.2 from PDF 1.7 spec
:return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties
"""
return self.get_threads_root()

def getOutlineRoot(self) -> TreeObject: # pragma: no cover
"""
.. deprecated:: 1.28.0
Expand Down
1 change: 1 addition & 0 deletions PyPDF2/constants.py
Expand Up @@ -16,6 +16,7 @@ class Core:
"""Keywords that don't quite belong anywhere else."""

OUTLINES = "/Outlines"
THREADS = "/Threads"
PAGE = "/Page"
PAGES = "/Pages"
CATALOG = "/Catalog"
Expand Down
13 changes: 13 additions & 0 deletions tests/test_reader.py
Expand Up @@ -18,6 +18,7 @@
WrongPasswordError,
)
from PyPDF2.generic import (
ArrayObject,
Destination,
DictionaryObject,
NameObject,
Expand Down Expand Up @@ -1193,6 +1194,18 @@ def test_zeroing_xref():
len(reader.pages)


def test_thread():
url = "https://github.com/py-pdf/PyPDF2/files/9066120/UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
name = "UTA_OSHA.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert reader.threads is None
url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf"
name = "tika-924666.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
assert isinstance(reader.threads, ArrayObject)
assert len(reader.threads) >= 1


def test_build_outline_item(caplog):
url = "https://github.com/py-pdf/PyPDF2/files/9464742/shiv_resume.pdf"
name = "shiv_resume.pdf"
Expand Down
10 changes: 10 additions & 0 deletions tests/test_writer.py
Expand Up @@ -7,6 +7,7 @@
from PyPDF2 import PageObject, PdfMerger, PdfReader, PdfWriter
from PyPDF2.errors import PageSizeNotDefinedError
from PyPDF2.generic import (
ArrayObject,
IndirectObject,
NameObject,
NumberObject,
Expand Down Expand Up @@ -856,3 +857,12 @@ def test_startup_dest():
pdf_file_writer.open_destination = None
assert "/OpenAction" not in pdf_file_writer._root_object
pdf_file_writer.open_destination = None


def test_threads_empty():
writer = PdfWriter()
thr = writer.threads
assert isinstance(thr, ArrayObject)
assert len(thr) == 0
thr2 = writer.threads
assert thr == thr2

0 comments on commit 3e250c5

Please sign in to comment.