diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py index 65586d7d5..ab34c7a9e 100644 --- a/PyPDF2/_reader.py +++ b/PyPDF2/_reader.py @@ -800,6 +800,20 @@ def getOutlines( deprecate_with_replacement("getOutlines", "outline") return self._get_outline(node, outline) + @property + def threads(self) -> Optional[ArrayObject]: + """ + Read-only property for the list of threads see §8.3.2 from PDF 1.7 spec + + :return: an Array of Dictionnaries with "/F" and "/I" properties + or None if no articles. + """ + catalog = cast(DictionaryObject, self.trailer[TK.ROOT]) + if CO.THREADS in catalog: + return cast("ArrayObject", catalog[CO.THREADS]) + else: + return None + def _get_page_number_by_indirect( self, indirect_ref: Union[None, int, NullObject, IndirectObject] ) -> int: diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 12539900d..3ad538da9 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -1098,6 +1098,29 @@ def get_outline_root(self) -> TreeObject: return outline + def get_threads_root(self) -> ArrayObject: + """ + the list of threads see §8.3.2 from PDF 1.7 spec + + :return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties + """ + if CO.THREADS in self._root_object: + # TABLE 3.25 Entries in the catalog dictionary + threads = cast(ArrayObject, self._root_object[CO.THREADS]) + else: + threads = ArrayObject() + self._root_object[NameObject(CO.THREADS)] = threads + return threads + + @property + def threads(self) -> ArrayObject: + """ + Read-only property for the list of threads see §8.3.2 from PDF 1.7 spec + + :return: an Array (possibly empty) of Dictionaries with "/F" and "/I" properties + """ + return self.get_threads_root() + def getOutlineRoot(self) -> TreeObject: # pragma: no cover """ .. deprecated:: 1.28.0 diff --git a/PyPDF2/constants.py b/PyPDF2/constants.py index f8d3faf8f..a2f8c49ed 100644 --- a/PyPDF2/constants.py +++ b/PyPDF2/constants.py @@ -16,6 +16,7 @@ class Core: """Keywords that don't quite belong anywhere else.""" OUTLINES = "/Outlines" + THREADS = "/Threads" PAGE = "/Page" PAGES = "/Pages" CATALOG = "/Catalog" diff --git a/tests/test_reader.py b/tests/test_reader.py index 0338d6eb2..8e109cfbd 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -18,6 +18,7 @@ WrongPasswordError, ) from PyPDF2.generic import ( + ArrayObject, Destination, DictionaryObject, NameObject, @@ -1193,6 +1194,18 @@ def test_zeroing_xref(): len(reader.pages) +def test_thread(): + url = "https://github.com/py-pdf/PyPDF2/files/9066120/UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf" + name = "UTA_OSHA.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + assert reader.threads is None + url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf" + name = "tika-924666.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + assert isinstance(reader.threads, ArrayObject) + assert len(reader.threads) >= 1 + + def test_build_outline_item(caplog): url = "https://github.com/py-pdf/PyPDF2/files/9464742/shiv_resume.pdf" name = "shiv_resume.pdf" diff --git a/tests/test_writer.py b/tests/test_writer.py index 70adf3ba2..fa8638b3d 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -7,6 +7,7 @@ from PyPDF2 import PageObject, PdfMerger, PdfReader, PdfWriter from PyPDF2.errors import PageSizeNotDefinedError from PyPDF2.generic import ( + ArrayObject, IndirectObject, NameObject, NumberObject, @@ -856,3 +857,12 @@ def test_startup_dest(): pdf_file_writer.open_destination = None assert "/OpenAction" not in pdf_file_writer._root_object pdf_file_writer.open_destination = None + + +def test_threads_empty(): + writer = PdfWriter() + thr = writer.threads + assert isinstance(thr, ArrayObject) + assert len(thr) == 0 + thr2 = writer.threads + assert thr == thr2