From 3a1dcff0daf5dbf7d88826746d97c90aa612a15e Mon Sep 17 00:00:00 2001
From: Joseph Hale <me@jhale.dev>
Date: Mon, 12 Dec 2022 13:31:24 -0700
Subject: [PATCH 1/4] DEV: Add in-project virtual envs to .gitignore

Many developers (like myself) like to use virtual environments included within
the current project. These virtual environment are local development constructs
and should not be checked into source control.

This commit adds two common virtual environment directory names to the
.gitignore to avoid accidental commits from future developers.
---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index 6449fe86b..4dfd2ef51 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,10 @@ build
 dist/*
 __pycache__/
 
+# in-project virtual environments
+venv/
+.venv/
+
 #
 .mutmut-cache
 mutmut-results.*

From 6b7e055c21851d5114e9ac30c8bd458e8d8871be Mon Sep 17 00:00:00 2001
From: Joseph Hale <me@jhale.dev>
Date: Mon, 12 Dec 2022 13:44:57 -0700
Subject: [PATCH 2/4] DEV: Include `pillow` in `requirements/dev.in`

The current contribution instructions in `docs/dev/intro.md` direct new code
contributors to install the `dev` requirements. After following that
instruction, the minimal test suite fails with the following errors:

```
python -m venv .venv
source .venv/bin/activate
pip install -r requirements/dev.txt
pytest -m "not external" -m "not samples" -m "not slow"
```

=================================================================================================== short test summary info ====================================================================================================
FAILED tests/test_reader.py::test_get_images[pdflatex-outline.pdf-expected_images0] - ModuleNotFoundError: No module named 'PIL'
FAILED tests/test_reader.py::test_get_images[crazyones.pdf-expected_images1] - ModuleNotFoundError: No module named 'PIL'
FAILED tests/test_reader.py::test_get_images[git.pdf-expected_images2] - ModuleNotFoundError: No module named 'PIL'
FAILED tests/test_reader.py::test_get_images[imagemagick-CCITTFaxDecode.pdf-expected_images5] - ModuleNotFoundError: No module named 'PIL'
FAILED tests/test_reader.py::test_get_images[src6-expected_images6] - ModuleNotFoundError: No module named 'PIL'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/994/994636.pdf-tika-994636.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/952/952133.pdf-tika-952133.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/914/914568.pdf-tika-914568.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/952/952016.pdf-tika-952016.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/965/965118.pdf-tika-952016.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/959/959184.pdf-tika-959184.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/958/958496.pdf-tika-958496.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/972/972174.pdf-tika-972174.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/972/972243.pdf-tika-972243.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://corpora.tika.apache.org/base/docs/govdocs1/969/969502.pdf-tika-969502.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction[https://arxiv.org/pdf/2201.00214.pdf-arxiv-2201.00214.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction_strict - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
FAILED tests/test_workflows.py::test_image_extraction2[https://corpora.tika.apache.org/base/docs/govdocs1/977/977609.pdf-tika-977609.pdf] - ImportError: pillow is required to do image extraction. It can be installed via 'pip install PyPDF2[image]'
======================================================================= 18 failed, 536 passed, 5 skipped, 53 deselected, 5 xfailed in 146.94s (0:02:26) ========================================================================

This commit adds `pillow` to  `requirements/dev.in` so that the minimal test
suite can pass on the first try so that new code contributors can start
implementing improvements with confidence.
---
 requirements/dev.in  |  1 +
 requirements/dev.txt | 21 +++++----------------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/requirements/dev.in b/requirements/dev.in
index 374d81236..6229bd533 100644
--- a/requirements/dev.in
+++ b/requirements/dev.in
@@ -1,4 +1,5 @@
 black
+pillow
 pip-tools
 pre-commit<2.18.0
 pytest-cov
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 82b8d6fdd..0062323bc 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,6 +1,6 @@
 #
-# This file is autogenerated by pip-compile with python 3.7
-# To update, run:
+# This file is autogenerated by pip-compile with Python 3.8
+# by the following command:
 #
 #    pip-compile requirements/dev.in
 #
@@ -38,13 +38,6 @@ identify==2.5.9
     # via pre-commit
 idna==3.4
     # via requests
-importlib-metadata==5.1.0
-    # via
-    #   build
-    #   click
-    #   pre-commit
-    #   pytest
-    #   virtualenv
 iniconfig==1.1.1
     # via pytest
 mypy-extensions==0.4.3
@@ -59,6 +52,8 @@ pathspec==0.10.3
     # via black
 pep517==0.13.0
     # via build
+pillow==9.3.0
+    # via -r requirements/dev.in
 pip-tools==6.11.0
     # via -r requirements/dev.in
 platformdirs==2.6.0
@@ -87,12 +82,8 @@ tomli==2.0.1
     #   pytest
 tomli-w==1.0.0
     # via flit
-typed-ast==1.5.4
-    # via black
 typing-extensions==4.4.0
-    # via
-    #   black
-    #   importlib-metadata
+    # via black
 urllib3==1.26.13
     # via requests
 virtualenv==20.17.1
@@ -101,8 +92,6 @@ wheel==0.38.4
     # via
     #   -r requirements/dev.in
     #   pip-tools
-zipp==3.11.0
-    # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip

From f25e817f82b20934de0e3969fbafd57b6a6c7974 Mon Sep 17 00:00:00 2001
From: Joseph Hale <me@jhale.dev>
Date: Mon, 12 Dec 2022 14:23:17 -0700
Subject: [PATCH 3/4] STY: Use official `IO` type for file streams

The Python standard library provides the `IO` type for file streams. (Source:
https://docs.python.org/3/library/typing.html#typing.IO)

This commit replaces the complex Union type of the `IO` implementations with the
official `IO` type. This will improve the accuracy of type checking in users'
IDEs.
---
 PyPDF2/_utils.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/PyPDF2/_utils.py b/PyPDF2/_utils.py
index 2dd91e533..947233919 100644
--- a/PyPDF2/_utils.py
+++ b/PyPDF2/_utils.py
@@ -34,15 +34,10 @@
 import warnings
 from codecs import getencoder
 from dataclasses import dataclass
-from io import (
-    DEFAULT_BUFFER_SIZE,
-    BufferedReader,
-    BufferedWriter,
-    BytesIO,
-    FileIO,
-)
+from io import DEFAULT_BUFFER_SIZE
 from os import SEEK_CUR
 from typing import (
+    IO,
     Any,
     Callable,
     Dict,
@@ -68,7 +63,7 @@
     float, float, float, float, float, float
 ]
 
-StreamType = Union[BytesIO, BufferedReader, BufferedWriter, FileIO]
+StreamType = IO
 StrByteType = Union[str, StreamType]
 
 DEPR_MSG_NO_REPLACEMENT = "{} is deprecated and will be removed in PyPDF2 {}."

From c9e7ec37787a6933139fb51e8d603d74a87da8f0 Mon Sep 17 00:00:00 2001
From: Joseph Hale <me@jhale.dev>
Date: Mon, 12 Dec 2022 15:39:57 -0700
Subject: [PATCH 4/4] STY: Use standard `IO` type hint for writers

The CI system flagged some additional conflicts with the `IO` type in the writer
classes.

This commit changes the writer classes to use the standard `IO` type instead of
the union of IO implementations.
---
 PyPDF2/_protocols.py |  7 ++-----
 PyPDF2/_writer.py    | 18 +++++++++++-------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/PyPDF2/_protocols.py b/PyPDF2/_protocols.py
index b83db961b..89c80f9a5 100644
--- a/PyPDF2/_protocols.py
+++ b/PyPDF2/_protocols.py
@@ -1,8 +1,7 @@
 """Helpers for working with PDF types."""
 
-from io import BufferedReader, BufferedWriter, BytesIO, FileIO
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import IO, Any, Dict, List, Optional, Tuple, Union
 
 try:
     # Python 3.8+: https://peps.python.org/pep-0586
@@ -59,7 +58,5 @@ class PdfWriterProtocol(Protocol):  # pragma: no cover
     def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
         ...
 
-    def write(
-        self, stream: Union[Path, StrByteType]
-    ) -> Tuple[bool, Union[FileIO, BytesIO, BufferedReader, BufferedWriter]]:
+    def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO]:
         ...
diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py
index db5e394ac..dcb5ffee0 100644
--- a/PyPDF2/_writer.py
+++ b/PyPDF2/_writer.py
@@ -38,10 +38,11 @@
 import uuid
 import warnings
 from hashlib import md5
-from io import BufferedReader, BufferedWriter, BytesIO, FileIO, IOBase
+from io import BytesIO, FileIO, IOBase
 from pathlib import Path
 from types import TracebackType
 from typing import (
+    IO,
     Any,
     Callable,
     Deque,
@@ -962,9 +963,7 @@ def write_stream(self, stream: StreamType) -> None:
         self._write_trailer(stream)
         stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n"))  # eof
 
-    def write(
-        self, stream: Union[Path, StrByteType]
-    ) -> Tuple[bool, Union[FileIO, BytesIO, BufferedReader, BufferedWriter]]:
+    def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO]:
         """
         Write the collection of pages added to this object out as a PDF file.
 
@@ -1289,7 +1288,7 @@ def add_outline_item_destination(
         page_destination: Union[None, PageObject, TreeObject] = None,
         parent: Union[None, TreeObject, IndirectObject] = None,
         before: Union[None, TreeObject, IndirectObject] = None,
-        dest: Union[None, PageObject, TreeObject] = None,   # deprecated
+        dest: Union[None, PageObject, TreeObject] = None,  # deprecated
     ) -> IndirectObject:
         if page_destination is not None and dest is not None:  # deprecated
             raise ValueError(
@@ -2483,7 +2482,9 @@ def _add_articles_thread(
                 pag_obj = cast("PageObject", pag.get_object())
                 if "/B" not in pag_obj:
                     pag_obj[NameObject("/B")] = ArrayObject()
-                cast("ArrayObject", pag_obj["/B"]).append(new_article.indirect_reference)
+                cast("ArrayObject", pag_obj["/B"]).append(
+                    new_article.indirect_reference
+                )
             current_article = cast("DictionaryObject", current_article["/N"])
             if current_article == first_article:
                 new_article[NameObject("/N")] = new_first.indirect_reference  # type: ignore
@@ -2674,7 +2675,10 @@ def find_outline_item(
 
         i = 0
         while o is not None:
-            if o.indirect_reference == outline_item or o.get("/Title", None) == outline_item:
+            if (
+                o.indirect_reference == outline_item
+                or o.get("/Title", None) == outline_item
+            ):
                 return [i]
             else:
                 if "/First" in o: