Skip to content

Commit

Permalink
Update writer class
Browse files Browse the repository at this point in the history
Added correct_annotation_encodings method in writer class to automatically deal with issue where output PDF form with undefined '/Encoding' in the "/DR" section in the fields/annotations
  • Loading branch information
HannahNgx committed Apr 25, 2024
1 parent 1d8d3be commit 9f1b2f0
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions pypdf/_writer.py
Expand Up @@ -1085,6 +1085,26 @@ def clone_reader_document_root(self, reader: PdfReader) -> None:
NameObject("/Kids")
] = ArrayObject([p.indirect_reference for p in self.flattened_pages])

def correct_annotation_encodings(self):
"""
Corrects the encoding of fonts in annotations where necessary.
"""
# Assuming self._pages is the root of the page tree
# and that it contains a "/Kids" entry with an array of page objects
for page_ref in self._pages.get("/Kids", []):
page = page_ref.get_object()
if "/Annots" in page:
for annot_ref in page["/Annots"]:
annot = annot_ref.get_object()
if "/DR" in annot and "/Font" in annot["/DR"]:
fonts = annot["/DR"]["/Font"]
for font_key, font_value in fonts.items():
if "/Encoding" in font_value and isinstance(font_value["/Encoding"], NameObject):
encoding = font_value["/Encoding"]
if encoding not in [NameObject("/WinAnsiEncoding"), NameObject("/MacRomanEncoding")]:
font_value["/Encoding"] = NameObject("/PDFDocEncoding")


def clone_document_from_reader(
self,
reader: PdfReader,
Expand All @@ -1106,6 +1126,7 @@ def clone_document_from_reader(
"""
self.clone_reader_document_root(reader)
self._info_obj = self._add_object(DictionaryObject())
self.correct_annotation_encodings()
if TK.INFO in reader.trailer:
self._info = reader._info # actually copy fields
try:
Expand Down

0 comments on commit 9f1b2f0

Please sign in to comment.