Skip to content

Commit

Permalink
reduce change
Browse files Browse the repository at this point in the history
  • Loading branch information
francois committed Apr 22, 2024
1 parent 1f3853f commit 393dc6e
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 14 deletions.
36 changes: 24 additions & 12 deletions pypdf/generic/_base.py
Expand Up @@ -580,7 +580,6 @@ def write_to_stream(


class NameObject(str, PdfObject): # noqa: SLOT000
encoding = "utf8"
delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")
surfix = b"/"
renumber_table: ClassVar[Dict[int, bytes]] = {
Expand Down Expand Up @@ -618,8 +617,8 @@ def renumber(self) -> bytes:
f"Incorrect first char in NameObject, should start with '/': ({self})",
"6.0.0",
)
out = self[0].encode(self.encoding)
for c in self[1:].encode(self.encoding):
out = self[0].encode(self.CHARSETS[0])
for c in self[1:].encode(self.CHARSETS[0]):
out += self.renumber_table[c]
return out

Expand All @@ -636,21 +635,34 @@ def unnumber(sin: bytes) -> bytes:
i = i + 1
return sin

CHARSETS = ("utf-8", "latin1")

@staticmethod
def read_from_stream(
stream: StreamType, pdf: Any = None
) -> "NameObject": # PdfReader
def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject": # PdfReader
name = stream.read(1)
if name != NameObject.surfix:
raise PdfReadError("name read error")
name += read_until_regex(stream, NameObject.delimiter_pattern)
name = NameObject.unnumber(name)
try:
name = NameObject(name.decode(NameObject.encoding))
except UnicodeDecodeError:
name = NameObject(name.decode("latin1"))
name.encoding = "latin1"
return name
for enc in NameObject.CHARSETS:
try:
name = NameObject(name.decode(enc))
name.CHARSETS = [enc]
return name
except UnicodeDecodeError:
pass
if not pdf.strict:
logger_warning(
f"Illegal character in NameObject ({name!r}), "
"you may need to adjust NameObject.CHARSETS",
__name__,
)
return NameObject(name.decode("charmap"))
else:
raise PdfReadError(
f"Illegal character in NameObject ({name!r}). "
"You may need to adjust NameObject.CHARSETS.",
)


def encode_pdfdocencoding(unicode_string: str) -> bytes:
Expand Down
21 changes: 19 additions & 2 deletions tests/test_generic.py
@@ -1,5 +1,5 @@
"""Test the pypdf.generic module."""

from copy import deepcopy
from io import BytesIO
from pathlib import Path
from unittest.mock import patch
Expand Down Expand Up @@ -236,7 +236,7 @@ def test_name_object():
),
(b"/\x80\x02\x03", "\x80\x02\x03", b"/#80#02#03"),
):
name = NameObject.read_from_stream(BytesIO(test[0]))
name = NameObject.read_from_stream(BytesIO(test[0]), None)
assert name == f"/{test[1]}"
bio = BytesIO()
name.write_to_stream(bio)
Expand Down Expand Up @@ -1047,6 +1047,23 @@ def test_checkboxradiobuttonattributes_opt():
assert "/Opt" in CheckboxRadioButtonAttributes.attributes_dict()


def test_name_object_invalid_decode():
charsets = deepcopy(NameObject.CHARSETS)
try:
NameObject.CHARSETS = ("utf-8",)
stream = BytesIO(b"/\x80\x02\x03")
# strict:
with pytest.raises(PdfReadError) as exc:
NameObject.read_from_stream(stream, ReaderDummy(strict=True))
assert "Illegal character in NameObject " in exc.value.args[0]

# non-strict:
stream.seek(0)
NameObject.read_from_stream(stream, ReaderDummy(strict=False))
finally:
NameObject.CHARSETS = charsets


def test_indirect_object_invalid_read():
stream = BytesIO(b"0 1 s")
with pytest.raises(PdfReadError) as exc:
Expand Down

0 comments on commit 393dc6e

Please sign in to comment.