Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved handling of PPM header #5121

Merged
merged 23 commits into from Mar 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
59 changes: 57 additions & 2 deletions Tests/test_file_ppm.py
Expand Up @@ -3,7 +3,7 @@

import pytest

from PIL import Image
from PIL import Image, UnidentifiedImageError

from .helper import assert_image_equal_tofile, assert_image_similar, hopper

Expand Down Expand Up @@ -50,15 +50,70 @@ def test_pnm(tmp_path):
assert_image_equal_tofile(im, f)


def test_magic(tmp_path):
path = str(tmp_path / "temp.ppm")
with open(path, "wb") as f:
f.write(b"PyInvalid")

with pytest.raises(UnidentifiedImageError):
with Image.open(path):
pass
radarhere marked this conversation as resolved.
Show resolved Hide resolved


radarhere marked this conversation as resolved.
Show resolved Hide resolved
def test_header_with_comments(tmp_path):
path = str(tmp_path / "temp.ppm")
with open(path, "wb") as f:
f.write(b"P6 #comment\n#comment\r12#comment\r8\n128 #comment\n255\n")

with Image.open(path) as im:
assert im.size == (128, 128)


def test_non_integer_token(tmp_path):
path = str(tmp_path / "temp.ppm")
with open(path, "wb") as f:
f.write(b"P6\nTEST")

with pytest.raises(ValueError):
with Image.open(path):
pass
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not quite on board with this one. The idea here is that b"\x00" is not caught by

if c > b"\x79":
raise ValueError("Expected ASCII value, found binary")

even though \x00 is not greater than \x79, and only for it to be raised as a different ValueError later when it turns out that b"128\x00" is not an integer.

There isn't a valid scenario where the token/magic needs to be something other than a number or a letter. I've pushed a commit to change this test into a check that a ValueError is raised when a token is not an integer.



def test_token_too_long(tmp_path):
path = str(tmp_path / "temp.ppm")
with open(path, "wb") as f:
f.write(b"P6\n 01234567890")

with pytest.raises(ValueError) as e:
with Image.open(path):
pass

assert str(e.value) == "Token too long in file header: b'01234567890'"


def test_too_many_colors(tmp_path):
path = str(tmp_path / "temp.ppm")
with open(path, "wb") as f:
f.write(b"P6\n1 1\n1000\n")

with pytest.raises(ValueError) as e:
with Image.open(path):
pass

assert str(e.value) == "Too many colors for band: 1000"


def test_truncated_file(tmp_path):
path = str(tmp_path / "temp.pgm")
with open(path, "w") as f:
f.write("P6")

with pytest.raises(ValueError):
with pytest.raises(ValueError) as e:
with Image.open(path):
pass

assert str(e.value) == "Reached EOF while reading header"


def test_neg_ppm():
# Storage.c accepted negative values for xsize, ysize. the
Expand Down
78 changes: 44 additions & 34 deletions src/PIL/PpmImagePlugin.py
Expand Up @@ -49,26 +49,46 @@ class PpmImageFile(ImageFile.ImageFile):
format = "PPM"
format_description = "Pbmplus image"

def _token(self, s=b""):
while True: # read until next whitespace
def _read_magic(self):
magic = b""
# read until whitespace or longest available magic number
for _ in range(6):
c = self.fp.read(1)
if not c or c in b_whitespace:
break
if c > b"\x79":
raise ValueError("Expected ASCII value, found binary")
radarhere marked this conversation as resolved.
Show resolved Hide resolved
s = s + c
if len(s) > 9:
raise ValueError("Expected int, got > 9 digits")
return s
magic += c
return magic

def _open(self):
def _read_token(self):
token = b""
while len(token) <= 10: # read until next whitespace or limit of 10 characters
c = self.fp.read(1)
if not c:
break
elif c in b_whitespace: # token ended
if not token:
# skip whitespace at start
continue
break
elif c == b"#":
# ignores rest of the line; stops at CR, LF or EOF
while self.fp.read(1) not in b"\r\n":
pass
continue
token += c
if not token:
# Token was not even 1 byte
raise ValueError("Reached EOF while reading header")
elif len(token) > 10:
raise ValueError(f"Token too long in file header: {token}")
return token

# check magic
s = self.fp.read(1)
if s != b"P":
def _open(self):
magic_number = self._read_magic()
try:
mode = MODES[magic_number]
except KeyError:
raise SyntaxError("not a PPM file")
radarhere marked this conversation as resolved.
Show resolved Hide resolved
magic_number = self._token(s)
mode = MODES[magic_number]

self.custom_mimetype = {
b"P4": "image/x-portable-bitmap",
Expand All @@ -83,29 +103,19 @@ def _open(self):
self.mode = rawmode = mode

for ix in range(3):
while True:
while True:
s = self.fp.read(1)
if s not in b_whitespace:
break
if s == b"":
raise ValueError("File does not extend beyond magic number")
if s != b"#":
break
s = self.fp.readline()
s = int(self._token(s))
if ix == 0:
xsize = s
elif ix == 1:
ysize = s
token = int(self._read_token())
if ix == 0: # token is the x size
xsize = token
elif ix == 1: # token is the y size
ysize = token
if mode == "1":
break
elif ix == 2:
# maxgrey
if s > 255:
elif ix == 2: # token is maxval
maxval = token
if maxval > 255:
if not mode == "L":
raise ValueError(f"Too many colors for band: {s}")
if s < 2 ** 16:
raise ValueError(f"Too many colors for band: {token}")
radarhere marked this conversation as resolved.
Show resolved Hide resolved
if maxval < 2 ** 16:
self.mode = "I"
rawmode = "I;16B"
else:
Expand Down