Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop flattening EXIF IFD into getexif() #4947

Merged
merged 7 commits into from Mar 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions Tests/test_file_jpeg.py
Expand Up @@ -264,11 +264,11 @@ def test_empty_exif_gps(self):
assert exif[0x0112] == Image.TRANSVERSE

# Assert that the GPS IFD is present and empty
assert exif[0x8825] == {}
assert exif.get_ifd(0x8825) == {}

transposed = ImageOps.exif_transpose(im)
exif = transposed.getexif()
assert exif[0x8825] == {}
assert exif.get_ifd(0x8825) == {}

# Assert that it was transposed
assert 0x0112 not in exif
Expand Down
37 changes: 25 additions & 12 deletions Tests/test_image.py
Expand Up @@ -667,43 +667,43 @@ def test_exif_jpeg(self, tmp_path):
exif = im.getexif()
assert 258 not in exif
assert 274 in exif
assert 40960 in exif
assert exif[40963] == 450
assert 282 in exif
assert exif[296] == 2
assert exif[11] == "gThumb 3.0.1"

out = str(tmp_path / "temp.jpg")
exif[258] = 8
del exif[274]
del exif[40960]
exif[40963] = 455
del exif[282]
exif[296] = 455
exif[11] = "Pillow test"
im.save(out, exif=exif)
with Image.open(out) as reloaded:
reloaded_exif = reloaded.getexif()
assert reloaded_exif[258] == 8
assert 274 not in reloaded_exif
assert 40960 not in reloaded_exif
assert reloaded_exif[40963] == 455
assert 282 not in reloaded_exif
assert reloaded_exif[296] == 455
assert reloaded_exif[11] == "Pillow test"

with Image.open("Tests/images/no-dpi-in-exif.jpg") as im: # Big endian
exif = im.getexif()
assert 258 not in exif
assert 40962 in exif
assert exif[40963] == 200
assert 306 in exif
assert exif[274] == 1
assert exif[305] == "Adobe Photoshop CC 2017 (Macintosh)"

out = str(tmp_path / "temp.jpg")
exif[258] = 8
del exif[34665]
exif[40963] = 455
del exif[306]
exif[274] = 455
exif[305] = "Pillow test"
im.save(out, exif=exif)
with Image.open(out) as reloaded:
reloaded_exif = reloaded.getexif()
assert reloaded_exif[258] == 8
assert 34665 not in reloaded_exif
assert reloaded_exif[40963] == 455
assert 306 not in reloaded_exif
assert reloaded_exif[274] == 455
assert reloaded_exif[305] == "Pillow test"

@skip_unless_feature("webp")
Expand Down Expand Up @@ -756,6 +756,19 @@ def test_exif_interop(self):
4098: 1704,
}

reloaded_exif = Image.Exif()
reloaded_exif.load(exif.tobytes())
assert reloaded_exif.get_ifd(0xA005) == exif.get_ifd(0xA005)

def test_exif_ifd(self):
with Image.open("Tests/images/flower.jpg") as im:
exif = im.getexif()
del exif.get_ifd(0x8769)[0xA005]

reloaded_exif = Image.Exif()
reloaded_exif.load(exif.tobytes())
assert reloaded_exif.get_ifd(0x8769) == exif.get_ifd(0x8769)

@pytest.mark.parametrize(
"test_module",
[PIL, Image],
Expand Down
198 changes: 111 additions & 87 deletions src/PIL/Image.py
Expand Up @@ -3307,11 +3307,11 @@ def _fixup_dict(self, src_dict):
# returns a dict with any single item tuples/lists as individual values
return {k: self._fixup(v) for k, v in src_dict.items()}

def _get_ifd_dict(self, tag):
def _get_ifd_dict(self, offset):
try:
# an offset pointer to the location of the nested embedded IFD.
# It should be a long, but may be corrupted.
self.fp.seek(self[tag])
self.fp.seek(offset)
except (KeyError, TypeError):
pass
else:
Expand Down Expand Up @@ -3349,11 +3349,20 @@ def load(self, data):
self.fp.seek(self._info.next)
self._info.load(self.fp)

def _get_merged_dict(self):
merged_dict = dict(self)

# get EXIF extension
ifd = self._get_ifd_dict(0x8769)
if ifd:
self._data.update(ifd)
self._ifds[0x8769] = ifd
if 0x8769 in self:
ifd = self._get_ifd_dict(self[0x8769])
if ifd:
merged_dict.update(ifd)

# GPS
if 0x8825 in self:
merged_dict[0x8825] = self._get_ifd_dict(self[0x8825])

return merged_dict

def tobytes(self, offset=8):
from . import TiffImagePlugin
Expand All @@ -3364,91 +3373,108 @@ def tobytes(self, offset=8):
head = b"MM\x00\x2A\x00\x00\x00\x08"
ifd = TiffImagePlugin.ImageFileDirectory_v2(ifh=head)
for tag, value in self.items():
if tag in [0x8769, 0x8225, 0x8825] and not isinstance(value, dict):
value = self.get_ifd(tag)
Copy link
Contributor

@kkopachev kkopachev Oct 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not work for interop since it's inside of Exif IFD.

im = Image.open('10_years_of_Wikipedia_by_Guillaume_Paumier.jpg')
good_exif = im.getexif()

bad_exif = Exif()
bad_exif.load(good_exif.tobytes())

print("Good interop dictionary size: %d" % len(good_exif.get_ifd(0xA005)))
print(" Bad interop dictionary size: %d" % len(bad_exif.get_ifd(0xA005)))

I suspect same for makernote too, but since makernote is vendor specific, I think it would be safer to make it read only property.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I've added another commit to cover Interop.

Copy link
Contributor

@kkopachev kkopachev Oct 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've being thinking through this on and off for some time.
Thinking of consumer of this class, it would make sense when reading Exif IFD to get Interop value decoded into a dict at that time. Or shimmed with ImageFileDirectory_v2 instance. Otherwise caller must know Interop tag ID, must know to call get_ifd, etc. So that's a bit inconvenient and adds extra knowledge to consumer.
Same for Makernote, however saving it back we don't want to encode it back to vendor-specific format and probably worth just copying whatever bytes were there originally.
On the other hand Exif is quite complicated and some knowledge from consumer is expected anyway.
🤷
What do you think of this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another benefit of using ImageFileDirectory_v2 as a shim for sub-IFDs is that we get lazy loading.

if (
tag == 0x8769
and 0xA005 in value
and not isinstance(value[0xA005], dict)
):
value = value.copy()
value[0xA005] = self.get_ifd(0xA005)
ifd[tag] = value
return b"Exif\x00\x00" + head + ifd.tobytes(offset)

def get_ifd(self, tag):
if tag not in self._ifds and tag in self:
if tag in [0x8825, 0xA005]:
# gpsinfo, interop
self._ifds[tag] = self._get_ifd_dict(tag)
elif tag == 0x927C: # makernote
from .TiffImagePlugin import ImageFileDirectory_v2

if self[0x927C][:8] == b"FUJIFILM":
exif_data = self[0x927C]
ifd_offset = i32le(exif_data, 8)
ifd_data = exif_data[ifd_offset:]

makernote = {}
for i in range(0, struct.unpack("<H", ifd_data[:2])[0]):
ifd_tag, typ, count, data = struct.unpack(
"<HHL4s", ifd_data[i * 12 + 2 : (i + 1) * 12 + 2]
)
try:
unit_size, handler = ImageFileDirectory_v2._load_dispatch[
typ
]
except KeyError:
continue
size = count * unit_size
if size > 4:
(offset,) = struct.unpack("<L", data)
data = ifd_data[offset - 12 : offset + size - 12]
else:
data = data[:size]

if len(data) != size:
warnings.warn(
"Possibly corrupt EXIF MakerNote data. "
f"Expecting to read {size} bytes but only got "
f"{len(data)}. Skipping tag {ifd_tag}"
if tag not in self._ifds:
if tag in [0x8769, 0x8825]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think __getitem__ method should be updated as well to call get_ifd for Exif IFD. I can't comment on non-changed code, but currently it's line 3474 should be if tag in [0x8769, 0x8825]:

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I can see that this would be more useful for most cases, if you wanted to actually read the IFD offset, you wouldn't be able to anymore (apart from looking directly into _data or _info)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see your point.
I think getting offset of Exif IFD is useless by itself, unless you want manually parse IFD structure yourself, in which case you could do so with raw exif bytes.
Additionally, I think code should be consistent: either decode both GPS and Exif IFDs into dictionaries on read in __getitem__, or return offset in both cases.

# exif, gpsinfo
if tag in self:
self._ifds[tag] = self._get_ifd_dict(self[tag])
elif tag in [0xA005, 0x927C]:
# interop, makernote
if 0x8769 not in self._ifds:
self.get_ifd(0x8769)
tag_data = self._ifds[0x8769][tag]
if tag == 0x927C:
# makernote
from .TiffImagePlugin import ImageFileDirectory_v2

if tag_data[:8] == b"FUJIFILM":
ifd_offset = i32le(tag_data, 8)
ifd_data = tag_data[ifd_offset:]

makernote = {}
for i in range(0, struct.unpack("<H", ifd_data[:2])[0]):
ifd_tag, typ, count, data = struct.unpack(
"<HHL4s", ifd_data[i * 12 + 2 : (i + 1) * 12 + 2]
)
continue

if not data:
continue

makernote[ifd_tag] = handler(
ImageFileDirectory_v2(), data, False
)
self._ifds[0x927C] = dict(self._fixup_dict(makernote))
elif self.get(0x010F) == "Nintendo":
ifd_data = self[0x927C]

makernote = {}
for i in range(0, struct.unpack(">H", ifd_data[:2])[0]):
ifd_tag, typ, count, data = struct.unpack(
">HHL4s", ifd_data[i * 12 + 2 : (i + 1) * 12 + 2]
)
if ifd_tag == 0x1101:
# CameraInfo
(offset,) = struct.unpack(">L", data)
self.fp.seek(offset)

camerainfo = {"ModelID": self.fp.read(4)}

self.fp.read(4)
# Seconds since 2000
camerainfo["TimeStamp"] = i32le(self.fp.read(12))

self.fp.read(4)
camerainfo["InternalSerialNumber"] = self.fp.read(4)

self.fp.read(12)
parallax = self.fp.read(4)
handler = ImageFileDirectory_v2._load_dispatch[
TiffTags.FLOAT
][1]
camerainfo["Parallax"] = handler(
ImageFileDirectory_v2(), parallax, False
try:
(
unit_size,
handler,
) = ImageFileDirectory_v2._load_dispatch[typ]
except KeyError:
continue
size = count * unit_size
if size > 4:
(offset,) = struct.unpack("<L", data)
data = ifd_data[offset - 12 : offset + size - 12]
else:
data = data[:size]

if len(data) != size:
warnings.warn(
"Possibly corrupt EXIF MakerNote data. "
f"Expecting to read {size} bytes but only got "
f"{len(data)}. Skipping tag {ifd_tag}"
)
continue

if not data:
continue

makernote[ifd_tag] = handler(
ImageFileDirectory_v2(), data, False
)

self.fp.read(4)
camerainfo["Category"] = self.fp.read(2)

makernote = {0x1101: dict(self._fixup_dict(camerainfo))}
self._ifds[0x927C] = makernote
self._ifds[tag] = dict(self._fixup_dict(makernote))
elif self.get(0x010F) == "Nintendo":
makernote = {}
for i in range(0, struct.unpack(">H", tag_data[:2])[0]):
ifd_tag, typ, count, data = struct.unpack(
">HHL4s", tag_data[i * 12 + 2 : (i + 1) * 12 + 2]
)
if ifd_tag == 0x1101:
# CameraInfo
(offset,) = struct.unpack(">L", data)
self.fp.seek(offset)

camerainfo = {"ModelID": self.fp.read(4)}

self.fp.read(4)
# Seconds since 2000
camerainfo["TimeStamp"] = i32le(self.fp.read(12))

self.fp.read(4)
camerainfo["InternalSerialNumber"] = self.fp.read(4)

self.fp.read(12)
parallax = self.fp.read(4)
handler = ImageFileDirectory_v2._load_dispatch[
TiffTags.FLOAT
][1]
camerainfo["Parallax"] = handler(
ImageFileDirectory_v2(), parallax, False
)

self.fp.read(4)
camerainfo["Category"] = self.fp.read(2)

makernote = {0x1101: dict(self._fixup_dict(camerainfo))}
self._ifds[tag] = makernote
else:
# interop
self._ifds[tag] = self._get_ifd_dict(tag_data)
return self._ifds.get(tag, {})

def __str__(self):
Expand All @@ -3468,8 +3494,6 @@ def __len__(self):
def __getitem__(self, tag):
if self._info is not None and tag not in self._data and tag in self._info:
self._data[tag] = self._fixup(self._info[tag])
if tag == 0x8825:
self._data[tag] = self.get_ifd(tag)
del self._info[tag]
return self._data[tag]

Expand Down
2 changes: 1 addition & 1 deletion src/PIL/JpegImagePlugin.py
Expand Up @@ -478,7 +478,7 @@ def _getmp(self):
def _getexif(self):
if "exif" not in self.info:
return None
return dict(self.getexif())
return self.getexif()._get_merged_dict()
radarhere marked this conversation as resolved.
Show resolved Hide resolved


def _getmp(self):
Expand Down
2 changes: 1 addition & 1 deletion src/PIL/MpoImagePlugin.py
Expand Up @@ -84,7 +84,7 @@ def seek(self, frame):

mptype = self.mpinfo[0xB002][frame]["Attribute"]["MPType"]
if mptype.startswith("Large Thumbnail"):
exif = self.getexif()
exif = self.getexif().get_ifd(0x8769)
if 40962 in exif and 40963 in exif:
self._size = (exif[40962], exif[40963])
elif "exif" in self.info:
Expand Down
2 changes: 1 addition & 1 deletion src/PIL/PngImagePlugin.py
Expand Up @@ -968,7 +968,7 @@ def _getexif(self):
self.load()
if "exif" not in self.info and "Raw profile type exif" not in self.info:
return None
return dict(self.getexif())
return self.getexif()._get_merged_dict()

def getexif(self):
if "exif" not in self.info:
Expand Down
1 change: 1 addition & 0 deletions src/PIL/TiffTags.py
Expand Up @@ -184,6 +184,7 @@ def lookup(tag):
34665: ("ExifIFD", LONG, 1),
34675: ("ICCProfile", UNDEFINED, 1),
34853: ("GPSInfoIFD", LONG, 1),
40965: ("InteroperabilityIFD", LONG, 1),
# MPInfo
45056: ("MPFVersion", UNDEFINED, 1),
45057: ("NumberOfImages", LONG, 1),
Expand Down
2 changes: 1 addition & 1 deletion src/PIL/WebPImagePlugin.py
Expand Up @@ -96,7 +96,7 @@ def _open(self):
def _getexif(self):
if "exif" not in self.info:
return None
return dict(self.getexif())
return self.getexif()._get_merged_dict()

def seek(self, frame):
if not self._seek_check(frame):
Expand Down