From 49fc09d59038ca4cc219258aca2481111d33592a Mon Sep 17 00:00:00 2001 From: Glandos Date: Mon, 18 Feb 2019 16:05:24 +0100 Subject: [PATCH 1/6] use subclass for parsing EXIF data The old method (using dict() and _fixup_dict) create clones from IFD_v1, but it read all items when iterating, losing all benefits from the lazyness of IFD. The new method use a custom subclass that try to prevent all iterations as much as possible, keeping all tags undecoded. The return value isn't a dict anymore, but a subclass of ImageFileDirectory_v1 that doesn't support tov2(). All decoded tags are formatted as before. --- src/PIL/JpegImagePlugin.py | 85 +++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/src/PIL/JpegImagePlugin.py b/src/PIL/JpegImagePlugin.py index 541b84ee803..e092cee850f 100644 --- a/src/PIL/JpegImagePlugin.py +++ b/src/PIL/JpegImagePlugin.py @@ -469,18 +469,64 @@ def _getmp(self): return _getmp(self) -def _fixup_dict(src_dict): - # Helper function for _getexif() - # returns a dict with any single item tuples/lists as individual values - def _fixup(value): - try: - if len(value) == 1 and not isinstance(value, dict): - return value[0] - except Exception: - pass - return value - - return {k: _fixup(v) for k, v in src_dict.items()} +def _fixup(value): + try: + if len(value) == 1 and not isinstance(value, dict): + return value[0] + except Exception: + pass + return value + + +class ExifImageFileDirectory(TiffImagePlugin.ImageFileDirectory_v1): + ''' + Specialization for parsing EXIF data: + - Remove support for v2 to avoid useless computations + - custom __setitem__ to supports IFD values + - values are "fixed up" so that 1-size tuples are expanded + - custom update() to avoid iterating and expanding non parsed data + + The goal is to use the lazyness of ImageFileDirectory_v1 in _getexif(). + ''' + + def to_v2(self): + raise NotImplementedError() + + def _setitem(self, tag, value, legacy_api): + super()._setitem(tag, value, legacy_api) + if legacy_api: + val = self._tags_v1[tag] + if not isinstance(val, (tuple, bytes)): + val = val, + self._tags_v1[tag] = _fixup(val) + + def __setitem__(self, tag, value): + if isinstance(value, TiffImagePlugin.ImageFileDirectory_v2): + self._tags_v1[tag] = value + else: + super().__setitem__(tag, value) + + def __getitem__(self, tag): + if tag not in self._tags_v1: # unpack on the fly + data = self._tagdata[tag] + typ = self.tagtype[tag] + size, handler = self._load_dispatch[typ] + # We don't support v2 + self._setitem(tag, handler(self, data, True), True) + val = self._tags_v1[tag] + # Don't try to convert as tuple, it is done in _setitem + return val + + def update(self, *args, **kwds): + if args and isinstance(args[0], TiffImagePlugin.ImageFileDirectory_v2): + other = args[0] + # custom update + self._tags_v1.update(other._tags_v1) + self._tags_v2.update(other._tags_v2) + self._tagdata.update(other._tagdata) + self.tagtype.update(other.tagtype) + else: + super().update(*args, **kwds) def _getexif(self): @@ -503,10 +549,9 @@ def _getexif(self): fp = io.BytesIO(data[6:]) head = fp.read(8) # process dictionary - info = TiffImagePlugin.ImageFileDirectory_v1(head) - fp.seek(info.next) - info.load(fp) - exif = dict(_fixup_dict(info)) + exif = ExifImageFileDirectory(head) + fp.seek(exif.next) + exif.load(fp) # get exif extension try: # exif field 0x8769 is an offset pointer to the location @@ -516,9 +561,9 @@ def _getexif(self): except (KeyError, TypeError): pass else: - info = TiffImagePlugin.ImageFileDirectory_v1(head) + info = ExifImageFileDirectory(head) info.load(fp) - exif.update(_fixup_dict(info)) + exif.update(info) # get gpsinfo extension try: # exif field 0x8825 is an offset pointer to the location @@ -528,9 +573,9 @@ def _getexif(self): except (KeyError, TypeError): pass else: - info = TiffImagePlugin.ImageFileDirectory_v1(head) + info = ExifImageFileDirectory(head) info.load(fp) - exif[0x8825] = _fixup_dict(info) + exif[0x8825] = info # Cache the result for future use self.info["parsed_exif"] = exif From 60c56140867185291b239f9d79e77a2c59f545f6 Mon Sep 17 00:00:00 2001 From: Glandos Date: Mon, 18 Feb 2019 21:21:47 +0100 Subject: [PATCH 2/6] py2 compatibility --- src/PIL/JpegImagePlugin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/PIL/JpegImagePlugin.py b/src/PIL/JpegImagePlugin.py index e092cee850f..fed21ef758f 100644 --- a/src/PIL/JpegImagePlugin.py +++ b/src/PIL/JpegImagePlugin.py @@ -493,7 +493,7 @@ def to_v2(self): raise NotImplementedError() def _setitem(self, tag, value, legacy_api): - super()._setitem(tag, value, legacy_api) + TiffImagePlugin.ImageFileDirectory_v1._setitem(self, tag, value, legacy_api) if legacy_api: val = self._tags_v1[tag] if not isinstance(val, (tuple, bytes)): @@ -504,7 +504,7 @@ def __setitem__(self, tag, value): if isinstance(value, TiffImagePlugin.ImageFileDirectory_v2): self._tags_v1[tag] = value else: - super().__setitem__(tag, value) + TiffImagePlugin.ImageFileDirectory_v1.__setitem__(self, tag, value) def __getitem__(self, tag): if tag not in self._tags_v1: # unpack on the fly @@ -526,7 +526,7 @@ def update(self, *args, **kwds): self._tagdata.update(other._tagdata) self.tagtype.update(other.tagtype) else: - super().update(*args, **kwds) + TiffImagePlugin.ImageFileDirectory_v1.update(self, *args, **kwds) def _getexif(self): From 475b4172a9073e77813603481dc77703c4a5e8e5 Mon Sep 17 00:00:00 2001 From: Glandos Date: Wed, 20 Feb 2019 10:10:06 +0100 Subject: [PATCH 3/6] remove conversion to tuple The _fixup does exactly the opposite --- src/PIL/JpegImagePlugin.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/PIL/JpegImagePlugin.py b/src/PIL/JpegImagePlugin.py index fed21ef758f..8efa977406d 100644 --- a/src/PIL/JpegImagePlugin.py +++ b/src/PIL/JpegImagePlugin.py @@ -496,8 +496,6 @@ def _setitem(self, tag, value, legacy_api): TiffImagePlugin.ImageFileDirectory_v1._setitem(self, tag, value, legacy_api) if legacy_api: val = self._tags_v1[tag] - if not isinstance(val, (tuple, bytes)): - val = val, self._tags_v1[tag] = _fixup(val) def __setitem__(self, tag, value): @@ -514,7 +512,7 @@ def __getitem__(self, tag): # We don't support v2 self._setitem(tag, handler(self, data, True), True) val = self._tags_v1[tag] - # Don't try to convert as tuple, it is done in _setitem + # Don't try to convert as tuple, it is undone by the _fixup function return val def update(self, *args, **kwds): From 56dca018aadfc7a8739e63919e9184bde16f5602 Mon Sep 17 00:00:00 2001 From: Andrew Murray <3112309+radarhere@users.noreply.github.com> Date: Sat, 23 Mar 2019 22:12:03 +0100 Subject: [PATCH 4/6] Update src/PIL/JpegImagePlugin.py Co-Authored-By: Glandos --- src/PIL/JpegImagePlugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/JpegImagePlugin.py b/src/PIL/JpegImagePlugin.py index 8efa977406d..9fcb08d646d 100644 --- a/src/PIL/JpegImagePlugin.py +++ b/src/PIL/JpegImagePlugin.py @@ -479,7 +479,7 @@ def _fixup(value): class ExifImageFileDirectory(TiffImagePlugin.ImageFileDirectory_v1): - ''' + """ Specialization for parsing EXIF data: - Remove support for v2 to avoid useless computations - custom __setitem__ to supports IFD values From d48b98a728b5d1ba12d456759e5135ad87d76269 Mon Sep 17 00:00:00 2001 From: Andrew Murray <3112309+radarhere@users.noreply.github.com> Date: Sat, 23 Mar 2019 22:12:10 +0100 Subject: [PATCH 5/6] Update src/PIL/JpegImagePlugin.py Co-Authored-By: Glandos --- src/PIL/JpegImagePlugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/JpegImagePlugin.py b/src/PIL/JpegImagePlugin.py index 9fcb08d646d..2f012e622c9 100644 --- a/src/PIL/JpegImagePlugin.py +++ b/src/PIL/JpegImagePlugin.py @@ -487,7 +487,7 @@ class ExifImageFileDirectory(TiffImagePlugin.ImageFileDirectory_v1): - custom update() to avoid iterating and expanding non parsed data The goal is to use the lazyness of ImageFileDirectory_v1 in _getexif(). - ''' + """ def to_v2(self): raise NotImplementedError() From c20d30128b784cdbccb3aa727a6d08fc9c19693a Mon Sep 17 00:00:00 2001 From: Hugo Date: Sat, 30 Mar 2019 15:21:45 +0100 Subject: [PATCH 6/6] Fix typo Co-Authored-By: Glandos --- src/PIL/JpegImagePlugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PIL/JpegImagePlugin.py b/src/PIL/JpegImagePlugin.py index 2f012e622c9..4419d2d5a70 100644 --- a/src/PIL/JpegImagePlugin.py +++ b/src/PIL/JpegImagePlugin.py @@ -486,7 +486,7 @@ class ExifImageFileDirectory(TiffImagePlugin.ImageFileDirectory_v1): - values are "fixed up" so that 1-size tuples are expanded - custom update() to avoid iterating and expanding non parsed data - The goal is to use the lazyness of ImageFileDirectory_v1 in _getexif(). + The goal is to use the laziness of ImageFileDirectory_v1 in _getexif(). """ def to_v2(self):