Skip to content

Commit

Permalink
Fix 2+ GB reads --without-performance
Browse files Browse the repository at this point in the history
PIL.Image.frombuffer() raises OverflowError on buffers >= 2 GB when
mapping color channels (python-pillow/Pillow#1475).  Work around this by
loading large buffers in smaller chunks and pasting them into the result
image.

Fixes #17.
  • Loading branch information
bgilbert committed Jun 10, 2016
1 parent 6263375 commit 9f514ee
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 10 deletions.
54 changes: 44 additions & 10 deletions openslide/lowlevel.py
Expand Up @@ -29,6 +29,7 @@
rather than in the high-level interface.)
"""

from __future__ import division
from ctypes import *
from itertools import count
import PIL.Image
Expand Down Expand Up @@ -63,16 +64,49 @@ def _load_image(buf, size):
return PIL.Image.frombuffer('RGBA', size, buf, 'raw', 'RGBA', 0, 1)
except ImportError:
def _load_image(buf, size):
'''buf can be a string, but should be a ctypes buffer to avoid an
extra copy in the caller.'''
# First reorder the bytes in a pixel from native-endian aRGB to
# big-endian RGBa to work around limitations in RGBa loader
rawmode = (sys.byteorder == 'little') and 'BGRA' or 'ARGB'
buf = PIL.Image.frombuffer('RGBA', size, buf, 'raw', rawmode, 0, 1)
# Image.tobytes() is named tostring() in Pillow 1.x and PIL
buf = (getattr(buf, 'tobytes', None) or buf.tostring)()
# Now load the image as RGBA, undoing premultiplication
return PIL.Image.frombuffer('RGBA', size, buf, 'raw', 'RGBa', 0, 1)
'''buf must be a buffer.'''

# Load entire buffer at once if possible
MAX_PIXELS_PER_LOAD = (1 << 29) - 1
# Otherwise, use chunks smaller than the maximum to reduce memory
# requirements
PIXELS_PER_LOAD = 1 << 26

def do_load(buf, size):
'''buf can be a string, but should be a ctypes buffer to avoid an
extra copy in the caller.'''
# First reorder the bytes in a pixel from native-endian aRGB to
# big-endian RGBa to work around limitations in RGBa loader
rawmode = (sys.byteorder == 'little') and 'BGRA' or 'ARGB'
buf = PIL.Image.frombuffer('RGBA', size, buf, 'raw', rawmode, 0, 1)
# Image.tobytes() is named tostring() in Pillow 1.x and PIL
buf = (getattr(buf, 'tobytes', None) or buf.tostring)()
# Now load the image as RGBA, undoing premultiplication
return PIL.Image.frombuffer('RGBA', size, buf, 'raw', 'RGBa', 0, 1)

# Fast path for small buffers
w, h = size
if w * h <= MAX_PIXELS_PER_LOAD:
return do_load(buf, size)

# Load in chunks to avoid OverflowError in PIL.Image.frombuffer()
# https://github.com/python-pillow/Pillow/issues/1475
if w > PIXELS_PER_LOAD:
# We could support this, but it seems like overkill
raise ValueError('Width %d is too large (maximum %d)' %
(w, PIXELS_PER_LOAD))
rows_per_load = PIXELS_PER_LOAD // w
img = PIL.Image.new('RGBA', (w, h))
for y in range(0, h, rows_per_load):
rows = min(h - y, rows_per_load)
if sys.version[0] == '2':
chunk = buffer(buf, 4 * y * w, 4 * rows * w)
else:
# PIL.Image.frombuffer() won't take a memoryview or
# bytearray, so we can't avoid copying
chunk = memoryview(buf)[y * w:(y + rows) * w].tobytes()
img.paste(do_load(chunk, (w, rows)), (0, y))
return img

class OpenSlideError(Exception):
"""An error produced by the OpenSlide library.
Expand Down
14 changes: 14 additions & 0 deletions tests/test_openslide.py
Expand Up @@ -120,6 +120,20 @@ def test_read_region_bad_size(self):
self.assertRaises(OpenSlideError,
lambda: self.osr.read_region((0, 0), 1, (400, -5)))

def test_read_region_2GB(self):
self.assertEqual(
self.osr.read_region((1000, 1000), 0, (32768, 16384)).size,
(32768, 16384))

def test_read_region_2GB_width(self):
try:
import openslide._convert
return
except ImportError:
pass
self.assertRaises(ValueError,
lambda: self.osr.read_region((1000, 1000), 0, (1 << 29, 1)))

def test_thumbnail(self):
self.assertEqual(self.osr.get_thumbnail((100, 100)).size, (100, 83))

Expand Down

0 comments on commit 9f514ee

Please sign in to comment.