Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance: Require BufRead instead of just Read for inputs. #427

Draft
wants to merge 3 commits into
base: next
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
## Unreleased
## Unreleased (TODO: breaking changes require increasing semver to 0.18.x)

* Performance improvements.
* Breaking API changes (required by the performance-related work):
- Changed the generic constraints of `png::Decoder<R>` and `png::Reader<R>`
to require `R: BufRead` (instead of just `R: Read`).

## 0.17.10

Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "png"
version = "0.17.10"
version = "0.17.10" # TODO: Increase semver to account for breaking API changes
license = "MIT OR Apache-2.0"

description = "PNG decoding and encoding library in pure Rust"
Expand Down
6 changes: 3 additions & 3 deletions examples/change-png-info.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
/// Tests "editing"/re-encoding of an image:
/// decoding, editing, re-encoding
use std::fs::File;
use std::io::BufWriter;
use std::io::{BufReader, BufWriter};
use std::path::Path;
pub type BoxResult<T> = Result<T, Box<dyn std::error::Error + Send + Sync>>;

fn main() -> BoxResult<()> {
// # Decode
// Read test image from pngsuite
let path_in = Path::new(r"./tests/pngsuite/basi0g01.png");
// The decoder is a build for reader and can be used to set various decoding options
// The decoder is a builder for reader and can be used to set various decoding options
// via `Transformations`. The default output transformation is `Transformations::IDENTITY`.
let decoder = png::Decoder::new(File::open(path_in)?);
let decoder = png::Decoder::new(BufReader::new(File::open(path_in)?));
let mut reader = decoder.read_info()?;
// Allocate the output buffer.
let png_info = reader.info();
Expand Down
4 changes: 2 additions & 2 deletions examples/show.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ use glium::{
texture::{ClientFormat, RawImage2d},
BlitTarget, Rect, Surface,
};
use std::{borrow::Cow, env, fs::File, io, path};
use std::{borrow::Cow, env, fs::File, io, io::BufReader, path};

/// Load the image using `png`
fn load_image(path: &path::PathBuf) -> io::Result<RawImage2d<'static, u8>> {
use png::ColorType::*;
let mut decoder = png::Decoder::new(File::open(path)?);
let mut decoder = png::Decoder::new(BufReader::new(File::open(path)?));
decoder.set_transformations(png::Transformations::normalize_to_color8());
let mut reader = decoder.read_info()?;
let mut img_data = vec![0; reader.output_buffer_size()];
Expand Down
35 changes: 20 additions & 15 deletions src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod zlib;
pub use self::stream::{DecodeOptions, Decoded, DecodingError, StreamingDecoder};
use self::stream::{FormatErrorInner, CHUNCK_BUFFER_SIZE};

use std::io::{BufRead, BufReader, Read};
use std::io::BufRead;
use std::mem;
use std::ops::Range;

Expand Down Expand Up @@ -79,7 +79,7 @@ impl Default for Limits {
}

/// PNG Decoder
pub struct Decoder<R: Read> {
pub struct Decoder<R: BufRead> {
read_decoder: ReadDecoder<R>,
/// Output transformations
transform: Transformations,
Expand Down Expand Up @@ -133,20 +133,20 @@ impl<'data> Row<'data> {
}
}

impl<R: Read> Decoder<R> {
impl<R: BufRead> Decoder<R> {
/// Create a new decoder configuration with default limits.
pub fn new(r: R) -> Decoder<R> {
Decoder::new_with_limits(r, Limits::default())
}

/// Create a new decoder configuration with custom limits.
pub fn new_with_limits(r: R, limits: Limits) -> Decoder<R> {
pub fn new_with_limits(reader: R, limits: Limits) -> Decoder<R> {
let mut decoder = StreamingDecoder::new();
decoder.limits = limits;

Decoder {
read_decoder: ReadDecoder {
reader: BufReader::with_capacity(CHUNCK_BUFFER_SIZE, r),
reader,
decoder,
at_eof: false,
},
Expand All @@ -155,13 +155,13 @@ impl<R: Read> Decoder<R> {
}

/// Create a new decoder configuration with custom `DecodeOptions`.
pub fn new_with_options(r: R, decode_options: DecodeOptions) -> Decoder<R> {
pub fn new_with_options(reader: R, decode_options: DecodeOptions) -> Decoder<R> {
let mut decoder = StreamingDecoder::new_with_options(decode_options);
decoder.limits = Limits::default();

Decoder {
read_decoder: ReadDecoder {
reader: BufReader::with_capacity(CHUNCK_BUFFER_SIZE, r),
reader,
decoder,
at_eof: false,
},
Expand All @@ -179,17 +179,20 @@ impl<R: Read> Decoder<R> {
///
/// ```
/// use std::fs::File;
/// use std::io::BufReader;
/// use png::{Decoder, Limits};
/// // This image is 32×32, 1bit per pixel. The reader buffers one row which requires 4 bytes.
/// let mut limits = Limits::default();
/// limits.bytes = 3;
/// let mut decoder = Decoder::new_with_limits(File::open("tests/pngsuite/basi0g01.png").unwrap(), limits);
/// let reader = BufReader::new(File::open("tests/pngsuite/basi0g01.png").unwrap());
/// let mut decoder = Decoder::new_with_limits(reader, limits);
/// assert!(decoder.read_info().is_err());
///
/// // This image is 32x32 pixels, so the decoder will allocate less than 10Kib
/// let mut limits = Limits::default();
/// limits.bytes = 10*1024;
/// let mut decoder = Decoder::new_with_limits(File::open("tests/pngsuite/basi0g01.png").unwrap(), limits);
/// let reader = BufReader::new(File::open("tests/pngsuite/basi0g01.png").unwrap());
/// let mut decoder = Decoder::new_with_limits(reader, limits);
/// assert!(decoder.read_info().is_ok());
/// ```
pub fn set_limits(&mut self, limits: Limits) {
Expand Down Expand Up @@ -265,8 +268,10 @@ impl<R: Read> Decoder<R> {
/// eg.
/// ```
/// use std::fs::File;
/// use std::io::BufReader;
/// use png::Decoder;
/// let mut decoder = Decoder::new(File::open("tests/pngsuite/basi0g01.png").unwrap());
/// let reader = BufReader::new(File::open("tests/pngsuite/basi0g01.png").unwrap());
/// let mut decoder = Decoder::new(reader);
/// decoder.set_ignore_text_chunk(true);
/// assert!(decoder.read_info().is_ok());
/// ```
Expand All @@ -286,13 +291,13 @@ impl<R: Read> Decoder<R> {
}
}

struct ReadDecoder<R: Read> {
reader: BufReader<R>,
struct ReadDecoder<R: BufRead> {
reader: R,
decoder: StreamingDecoder,
at_eof: bool,
}

impl<R: Read> ReadDecoder<R> {
impl<R: BufRead> ReadDecoder<R> {
/// Returns the next decoded chunk. If the chunk is an ImageData chunk, its contents are written
/// into image_data.
fn decode_next(&mut self, image_data: &mut Vec<u8>) -> Result<Option<Decoded>, DecodingError> {
Expand Down Expand Up @@ -350,7 +355,7 @@ impl<R: Read> ReadDecoder<R> {
/// PNG reader (mostly high-level interface)
///
/// Provides a high level that iterates over lines or whole images.
pub struct Reader<R: Read> {
pub struct Reader<R: BufRead> {
decoder: ReadDecoder<R>,
bpp: BytesPerPixel,
subframe: SubframeInfo,
Expand Down Expand Up @@ -406,7 +411,7 @@ enum SubframeIdx {
End,
}

impl<R: Read> Reader<R> {
impl<R: BufRead> Reader<R> {
/// Reads all meta data until the next frame data starts.
/// Requires IHDR before the IDAT and fcTL before fdAT.
fn read_until_image_data(&mut self) -> Result<(), DecodingError> {
Expand Down
11 changes: 7 additions & 4 deletions src/decoder/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1487,12 +1487,13 @@ mod tests {
use crate::{Decoder, DecodingError};
use byteorder::WriteBytesExt;
use std::fs::File;
use std::io::Write;
use std::io::{BufReader, Write};

#[test]
fn image_gamma() -> Result<(), ()> {
fn trial(path: &str, expected: Option<ScaledFloat>) {
let decoder = crate::Decoder::new(File::open(path).unwrap());
let decoder =
crate::Decoder::new(BufReader::new(BufReader::new(File::open(path).unwrap())));
let reader = decoder.read_info().unwrap();
let actual: Option<ScaledFloat> = reader.info().source_gamma;
assert!(actual == expected);
Expand Down Expand Up @@ -1533,7 +1534,7 @@ mod tests {
#[test]
fn image_source_chromaticities() -> Result<(), ()> {
fn trial(path: &str, expected: Option<SourceChromaticities>) {
let decoder = crate::Decoder::new(File::open(path).unwrap());
let decoder = crate::Decoder::new(BufReader::new(File::open(path).unwrap()));
let reader = decoder.read_info().unwrap();
let actual: Option<SourceChromaticities> = reader.info().source_chromaticities;
assert!(actual == expected);
Expand Down Expand Up @@ -1725,7 +1726,9 @@ mod tests {
// https://github.com/image-rs/image/issues/1825#issuecomment-1321798639,
// but the 2nd iCCP chunk has been altered manually (see the 2nd comment below for more
// details).
let decoder = crate::Decoder::new(File::open("tests/bugfixes/issue#1825.png").unwrap());
let decoder = crate::Decoder::new(BufReader::new(
File::open("tests/bugfixes/issue#1825.png").unwrap(),
));
let reader = decoder.read_info().unwrap();
let icc_profile = reader.info().icc_profile.clone().unwrap().into_owned();

Expand Down
119 changes: 77 additions & 42 deletions src/decoder/zlib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,40 @@
use super::{stream::FormatErrorInner, DecodingError, CHUNCK_BUFFER_SIZE};
use super::{stream::FormatErrorInner, DecodingError};

use fdeflate::Decompressor;

/// [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that
/// "deflate/inflate compression with a sliding window (which is an upper bound on the
/// distances appearing in the deflate stream) of at most 32768 bytes".
///
/// `fdeflate` requires that we keep this many most recently decompressed bytes in the
/// `out_buffer` - this allows referring back to them when handling "length and distance
/// codes" in the deflate stream).
const LOOKBACK_SIZE: usize = 32768;

/// Threshold for postponing compacting `ZlibStream::out_buffers`. See
/// `compact_out_buffer_if_needed` for more details.
///
/// Higher factors result in higher memory usage, but the compaction cost is lower - factor of 4
/// means that 1 byte gets copied during compaction for 3 decompressed bytes.)
///
/// TODO: The factor of 4 is an ad-hoc heuristic. Consider measuring and using a different
/// factor. (Early experiments seem to indicate that factor of 4 is faster than a factor of
/// 2 and 4 * `LOOKBACK_SIZE` seems like an acceptable memory trade-off.
const COMPACTION_THRESHOLD: usize = LOOKBACK_SIZE * 4;

/// Maximum size of the output slice passed to `fdeflate::Decompressor::read`.
///
/// This helps to ensure that the decompressed data fits into L1 cache as the data
/// gets written into `ZlibStream::out_buffer` and then copied into into `image_data` provided
/// by the `ZlibStream`'s client.
///
/// TODO: Investigate lowering this constant. In theory a lower constant should help to keep
/// the entire working set within L1 cache (this incremental chunk of data also needs to
/// be "unfiltered" and then copied into the `png` crate client's output; we also need to
/// account for memory pressure from `fdeflate`'s state). Interestingly, early experiments
/// show that setting this constant to 8192 regresses the performance in some cases.
const MAX_INCREMENTAL_DECOMPRESSION_SIZE: usize = 16384;

/// Ergonomics wrapper around `miniz_oxide::inflate::stream` for zlib compressed data.
pub(super) struct ZlibStream {
/// Current decoding state.
Expand Down Expand Up @@ -54,6 +87,19 @@ impl ZlibStream {

pub(crate) fn set_max_total_output(&mut self, n: usize) {
self.max_total_output = n;

// Allocate `out_buffer` capacity once. `debug_assert_eq` in `prepare_vec_for_appending`
// double-checks that we don't reallocate.
const MAX_UNCOMPACTED_SIZE: usize =
COMPACTION_THRESHOLD + MAX_INCREMENTAL_DECOMPRESSION_SIZE;
let reserved_capacity = if n > MAX_UNCOMPACTED_SIZE {
MAX_UNCOMPACTED_SIZE
} else {
n
};
self.out_buffer
.reserve(reserved_capacity - self.out_buffer.len());
debug_assert!(self.out_buffer.capacity() >= reserved_capacity);
}

/// Set the `ignore_adler32` flag and return `true` if the flag was
Expand Down Expand Up @@ -90,9 +136,16 @@ impl ZlibStream {
self.state.ignore_adler32();
}

let incremental_out_buffer = {
let end = self
.out_pos
.saturating_add(MAX_INCREMENTAL_DECOMPRESSION_SIZE)
.min(self.out_buffer.len());
&mut self.out_buffer[..end]
};
let (in_consumed, out_consumed) = self
.state
.read(data, self.out_buffer.as_mut_slice(), self.out_pos, false)
.read(data, incremental_out_buffer, self.out_pos, false)
.map_err(|err| {
DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into())
})?;
Expand Down Expand Up @@ -156,34 +209,31 @@ impl ZlibStream {
self.max_total_output = usize::MAX;
}

let current_len = self.out_buffer.len();
let desired_len = self
.out_pos
.saturating_add(CHUNCK_BUFFER_SIZE)
.min(self.max_total_output);
if current_len >= desired_len {
return;
}
.saturating_add(MAX_INCREMENTAL_DECOMPRESSION_SIZE);

let buffered_len = self.decoding_size(self.out_buffer.len());
debug_assert!(self.out_buffer.len() <= buffered_len);
self.out_buffer.resize(buffered_len, 0u8);
}
// Allocate at most `self.max_total_output`. This avoids unnecessarily allocating and
// zeroing-out a bigger `out_buffer` than necessary.
let desired_len = desired_len.min(self.max_total_output);

fn decoding_size(&self, len: usize) -> usize {
// Allocate one more chunk size than currently or double the length while ensuring that the
// allocation is valid and that any cursor within it will be valid.
len
// This keeps the buffer size a power-of-two, required by miniz_oxide.
.saturating_add(CHUNCK_BUFFER_SIZE.max(len))
// Ensure all buffer indices are valid cursor positions.
// Note: both cut off and zero extension give correct results.
.min(u64::max_value() as usize)
// Ensure the allocation request is valid.
// TODO: maximum allocation limits?
.min(isize::max_value() as usize)
// Don't unnecessarily allocate more than `max_total_output`.
.min(self.max_total_output)
// Ensure the allocation request is valid.
// TODO: maximum allocation limits?
const _: () = assert!((isize::max_value() as usize) < u64::max_value() as usize);
let desired_len = desired_len.min(isize::max_value() as usize);

if self.out_buffer.len() < desired_len {
#[cfg(debug_assertions)]
let old_capacity = self.out_buffer.capacity();

self.out_buffer.resize(desired_len, 0u8);

#[cfg(debug_assertions)]
if self.max_total_output != usize::MAX {
let new_capacity = self.out_buffer.capacity();
debug_assert_eq!(old_capacity, new_capacity);
}
}
}

fn transfer_finished_data(&mut self, image_data: &mut Vec<u8>) -> usize {
Expand All @@ -194,24 +244,9 @@ impl ZlibStream {
}

fn compact_out_buffer_if_needed(&mut self) {
// [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that
// "deflate/inflate compression with a sliding window (which is an upper bound on the
// distances appearing in the deflate stream) of at most 32768 bytes".
//
// `fdeflate` requires that we keep this many most recently decompressed bytes in the
// `out_buffer` - this allows referring back to them when handling "length and distance
// codes" in the deflate stream).
const LOOKBACK_SIZE: usize = 32768;

// Compact `self.out_buffer` when "needed". Doing this conditionally helps to put an upper
// bound on the amortized cost of copying the data within `self.out_buffer`.
//
// TODO: The factor of 4 is an ad-hoc heuristic. Consider measuring and using a different
// factor. (Early experiments seem to indicate that factor of 4 is faster than a factor of
// 2 and 4 * `LOOKBACK_SIZE` seems like an acceptable memory trade-off. Higher factors
// result in higher memory usage, but the compaction cost is lower - factor of 4 means
// that 1 byte gets copied during compaction for 3 decompressed bytes.)
if self.out_pos > LOOKBACK_SIZE * 4 {
if self.out_pos > COMPACTION_THRESHOLD {
// Only preserve the `lookback_buffer` and "throw away" the earlier prefix.
let lookback_buffer = self.out_pos.saturating_sub(LOOKBACK_SIZE)..self.out_pos;
let preserved_len = lookback_buffer.len();
Expand Down