From 8f71cc0a72d22d3ab9881af258a25d972dcb8ce8 Mon Sep 17 00:00:00 2001 From: Jonathan Behrens Date: Fri, 17 Mar 2023 16:17:46 -0700 Subject: [PATCH] Faster decompression of non-fdeflate encoded PNGs --- Cargo.toml | 2 +- src/decoder/stream.rs | 19 +----- src/decoder/zlib.rs | 152 ++++++++++-------------------------------- 3 files changed, 39 insertions(+), 134 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7895afd6..a54b55e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ include = [ [dependencies] bitflags = "1.0" crc32fast = "1.2.0" -fdeflate = "0.2.1" +fdeflate = "0.3.0" flate2 = "1.0" miniz_oxide = { version = "0.7.1", features = ["simd"] } diff --git a/src/decoder/stream.rs b/src/decoder/stream.rs index 9ef0750d..8e570b9c 100644 --- a/src/decoder/stream.rs +++ b/src/decoder/stream.rs @@ -196,7 +196,7 @@ pub(crate) enum FormatErrorInner { // Errors specific to the IDAT/fDAT chunks. /// The compression of the data stream was faulty. CorruptFlateStream { - err: miniz_oxide::inflate::TINFLStatus, + err: fdeflate::DecompressionError, }, /// The image data chunk was too short for the expected pixel count. NoMoreImageData, @@ -291,22 +291,7 @@ impl fmt::Display for FormatError { NoMoreImageData => write!(fmt, "IDAT or fDAT chunk is has not enough data for image."), CorruptFlateStream { err } => { write!(fmt, "Corrupt deflate stream. ")?; - use miniz_oxide::inflate::TINFLStatus; - match err { - TINFLStatus::Adler32Mismatch => write!(fmt, "Adler32 checksum failed."), - TINFLStatus::BadParam => write!(fmt, "Invalid input parameters."), - // The Done status should already have been handled. - TINFLStatus::Done => write!(fmt, "Unexpected done status."), - TINFLStatus::FailedCannotMakeProgress => { - write!(fmt, "Unexpected end of data.") - } - // The HasMoreOutput status should already have been handled. - TINFLStatus::HasMoreOutput => write!(fmt, "Has more output."), - // The HasMoreInput status should already have been handled. - TINFLStatus::NeedsMoreInput => write!(fmt, "Needs more input."), - // Write out the error number in case a new has been added. - _ => write!(fmt, "Error number {:?}.", err), - } + write!(fmt, "{:?}", err) } // TODO: Wrap more info in the enum variant BadTextEncoding(tde) => { diff --git a/src/decoder/zlib.rs b/src/decoder/zlib.rs index 982f3326..2953c951 100644 --- a/src/decoder/zlib.rs +++ b/src/decoder/zlib.rs @@ -1,77 +1,11 @@ use super::{stream::FormatErrorInner, DecodingError, CHUNCK_BUFFER_SIZE}; -use miniz_oxide::inflate::core::{decompress, inflate_flags, DecompressorOxide}; -use miniz_oxide::inflate::TINFLStatus; - -enum Compressor { - FullZlib(DecompressorOxide), - FDeflate(fdeflate::Decompressor), -} -impl Compressor { - fn decompress( - &mut self, - input: &[u8], - output: &mut [u8], - output_position: usize, - ignore_adler32: bool, - end_of_input: bool, - ) -> (TINFLStatus, usize, usize) { - match self { - Compressor::FullZlib(decompressor) => { - const BASE_FLAGS: u32 = inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER - | inflate_flags::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; - - let mut flags = BASE_FLAGS; - if !end_of_input { - flags |= inflate_flags::TINFL_FLAG_HAS_MORE_INPUT; - } - if ignore_adler32 { - flags |= inflate_flags::TINFL_FLAG_IGNORE_ADLER32; - } - - decompress(decompressor, input, output, output_position, flags) - } - Compressor::FDeflate(decompressor) => { - match decompressor.read(input, &mut output[output_position..], end_of_input) { - Ok((in_consumed, out_consumed)) if decompressor.done() => { - assert!(in_consumed <= input.len()); - (TINFLStatus::Done, in_consumed, out_consumed) - } - Ok((in_consumed, out_consumed)) => { - assert!(in_consumed <= input.len()); - (TINFLStatus::HasMoreOutput, in_consumed, out_consumed) - } - Err(fdeflate::DecompressionError::NotFDeflate) => { - // fdeflate guarantees that it will detect non-fdeflate streams before - // consuming any input. If that happens, sanity check that no output - // has been produced and feed the same input to a full zlib decoder. - assert_eq!(output_position, 0); - - *self = Compressor::FullZlib(DecompressorOxide::new()); - self.decompress( - input, - output, - output_position, - ignore_adler32, - end_of_input, - ) - } - Err(_) => (TINFLStatus::Failed, 0, 0), - } - } - } - } -} -impl Default for Compressor { - fn default() -> Self { - Compressor::FDeflate(fdeflate::Decompressor::new()) - } -} +use fdeflate::Decompressor; /// Ergonomics wrapper around `miniz_oxide::inflate::stream` for zlib compressed data. pub(super) struct ZlibStream { /// Current decoding state. - state: Box, + state: Box, /// If there has been a call to decompress already. started: bool, /// A buffer of compressed data. @@ -106,7 +40,7 @@ pub(super) struct ZlibStream { impl ZlibStream { pub(crate) fn new() -> Self { ZlibStream { - state: Default::default(), + state: Box::new(Decompressor::new()), started: false, in_buffer: Vec::with_capacity(CHUNCK_BUFFER_SIZE), in_pos: 0, @@ -122,7 +56,7 @@ impl ZlibStream { self.in_pos = 0; self.out_buffer.clear(); self.out_pos = 0; - *self.state = Default::default(); + *self.state = Decompressor::new(); } /// Set the `ignore_adler32` flag and return `true` if the flag was @@ -155,21 +89,23 @@ impl ZlibStream { ) -> Result { self.prepare_vec_for_appending(); - let (status, mut in_consumed, out_consumed) = { - let in_data = if self.in_buffer.is_empty() { - data - } else { - &self.in_buffer[self.in_pos..] - }; - self.state.decompress( - in_data, - self.out_buffer.as_mut_slice(), - self.out_pos, - self.ignore_adler32, - false, - ) + if !self.started && self.ignore_adler32 { + self.state.ignore_adler32(); + } + + let in_data = if self.in_buffer.is_empty() { + data + } else { + &self.in_buffer[self.in_pos..] }; + let (mut in_consumed, out_consumed) = self + .state + .read(in_data, self.out_buffer.as_mut_slice(), self.out_pos, false) + .map_err(|err| { + DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into()) + })?; + if !self.in_buffer.is_empty() { self.in_pos += in_consumed; in_consumed = 0; @@ -189,14 +125,7 @@ impl ZlibStream { self.out_pos += out_consumed; self.transfer_finished_data(image_data); - match status { - TINFLStatus::Done | TINFLStatus::HasMoreOutput | TINFLStatus::NeedsMoreInput => { - Ok(in_consumed) - } - err => Err(DecodingError::Format( - FormatErrorInner::CorruptFlateStream { err }.into(), - )), - } + Ok(in_consumed) } /// Called after all consecutive IDAT chunks were handled. @@ -219,40 +148,31 @@ impl ZlibStream { loop { self.prepare_vec_for_appending(); - let (status, in_consumed, out_consumed) = { - // TODO: we may be able to avoid the indirection through the buffer here. - // First append all buffered data and then create a cursor on the image_data - // instead. - self.state.decompress( + let (in_consumed, out_consumed) = self + .state + .read( &tail[start..], self.out_buffer.as_mut_slice(), self.out_pos, - self.ignore_adler32, true, ) - }; + .map_err(|err| { + DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into()) + })?; start += in_consumed; self.out_pos += out_consumed; - match status { - TINFLStatus::Done => { - self.out_buffer.truncate(self.out_pos); - image_data.append(&mut self.out_buffer); - return Ok(()); - } - TINFLStatus::HasMoreOutput => { - let transferred = self.transfer_finished_data(image_data); - assert!( - transferred > 0 || in_consumed > 0 || out_consumed > 0, - "No more forward progress made in stream decoding." - ); - } - err => { - return Err(DecodingError::Format( - FormatErrorInner::CorruptFlateStream { err }.into(), - )); - } + if self.state.is_done() { + self.out_buffer.truncate(self.out_pos); + image_data.append(&mut self.out_buffer); + return Ok(()); + } else { + let transferred = self.transfer_finished_data(image_data); + assert!( + transferred > 0 || in_consumed > 0 || out_consumed > 0, + "No more forward progress made in stream decoding." + ); } } }