Skip to content

Commit

Permalink
lzbuffer: add memlimit
Browse files Browse the repository at this point in the history
Adds a memlimit configuration option for decompression. If the dict
buffer's memory limit is exceeded, decompression will fail with an
LZMAError. Additional functions were added to reduce the amount of
breaking changes in the library.
  • Loading branch information
cccs-sadugas committed Jul 9, 2020
1 parent acb788b commit fcbb11f
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 23 deletions.
51 changes: 39 additions & 12 deletions src/decode/lzbuffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub trait LZBuffer {
// Retrieve the n-th last byte
fn last_n(&self, dist: usize) -> error::Result<u8>;
// Append a literal
fn append_literal(&mut self, lit: u8) -> io::Result<()>;
fn append_literal(&mut self, lit: u8) -> error::Result<()>;
// Fetch an LZ sequence (length, distance) from inside the buffer
fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()>;
// Flush the buffer to the output
Expand All @@ -22,6 +22,7 @@ where
{
stream: &'a mut W, // Output sink
buf: Vec<u8>, // Buffer
memlimit: usize, // Buffer memory limit
len: usize, // Total number of bytes sent through the buffer
}

Expand All @@ -30,9 +31,14 @@ where
W: io::Write,
{
pub fn from_stream(stream: &'a mut W) -> Self {
Self::from_stream_with_memlimit(stream, std::usize::MAX)
}

pub fn from_stream_with_memlimit(stream: &'a mut W, memlimit: usize) -> Self {
Self {
stream,
buf: Vec::new(),
memlimit,
len: 0,
}
}
Expand Down Expand Up @@ -84,10 +90,19 @@ where
}

// Append a literal
fn append_literal(&mut self, lit: u8) -> io::Result<()> {
self.buf.push(lit);
self.len += 1;
Ok(())
fn append_literal(&mut self, lit: u8) -> error::Result<()> {
let new_len = self.len + 1;

if new_len > self.memlimit {
Err(error::Error::LZMAError(format!(
"exceeded memory limit of {}",
self.memlimit
)))
} else {
self.buf.push(lit);
self.len = new_len;
Ok(())
}
}

// Fetch an LZ sequence (length, distance) from inside the buffer
Expand Down Expand Up @@ -127,6 +142,7 @@ where
stream: &'a mut W, // Output sink
buf: Vec<u8>, // Circular buffer
dict_size: usize, // Length of the buffer
memlimit: usize, // Buffer memory limit
cursor: usize, // Current position
len: usize, // Total number of bytes sent through the buffer
}
Expand All @@ -135,12 +151,13 @@ impl<'a, W> LZCircularBuffer<'a, W>
where
W: io::Write,
{
pub fn from_stream(stream: &'a mut W, dict_size: usize) -> Self {
pub fn from_stream_with_memlimit(stream: &'a mut W, dict_size: usize, memlimit: usize) -> Self {
lzma_info!("Dict size in LZ buffer: {}", dict_size);
Self {
stream,
buf: Vec::new(),
dict_size,
memlimit,
cursor: 0,
len: 0,
}
Expand All @@ -150,11 +167,21 @@ where
*self.buf.get(index).unwrap_or(&0)
}

fn set(&mut self, index: usize, value: u8) {
if self.buf.len() < index + 1 {
self.buf.resize(index + 1, 0);
fn set(&mut self, index: usize, value: u8) -> error::Result<()> {
let new_len = index + 1;

if new_len > self.memlimit {
Err(error::Error::LZMAError(format!(
"exceeded memory limit of {}",
self.memlimit
)))
} else {
if self.buf.len() < new_len {
self.buf.resize(new_len, 0);
}
self.buf[index] = value;
Ok(())
}
self.buf[index] = value;
}
}

Expand Down Expand Up @@ -195,8 +222,8 @@ where
}

// Append a literal
fn append_literal(&mut self, lit: u8) -> io::Result<()> {
self.set(self.cursor, lit);
fn append_literal(&mut self, lit: u8) -> error::Result<()> {
self.set(self.cursor, lit)?;
self.cursor += 1;
self.len += 1;

Expand Down
18 changes: 17 additions & 1 deletion src/decode/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,28 @@ pub fn new_circular<'a, W>(
output: &'a mut W,
params: LZMAParams,
) -> error::Result<DecoderState<lzbuffer::LZCircularBuffer<'a, W>>>
where
W: io::Write,
{
new_circular_with_memlimit(output, params, std::usize::MAX)
}

// Initialize decoder with circular buffer
pub fn new_circular_with_memlimit<'a, W>(
output: &'a mut W,
params: LZMAParams,
memlimit: usize,
) -> error::Result<DecoderState<lzbuffer::LZCircularBuffer<'a, W>>>
where
W: io::Write,
{
// Decoder
let decoder = DecoderState {
output: lzbuffer::LZCircularBuffer::from_stream(output, params.dict_size as usize),
output: lzbuffer::LZCircularBuffer::from_stream_with_memlimit(
output,
params.dict_size as usize,
memlimit,
),
lc: params.lc,
lp: params.lp,
pb: params.pb,
Expand Down
4 changes: 4 additions & 0 deletions src/decode/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ pub struct Options {
/// The default is
/// [`UnpackedSize::ReadFromHeader`](enum.UnpackedSize.html#variant.ReadFromHeader).
pub unpacked_size: UnpackedSize,
/// Defines whether the dictionary's dynamic size should be limited during decompression.
///
/// The default is unlimited.
pub memlimit: Option<usize>,
}

/// Alternatives for defining the unpacked size of the decoded data.
Expand Down
7 changes: 6 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ pub fn lzma_decompress_with_options<R: io::BufRead, W: io::Write>(
options: &decompress::Options,
) -> error::Result<()> {
let params = decode::lzma::LZMAParams::read_header(input, options)?;
let mut decoder = decode::lzma::new_circular(output, params)?;
let mut decoder = if let Some(memlimit) = options.memlimit {
decode::lzma::new_circular_with_memlimit(output, params, memlimit)?
} else {
decode::lzma::new_circular(output, params)?
};

let mut rangecoder = decode::rangecoder::RangeDecoder::new(input).or_else(|e| {
Err(error::Error::LZMAError(format!(
"LZMA stream too short: {}",
Expand Down
49 changes: 40 additions & 9 deletions tests/lzma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ fn round_trip(x: &[u8]) {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader,
..Default::default()
};
round_trip_with_options(x, &encode_options, &decode_options);
assert_round_trip_with_options(x, &encode_options, &decode_options);
}

fn round_trip_no_options(x: &[u8]) {
Expand All @@ -31,7 +32,7 @@ fn round_trip_with_options(
x: &[u8],
encode_options: &lzma_rs::compress::Options,
decode_options: &lzma_rs::decompress::Options,
) {
) -> lzma_rs::error::Result<Vec<u8>> {
let mut compressed: Vec<u8> = Vec::new();
lzma_rs::lzma_compress_with_options(
&mut std::io::BufReader::new(x),
Expand All @@ -45,8 +46,19 @@ fn round_trip_with_options(
debug!("Compressed content: {:?}", compressed);
let mut bf = std::io::BufReader::new(compressed.as_slice());
let mut decomp: Vec<u8> = Vec::new();
lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options).unwrap();
assert_eq!(decomp, x)
lzma_rs::lzma_decompress_with_options(&mut bf, &mut decomp, decode_options)?;
Ok(decomp)
}

fn assert_round_trip_with_options(
x: &[u8],
encode_options: &lzma_rs::compress::Options,
decode_options: &lzma_rs::decompress::Options,
) {
assert_eq!(
round_trip_with_options(x, encode_options, decode_options).unwrap(),
x
)
}

fn round_trip_file(filename: &str) {
Expand Down Expand Up @@ -170,8 +182,9 @@ fn unpacked_size_write_to_header() {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadFromHeader,
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -182,8 +195,9 @@ fn unpacked_size_provided_outside() {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::UseProvided(Some(data.len() as u64)),
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -196,8 +210,9 @@ fn unpacked_size_write_some_to_header_but_use_provided_on_read() {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
data.len() as u64,
)),
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -210,8 +225,9 @@ fn unpacked_size_write_none_to_header_and_use_provided_on_read() {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(Some(
data.len() as u64,
)),
..Default::default()
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
Expand All @@ -222,6 +238,21 @@ fn unpacked_size_write_none_to_header_and_use_provided_none_on_read() {
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None),
..Default::default()
};
assert_round_trip_with_options(&data[..], &encode_options, &decode_options);
}

#[test]
#[should_panic(expected = "exceeded memory limit of 0")]
fn memlimit() {
let data = b"Some data";
let encode_options = lzma_rs::compress::Options {
unpacked_size: lzma_rs::compress::UnpackedSize::WriteToHeader(None),
};
let decode_options = lzma_rs::decompress::Options {
unpacked_size: lzma_rs::decompress::UnpackedSize::ReadHeaderButUseProvided(None),
memlimit: Some(0),
};
round_trip_with_options(&data[..], &encode_options, &decode_options);
round_trip_with_options(&data[..], &encode_options, &decode_options).unwrap();
}

0 comments on commit fcbb11f

Please sign in to comment.