From e35d857b76c2352d95aa863095edccaeb3a60e44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20Kr=C3=BCger?= Date: Mon, 13 May 2024 10:30:33 +0200 Subject: [PATCH] Refactored huffman implementation a bit. --- src/legacy/huffman.rs | 83 ++++++++++++++++++++++--------------------- src/legacy/implode.rs | 4 +-- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/src/legacy/huffman.rs b/src/legacy/huffman.rs index 4708d1e17..ff32d2150 100644 --- a/src/legacy/huffman.rs +++ b/src/legacy/huffman.rs @@ -23,9 +23,9 @@ pub struct HuffmanDecoder { /// Lookup table for fast decoding of short codewords. pub table: [TableEntry; 1 << HUFFMAN_LOOKUP_TABLE_BITS], /// "Sentinel bits" value for each codeword length. - pub sentinel_bits: [u32; MAX_HUFFMAN_BITS + 1], + pub sentinel_bits: [u32; MAX_HUFFMAN_BITS], /// First symbol index minus first codeword mod 2**16 for each length. - pub offset_first_sym_idx: [u16; MAX_HUFFMAN_BITS + 1], + pub offset_first_sym_idx: [u16; MAX_HUFFMAN_BITS], /// Map from symbol index to symbol. pub syms: [u16; MAX_HUFFMAN_SYMBOLS], // num_syms:usize @@ -48,58 +48,61 @@ impl Default for HuffmanDecoder { /// Returns false if the codeword lengths do not correspond to a valid prefix /// code. impl HuffmanDecoder { - pub fn init(&mut self, lengths: &[u8], n: usize) -> bool { - let mut count = [0; MAX_HUFFMAN_BITS + 1]; - let mut code = [0; MAX_HUFFMAN_BITS + 1]; - let mut sym_idx = [0; MAX_HUFFMAN_BITS + 1]; + pub fn init(&mut self, lengths: &[u8], n: usize) -> std::io::Result<()> { + let mut count = [0; MAX_HUFFMAN_BITS]; + let mut code = [0; MAX_HUFFMAN_BITS]; + let mut sym_idx: [u16; 16] = [0; MAX_HUFFMAN_BITS]; // Zero-initialize the lookup table. for t in &mut self.table { t.len = 0; } // Count the number of codewords of each length. - for i in 0..n { - debug_assert!(lengths[i] as usize <= MAX_HUFFMAN_BITS); - count[lengths[i] as usize] += 1; + for sym in 0..n { + let len = lengths[sym] as usize; + // Ignore zero-length codewords. + if len == 0 { + continue; + } + debug_assert!(len <= MAX_HUFFMAN_BITS); + count[len] += 1; } - count[0] = 0; // Ignore zero-length codewords. - // Compute sentinel_bits and offset_first_sym_idx for each length. - code[0] = 0; - sym_idx[0] = 0; - for l in 1..=MAX_HUFFMAN_BITS { + + for len in 1..MAX_HUFFMAN_BITS { // First canonical codeword of this length. - code[l] = ((code[l - 1] + count[l - 1]) << 1) as u16; + code[len] = (code[len - 1] + count[len - 1]) << 1; - if count[l] != 0 && code[l] as u32 + count[l] as u32 - 1 > (1u32 << l) - 1 { - // The last codeword is longer than l bits. - return false; + if count[len] != 0 && code[len] as u32 + count[len] as u32 - 1 > (1u32 << len) - 1 { + return Err(Error::new( + io::ErrorKind::InvalidData, + "The last codeword is longer than len bits", + )); } - let s = ((code[l] as u32 + count[l] as u32) << (MAX_HUFFMAN_BITS - l)) as u32; - self.sentinel_bits[l] = s; - debug_assert!(self.sentinel_bits[l] >= code[l] as u32, "No overflow!"); - - sym_idx[l] = sym_idx[l - 1] + count[l - 1]; - self.offset_first_sym_idx[l] = sym_idx[l].wrapping_sub(code[l]); + let s = ((code[len] as u32 + count[len] as u32) << (MAX_HUFFMAN_BITS - len)) as u32; + self.sentinel_bits[len] = s; + debug_assert!(self.sentinel_bits[len] >= code[len] as u32, "No overflow!"); + sym_idx[len] = sym_idx[len - 1] + count[len - 1]; + self.offset_first_sym_idx[len] = sym_idx[len].wrapping_sub(code[len]); } // Build mapping from index to symbol and populate the lookup table. - for i in 0..n { - let l = lengths[i] as usize; - if l == 0 { + for sym in 0..n { + let len = lengths[sym] as usize; + if len == 0 { continue; } - self.syms[sym_idx[l] as usize] = i as u16; - sym_idx[l] += 1; + self.syms[sym_idx[len] as usize] = sym as u16; + sym_idx[len] += 1; - if l <= HUFFMAN_LOOKUP_TABLE_BITS as usize { - self.table_insert(i, l, code[l]); - code[l] += 1; + if len <= HUFFMAN_LOOKUP_TABLE_BITS as usize { + self.table_insert(sym, len, code[len]); + code[len] += 1; } } - true + Ok(()) } pub fn table_insert(&mut self, sym: usize, len: usize, codeword: u16) { @@ -128,7 +131,7 @@ impl HuffmanDecoder { debug_assert!(lookup_bits < self.table.len()); if self.table[lookup_bits].len != 0 { - debug_assert!(self.table[lookup_bits].len <= HUFFMAN_LOOKUP_TABLE_BITS); + debug_assert!(self.table[lookup_bits].len < HUFFMAN_LOOKUP_TABLE_BITS); // debug_assert!(self.table[lookup_bits].sym < self.num_syms); *num_used_bits = self.table[lookup_bits].len; return Ok(self.table[lookup_bits].sym); @@ -136,15 +139,15 @@ impl HuffmanDecoder { // Then do canonical decoding with the bits in MSB-first order. let mut bits = reverse_lsb(bits, MAX_HUFFMAN_BITS); - for l in HUFFMAN_LOOKUP_TABLE_BITS as usize + 1..=MAX_HUFFMAN_BITS { - if (bits as u32) < self.sentinel_bits[l] { + for l in HUFFMAN_LOOKUP_TABLE_BITS as usize + 1..MAX_HUFFMAN_BITS { + if self.sentinel_bits[l] > bits as u32 { bits >>= MAX_HUFFMAN_BITS - l; - let sym_idx = (self.offset_first_sym_idx[l] as usize + bits as usize) & 0xFFFF; - //assert(sym_idx < self.num_syms); + let sym_idx = self.offset_first_sym_idx[l] + bits; + // debug_assert(sym_idx < self.num_syms); *num_used_bits = l as u8; - return Ok(self.syms[sym_idx]); + return Ok(self.syms[sym_idx as usize]); } } *num_used_bits = 0; @@ -184,7 +187,7 @@ mod tests { ]; let mut d = HuffmanDecoder::default(); - assert!(d.init(&lens, lens.len())); + d.init(&lens, lens.len()).unwrap(); let mut used = 0; // 000 (msb-first) -> 000 (lsb-first) diff --git a/src/legacy/implode.rs b/src/legacy/implode.rs index 2dd644612..bc30279dc 100644 --- a/src/legacy/implode.rs +++ b/src/legacy/implode.rs @@ -78,9 +78,7 @@ fn read_huffman_code( )); } - let ok = d.init(&lens, num_lens); - debug_assert!(ok, "The checks above mean the tree should be valid."); - Ok(()) + d.init(&lens, num_lens) } fn hwexplode(