Skip to content

Commit

Permalink
Improved docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 14, 2022
1 parent 8a6024f commit b510853
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 106 deletions.
144 changes: 123 additions & 21 deletions src/encoding/bitpacked/decode.rs
Original file line number Diff line number Diff line change
@@ -1,44 +1,46 @@
use super::{Packed, Unpackable, Unpacked};

/// An [`Iterator`] of [`Unpackable`] unpacked from a bitpacked slice of bytes.
/// # Implementation
/// This iterator unpacks bytes in chunks and does not allocate.
#[derive(Debug, Clone)]
pub struct Decoder<'a, T: Unpackable> {
compressed_chunks: std::slice::Chunks<'a, u8>,
packed: std::slice::Chunks<'a, u8>,
num_bits: usize,
remaining: usize,
current_pack_index: usize, // invariant: < T::PACK_LENGTH
current_pack: T::Unpacked,
unpacked: T::Unpacked, // has the current unpacked values.
}

#[inline]
fn decode_pack<T: Unpackable>(compressed: &[u8], num_bits: usize, pack: &mut T::Unpacked) {
let compressed_block_size = T::Unpacked::LENGTH * num_bits / 8;

if compressed.len() < compressed_block_size {
fn decode_pack<T: Unpackable>(packed: &[u8], num_bits: usize, unpacked: &mut T::Unpacked) {
if packed.len() < T::Unpacked::LENGTH * num_bits / 8 {
let mut buf = T::Packed::zero();
buf.as_mut()[..compressed.len()].copy_from_slice(compressed);
T::unpack(buf.as_ref(), num_bits, pack)
buf.as_mut()[..packed.len()].copy_from_slice(packed);
T::unpack(buf.as_ref(), num_bits, unpacked)
} else {
T::unpack(compressed, num_bits, pack)
T::unpack(packed, num_bits, unpacked)
}
}

impl<'a, T: Unpackable> Decoder<'a, T> {
pub fn new(compressed: &'a [u8], num_bits: usize, mut length: usize) -> Self {
let compressed_block_size = 32 * num_bits / 8;
/// Returns a [`Decoder`] with `T` encoded in `packed` with `num_bits`.
pub fn new(packed: &'a [u8], num_bits: usize, mut length: usize) -> Self {
let block_size = std::mem::size_of::<T>() * num_bits;

let mut compressed_chunks = compressed.chunks(compressed_block_size);
let mut current_pack = T::Unpacked::zero();
if let Some(chunk) = compressed_chunks.next() {
decode_pack::<T>(chunk, num_bits, &mut current_pack);
let mut packed = packed.chunks(block_size);
let mut unpacked = T::Unpacked::zero();
if let Some(chunk) = packed.next() {
decode_pack::<T>(chunk, num_bits, &mut unpacked);
} else {
length = 0
};

Self {
remaining: length,
compressed_chunks,
packed,
num_bits,
current_pack,
unpacked,
current_pack_index: 0,
}
}
Expand All @@ -52,15 +54,15 @@ impl<'a, T: Unpackable> Iterator for Decoder<'a, T> {
if self.remaining == 0 {
return None;
}
let result = self.current_pack[self.current_pack_index];
let result = self.unpacked[self.current_pack_index];
self.current_pack_index += 1;
self.remaining -= 1;
if self.current_pack_index == T::Unpacked::LENGTH {
if let Some(chunk) = self.compressed_chunks.next() {
decode_pack::<T>(chunk, self.num_bits, &mut self.current_pack);
if let Some(packed) = self.packed.next() {
decode_pack::<T>(packed, self.num_bits, &mut self.unpacked);
self.current_pack_index = 0;
}
}
self.remaining -= 1;
Some(result)
}

Expand All @@ -69,3 +71,103 @@ impl<'a, T: Unpackable> Iterator for Decoder<'a, T> {
(self.remaining, Some(self.remaining))
}
}

#[cfg(test)]
mod tests {
use super::super::tests::case1;
use super::*;

#[test]
fn test_decode_rle() {
// Test data: 0-7 with bit width 3
// 0: 000
// 1: 001
// 2: 010
// 3: 011
// 4: 100
// 5: 101
// 6: 110
// 7: 111
let num_bits = 3;
let length = 8;
// encoded: 0b10001000u8, 0b11000110, 0b11111010
let data = vec![0b10001000u8, 0b11000110, 0b11111010];

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, vec![0, 1, 2, 3, 4, 5, 6, 7]);
}

#[test]
fn decode_large() {
let (num_bits, expected, data) = case1();

let decoded = Decoder::<u32>::new(&data, num_bits, expected.len()).collect::<Vec<_>>();
assert_eq!(decoded, expected);
}

#[test]
fn test_decode_bool() {
let num_bits = 1;
let length = 8;
let data = vec![0b10101010];

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, vec![0, 1, 0, 1, 0, 1, 0, 1]);
}

#[test]
fn test_decode_u64() {
let num_bits = 1;
let length = 8;
let data = vec![0b10101010];

let decoded = Decoder::<u64>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, vec![0, 1, 0, 1, 0, 1, 0, 1]);
}

#[test]
fn even_case() {
// [0, 1, 2, 3, 4, 5, 6, 0]x99
let data = &[0b10001000u8, 0b11000110, 0b00011010];
let num_bits = 3;
let copies = 99; // 8 * 99 % 32 != 0
let expected = std::iter::repeat(&[0u32, 1, 2, 3, 4, 5, 6, 0])
.take(copies)
.flatten()
.copied()
.collect::<Vec<_>>();
let data = std::iter::repeat(data)
.take(copies)
.flatten()
.copied()
.collect::<Vec<_>>();
let length = expected.len();

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, expected);
}

#[test]
fn odd_case() {
// [0, 1, 2, 3, 4, 5, 6, 0]x4 + [2]
let data = &[0b10001000u8, 0b11000110, 0b00011010];
let num_bits = 3;
let copies = 4;
let expected = std::iter::repeat(&[0u32, 1, 2, 3, 4, 5, 6, 0])
.take(copies)
.flatten()
.copied()
.chain(std::iter::once(2))
.collect::<Vec<_>>();
let data = std::iter::repeat(data)
.take(copies)
.flatten()
.copied()
.chain(std::iter::once(0b00000010u8))
.collect::<Vec<_>>();
let length = expected.len();

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, expected);
}
}
89 changes: 4 additions & 85 deletions src/encoding/bitpacked/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod unpack;
pub use decode::Decoder;
pub use encode::{encode, encode_pack};

/// A byte slice (e.g. `[u8; 8]`) denoting types that represent complete packs.
pub trait Packed:
Copy
+ Sized
Expand Down Expand Up @@ -50,6 +51,7 @@ impl Packed for [u8; 64 * 64] {
}
}

/// A byte slice of [`Unpackable`] denoting complete unpacked arrays.
pub trait Unpacked<T>:
Copy
+ Sized
Expand Down Expand Up @@ -95,6 +97,7 @@ impl Unpacked<u64> for [u64; 64] {
}
}

/// A type representing a type that can be bitpacked and unpacked by this crate.
pub trait Unpackable: Copy + Sized + Default {
type Packed: Packed;
type Unpacked: Unpacked<Self>;
Expand Down Expand Up @@ -166,27 +169,7 @@ impl Unpackable for u64 {
mod tests {
use super::*;

#[test]
fn test_decode_rle() {
// Test data: 0-7 with bit width 3
// 0: 000
// 1: 001
// 2: 010
// 3: 011
// 4: 100
// 5: 101
// 6: 110
// 7: 111
let num_bits = 3;
let length = 8;
// encoded: 0b10001000u8, 0b11000110, 0b11111010
let data = vec![0b10001000u8, 0b11000110, 0b11111010];

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, vec![0, 1, 2, 3, 4, 5, 6, 7]);
}

fn case1() -> (usize, Vec<u32>, Vec<u8>) {
pub fn case1() -> (usize, Vec<u32>, Vec<u8>) {
let num_bits = 3;
let compressed = vec![
0b10001000u8,
Expand All @@ -212,14 +195,6 @@ mod tests {
(num_bits, decompressed, compressed)
}

#[test]
fn decode_large() {
let (num_bits, expected, data) = case1();

let decoded = Decoder::<u32>::new(&data, num_bits, expected.len()).collect::<Vec<_>>();
assert_eq!(decoded, expected);
}

#[test]
fn encode_large() {
let (num_bits, unpacked, expected) = case1();
Expand All @@ -242,60 +217,4 @@ mod tests {

assert_eq!(&packed[..3], expected);
}

#[test]
fn test_decode_bool() {
let num_bits = 1;
let length = 8;
let data = vec![0b10101010];

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, vec![0, 1, 0, 1, 0, 1, 0, 1]);
}

#[test]
fn even_case() {
// [0, 1, 2, 3, 4, 5, 6, 0]x99
let data = &[0b10001000u8, 0b11000110, 0b00011010];
let num_bits = 3;
let copies = 99; // 8 * 99 % 32 != 0
let expected = std::iter::repeat(&[0u32, 1, 2, 3, 4, 5, 6, 0])
.take(copies)
.flatten()
.copied()
.collect::<Vec<_>>();
let data = std::iter::repeat(data)
.take(copies)
.flatten()
.copied()
.collect::<Vec<_>>();
let length = expected.len();

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, expected);
}

#[test]
fn odd_case() {
// [0, 1, 2, 3, 4, 5, 6, 0]x4 + [2]
let data = &[0b10001000u8, 0b11000110, 0b00011010];
let num_bits = 3;
let copies = 4;
let expected = std::iter::repeat(&[0u32, 1, 2, 3, 4, 5, 6, 0])
.take(copies)
.flatten()
.copied()
.chain(std::iter::once(2))
.collect::<Vec<_>>();
let data = std::iter::repeat(data)
.take(copies)
.flatten()
.copied()
.chain(std::iter::once(0b00000010u8))
.collect::<Vec<_>>();
let length = expected.len();

let decoded = Decoder::<u32>::new(&data, num_bits, length).collect::<Vec<_>>();
assert_eq!(decoded, expected);
}
}

0 comments on commit b510853

Please sign in to comment.