Skip to content

Commit

Permalink
Add lazy RLE+ decoding
Browse files Browse the repository at this point in the history
License: MIT
Signed-off-by: Jakub Sztandera <[email protected]>
  • Loading branch information
Kubuxu committed Sep 20, 2019
1 parent f3854a4 commit ca93473
Show file tree
Hide file tree
Showing 5 changed files with 486 additions and 0 deletions.
154 changes: 154 additions & 0 deletions lib/rlepluslazy/internal/bitvector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package bitvector

import (
"errors"
"log"
)

var (
// ErrOutOfRange - the index passed is out of range for the BitVector
ErrOutOfRange = errors.New("index out of range")
)

// BitNumbering indicates the ordering of bits, either
// least-significant bit in position 0, or most-significant bit
// in position 0.
//
// It it used in 3 ways with BitVector:
// 1. Ordering of bits within the Buf []byte structure
// 2. What order to add bits when using Extend()
// 3. What order to read bits when using Take()
//
// https://en.wikipedia.org/wiki/Bit_numbering
type BitNumbering int

const (
// LSB0 - bit ordering starts with the low-order bit
LSB0 BitNumbering = iota

// MSB0 - bit ordering starts with the high-order bit
MSB0
)

// BitVector is used to manipulate ordered collections of bits
type BitVector struct {
Buf []byte

// BytePacking is the bit ordering within bytes
BytePacking BitNumbering

// Len is the logical number of bits in the vector.
// The last byte in Buf may have undefined bits if Len is not a multiple of 8
Len uint
}

// NewBitVector constructs a new BitVector from a slice of bytes.
//
// The bytePacking parameter is required to know how to interpret the bit ordering within the bytes.
func NewBitVector(buf []byte, bytePacking BitNumbering) *BitVector {
return &BitVector{
BytePacking: bytePacking,
Buf: buf,
Len: uint(len(buf) * 8),
}
}

// Push adds a single bit to the BitVector.
//
// Although it takes a byte, only the low-order bit is used, so just use 0 or 1.
func (v *BitVector) Push(val byte) {
if v.Len%8 == 0 {
v.Buf = append(v.Buf, 0)
}
lastIdx := v.Len / 8

switch v.BytePacking {
case LSB0:
v.Buf[lastIdx] |= (val & 1) << (v.Len % 8)
default:
v.Buf[lastIdx] |= (val & 1) << (7 - (v.Len % 8))
}

v.Len++
}

// Get returns a single bit as a byte -- either 0 or 1
func (v *BitVector) Get(idx uint) (byte, error) {
if idx >= v.Len {
return 0, ErrOutOfRange
}
blockIdx := idx / 8

switch v.BytePacking {
case LSB0:
return v.Buf[blockIdx] >> (idx % 8) & 1, nil
default:
return v.Buf[blockIdx] >> (7 - idx%8) & 1, nil
}
}

// Extend adds up to 8 bits to the receiver
//
// Given a byte b == 0b11010101
// v.Extend(b, 4, LSB0) would add < 1, 0, 1, 0 >
// v.Extend(b, 4, MSB0) would add < 1, 1, 0, 1 >
//
// Panics if count is out of range
func (v *BitVector) Extend(val byte, count uint, order BitNumbering) {
if count > 8 {
log.Panicf("invalid count")
}

for i := uint(0); i < count; i++ {
switch order {
case LSB0:
v.Push((val >> i) & 1)
default:
v.Push((val >> (7 - i)) & 1)
}
}
}

// Take reads up to 8 bits at the given index.
//
// Given a BitVector < 1, 1, 0, 1, 0, 1, 0, 1 >
// v.Take(0, 4, LSB0) would return 0b00001011
// v.Take(0, 4, MSB0) would return 0b11010000
//
// Panics if count is out of range
func (v *BitVector) Take(index uint, count uint, order BitNumbering) (out byte) {
if count > 8 {
log.Panicf("invalid count")
}

for i := uint(0); i < count; i++ {
val, _ := v.Get(index + i)

switch order {
case LSB0:
out |= val << i
default:
out |= val << (7 - i)
}
}
return
}

// Iterator returns a function, which when invoked, returns the number
// of bits requested, and increments an internal cursor.
//
// When the end of the BitVector is reached, it returns zeroes indefinitely
//
// Panics if count is out of range
func (v *BitVector) Iterator(order BitNumbering) func(uint) byte {
cursor := uint(0)
return func(count uint) (out byte) {
if count > 8 {
log.Panicf("invalid count")
}

out = v.Take(cursor, count, order)
cursor += count
return
}
}
136 changes: 136 additions & 0 deletions lib/rlepluslazy/internal/bitvector_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package bitvector_test

import (
"testing"

"github.com/stretchr/testify/assert"

bitvector "github.com/filecoin-project/go-lotus/extern/rleplus/internal"
)

func TestBitVector(t *testing.T) {
t.Run("zero value", func(t *testing.T) {
var v bitvector.BitVector

assert.Equal(t, bitvector.LSB0, v.BytePacking)
})

t.Run("Push", func(t *testing.T) {
// MSB0 bit numbering
v := bitvector.BitVector{BytePacking: bitvector.MSB0}
v.Push(1)
v.Push(0)
v.Push(1)
v.Push(1)

assert.Equal(t, byte(176), v.Buf[0])

// LSB0 bit numbering
v = bitvector.BitVector{BytePacking: bitvector.LSB0}
v.Push(1)
v.Push(0)
v.Push(1)
v.Push(1)

assert.Equal(t, byte(13), v.Buf[0])
})

t.Run("Get", func(t *testing.T) {
bits := []byte{1, 0, 1, 1, 0, 0, 1, 0}

for _, numbering := range []bitvector.BitNumbering{bitvector.MSB0, bitvector.LSB0} {
v := bitvector.BitVector{BytePacking: numbering}

for _, bit := range bits {
v.Push(bit)
}

for idx, expected := range bits {
actual, _ := v.Get(uint(idx))
assert.Equal(t, expected, actual)
}
}
})

t.Run("Extend", func(t *testing.T) {
val := byte(171) // 0b10101011

var v bitvector.BitVector

// MSB0 bit numbering
v = bitvector.BitVector{}
v.Extend(val, 4, bitvector.MSB0)
assertBitVector(t, []byte{1, 0, 1, 0}, v)
v.Extend(val, 5, bitvector.MSB0)
assertBitVector(t, []byte{1, 0, 1, 0, 1, 0, 1, 0, 1}, v)

// LSB0 bit numbering
v = bitvector.BitVector{}
v.Extend(val, 4, bitvector.LSB0)
assertBitVector(t, []byte{1, 1, 0, 1}, v)
v.Extend(val, 5, bitvector.LSB0)
assertBitVector(t, []byte{1, 1, 0, 1, 1, 1, 0, 1, 0}, v)
})

t.Run("invalid counts to Take/Extend/Iterator cause panics", func(t *testing.T) {
v := bitvector.BitVector{BytePacking: bitvector.LSB0}

assert.Panics(t, func() { v.Extend(0xff, 9, bitvector.LSB0) })

assert.Panics(t, func() { v.Take(0, 9, bitvector.LSB0) })

next := v.Iterator(bitvector.LSB0)
assert.Panics(t, func() { next(9) })
})

t.Run("Take", func(t *testing.T) {
var v bitvector.BitVector

bits := []byte{1, 0, 1, 0, 1, 0, 1, 1}
for _, bit := range bits {
v.Push(bit)
}

assert.Equal(t, byte(176), v.Take(4, 4, bitvector.MSB0))
assert.Equal(t, byte(13), v.Take(4, 4, bitvector.LSB0))
})

t.Run("Iterator", func(t *testing.T) {
var buf []byte

// make a bitvector of 256 sample bits
for i := 0; i < 32; i++ {
buf = append(buf, 128+32)
}

v := bitvector.NewBitVector(buf, bitvector.LSB0)

next := v.Iterator(bitvector.LSB0)

// compare to Get()
for i := uint(0); i < v.Len; i++ {
expected, _ := v.Get(i)
assert.Equal(t, expected, next(1))
}

// out of range should return zero
assert.Equal(t, byte(0), next(1))
assert.Equal(t, byte(0), next(8))

// compare to Take()
next = v.Iterator(bitvector.LSB0)
assert.Equal(t, next(5), v.Take(0, 5, bitvector.LSB0))
assert.Equal(t, next(8), v.Take(5, 8, bitvector.LSB0))
})
}

// Note: When using this helper assertion, expectedBits should *only* be 0s and 1s.
func assertBitVector(t *testing.T, expectedBits []byte, actual bitvector.BitVector) {
assert.Equal(t, uint(len(expectedBits)), actual.Len)

for idx, bit := range expectedBits {
actualBit, err := actual.Get(uint(idx))
assert.NoError(t, err)
assert.Equal(t, bit, actualBit)
}
}
42 changes: 42 additions & 0 deletions lib/rlepluslazy/rleminus.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package rlepluslazy

/*
const version = 0
// uncompressed 1: celi(7/8x + 1/8)
// uncompressed 2: celi(log(x*2)/log(128)) + x
func Encode(first byte, runs []uint64) []byte {
varBuf := make([]byte, binary.MaxVarintLen64)
outBuf := make([]byte, 0, 1024)
n := binary.PutUvarint(varBuf, version)
outBuf = append(outBuf, varBuf[:n]...)
curBit := first
carryOver := uint64(0)
carryOverLen := uint(0)
for x, run := range runs {
if carryOverLen != 1 {
diff := carryOverLen % 7
if diff > run {
diff = run
}
run = run - diff
carryOver = carryOver>>diff | (math.MaxUint64 << (64 - diff))
carryOver = carryOver >> (64 - carryOverLen)
n = binary.PutUvarint(varBuf, carryOver)
outBuf = append(outBuf, varBuf[:n]...)
} else {
}
curBit = 1 - curBit
}
return nil
}
*/
Loading

0 comments on commit ca93473

Please sign in to comment.