Skip to content

Commit

Permalink
Simplify FixedLengthEncoding (#2812)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Oct 3, 2022
1 parent 70054fd commit 931c6fc
Showing 1 changed file with 35 additions and 21 deletions.
56 changes: 35 additions & 21 deletions arrow/src/row/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,27 @@ use half::f16;

/// Encodes a value of a particular fixed width type into bytes according to the rules
/// described on [`super::RowConverter`]
pub trait FixedLengthEncoding<const N: usize>: Copy {
const ENCODED_LEN: usize = 1 + N;
pub trait FixedLengthEncoding: Copy {
const ENCODED_LEN: usize = 1 + std::mem::size_of::<Self::Encoded>();

fn encode(self) -> [u8; N];
type Encoded: Sized + Copy + AsRef<[u8]> + AsMut<[u8]>;

fn encode(self) -> Self::Encoded;
}

impl FixedLengthEncoding<1> for bool {
impl FixedLengthEncoding for bool {
type Encoded = [u8; 1];

fn encode(self) -> [u8; 1] {
[self as u8]
}
}

macro_rules! encode_signed {
($n:expr, $t:ty) => {
impl FixedLengthEncoding<$n> for $t {
impl FixedLengthEncoding for $t {
type Encoded = [u8; $n];

fn encode(self) -> [u8; $n] {
let mut b = self.to_be_bytes();
// Toggle top "sign" bit to ensure consistent sort order
Expand All @@ -57,7 +63,9 @@ encode_signed!(16, i128);

macro_rules! encode_unsigned {
($n:expr, $t:ty) => {
impl FixedLengthEncoding<$n> for $t {
impl FixedLengthEncoding for $t {
type Encoded = [u8; $n];

fn encode(self) -> [u8; $n] {
self.to_be_bytes()
}
Expand All @@ -70,7 +78,9 @@ encode_unsigned!(2, u16);
encode_unsigned!(4, u32);
encode_unsigned!(8, u64);

impl FixedLengthEncoding<2> for f16 {
impl FixedLengthEncoding for f16 {
type Encoded = [u8; 2];

fn encode(self) -> [u8; 2] {
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
let s = self.to_bits() as i16;
Expand All @@ -79,7 +89,9 @@ impl FixedLengthEncoding<2> for f16 {
}
}

impl FixedLengthEncoding<4> for f32 {
impl FixedLengthEncoding for f32 {
type Encoded = [u8; 4];

fn encode(self) -> [u8; 4] {
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
let s = self.to_bits() as i32;
Expand All @@ -88,7 +100,9 @@ impl FixedLengthEncoding<4> for f32 {
}
}

impl FixedLengthEncoding<8> for f64 {
impl FixedLengthEncoding for f64 {
type Encoded = [u8; 8];

fn encode(self) -> [u8; 8] {
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
let s = self.to_bits() as i64;
Expand All @@ -97,7 +111,9 @@ impl FixedLengthEncoding<8> for f64 {
}
}

impl FixedLengthEncoding<16> for Decimal128 {
impl FixedLengthEncoding for Decimal128 {
type Encoded = [u8; 16];

fn encode(self) -> [u8; 16] {
let mut val = *self.raw_value();
// Convert to big endian representation
Expand All @@ -108,7 +124,9 @@ impl FixedLengthEncoding<16> for Decimal128 {
}
}

impl FixedLengthEncoding<32> for Decimal256 {
impl FixedLengthEncoding for Decimal256 {
type Encoded = [u8; 32];

fn encode(self) -> [u8; 32] {
let mut val = *self.raw_value();
// Convert to big endian representation
Expand All @@ -120,10 +138,10 @@ impl FixedLengthEncoding<32> for Decimal256 {
}

/// Returns the total encoded length (including null byte) for a value of type `T::Native`
pub const fn encoded_len<const N: usize, T>(_col: &PrimitiveArray<T>) -> usize
pub const fn encoded_len<T>(_col: &PrimitiveArray<T>) -> usize
where
T: ArrowPrimitiveType,
T::Native: FixedLengthEncoding<N>,
T::Native: FixedLengthEncoding,
{
T::Native::ENCODED_LEN
}
Expand All @@ -132,26 +150,22 @@ where
///
/// - 1 byte `0` if null or `1` if valid
/// - bytes of [`FixedLengthEncoding`]
pub fn encode<
const N: usize,
T: FixedLengthEncoding<N>,
I: IntoIterator<Item = Option<T>>,
>(
pub fn encode<T: FixedLengthEncoding, I: IntoIterator<Item = Option<T>>>(
out: &mut Rows,
i: I,
opts: SortOptions,
) {
for (offset, maybe_val) in out.offsets.iter_mut().skip(1).zip(i) {
let end_offset = *offset + N + 1;
let end_offset = *offset + T::ENCODED_LEN;
if let Some(val) = maybe_val {
let to_write = &mut out.buffer[*offset..end_offset];
to_write[0] = 1;
let mut encoded = val.encode();
if opts.descending {
// Flip bits to reverse order
encoded.iter_mut().for_each(|v| *v = !*v)
encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
}
to_write[1..].copy_from_slice(&encoded)
to_write[1..].copy_from_slice(encoded.as_ref())
} else if !opts.nulls_first {
out.buffer[*offset] = 0xFF;
}
Expand Down

0 comments on commit 931c6fc

Please sign in to comment.