Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify FixedLengthEncoding #2812

Merged
merged 1 commit into from
Oct 3, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 35 additions & 21 deletions arrow/src/row/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,27 @@ use half::f16;

/// Encodes a value of a particular fixed width type into bytes according to the rules
/// described on [`super::RowConverter`]
pub trait FixedLengthEncoding<const N: usize>: Copy {
const ENCODED_LEN: usize = 1 + N;
pub trait FixedLengthEncoding: Copy {
const ENCODED_LEN: usize = 1 + std::mem::size_of::<Self::Encoded>();

fn encode(self) -> [u8; N];
type Encoded: Sized + Copy + AsRef<[u8]> + AsMut<[u8]>;

fn encode(self) -> Self::Encoded;
}

impl FixedLengthEncoding<1> for bool {
impl FixedLengthEncoding for bool {
type Encoded = [u8; 1];

fn encode(self) -> [u8; 1] {
[self as u8]
}
}

macro_rules! encode_signed {
($n:expr, $t:ty) => {
impl FixedLengthEncoding<$n> for $t {
impl FixedLengthEncoding for $t {
type Encoded = [u8; $n];

fn encode(self) -> [u8; $n] {
let mut b = self.to_be_bytes();
// Toggle top "sign" bit to ensure consistent sort order
Expand All @@ -57,7 +63,9 @@ encode_signed!(16, i128);

macro_rules! encode_unsigned {
($n:expr, $t:ty) => {
impl FixedLengthEncoding<$n> for $t {
impl FixedLengthEncoding for $t {
type Encoded = [u8; $n];

fn encode(self) -> [u8; $n] {
self.to_be_bytes()
}
Expand All @@ -70,7 +78,9 @@ encode_unsigned!(2, u16);
encode_unsigned!(4, u32);
encode_unsigned!(8, u64);

impl FixedLengthEncoding<2> for f16 {
impl FixedLengthEncoding for f16 {
type Encoded = [u8; 2];

fn encode(self) -> [u8; 2] {
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
let s = self.to_bits() as i16;
Expand All @@ -79,7 +89,9 @@ impl FixedLengthEncoding<2> for f16 {
}
}

impl FixedLengthEncoding<4> for f32 {
impl FixedLengthEncoding for f32 {
type Encoded = [u8; 4];

fn encode(self) -> [u8; 4] {
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
let s = self.to_bits() as i32;
Expand All @@ -88,7 +100,9 @@ impl FixedLengthEncoding<4> for f32 {
}
}

impl FixedLengthEncoding<8> for f64 {
impl FixedLengthEncoding for f64 {
type Encoded = [u8; 8];

fn encode(self) -> [u8; 8] {
// https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260
let s = self.to_bits() as i64;
Expand All @@ -97,7 +111,9 @@ impl FixedLengthEncoding<8> for f64 {
}
}

impl FixedLengthEncoding<16> for Decimal128 {
impl FixedLengthEncoding for Decimal128 {
type Encoded = [u8; 16];

fn encode(self) -> [u8; 16] {
let mut val = *self.raw_value();
// Convert to big endian representation
Expand All @@ -108,7 +124,9 @@ impl FixedLengthEncoding<16> for Decimal128 {
}
}

impl FixedLengthEncoding<32> for Decimal256 {
impl FixedLengthEncoding for Decimal256 {
type Encoded = [u8; 32];

fn encode(self) -> [u8; 32] {
let mut val = *self.raw_value();
// Convert to big endian representation
Expand All @@ -120,10 +138,10 @@ impl FixedLengthEncoding<32> for Decimal256 {
}

/// Returns the total encoded length (including null byte) for a value of type `T::Native`
pub const fn encoded_len<const N: usize, T>(_col: &PrimitiveArray<T>) -> usize
pub const fn encoded_len<T>(_col: &PrimitiveArray<T>) -> usize
where
T: ArrowPrimitiveType,
T::Native: FixedLengthEncoding<N>,
T::Native: FixedLengthEncoding,
{
T::Native::ENCODED_LEN
}
Expand All @@ -132,26 +150,22 @@ where
///
/// - 1 byte `0` if null or `1` if valid
/// - bytes of [`FixedLengthEncoding`]
pub fn encode<
const N: usize,
T: FixedLengthEncoding<N>,
I: IntoIterator<Item = Option<T>>,
>(
pub fn encode<T: FixedLengthEncoding, I: IntoIterator<Item = Option<T>>>(
out: &mut Rows,
i: I,
opts: SortOptions,
) {
for (offset, maybe_val) in out.offsets.iter_mut().skip(1).zip(i) {
let end_offset = *offset + N + 1;
let end_offset = *offset + T::ENCODED_LEN;
if let Some(val) = maybe_val {
let to_write = &mut out.buffer[*offset..end_offset];
to_write[0] = 1;
let mut encoded = val.encode();
if opts.descending {
// Flip bits to reverse order
encoded.iter_mut().for_each(|v| *v = !*v)
encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
}
to_write[1..].copy_from_slice(&encoded)
to_write[1..].copy_from_slice(encoded.as_ref())
} else if !opts.nulls_first {
out.buffer[*offset] = 0xFF;
}
Expand Down