From 931c6fcc45f4aca441191f007cf31daa19eb66d2 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Mon, 3 Oct 2022 14:53:53 +0100 Subject: [PATCH] Simplify FixedLengthEncoding (#2812) --- arrow/src/row/fixed.rs | 56 ++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/arrow/src/row/fixed.rs b/arrow/src/row/fixed.rs index 78108274241b..04b9a30ecad8 100644 --- a/arrow/src/row/fixed.rs +++ b/arrow/src/row/fixed.rs @@ -24,13 +24,17 @@ use half::f16; /// Encodes a value of a particular fixed width type into bytes according to the rules /// described on [`super::RowConverter`] -pub trait FixedLengthEncoding: Copy { - const ENCODED_LEN: usize = 1 + N; +pub trait FixedLengthEncoding: Copy { + const ENCODED_LEN: usize = 1 + std::mem::size_of::(); - fn encode(self) -> [u8; N]; + type Encoded: Sized + Copy + AsRef<[u8]> + AsMut<[u8]>; + + fn encode(self) -> Self::Encoded; } -impl FixedLengthEncoding<1> for bool { +impl FixedLengthEncoding for bool { + type Encoded = [u8; 1]; + fn encode(self) -> [u8; 1] { [self as u8] } @@ -38,7 +42,9 @@ impl FixedLengthEncoding<1> for bool { macro_rules! encode_signed { ($n:expr, $t:ty) => { - impl FixedLengthEncoding<$n> for $t { + impl FixedLengthEncoding for $t { + type Encoded = [u8; $n]; + fn encode(self) -> [u8; $n] { let mut b = self.to_be_bytes(); // Toggle top "sign" bit to ensure consistent sort order @@ -57,7 +63,9 @@ encode_signed!(16, i128); macro_rules! encode_unsigned { ($n:expr, $t:ty) => { - impl FixedLengthEncoding<$n> for $t { + impl FixedLengthEncoding for $t { + type Encoded = [u8; $n]; + fn encode(self) -> [u8; $n] { self.to_be_bytes() } @@ -70,7 +78,9 @@ encode_unsigned!(2, u16); encode_unsigned!(4, u32); encode_unsigned!(8, u64); -impl FixedLengthEncoding<2> for f16 { +impl FixedLengthEncoding for f16 { + type Encoded = [u8; 2]; + fn encode(self) -> [u8; 2] { // https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260 let s = self.to_bits() as i16; @@ -79,7 +89,9 @@ impl FixedLengthEncoding<2> for f16 { } } -impl FixedLengthEncoding<4> for f32 { +impl FixedLengthEncoding for f32 { + type Encoded = [u8; 4]; + fn encode(self) -> [u8; 4] { // https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260 let s = self.to_bits() as i32; @@ -88,7 +100,9 @@ impl FixedLengthEncoding<4> for f32 { } } -impl FixedLengthEncoding<8> for f64 { +impl FixedLengthEncoding for f64 { + type Encoded = [u8; 8]; + fn encode(self) -> [u8; 8] { // https://github.com/rust-lang/rust/blob/9c20b2a8cc7588decb6de25ac6a7912dcef24d65/library/core/src/num/f32.rs#L1176-L1260 let s = self.to_bits() as i64; @@ -97,7 +111,9 @@ impl FixedLengthEncoding<8> for f64 { } } -impl FixedLengthEncoding<16> for Decimal128 { +impl FixedLengthEncoding for Decimal128 { + type Encoded = [u8; 16]; + fn encode(self) -> [u8; 16] { let mut val = *self.raw_value(); // Convert to big endian representation @@ -108,7 +124,9 @@ impl FixedLengthEncoding<16> for Decimal128 { } } -impl FixedLengthEncoding<32> for Decimal256 { +impl FixedLengthEncoding for Decimal256 { + type Encoded = [u8; 32]; + fn encode(self) -> [u8; 32] { let mut val = *self.raw_value(); // Convert to big endian representation @@ -120,10 +138,10 @@ impl FixedLengthEncoding<32> for Decimal256 { } /// Returns the total encoded length (including null byte) for a value of type `T::Native` -pub const fn encoded_len(_col: &PrimitiveArray) -> usize +pub const fn encoded_len(_col: &PrimitiveArray) -> usize where T: ArrowPrimitiveType, - T::Native: FixedLengthEncoding, + T::Native: FixedLengthEncoding, { T::Native::ENCODED_LEN } @@ -132,26 +150,22 @@ where /// /// - 1 byte `0` if null or `1` if valid /// - bytes of [`FixedLengthEncoding`] -pub fn encode< - const N: usize, - T: FixedLengthEncoding, - I: IntoIterator>, ->( +pub fn encode>>( out: &mut Rows, i: I, opts: SortOptions, ) { for (offset, maybe_val) in out.offsets.iter_mut().skip(1).zip(i) { - let end_offset = *offset + N + 1; + let end_offset = *offset + T::ENCODED_LEN; if let Some(val) = maybe_val { let to_write = &mut out.buffer[*offset..end_offset]; to_write[0] = 1; let mut encoded = val.encode(); if opts.descending { // Flip bits to reverse order - encoded.iter_mut().for_each(|v| *v = !*v) + encoded.as_mut().iter_mut().for_each(|v| *v = !*v) } - to_write[1..].copy_from_slice(&encoded) + to_write[1..].copy_from_slice(encoded.as_ref()) } else if !opts.nulls_first { out.buffer[*offset] = 0xFF; }