From 76eb0f89979285eddc7592a3ac74ca97321e8348 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Thu, 3 Mar 2022 05:25:37 +0000 Subject: [PATCH] WIP --- Cargo.toml | 3 +- src/io/odbc/read/deserialize.rs | 106 +++++++++++++------------------- 2 files changed, 43 insertions(+), 66 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a8afde46f51..c6a9e189f35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,8 +88,7 @@ strength_reduce = { version = "0.2", optional = true } multiversion = { version = "0.6.1", optional = true } # For support for odbc -#odbc-api = { version = "0.34", optional = true } -odbc-api = { git = "https://github.com/jorgecarleitao/odbc-api", branch= "expose_indicators", optional = true } +odbc-api = { version = "0.35", optional = true } [dev-dependencies] criterion = "0.3" diff --git a/src/io/odbc/read/deserialize.rs b/src/io/odbc/read/deserialize.rs index 74db813d2c5..e55fc1a2598 100644 --- a/src/io/odbc/read/deserialize.rs +++ b/src/io/odbc/read/deserialize.rs @@ -1,3 +1,4 @@ +use odbc_api::buffers::{BinColumnIt, TextColumnIt}; use odbc_api::Bit; use crate::array::{Array, BinaryArray, BooleanArray, PrimitiveArray, Utf8Array}; @@ -12,19 +13,9 @@ use super::super::api::buffers::AnyColumnView; /// This is CPU-bounded pub fn deserialize(column: AnyColumnView, data_type: DataType) -> Box { match column { - AnyColumnView::Text(slice) => Box::new(utf8( - data_type, - slice.values(), - slice.lengths(), - slice.max_len(), - )) as _, + AnyColumnView::Text(iter) => Box::new(utf8(data_type, iter)) as _, AnyColumnView::WText(_) => todo!(), - AnyColumnView::Binary(slice) => Box::new(binary( - data_type, - slice.values(), - slice.lengths(), - slice.max_len(), - )) as _, + AnyColumnView::Binary(iter) => Box::new(binary(data_type, iter)) as _, AnyColumnView::Date(_) => todo!(), AnyColumnView::Time(_) => todo!(), AnyColumnView::Timestamp(_) => todo!(), @@ -41,42 +32,44 @@ pub fn deserialize(column: AnyColumnView, data_type: DataType) -> Box AnyColumnView::NullableTimestamp(_) => todo!(), AnyColumnView::NullableF64(slice) => Box::new(primitive_optional( data_type, - slice.values(), - slice.indicators(), + slice.raw_values().0, + slice.raw_values().1, )) as _, AnyColumnView::NullableF32(slice) => Box::new(primitive_optional( data_type, - slice.values(), - slice.indicators(), + slice.raw_values().0, + slice.raw_values().1, )) as _, AnyColumnView::NullableI8(slice) => Box::new(primitive_optional( data_type, - slice.values(), - slice.indicators(), + slice.raw_values().0, + slice.raw_values().1, )) as _, AnyColumnView::NullableI16(slice) => Box::new(primitive_optional( data_type, - slice.values(), - slice.indicators(), + slice.raw_values().0, + slice.raw_values().1, )) as _, AnyColumnView::NullableI32(slice) => Box::new(primitive_optional( data_type, - slice.values(), - slice.indicators(), + slice.raw_values().0, + slice.raw_values().1, )) as _, AnyColumnView::NullableI64(slice) => Box::new(primitive_optional( data_type, - slice.values(), - slice.indicators(), + slice.raw_values().0, + slice.raw_values().1, )) as _, AnyColumnView::NullableU8(slice) => Box::new(primitive_optional( data_type, - slice.values(), - slice.indicators(), + slice.raw_values().0, + slice.raw_values().1, + )) as _, + AnyColumnView::NullableBit(slice) => Box::new(bool_optional( + data_type, + slice.raw_values().0, + slice.raw_values().1, )) as _, - AnyColumnView::NullableBit(slice) => { - Box::new(bool_optional(data_type, slice.values(), slice.indicators())) as _ - } } } @@ -110,53 +103,38 @@ fn bool_optional(data_type: DataType, values: &[Bit], indicators: &[isize]) -> B BooleanArray::from_data(data_type, values, validity) } -fn binary_generic( - slice: &[u8], - lengths: &[isize], - max_length: usize, - null_terminator: usize, +fn binary_generic<'a>( + iter: impl Iterator>, ) -> (Buffer, Buffer, Option) { - let mut validity = MutableBitmap::with_capacity(lengths.len()); + let length = iter.size_hint().0; + let mut validity = MutableBitmap::with_capacity(length); + let mut values = Vec::::with_capacity(0); - println!("{:?}", lengths); - println!("{:?}", slice); - let mut offsets = Vec::with_capacity(lengths.len() + 1); + let mut offsets = Vec::with_capacity(length + 1); offsets.push(0i32); - let mut length = 0; - offsets.extend(lengths.iter().map(|&indicator| { - validity.push(indicator != -1); - length += if indicator > 0 { indicator as i32 } else { 0 }; - length - })); - // the loop above ensures monotonicity - // this proves boundness - assert!((length as usize) < slice.len()); - - let mut values = Vec::::with_capacity(length as usize); - offsets.windows(2).enumerate().for_each(|(index, x)| { - let len = (x[1] - x[0]) as usize; - let offset = index * (max_length + null_terminator); - // this bound check is not necessary - values.extend_from_slice(&slice[offset..offset + len]) - }); + + for item in iter { + if let Some(item) = item { + values.extend_from_slice(item); + validity.push(true); + } else { + validity.push(false); + } + offsets.push(values.len() as i32) + } (offsets.into(), values.into(), validity.into()) } -fn binary( - data_type: DataType, - slice: &[u8], - lengths: &[isize], - max_length: usize, -) -> BinaryArray { - let (offsets, values, validity) = binary_generic(slice, lengths, max_length, 0); +fn binary(data_type: DataType, iter: BinColumnIt) -> BinaryArray { + let (offsets, values, validity) = binary_generic(iter); // this O(N) check is not necessary BinaryArray::from_data(data_type, offsets, values, validity) } -fn utf8(data_type: DataType, slice: &[u8], lengths: &[isize], max_length: usize) -> Utf8Array { - let (offsets, values, validity) = binary_generic(slice, lengths, max_length, 1); +fn utf8(data_type: DataType, iter: TextColumnIt) -> Utf8Array { + let (offsets, values, validity) = binary_generic(iter); // this O(N) check is necessary for the utf8 validity Utf8Array::from_data(data_type, offsets, values, validity)