diff --git a/src/io/parquet/read/dictionary.rs b/src/io/parquet/read/dictionary.rs index b9d1e9f21a2..23aa8d28339 100644 --- a/src/io/parquet/read/dictionary.rs +++ b/src/io/parquet/read/dictionary.rs @@ -24,7 +24,7 @@ where K: DictionaryKey, { Optional(Optional<'a, K>), - //Required(Required<'a, T, P, F>), + Required(Required<'a, K>), } #[inline] @@ -44,6 +44,27 @@ where new_indices.map(Box::new(|x| K::from_u32(x).unwrap()) as _) } +#[derive(Debug)] +pub struct Required<'a, K> +where + K: DictionaryKey, +{ + values: std::iter::Map, Box K + 'a>>, +} + +impl<'a, K> Required<'a, K> +where + K: DictionaryKey, +{ + fn new(page: &'a DataPage) -> Self { + let (_, _, indices_buffer, _) = utils::split_buffer(page, page.descriptor()); + + let values = values_iter1(indices_buffer, page.num_values()); + + Self { values } + } +} + #[derive(Debug)] pub struct Optional<'a, K> where @@ -76,6 +97,7 @@ where fn len(&self) -> usize { match self { State::Optional(optional) => optional.validity.len(), + State::Required(required) => required.values.size_hint().0, } } } @@ -112,10 +134,7 @@ where match (page.encoding(), is_optional) { (Encoding::PlainDictionary | Encoding::RleDictionary, false) => { - todo!() - /*Ok(State::Required( - RequiredDictionaryPage::new(page, dict, op2), - ))*/ + Ok(State::Required(Required::new(page))) } (Encoding::PlainDictionary | Encoding::RleDictionary, true) => { Ok(State::Optional(Optional::new(page))) @@ -148,9 +167,9 @@ where values, &mut page.values, ), - /*State::Required(page) => { + State::Required(page) => { values.extend(page.values.by_ref().take(remaining)); - }*/ + } } } } @@ -170,7 +189,7 @@ impl Dict { } } -pub fn finish_key(values: Vec, validity: MutableBitmap) -> PrimitiveArray { +fn finish_key(values: Vec, validity: MutableBitmap) -> PrimitiveArray { PrimitiveArray::from_data(K::PRIMITIVE.into(), values.into(), validity.into()) } diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 515d1710747..550a8f924b6 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -745,12 +745,14 @@ fn arrow_type() -> Result<()> { Field::new("a3", array3.data_type().clone(), true), Field::new("a4", array4.data_type().clone(), true), Field::new("a5", array5.data_type().clone(), true), + Field::new("a6", array5.data_type().clone(), false), ]); let batch = Chunk::try_new(vec![ Arc::new(array) as Arc, Arc::new(array2), Arc::new(array3), Arc::new(array4), + Arc::new(array5.clone()), Arc::new(array5), ])?;