Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added support to read required dictionaries (#806)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Feb 5, 2022
1 parent 694b895 commit 19c07ab
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 8 deletions.
35 changes: 27 additions & 8 deletions src/io/parquet/read/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ where
K: DictionaryKey,
{
Optional(Optional<'a, K>),
//Required(Required<'a, T, P, F>),
Required(Required<'a, K>),
}

#[inline]
Expand All @@ -44,6 +44,27 @@ where
new_indices.map(Box::new(|x| K::from_u32(x).unwrap()) as _)
}

#[derive(Debug)]
pub struct Required<'a, K>
where
K: DictionaryKey,
{
values: std::iter::Map<HybridRleDecoder<'a>, Box<dyn Fn(u32) -> K + 'a>>,
}

impl<'a, K> Required<'a, K>
where
K: DictionaryKey,
{
fn new(page: &'a DataPage) -> Self {
let (_, _, indices_buffer, _) = utils::split_buffer(page, page.descriptor());

let values = values_iter1(indices_buffer, page.num_values());

Self { values }
}
}

#[derive(Debug)]
pub struct Optional<'a, K>
where
Expand Down Expand Up @@ -76,6 +97,7 @@ where
fn len(&self) -> usize {
match self {
State::Optional(optional) => optional.validity.len(),
State::Required(required) => required.values.size_hint().0,
}
}
}
Expand Down Expand Up @@ -112,10 +134,7 @@ where

match (page.encoding(), is_optional) {
(Encoding::PlainDictionary | Encoding::RleDictionary, false) => {
todo!()
/*Ok(State::Required(
RequiredDictionaryPage::new(page, dict, op2),
))*/
Ok(State::Required(Required::new(page)))
}
(Encoding::PlainDictionary | Encoding::RleDictionary, true) => {
Ok(State::Optional(Optional::new(page)))
Expand Down Expand Up @@ -148,9 +167,9 @@ where
values,
&mut page.values,
),
/*State::Required(page) => {
State::Required(page) => {
values.extend(page.values.by_ref().take(remaining));
}*/
}
}
}
}
Expand All @@ -170,7 +189,7 @@ impl Dict {
}
}

pub fn finish_key<K: DictionaryKey>(values: Vec<K>, validity: MutableBitmap) -> PrimitiveArray<K> {
fn finish_key<K: DictionaryKey>(values: Vec<K>, validity: MutableBitmap) -> PrimitiveArray<K> {
PrimitiveArray::from_data(K::PRIMITIVE.into(), values.into(), validity.into())
}

Expand Down
2 changes: 2 additions & 0 deletions tests/it/io/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -745,12 +745,14 @@ fn arrow_type() -> Result<()> {
Field::new("a3", array3.data_type().clone(), true),
Field::new("a4", array4.data_type().clone(), true),
Field::new("a5", array5.data_type().clone(), true),
Field::new("a6", array5.data_type().clone(), false),
]);
let batch = Chunk::try_new(vec![
Arc::new(array) as Arc<dyn Array>,
Arc::new(array2),
Arc::new(array3),
Arc::new(array4),
Arc::new(array5.clone()),
Arc::new(array5),
])?;

Expand Down

0 comments on commit 19c07ab

Please sign in to comment.