diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 5bb0bd0f619..d15a7294565 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -170,11 +170,13 @@ fn slice_parquet_array<'a>( // the slice is a bit awkward because we always want the latest value to compute the next length; l_nested.offsets = &l_nested.offsets [offset..offset + std::cmp::min(length + 1, l_nested.offsets.len())]; + l_nested.validity_offset = offset; } Nested::List(l_nested) => { is_nested = true; l_nested.offsets = &l_nested.offsets [offset..offset + std::cmp::min(length + 1, l_nested.offsets.len())]; + l_nested.validity_offset = offset; } _ => {} } diff --git a/src/io/parquet/write/nested/def.rs b/src/io/parquet/write/nested/def.rs index 395f73b913e..0d731d115f1 100644 --- a/src/io/parquet/write/nested/def.rs +++ b/src/io/parquet/write/nested/def.rs @@ -37,6 +37,7 @@ fn single_list_iter<'a, O: Offset>(nested: &ListNested<'a, O>) -> Box Box::new( validity .iter() + .skip(nested.validity_offset) // lists have 2 groups, so // True => 2 // False => 1 @@ -221,6 +222,7 @@ mod tests { is_optional: false, offsets: &[0, 2, 2, 5, 8, 8, 11, 11, 12], validity: None, + validity_offset: 0, }), Nested::Primitive(None, false, 12), ]; @@ -246,6 +248,7 @@ mod tests { is_optional: true, offsets: &[0, 2, 2, 5, 8, 8, 11, 11, 12], validity: Some(&v0), + validity_offset: 0, }), Nested::Primitive(Some(&v1), true, 12), ]; @@ -261,11 +264,13 @@ mod tests { is_optional: false, offsets: &[0, 2, 4], validity: None, + validity_offset: 0, }), Nested::List(ListNested:: { is_optional: false, offsets: &[0, 3, 7, 8, 10], validity: None, + validity_offset: 0, }), Nested::Primitive(None, false, 12), ]; @@ -283,11 +288,13 @@ mod tests { is_optional: true, offsets: &[0, 2, 2, 2, 5], validity: Some(&a), + validity_offset: 0, }), Nested::List(ListNested:: { is_optional: false, offsets: &[0, 3, 7, 8, 8, 10], validity: None, + validity_offset: 0, }), Nested::Primitive(None, false, 12), ]; @@ -306,11 +313,13 @@ mod tests { is_optional: true, offsets: &[0, 2, 2, 5], validity: Some(&a), + validity_offset: 0, }), Nested::List(ListNested:: { is_optional: true, offsets: &[0, 3, 7, 8, 8, 8], validity: Some(&b), + validity_offset: 0, }), Nested::Primitive(None, false, 12), ]; @@ -330,11 +339,13 @@ mod tests { is_optional: true, offsets: &[0, 2, 2, 4], validity: Some(&a), + validity_offset: 0, }), Nested::List(ListNested:: { is_optional: true, offsets: &[0, 3, 7, 8, 8], validity: Some(&b), + validity_offset: 0, }), Nested::Primitive(Some(&c), true, 12), ]; @@ -342,4 +353,24 @@ mod tests { test(nested, expected) } + + #[test] + fn list_offset() { + let a = Bitmap::from([ + true, false, false, false, true, false, true, false, false, false, true, false, false, + false, false, + ]); + let nested = vec![ + Nested::List(ListNested { + is_optional: true, + offsets: &[4, 4, 7, 7], + validity: Some(&validity), + validity_offset: 3, + }), + Nested::Primitive(None, false, 12), + ]; + + let expected = vec![0, 2, 2, 2, 0]; + test(nested, expected) + } } diff --git a/src/io/parquet/write/nested/rep.rs b/src/io/parquet/write/nested/rep.rs index c7c5d15eb8e..bfd0046e06b 100644 --- a/src/io/parquet/write/nested/rep.rs +++ b/src/io/parquet/write/nested/rep.rs @@ -181,6 +181,7 @@ mod tests { is_optional: false, offsets: &[0, 2, 2, 5, 8, 8, 11, 11, 12], validity: None, + validity_offset: 0, }), Nested::Primitive(None, false, 12), ]; @@ -197,11 +198,13 @@ mod tests { is_optional: false, offsets: &[0, 2, 2, 4], validity: None, + validity_offset: 0, }), Nested::List(ListNested:: { is_optional: false, offsets: &[0, 3, 7, 8, 10], validity: None, + validity_offset: 0, }), Nested::Primitive(None, false, 10), ]; @@ -222,6 +225,7 @@ mod tests { is_optional: true, offsets: &[0i32, 1, 2], validity: None, + validity_offset: 0, }), Nested::Struct(None, true, 2), Nested::Primitive(None, true, 2), diff --git a/src/io/parquet/write/pages.rs b/src/io/parquet/write/pages.rs index 1e4bcbe9071..6ab979e3a50 100644 --- a/src/io/parquet/write/pages.rs +++ b/src/io/parquet/write/pages.rs @@ -19,6 +19,7 @@ pub struct ListNested<'a, O: Offset> { pub is_optional: bool, pub offsets: &'a [O], pub validity: Option<&'a Bitmap>, + pub validity_offset: usize, } impl<'a, O: Offset> ListNested<'a, O> { @@ -27,6 +28,7 @@ impl<'a, O: Offset> ListNested<'a, O> { is_optional, offsets, validity, + validity_offset: 0, } } } @@ -487,6 +489,7 @@ mod tests { is_optional: false, offsets: &[0, 2, 4], validity: None, + validity_offset: 0, }), Nested::Struct(Some(&Bitmap::from([true, true, false, true])), true, 4), Nested::Primitive(None, false, 4), @@ -496,6 +499,7 @@ mod tests { is_optional: false, offsets: &[0, 2, 4], validity: None, + validity_offset: 0, }), Nested::Struct(Some(&Bitmap::from([true, true, false, true])), true, 4), Nested::Primitive(None, false, 4),