diff --git a/Cargo.toml b/Cargo.toml index bd255befa43..ee47f3cac67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,7 +69,7 @@ ahash = { version = "0.7", optional = true } # parquet support #parquet2 = { version = "0.10", optional = true, default_features = false, features = ["stream"] } -parquet2 = { path = "../parquet2", optional = true, default_features = false, features = ["stream"] } +parquet2 = { git = "https://github.com/jorgecarleitao/parquet2", branch = "write_indexes", optional = true, default_features = false, features = ["stream"] } # avro support avro-schema = { version = "0.2", optional = true } diff --git a/benches/write_parquet.rs b/benches/write_parquet.rs index 32b264bfe53..42cf8deec49 100644 --- a/benches/write_parquet.rs +++ b/benches/write_parquet.rs @@ -34,8 +34,7 @@ fn write(array: &dyn Array, encoding: Encoding) -> Result<()> { writer.start()?; for group in row_groups { - let (group, len) = group?; - writer.write(group, len)?; + writer.write(group?)?; } let _ = writer.end(None)?; Ok(()) diff --git a/examples/parquet_write.rs b/examples/parquet_write.rs index df7939563bb..f11e2ec4f29 100644 --- a/examples/parquet_write.rs +++ b/examples/parquet_write.rs @@ -30,8 +30,7 @@ fn write_batch(path: &str, schema: Schema, columns: Chunk>) -> Re writer.start()?; for group in row_groups { - let (group, len) = group?; - writer.write(group, len)?; + writer.write(group?)?; } let _size = writer.end(None)?; Ok(()) diff --git a/src/doc/lib.md b/src/doc/lib.md index 9638ff47480..08108b50932 100644 --- a/src/doc/lib.md +++ b/src/doc/lib.md @@ -62,8 +62,7 @@ fn main() -> Result<()> { // Write the file. writer.start()?; for group in row_groups { - let (group, len) = group?; - writer.write(group, len)?; + writer.write(group?)?; } let _ = writer.end(None)?; Ok(()) diff --git a/src/io/parquet/write/binary/basic.rs b/src/io/parquet/write/binary/basic.rs index d647c4cfcf2..5d67dbf90f8 100644 --- a/src/io/parquet/write/binary/basic.rs +++ b/src/io/parquet/write/binary/basic.rs @@ -87,6 +87,7 @@ pub fn array_to_page( utils::build_plain_page( buffer, array.len(), + array.len(), array.null_count(), 0, definition_levels_byte_length, @@ -97,7 +98,7 @@ pub fn array_to_page( ) } -pub(super) fn build_statistics( +pub(crate) fn build_statistics( array: &BinaryArray, primitive_type: PrimitiveType, ) -> ParquetStatistics { diff --git a/src/io/parquet/write/binary/mod.rs b/src/io/parquet/write/binary/mod.rs index 8d9e94cd0fb..e229572b14a 100644 --- a/src/io/parquet/write/binary/mod.rs +++ b/src/io/parquet/write/binary/mod.rs @@ -2,6 +2,7 @@ mod basic; mod nested; pub use basic::array_to_page; +pub(crate) use basic::build_statistics; pub(crate) use basic::encode_plain; pub(super) use basic::{encode_delta, ord_binary}; pub use nested::array_to_page as nested_array_to_page; diff --git a/src/io/parquet/write/binary/nested.rs b/src/io/parquet/write/binary/nested.rs index 5efbaece463..941a910ac3a 100644 --- a/src/io/parquet/write/binary/nested.rs +++ b/src/io/parquet/write/binary/nested.rs @@ -41,6 +41,7 @@ where utils::build_plain_page( buffer, levels::num_values(nested.offsets()), + nested.offsets().len().saturating_sub(1), array.null_count(), repetition_levels_byte_length, definition_levels_byte_length, diff --git a/src/io/parquet/write/boolean/basic.rs b/src/io/parquet/write/boolean/basic.rs index 91c9c307d0b..e70a0de769d 100644 --- a/src/io/parquet/write/boolean/basic.rs +++ b/src/io/parquet/write/boolean/basic.rs @@ -69,6 +69,7 @@ pub fn array_to_page( utils::build_plain_page( buffer, array.len(), + array.len(), array.null_count(), 0, definition_levels_byte_length, diff --git a/src/io/parquet/write/boolean/nested.rs b/src/io/parquet/write/boolean/nested.rs index 35b65c3f10b..d758bd5097e 100644 --- a/src/io/parquet/write/boolean/nested.rs +++ b/src/io/parquet/write/boolean/nested.rs @@ -39,6 +39,7 @@ where utils::build_plain_page( buffer, levels::num_values(nested.offsets()), + nested.offsets().len().saturating_sub(1), array.null_count(), repetition_levels_byte_length, definition_levels_byte_length, diff --git a/src/io/parquet/write/dictionary.rs b/src/io/parquet/write/dictionary.rs index 8502e7839c9..e41f006ca2a 100644 --- a/src/io/parquet/write/dictionary.rs +++ b/src/io/parquet/write/dictionary.rs @@ -2,12 +2,17 @@ use parquet2::{ encoding::{hybrid_rle::encode_u32, Encoding}, metadata::Descriptor, page::{EncodedDictPage, EncodedPage}, + statistics::ParquetStatistics, write::{DynIter, WriteOptions}, }; +use super::binary::build_statistics as binary_build_statistics; use super::binary::encode_plain as binary_encode_plain; +use super::fixed_len_bytes::build_statistics as fixed_binary_build_statistics; use super::fixed_len_bytes::encode_plain as fixed_binary_encode_plain; +use super::primitive::build_statistics as primitive_build_statistics; use super::primitive::encode_plain as primitive_encode_plain; +use super::utf8::build_statistics as utf8_build_statistics; use super::utf8::encode_plain as utf8_encode_plain; use crate::bitmap::Bitmap; use crate::datatypes::DataType; @@ -20,9 +25,9 @@ use crate::{ fn encode_keys( array: &PrimitiveArray, - // todo: merge this to not discard values' validity validity: Option<&Bitmap>, descriptor: Descriptor, + statistics: ParquetStatistics, options: WriteOptions, ) -> Result { let is_optional = is_nullable(&descriptor.primitive_type.field_info); @@ -96,10 +101,11 @@ fn encode_keys( utils::build_plain_page( buffer, array.len(), + array.len(), array.null_count(), 0, definition_levels_byte_length, - None, + Some(statistics), descriptor, options, Encoding::RleDictionary, @@ -108,12 +114,15 @@ fn encode_keys( } macro_rules! dyn_prim { - ($from:ty, $to:ty, $array:expr, $options:expr) => {{ + ($from:ty, $to:ty, $array:expr, $options:expr, $descriptor:expr) => {{ let values = $array.values().as_any().downcast_ref().unwrap(); let mut buffer = vec![]; primitive_encode_plain::<$from, $to>(values, false, &mut buffer); - EncodedDictPage::new(buffer, values.len()) + ( + EncodedDictPage::new(buffer, values.len()), + primitive_build_statistics::<$from, $to>(values, $descriptor.primitive_type.clone()), + ) }}; } @@ -123,59 +132,66 @@ pub fn array_to_pages( options: WriteOptions, encoding: Encoding, ) -> Result>> { + println!("{descriptor:#?}"); match encoding { Encoding::PlainDictionary | Encoding::RleDictionary => { // write DictPage - let dict_page = match array.values().data_type().to_logical_type() { - DataType::Int8 => dyn_prim!(i8, i32, array, options), - DataType::Int16 => dyn_prim!(i16, i32, array, options), + let (dict_page, statistics) = match array.values().data_type().to_logical_type() { + DataType::Int8 => dyn_prim!(i8, i32, array, options, descriptor), + DataType::Int16 => dyn_prim!(i16, i32, array, options, descriptor), DataType::Int32 | DataType::Date32 | DataType::Time32(_) => { - dyn_prim!(i32, i32, array, options) + dyn_prim!(i32, i32, array, options, descriptor) } DataType::Int64 | DataType::Date64 | DataType::Time64(_) | DataType::Timestamp(_, _) - | DataType::Duration(_) => dyn_prim!(i64, i64, array, options), - DataType::UInt8 => dyn_prim!(u8, i32, array, options), - DataType::UInt16 => dyn_prim!(u16, i32, array, options), - DataType::UInt32 => dyn_prim!(u32, i32, array, options), - DataType::UInt64 => dyn_prim!(i64, i64, array, options), - DataType::Float32 => dyn_prim!(f32, f32, array, options), - DataType::Float64 => dyn_prim!(f64, f64, array, options), + | DataType::Duration(_) => dyn_prim!(i64, i64, array, options, descriptor), + DataType::UInt8 => dyn_prim!(u8, i32, array, options, descriptor), + DataType::UInt16 => dyn_prim!(u16, i32, array, options, descriptor), + DataType::UInt32 => dyn_prim!(u32, i32, array, options, descriptor), + DataType::UInt64 => dyn_prim!(i64, i64, array, options, descriptor), + DataType::Float32 => dyn_prim!(f32, f32, array, options, descriptor), + DataType::Float64 => dyn_prim!(f64, f64, array, options, descriptor), DataType::Utf8 => { - let values = array.values().as_any().downcast_ref().unwrap(); + let array = array.values().as_any().downcast_ref().unwrap(); let mut buffer = vec![]; - utf8_encode_plain::(values, false, &mut buffer); - EncodedDictPage::new(buffer, values.len()) + utf8_encode_plain::(array, false, &mut buffer); + let stats = utf8_build_statistics(array, descriptor.primitive_type.clone()); + (EncodedDictPage::new(buffer, array.len()), stats) } DataType::LargeUtf8 => { - let values = array.values().as_any().downcast_ref().unwrap(); + let array = array.values().as_any().downcast_ref().unwrap(); let mut buffer = vec![]; - utf8_encode_plain::(values, false, &mut buffer); - EncodedDictPage::new(buffer, values.len()) + utf8_encode_plain::(array, false, &mut buffer); + let stats = utf8_build_statistics(array, descriptor.primitive_type.clone()); + (EncodedDictPage::new(buffer, array.len()), stats) } DataType::Binary => { - let values = array.values().as_any().downcast_ref().unwrap(); + let array = array.values().as_any().downcast_ref().unwrap(); let mut buffer = vec![]; - binary_encode_plain::(values, false, &mut buffer); - EncodedDictPage::new(buffer, values.len()) + binary_encode_plain::(array, false, &mut buffer); + let stats = binary_build_statistics(array, descriptor.primitive_type.clone()); + (EncodedDictPage::new(buffer, array.len()), stats) } DataType::LargeBinary => { - let values = array.values().as_any().downcast_ref().unwrap(); + let array = array.values().as_any().downcast_ref().unwrap(); let mut buffer = vec![]; - binary_encode_plain::(values, false, &mut buffer); - EncodedDictPage::new(buffer, values.len()) + binary_encode_plain::(array, false, &mut buffer); + let stats = binary_build_statistics(array, descriptor.primitive_type.clone()); + (EncodedDictPage::new(buffer, array.len()), stats) } DataType::FixedSizeBinary(_) => { let mut buffer = vec![]; let array = array.values().as_any().downcast_ref().unwrap(); fixed_binary_encode_plain(array, false, &mut buffer); - EncodedDictPage::new(buffer, array.len()) + let stats = + fixed_binary_build_statistics(array, descriptor.primitive_type.clone()); + (EncodedDictPage::new(buffer, array.len()), stats) } other => { return Err(ArrowError::NotYetImplemented(format!( @@ -187,8 +203,13 @@ pub fn array_to_pages( let dict_page = EncodedPage::Dict(dict_page); // write DataPage pointing to DictPage - let data_page = - encode_keys(array.keys(), array.values().validity(), descriptor, options)?; + let data_page = encode_keys( + array.keys(), + array.values().validity(), + descriptor, + statistics, + options, + )?; let iter = std::iter::once(Ok(dict_page)).chain(std::iter::once(Ok(data_page))); Ok(DynIter::new(Box::new(iter))) diff --git a/src/io/parquet/write/file.rs b/src/io/parquet/write/file.rs index 47f595a1717..2354fead5e7 100644 --- a/src/io/parquet/write/file.rs +++ b/src/io/parquet/write/file.rs @@ -67,12 +67,8 @@ impl FileWriter { } /// Writes a row group to the file. - pub fn write( - &mut self, - row_group: RowGroupIter<'_, ArrowError>, - num_rows: usize, - ) -> Result<()> { - Ok(self.writer.write(row_group, num_rows)?) + pub fn write(&mut self, row_group: RowGroupIter<'_, ArrowError>) -> Result<()> { + Ok(self.writer.write(row_group)?) } /// Writes the footer of the parquet file. Returns the total size of the file. diff --git a/src/io/parquet/write/fixed_len_bytes.rs b/src/io/parquet/write/fixed_len_bytes.rs index a4a5f336123..32bce74d1c8 100644 --- a/src/io/parquet/write/fixed_len_bytes.rs +++ b/src/io/parquet/write/fixed_len_bytes.rs @@ -2,7 +2,8 @@ use parquet2::{ encoding::Encoding, metadata::Descriptor, page::DataPage, - statistics::{deserialize_statistics, serialize_statistics, ParquetStatistics}, + schema::types::PrimitiveType, + statistics::{serialize_statistics, FixedLenStatistics, ParquetStatistics, Statistics}, write::WriteOptions, }; @@ -48,7 +49,7 @@ pub fn array_to_page( encode_plain(array, is_optional, &mut buffer); let statistics = if options.write_statistics { - build_statistics(array, descriptor.clone()) + Some(build_statistics(array, descriptor.primitive_type.clone())) } else { None }; @@ -56,6 +57,7 @@ pub fn array_to_page( utils::build_plain_page( buffer, array.len(), + array.len(), array.null_count(), 0, definition_levels_byte_length, @@ -68,11 +70,10 @@ pub fn array_to_page( pub(super) fn build_statistics( array: &FixedSizeBinaryArray, - descriptor: Descriptor, -) -> Option { - let pq_statistics = &ParquetStatistics { - max: None, - min: None, + primitive_type: PrimitiveType, +) -> ParquetStatistics { + let statistics = &FixedLenStatistics { + primitive_type, null_count: Some(array.null_count() as i64), distinct_count: None, max_value: array @@ -85,8 +86,6 @@ pub(super) fn build_statistics( .flatten() .min_by(|x, y| ord_binary(x, y)) .map(|x| x.to_vec()), - }; - deserialize_statistics(pq_statistics, descriptor.primitive_type) - .map(|e| serialize_statistics(&*e)) - .ok() + } as &dyn Statistics; + serialize_statistics(statistics) } diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 7972a50661d..e46dfeb9e89 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -53,11 +53,13 @@ pub(self) fn decimal_length_from_precision(precision: usize) -> usize { /// Creates a parquet [`SchemaDescriptor`] from a [`Schema`]. pub fn to_parquet_schema(schema: &Schema) -> Result { + println!("{:#?}", schema); let parquet_types = schema .fields .iter() .map(to_parquet_type) .collect::>>()?; + println!("{:#?}", parquet_types); Ok(SchemaDescriptor::new("root".to_string(), parquet_types)) } diff --git a/src/io/parquet/write/primitive/basic.rs b/src/io/parquet/write/primitive/basic.rs index ede33765d32..2b7ea6e08b9 100644 --- a/src/io/parquet/write/primitive/basic.rs +++ b/src/io/parquet/write/primitive/basic.rs @@ -75,6 +75,7 @@ where utils::build_plain_page( buffer, array.len(), + array.len(), array.null_count(), 0, definition_levels_byte_length, diff --git a/src/io/parquet/write/primitive/mod.rs b/src/io/parquet/write/primitive/mod.rs index ddeb6541605..41a73015f56 100644 --- a/src/io/parquet/write/primitive/mod.rs +++ b/src/io/parquet/write/primitive/mod.rs @@ -1,6 +1,7 @@ mod basic; mod nested; +pub(crate) use basic::build_statistics; pub use basic::array_to_page; pub(crate) use basic::encode_plain; pub use nested::array_to_page as nested_array_to_page; diff --git a/src/io/parquet/write/primitive/nested.rs b/src/io/parquet/write/primitive/nested.rs index 9c8e9104eac..ff8d3f1a658 100644 --- a/src/io/parquet/write/primitive/nested.rs +++ b/src/io/parquet/write/primitive/nested.rs @@ -47,6 +47,7 @@ where utils::build_plain_page( buffer, levels::num_values(nested.offsets()), + nested.offsets().len().saturating_sub(1), array.null_count(), repetition_levels_byte_length, definition_levels_byte_length, diff --git a/src/io/parquet/write/row_group.rs b/src/io/parquet/write/row_group.rs index 5d4a72ee02a..5c419640395 100644 --- a/src/io/parquet/write/row_group.rs +++ b/src/io/parquet/write/row_group.rs @@ -80,23 +80,19 @@ impl + 'static, I: Iterator>>> RowGro impl + 'static + Send + Sync, I: Iterator>>> Iterator for RowGroupIterator { - type Item = Result<(RowGroupIter<'static, ArrowError>, usize)>; + type Item = Result>; fn next(&mut self) -> Option { let options = self.options; self.iter.next().map(|maybe_chunk| { let chunk = maybe_chunk?; - let len = chunk.len(); let encodings = self.encodings.clone(); - Ok(( - row_group_iter( - chunk, - encodings, - self.parquet_schema.columns().to_vec(), - options, - ), - len, + Ok(row_group_iter( + chunk, + encodings, + self.parquet_schema.columns().to_vec(), + options, )) }) } diff --git a/src/io/parquet/write/sink.rs b/src/io/parquet/write/sink.rs index 8906be9431b..47b994840d1 100644 --- a/src/io/parquet/write/sink.rs +++ b/src/io/parquet/write/sink.rs @@ -150,7 +150,6 @@ where fn start_send(self: Pin<&mut Self>, item: Chunk>) -> Result<(), Self::Error> { let this = self.get_mut(); if let Some(mut writer) = this.writer.take() { - let count = item.len(); let rows = crate::io::parquet::write::row_group_iter( item, this.encoding.clone(), @@ -158,7 +157,7 @@ where this.options, ); this.task = Some(Box::pin(async move { - writer.write(rows, count).await?; + writer.write(rows).await?; Ok(Some(writer)) })); Ok(()) diff --git a/src/io/parquet/write/utf8/basic.rs b/src/io/parquet/write/utf8/basic.rs index 5b2648f848c..cf45e4e9cd6 100644 --- a/src/io/parquet/write/utf8/basic.rs +++ b/src/io/parquet/write/utf8/basic.rs @@ -86,6 +86,7 @@ pub fn array_to_page( utils::build_plain_page( buffer, array.len(), + array.len(), array.null_count(), 0, definition_levels_byte_length, @@ -96,7 +97,7 @@ pub fn array_to_page( ) } -pub(super) fn build_statistics( +pub(crate) fn build_statistics( array: &Utf8Array, primitive_type: PrimitiveType, ) -> ParquetStatistics { diff --git a/src/io/parquet/write/utf8/mod.rs b/src/io/parquet/write/utf8/mod.rs index ddeb6541605..eec1d695d1d 100644 --- a/src/io/parquet/write/utf8/mod.rs +++ b/src/io/parquet/write/utf8/mod.rs @@ -2,5 +2,6 @@ mod basic; mod nested; pub use basic::array_to_page; +pub(crate) use basic::build_statistics; pub(crate) use basic::encode_plain; pub use nested::array_to_page as nested_array_to_page; diff --git a/src/io/parquet/write/utf8/nested.rs b/src/io/parquet/write/utf8/nested.rs index 9bd925a90e9..bf60ced491f 100644 --- a/src/io/parquet/write/utf8/nested.rs +++ b/src/io/parquet/write/utf8/nested.rs @@ -37,9 +37,12 @@ where None }; + println!("{nested:?}"); + utils::build_plain_page( buffer, levels::num_values(nested.offsets()), + nested.offsets().len().saturating_sub(1), array.null_count(), repetition_levels_byte_length, definition_levels_byte_length, diff --git a/src/io/parquet/write/utils.rs b/src/io/parquet/write/utils.rs index 89032593775..aaebbcbebae 100644 --- a/src/io/parquet/write/utils.rs +++ b/src/io/parquet/write/utils.rs @@ -60,7 +60,8 @@ pub fn write_def_levels( #[allow(clippy::too_many_arguments)] pub fn build_plain_page( buffer: Vec, - len: usize, + num_values: usize, + num_rows: usize, null_count: usize, repetition_levels_byte_length: usize, definition_levels_byte_length: usize, @@ -69,33 +70,32 @@ pub fn build_plain_page( options: WriteOptions, encoding: Encoding, ) -> Result { - match options.version { - Version::V1 => { - let header = DataPageHeader::V1(DataPageHeaderV1 { - num_values: len as i32, - encoding: encoding.into(), - definition_level_encoding: Encoding::Rle.into(), - repetition_level_encoding: Encoding::Rle.into(), - statistics, - }); - - Ok(DataPage::new(header, buffer, None, descriptor)) - } - Version::V2 => { - let header = DataPageHeader::V2(DataPageHeaderV2 { - num_values: len as i32, - encoding: encoding.into(), - num_nulls: null_count as i32, - num_rows: len as i32, - definition_levels_byte_length: definition_levels_byte_length as i32, - repetition_levels_byte_length: repetition_levels_byte_length as i32, - is_compressed: Some(options.compression != Compression::Uncompressed), - statistics, - }); - - Ok(DataPage::new(header, buffer, None, descriptor)) - } - } + let header = match options.version { + Version::V1 => DataPageHeader::V1(DataPageHeaderV1 { + num_values: num_values as i32, + encoding: encoding.into(), + definition_level_encoding: Encoding::Rle.into(), + repetition_level_encoding: Encoding::Rle.into(), + statistics, + }), + Version::V2 => DataPageHeader::V2(DataPageHeaderV2 { + num_values: num_values as i32, + encoding: encoding.into(), + num_nulls: null_count as i32, + num_rows: num_rows as i32, + definition_levels_byte_length: definition_levels_byte_length as i32, + repetition_levels_byte_length: repetition_levels_byte_length as i32, + is_compressed: Some(options.compression != Compression::Uncompressed), + statistics, + }), + }; + Ok(DataPage::new( + header, + buffer, + None, + descriptor, + Some((0, num_rows)), + )) } /// Auxiliary iterator adapter to declare the size hint of an iterator. diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 5e1b59c7488..718c5280793 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -329,7 +329,7 @@ pub fn pyarrow_nullable(column: &str) -> Box { .collect::>(); Box::new(PrimitiveArray::::from(values)) } - "string_large" => { + "int32_dict" => { let keys = PrimitiveArray::::from([Some(0), Some(1), None, Some(1)]); let values = Arc::new(PrimitiveArray::::from_slice([10, 200])); Box::new(DictionaryArray::::from_data(keys, values)) @@ -413,7 +413,13 @@ pub fn pyarrow_nullable_statistics(column: &str) -> Option> min_value: Some(0), max_value: Some(9), }), - "string_large" => return None, + "int32_dict" => Box::new(PrimitiveStatistics { + data_type: DataType::Dictionary(IntegerType::Int32, Box::new(DataType::Int32), false), + null_count: Some(0), + distinct_count: None, + min_value: Some(10), + max_value: Some(200), + }), "decimal_9" => Box::new(PrimitiveStatistics:: { distinct_count: None, null_count: Some(3), @@ -716,8 +722,7 @@ fn integration_write(schema: &Schema, batches: &[Chunk>]) -> Resu writer.start()?; for group in row_groups { - let (group, len) = group?; - writer.write(group, len)?; + writer.write(group?)?; } let (_size, writer) = writer.end(None)?; diff --git a/tests/it/io/parquet/write.rs b/tests/it/io/parquet/write.rs index c9141f4d515..a068a8335a1 100644 --- a/tests/it/io/parquet/write.rs +++ b/tests/it/io/parquet/write.rs @@ -49,8 +49,7 @@ fn round_trip( writer.start()?; for group in row_groups { - let (group, len) = group?; - writer.write(group, len)?; + writer.write(group?)?; } let (_size, writer) = writer.end(None)?; @@ -354,7 +353,7 @@ fn utf8_optional_v2_delta() -> Result<()> { #[test] fn i32_optional_v2_dict() -> Result<()> { round_trip( - "string_large", + "int32_dict", true, false, Version::V2, @@ -366,7 +365,7 @@ fn i32_optional_v2_dict() -> Result<()> { #[test] fn i32_optional_v2_dict_compressed() -> Result<()> { round_trip( - "string_large", + "int32_dict", true, false, Version::V2,