diff --git a/Cargo.toml b/Cargo.toml index 93a56ea6a90..5d8d8bc61f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ futures = { version = "0.3", optional = true } # for faster hashing ahash = { version = "0.7", optional = true } -parquet2 = { version = "0.2", optional = true, default_features = false, features = ["stream"] } +parquet2 = { version = "0.3", optional = true, default_features = false, features = ["stream"] } [dev-dependencies] rand = "0.8" diff --git a/arrow-parquet-integration-testing/src/main.rs b/arrow-parquet-integration-testing/src/main.rs index f41bca23015..6c8c6ea1054 100644 --- a/arrow-parquet-integration-testing/src/main.rs +++ b/arrow-parquet-integration-testing/src/main.rs @@ -2,14 +2,17 @@ use std::fs::File; use std::sync::Arc; use std::{collections::HashMap, convert::TryFrom, io::Read}; -use arrow2::datatypes::DataType; -use arrow2::error::Result; -use arrow2::io::parquet::write::{Encoding, RowGroupIterator}; -use arrow2::io::{ - json_integration::ArrowJson, - parquet::write::{write_file, CompressionCodec, Version, WriteOptions}, +use arrow2::{ + datatypes::{DataType, Schema}, + error::Result, + io::{ + json_integration::{to_record_batch, ArrowJson}, + parquet::write::{ + write_file, Compression, Encoding, RowGroupIterator, Version, WriteOptions, + }, + }, + record_batch::RecordBatch, }; -use arrow2::{datatypes::Schema, io::json_integration::to_record_batch, record_batch::RecordBatch}; use clap::{App, Arg}; @@ -154,7 +157,7 @@ fn main() -> Result<()> { let options = WriteOptions { write_statistics: true, - compression: CompressionCodec::Uncompressed, + compression: Compression::Uncompressed, version, }; diff --git a/benches/write_parquet.rs b/benches/write_parquet.rs index df3340a0d4e..7cff2c13052 100644 --- a/benches/write_parquet.rs +++ b/benches/write_parquet.rs @@ -14,7 +14,7 @@ fn write(array: &dyn Array, encoding: Encoding) -> Result<()> { let options = WriteOptions { write_statistics: false, - compression: CompressionCodec::Uncompressed, + compression: Compression::Uncompressed, version: Version::V1, }; diff --git a/examples/parquet_write.rs b/examples/parquet_write.rs index 52e70f417d4..af00a6a25a4 100644 --- a/examples/parquet_write.rs +++ b/examples/parquet_write.rs @@ -7,7 +7,7 @@ use arrow2::{ datatypes::{Field, Schema}, error::Result, io::parquet::write::{ - array_to_page, write_file, CompressionCodec, DynIter, Encoding, Version, WriteOptions, + array_to_page, write_file, Compression, DynIter, Encoding, Version, WriteOptions, }, }; @@ -16,7 +16,7 @@ fn write_single_array(path: &str, array: &dyn Array, field: Field) -> Result<()> let options = WriteOptions { write_statistics: true, - compression: CompressionCodec::Uncompressed, + compression: Compression::Uncompressed, version: Version::V2, }; let encoding = Encoding::Plain; diff --git a/examples/parquet_write_record.rs b/examples/parquet_write_record.rs index 9b4deb68792..7528f85a33d 100644 --- a/examples/parquet_write_record.rs +++ b/examples/parquet_write_record.rs @@ -5,17 +5,18 @@ use arrow2::{ array::{Array, Int32Array}, datatypes::{Field, Schema}, error::Result, - io::parquet::write::{write_file, CompressionCodec, RowGroupIterator, Version, WriteOptions}, + io::parquet::write::{ + write_file, Compression, Encoding, RowGroupIterator, Version, WriteOptions, + }, record_batch::RecordBatch, }; -use parquet2::schema::Encoding; fn write_batch(path: &str, batch: RecordBatch) -> Result<()> { let schema = batch.schema().clone(); let options = WriteOptions { write_statistics: true, - compression: CompressionCodec::Uncompressed, + compression: Compression::Uncompressed, version: Version::V2, }; diff --git a/src/io/parquet/mod.rs b/src/io/parquet/mod.rs index b8d08cb672a..0a4e80f0b5a 100644 --- a/src/io/parquet/mod.rs +++ b/src/io/parquet/mod.rs @@ -405,7 +405,7 @@ mod tests { mod tests_integration { use std::sync::Arc; - use super::write::CompressionCodec; + use super::write::Compression; use crate::array::{Array, PrimitiveArray, Utf8Array}; use crate::datatypes::DataType; use crate::datatypes::TimeUnit; @@ -421,7 +421,7 @@ mod tests_integration { fn integration_write(schema: &Schema, batches: &[RecordBatch]) -> Result> { let options = WriteOptions { write_statistics: true, - compression: CompressionCodec::Uncompressed, + compression: Compression::Uncompressed, version: Version::V1, }; diff --git a/src/io/parquet/read/binary/basic.rs b/src/io/parquet/read/binary/basic.rs index 469df2125a4..8ceca3ebbb3 100644 --- a/src/io/parquet/read/binary/basic.rs +++ b/src/io/parquet/read/binary/basic.rs @@ -1,7 +1,7 @@ use parquet2::{ encoding::{bitpacking, delta_length_byte_array, hybrid_rle, uleb128, Encoding}, metadata::{ColumnChunkMetaData, ColumnDescriptor}, - page::{BinaryPageDict, DataPage, DataPageHeader}, + page::{BinaryPageDict, DataPage, DataPageHeader, DataPageHeaderExt}, read::{levels, StreamingIterator}, }; @@ -214,7 +214,7 @@ fn extend_from_page( let is_optional = descriptor.max_def_level() == 1; match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); let (_, validity_buffer, values_buffer) = levels::split_buffer_v1(page.buffer(), false, is_optional); diff --git a/src/io/parquet/read/binary/nested.rs b/src/io/parquet/read/binary/nested.rs index 4582f532ddf..e20af80426b 100644 --- a/src/io/parquet/read/binary/nested.rs +++ b/src/io/parquet/read/binary/nested.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use parquet2::{ encoding::Encoding, metadata::{ColumnChunkMetaData, ColumnDescriptor}, - page::{DataPage, DataPageHeader}, + page::{DataPage, DataPageHeader, DataPageHeaderExt}, read::{ levels::{get_bit_width, split_buffer_v1, split_buffer_v2, RLEDecoder}, StreamingIterator, @@ -121,8 +121,8 @@ fn extend_from_page( match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); - assert_eq!(header.repetition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); + assert_eq!(header.repetition_level_encoding(), Encoding::Rle); match (&page.encoding(), page.dictionary_page()) { (Encoding::Plain, None) => { @@ -137,11 +137,11 @@ fn extend_from_page( values_buffer, additional, ( - &header.repetition_level_encoding, + &header.repetition_level_encoding(), descriptor.max_rep_level(), ), ( - &header.definition_level_encoding, + &header.definition_level_encoding(), descriptor.max_def_level(), ), is_nullable, diff --git a/src/io/parquet/read/boolean/basic.rs b/src/io/parquet/read/boolean/basic.rs index 055d9061657..8e3e155d797 100644 --- a/src/io/parquet/read/boolean/basic.rs +++ b/src/io/parquet/read/boolean/basic.rs @@ -8,7 +8,7 @@ use super::super::utils; use parquet2::{ encoding::{hybrid_rle, Encoding}, metadata::{ColumnChunkMetaData, ColumnDescriptor}, - page::{DataPage, DataPageHeader}, + page::{DataPage, DataPageHeader, DataPageHeaderExt}, read::{levels, StreamingIterator}, }; @@ -97,7 +97,7 @@ fn extend_from_page( let is_optional = descriptor.max_def_level() == 1; match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); match (&page.encoding(), page.dictionary_page(), is_optional) { (Encoding::Plain, None, true) => { diff --git a/src/io/parquet/read/boolean/nested.rs b/src/io/parquet/read/boolean/nested.rs index 43bfd5c501c..2accb1e352b 100644 --- a/src/io/parquet/read/boolean/nested.rs +++ b/src/io/parquet/read/boolean/nested.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use parquet2::{ encoding::Encoding, metadata::{ColumnChunkMetaData, ColumnDescriptor}, - page::{DataPage, DataPageHeader}, + page::{DataPage, DataPageHeader, DataPageHeaderExt}, read::{ levels::{get_bit_width, split_buffer_v1, split_buffer_v2, RLEDecoder}, StreamingIterator, @@ -107,8 +107,8 @@ fn extend_from_page( match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); - assert_eq!(header.repetition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); + assert_eq!(header.repetition_level_encoding(), Encoding::Rle); match (&page.encoding(), page.dictionary_page()) { (Encoding::Plain, None) => { @@ -123,11 +123,11 @@ fn extend_from_page( values_buffer, additional, ( - &header.repetition_level_encoding, + &header.repetition_level_encoding(), descriptor.max_rep_level(), ), ( - &header.definition_level_encoding, + &header.definition_level_encoding(), descriptor.max_def_level(), ), is_nullable, diff --git a/src/io/parquet/read/fixed_size_binary.rs b/src/io/parquet/read/fixed_size_binary.rs index cb87661d2e7..e92dd0619be 100644 --- a/src/io/parquet/read/fixed_size_binary.rs +++ b/src/io/parquet/read/fixed_size_binary.rs @@ -1,6 +1,6 @@ use parquet2::{ encoding::{bitpacking, hybrid_rle, uleb128, Encoding}, - page::{DataPage, DataPageHeader, FixedLenByteArrayPageDict}, + page::{DataPage, DataPageHeader, DataPageHeaderExt, FixedLenByteArrayPageDict}, read::{levels, StreamingIterator}, }; @@ -171,7 +171,7 @@ pub(crate) fn extend_from_page( let is_optional = descriptor.max_def_level() == 1; match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); let (_, validity_buffer, values_buffer) = levels::split_buffer_v1(page.buffer(), false, is_optional); diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index 1a57fd1d30b..8cddeb220bf 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -27,8 +27,8 @@ pub use parquet2::{ decompress, get_page_iterator as _get_page_iterator, read_metadata as _read_metadata, streaming_iterator, Decompressor, PageIterator, StreamingIterator, }, - schema::{ - types::{LogicalType, ParquetType, PhysicalType, PrimitiveConvertedType}, + schema::types::{ + LogicalType, ParquetType, PhysicalType, PrimitiveConvertedType, TimeUnit as ParquetTimeUnit, TimestampType, }, types::int96_to_i64_ns, @@ -43,7 +43,7 @@ pub fn get_page_iterator<'b, RR: Read + Seek>( buffer: Vec, ) -> Result> { Ok(_get_page_iterator( - metadata, row_group, column, reader, buffer, + metadata, row_group, column, reader, None, buffer, )?) } diff --git a/src/io/parquet/read/primitive/basic.rs b/src/io/parquet/read/primitive/basic.rs index 102c958a3ee..6cf55868cf2 100644 --- a/src/io/parquet/read/primitive/basic.rs +++ b/src/io/parquet/read/primitive/basic.rs @@ -1,6 +1,6 @@ use parquet2::{ encoding::{bitpacking, hybrid_rle, uleb128, Encoding}, - page::{DataPage, DataPageHeader, PrimitivePageDict}, + page::{DataPage, DataPageHeader, DataPageHeaderExt, PrimitivePageDict}, read::levels, types::NativeType, }; @@ -160,7 +160,7 @@ where let is_optional = descriptor.max_def_level() == 1; match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); let (_, validity_buffer, values_buffer) = levels::split_buffer_v1(page.buffer(), false, is_optional); diff --git a/src/io/parquet/read/primitive/dictionary.rs b/src/io/parquet/read/primitive/dictionary.rs index 8c864cfb133..640534b6198 100644 --- a/src/io/parquet/read/primitive/dictionary.rs +++ b/src/io/parquet/read/primitive/dictionary.rs @@ -1,10 +1,9 @@ use std::sync::Arc; use parquet2::{ - encoding::{bitpacking, hybrid_rle, uleb128}, - page::{DataPage, DataPageHeader, PrimitivePageDict}, + encoding::{bitpacking, hybrid_rle, uleb128, Encoding}, + page::{DataPage, DataPageHeader, DataPageHeaderExt, PrimitivePageDict}, read::{levels, StreamingIterator}, - schema::Encoding, types::NativeType, }; @@ -102,7 +101,7 @@ where let is_optional = descriptor.max_def_level() == 1; match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); let (_, validity_buffer, values_buffer) = levels::split_buffer_v1(page.buffer(), false, is_optional); diff --git a/src/io/parquet/read/primitive/nested.rs b/src/io/parquet/read/primitive/nested.rs index 39f5ae0d8ef..da10354681d 100644 --- a/src/io/parquet/read/primitive/nested.rs +++ b/src/io/parquet/read/primitive/nested.rs @@ -1,6 +1,6 @@ use parquet2::{ encoding::Encoding, - page::{DataPage, DataPageHeader}, + page::{DataPage, DataPageHeader, DataPageHeaderExt}, read::levels::{get_bit_width, split_buffer_v1, split_buffer_v2, RLEDecoder}, types::NativeType, }; @@ -127,8 +127,8 @@ where match page.header() { DataPageHeader::V1(header) => { - assert_eq!(header.definition_level_encoding, Encoding::Rle); - assert_eq!(header.repetition_level_encoding, Encoding::Rle); + assert_eq!(header.definition_level_encoding(), Encoding::Rle); + assert_eq!(header.repetition_level_encoding(), Encoding::Rle); match (&page.encoding(), page.dictionary_page()) { (Encoding::Plain, None) => { @@ -143,11 +143,11 @@ where values_buffer, additional, ( - &header.repetition_level_encoding, + &header.repetition_level_encoding(), descriptor.max_rep_level(), ), ( - &header.definition_level_encoding, + &header.definition_level_encoding(), descriptor.max_def_level(), ), is_nullable, diff --git a/src/io/parquet/read/schema/convert.rs b/src/io/parquet/read/schema/convert.rs index 4fa9d4999a9..1c92683b161 100644 --- a/src/io/parquet/read/schema/convert.rs +++ b/src/io/parquet/read/schema/convert.rs @@ -6,9 +6,9 @@ use parquet2::{ schema::{ types::{ BasicTypeInfo, GroupConvertedType, LogicalType, ParquetType, PhysicalType, - PrimitiveConvertedType, TimeUnit as ParquetTimeUnit, + PrimitiveConvertedType, TimeUnit as ParquetTimeUnit, TimestampType, }, - Repetition, TimestampType, + Repetition, }, }; diff --git a/src/io/parquet/read/utils.rs b/src/io/parquet/read/utils.rs index 2ae9ab87933..e4c8417d7f4 100644 --- a/src/io/parquet/read/utils.rs +++ b/src/io/parquet/read/utils.rs @@ -1,4 +1,4 @@ -use parquet2::{encoding::get_length, schema::Encoding}; +use parquet2::encoding::{get_length, Encoding}; use crate::error::ArrowError; diff --git a/src/io/parquet/write/binary/nested.rs b/src/io/parquet/write/binary/nested.rs index 52a1ee50674..cf92180a3c9 100644 --- a/src/io/parquet/write/binary/nested.rs +++ b/src/io/parquet/write/binary/nested.rs @@ -1,5 +1,6 @@ -use parquet2::schema::Encoding; -use parquet2::{metadata::ColumnDescriptor, page::CompressedDataPage, write::WriteOptions}; +use parquet2::{ + encoding::Encoding, metadata::ColumnDescriptor, page::CompressedDataPage, write::WriteOptions, +}; use super::super::{levels, utils}; use super::basic::{build_statistics, encode_plain}; diff --git a/src/io/parquet/write/boolean/basic.rs b/src/io/parquet/write/boolean/basic.rs index 927cc17044e..604aa49d3cf 100644 --- a/src/io/parquet/write/boolean/basic.rs +++ b/src/io/parquet/write/boolean/basic.rs @@ -1,8 +1,7 @@ use parquet2::{ - encoding::hybrid_rle::bitpacked_encode, + encoding::{hybrid_rle::bitpacked_encode, Encoding}, metadata::ColumnDescriptor, page::CompressedDataPage, - schema::Encoding, statistics::{serialize_statistics, BooleanStatistics, ParquetStatistics, Statistics}, write::WriteOptions, }; diff --git a/src/io/parquet/write/boolean/nested.rs b/src/io/parquet/write/boolean/nested.rs index aa172533235..b9726f93b65 100644 --- a/src/io/parquet/write/boolean/nested.rs +++ b/src/io/parquet/write/boolean/nested.rs @@ -1,5 +1,6 @@ -use parquet2::schema::Encoding; -use parquet2::{metadata::ColumnDescriptor, page::CompressedDataPage, write::WriteOptions}; +use parquet2::{ + encoding::Encoding, metadata::ColumnDescriptor, page::CompressedDataPage, write::WriteOptions, +}; use super::super::{levels, utils}; use super::basic::{build_statistics, encode_plain}; diff --git a/src/io/parquet/write/dictionary.rs b/src/io/parquet/write/dictionary.rs index 450ef4f7af7..5a7e5ad16b2 100644 --- a/src/io/parquet/write/dictionary.rs +++ b/src/io/parquet/write/dictionary.rs @@ -1,8 +1,9 @@ -use parquet2::encoding::hybrid_rle::encode_u32; -use parquet2::page::{CompressedDictPage, CompressedPage}; -use parquet2::schema::Encoding; -use parquet2::write::DynIter; -use parquet2::{metadata::ColumnDescriptor, write::WriteOptions}; +use parquet2::{ + encoding::{hybrid_rle::encode_u32, Encoding}, + metadata::ColumnDescriptor, + page::{CompressedDictPage, CompressedPage}, + write::{DynIter, WriteOptions}, +}; use super::binary::encode_plain as binary_encode_plain; use super::primitive::encode_plain as primitive_encode_plain; diff --git a/src/io/parquet/write/fixed_len_bytes.rs b/src/io/parquet/write/fixed_len_bytes.rs index 1fd5260b445..9d2cc2c188e 100644 --- a/src/io/parquet/write/fixed_len_bytes.rs +++ b/src/io/parquet/write/fixed_len_bytes.rs @@ -1,6 +1,6 @@ use parquet2::{ - compression::create_codec, metadata::ColumnDescriptor, page::CompressedDataPage, - schema::Encoding, write::WriteOptions, + compression::create_codec, encoding::Encoding, metadata::ColumnDescriptor, + page::CompressedDataPage, write::WriteOptions, }; use super::utils; diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 9f7362ec459..2b4c0f906fa 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -23,17 +23,13 @@ use crate::io::parquet::write::levels::NestedInfo; use crate::types::days_ms; use crate::types::NativeType; -use parquet2::metadata::ColumnDescriptor; pub use parquet2::{ - compression::CompressionCodec, + compression::Compression, + encoding::Encoding, + metadata::{ColumnDescriptor, KeyValue, SchemaDescriptor}, page::{CompressedDataPage, CompressedPage}, schema::types::ParquetType, - schema::Encoding, - write::{DynIter, RowGroupIter}, - write::{Version, WriteOptions}, -}; -use parquet2::{ - metadata::SchemaDescriptor, schema::KeyValue, write::write_file as parquet_write_file, + write::{write_file as parquet_write_file, DynIter, RowGroupIter, Version, WriteOptions}, }; pub use record_batch::RowGroupIterator; use schema::schema_to_metadata_key; @@ -497,7 +493,7 @@ mod tests { nullable: bool, nested: bool, version: Version, - compression: CompressionCodec, + compression: Compression, encoding: Encoding, ) -> Result<()> { let (array, statistics) = if nested { @@ -562,7 +558,7 @@ mod tests { true, false, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -574,7 +570,7 @@ mod tests { false, false, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -586,7 +582,7 @@ mod tests { true, false, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -598,7 +594,7 @@ mod tests { true, false, Version::V2, - CompressionCodec::Snappy, + Compression::Snappy, Encoding::Plain, ) } @@ -610,7 +606,7 @@ mod tests { true, false, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -622,7 +618,7 @@ mod tests { false, false, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -634,7 +630,7 @@ mod tests { true, false, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -646,7 +642,7 @@ mod tests { false, false, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -658,7 +654,7 @@ mod tests { true, false, Version::V2, - CompressionCodec::Snappy, + Compression::Snappy, Encoding::Plain, ) } @@ -670,7 +666,7 @@ mod tests { false, false, Version::V2, - CompressionCodec::Snappy, + Compression::Snappy, Encoding::Plain, ) } @@ -682,7 +678,7 @@ mod tests { true, false, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -694,7 +690,7 @@ mod tests { false, false, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -706,7 +702,7 @@ mod tests { true, false, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -718,7 +714,7 @@ mod tests { false, false, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -730,7 +726,7 @@ mod tests { false, false, Version::V2, - CompressionCodec::Snappy, + Compression::Snappy, Encoding::Plain, ) } @@ -742,7 +738,7 @@ mod tests { true, true, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -754,7 +750,7 @@ mod tests { true, true, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -766,7 +762,7 @@ mod tests { true, true, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -778,7 +774,7 @@ mod tests { true, true, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -790,7 +786,7 @@ mod tests { true, true, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -802,7 +798,7 @@ mod tests { true, true, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -814,7 +810,7 @@ mod tests { true, true, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -826,7 +822,7 @@ mod tests { true, true, Version::V1, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::Plain, ) } @@ -838,7 +834,7 @@ mod tests { true, false, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::DeltaLengthByteArray, ) } @@ -850,7 +846,7 @@ mod tests { true, false, Version::V2, - CompressionCodec::Uncompressed, + Compression::Uncompressed, Encoding::RleDictionary, ) } diff --git a/src/io/parquet/write/primitive/basic.rs b/src/io/parquet/write/primitive/basic.rs index c13a8f9bee8..796df77c083 100644 --- a/src/io/parquet/write/primitive/basic.rs +++ b/src/io/parquet/write/primitive/basic.rs @@ -1,7 +1,7 @@ use parquet2::{ + encoding::Encoding, metadata::ColumnDescriptor, page::CompressedDataPage, - schema::Encoding, statistics::{serialize_statistics, ParquetStatistics, PrimitiveStatistics, Statistics}, types::NativeType, write::WriteOptions, diff --git a/src/io/parquet/write/primitive/nested.rs b/src/io/parquet/write/primitive/nested.rs index 05bfb5858b6..36ff01a723d 100644 --- a/src/io/parquet/write/primitive/nested.rs +++ b/src/io/parquet/write/primitive/nested.rs @@ -1,6 +1,6 @@ -use parquet2::schema::Encoding; use parquet2::{ - metadata::ColumnDescriptor, page::CompressedDataPage, types::NativeType, write::WriteOptions, + encoding::Encoding, metadata::ColumnDescriptor, page::CompressedDataPage, types::NativeType, + write::WriteOptions, }; use super::super::levels; diff --git a/src/io/parquet/write/schema.rs b/src/io/parquet/write/schema.rs index b3169d8785e..efd03955cf4 100644 --- a/src/io/parquet/write/schema.rs +++ b/src/io/parquet/write/schema.rs @@ -1,6 +1,12 @@ -use parquet2::schema::{ - types::{ParquetType, PhysicalType, PrimitiveConvertedType, TimeUnit as ParquetTimeUnit}, - DecimalType, FieldRepetitionType, IntType, KeyValue, LogicalType, TimeType, TimestampType, +use parquet2::{ + metadata::KeyValue, + schema::{ + types::{ + DecimalType, IntType, LogicalType, ParquetType, PhysicalType, PrimitiveConvertedType, + TimeType, TimeUnit as ParquetTimeUnit, TimestampType, + }, + Repetition, + }, }; use crate::{ @@ -34,9 +40,9 @@ pub fn schema_to_metadata_key(schema: &Schema) -> KeyValue { pub fn to_parquet_type(field: &Field) -> Result { let name = field.name().clone(); let repetition = if field.is_nullable() { - FieldRepetitionType::Optional + Repetition::Optional } else { - FieldRepetitionType::Required + Repetition::Required }; // create type from field match field.data_type() { @@ -326,7 +332,7 @@ pub fn to_parquet_type(field: &Field) -> Result { Some(LogicalType::LIST(Default::default())), vec![ParquetType::try_from_group( "list".to_string(), - FieldRepetitionType::Repeated, + Repetition::Repeated, None, None, vec![to_parquet_type(f)?], @@ -335,14 +341,6 @@ pub fn to_parquet_type(field: &Field) -> Result { None, )?) } - /* - DataType::FixedSizeBinary(length) => { - Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY) - .with_repetition(repetition) - .with_length(*length) - .build() - } - */ other => Err(ArrowError::NotYetImplemented(format!( "Writing the data type {:?} is not yet implemented", other diff --git a/src/io/parquet/write/stream.rs b/src/io/parquet/write/stream.rs index fd9ea69a04f..8383ffb1edc 100644 --- a/src/io/parquet/write/stream.rs +++ b/src/io/parquet/write/stream.rs @@ -2,7 +2,7 @@ use futures::stream::Stream; use parquet2::write::RowGroupIter; use parquet2::{ - metadata::SchemaDescriptor, schema::KeyValue, + metadata::{KeyValue, SchemaDescriptor}, write::stream::write_stream as parquet_write_stream, }; diff --git a/src/io/parquet/write/utf8/nested.rs b/src/io/parquet/write/utf8/nested.rs index af213989acd..65e7d3d13bf 100644 --- a/src/io/parquet/write/utf8/nested.rs +++ b/src/io/parquet/write/utf8/nested.rs @@ -1,4 +1,4 @@ -use parquet2::schema::Encoding; +use parquet2::encoding::Encoding; use parquet2::{metadata::ColumnDescriptor, page::CompressedDataPage, write::WriteOptions}; use super::super::{levels, utils}; diff --git a/src/io/parquet/write/utils.rs b/src/io/parquet/write/utils.rs index 33c8a89854d..7850ea724e6 100644 --- a/src/io/parquet/write/utils.rs +++ b/src/io/parquet/write/utils.rs @@ -1,11 +1,10 @@ use crate::bitmap::Bitmap; use parquet2::{ - compression::create_codec, + compression::{create_codec, Compression}, encoding::{hybrid_rle::encode_bool, Encoding}, metadata::ColumnDescriptor, page::{CompressedDataPage, DataPageHeader, DataPageHeaderV1, DataPageHeaderV2}, - schema::CompressionCodec, statistics::ParquetStatistics, write::WriteOptions, }; @@ -75,9 +74,9 @@ pub fn build_plain_page( Version::V1 => { let header = DataPageHeader::V1(DataPageHeaderV1 { num_values: len as i32, - encoding, - definition_level_encoding: Encoding::Rle, - repetition_level_encoding: Encoding::Rle, + encoding: encoding.into(), + definition_level_encoding: Encoding::Rle.into(), + repetition_level_encoding: Encoding::Rle.into(), statistics, }); @@ -93,12 +92,12 @@ pub fn build_plain_page( Version::V2 => { let header = DataPageHeader::V2(DataPageHeaderV2 { num_values: len as i32, - encoding, + encoding: encoding.into(), num_nulls: null_count as i32, num_rows: len as i32, definition_levels_byte_length: definition_levels_byte_length as i32, repetition_levels_byte_length: repetition_levels_byte_length as i32, - is_compressed: Some(options.compression != CompressionCodec::Uncompressed), + is_compressed: Some(options.compression != Compression::Uncompressed), statistics, });