diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs index 4726734475ba..7728cd4cb2f2 100644 --- a/parquet/src/arrow/arrow_writer.rs +++ b/parquet/src/arrow/arrow_writer.rs @@ -227,7 +227,7 @@ fn write_leaves( ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => { Err(ParquetError::NYI( format!( - "Attempting to write an Arrow type {:?} to parquet that is not yet implemented", + "Attempting to write an Arrow type {:?} to parquet that is not yet implemented", array.data_type() ) )) @@ -1199,6 +1199,32 @@ mod tests { ); } + #[test] + fn bool_large_single_column() { + let values = Arc::new( + [None, Some(true), Some(false)] + .iter() + .cycle() + .copied() + .take(200_000) + .collect::(), + ); + let schema = + Schema::new(vec![Field::new("col", values.data_type().clone(), true)]); + let expected_batch = + RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap(); + let file = get_temp_file("bool_large_single_column", &[]); + + let mut writer = ArrowWriter::try_new( + file.try_clone().unwrap(), + expected_batch.schema(), + None, + ) + .expect("Unable to write file"); + writer.write(&expected_batch).unwrap(); + writer.close().unwrap(); + } + #[test] fn i8_single_column() { required_and_optional::(0..SMALL_SIZE as i8, "i8_single_column"); diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 127ba95387e3..3573362744fe 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -588,6 +588,7 @@ pub(crate) mod private { use crate::util::bit_util::{BitReader, BitWriter}; use crate::util::memory::ByteBufferPtr; + use arrow::util::bit_util::round_upto_power_of_2; use byteorder::ByteOrder; use std::convert::TryInto; @@ -669,7 +670,12 @@ pub(crate) mod private { bit_writer: &mut BitWriter, ) -> Result<()> { if bit_writer.bytes_written() + values.len() / 8 >= bit_writer.capacity() { - bit_writer.extend(256); + let bits_available = + (bit_writer.capacity() - bit_writer.bytes_written()) * 8; + let bits_needed = values.len() - bits_available; + let bytes_needed = (bits_needed + 7) / 8; + let bytes_needed = round_upto_power_of_2(bytes_needed, 256); + bit_writer.extend(bytes_needed); } for value in values { if !bit_writer.put_value(*value as u64, 1) {