Skip to content

Commit

Permalink
allocate enough bytes when writing booleans (#658)
Browse files Browse the repository at this point in the history
* allocate enough bytes when writing booleans

* round up to nearest multiple of 256
  • Loading branch information
bjchambers authored Aug 8, 2021
1 parent 4618ef5 commit 75432ed
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
28 changes: 27 additions & 1 deletion parquet/src/arrow/arrow_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ fn write_leaves(
ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Union(_) => {
Err(ParquetError::NYI(
format!(
"Attempting to write an Arrow type {:?} to parquet that is not yet implemented",
"Attempting to write an Arrow type {:?} to parquet that is not yet implemented",
array.data_type()
)
))
Expand Down Expand Up @@ -1199,6 +1199,32 @@ mod tests {
);
}

#[test]
fn bool_large_single_column() {
let values = Arc::new(
[None, Some(true), Some(false)]
.iter()
.cycle()
.copied()
.take(200_000)
.collect::<BooleanArray>(),
);
let schema =
Schema::new(vec![Field::new("col", values.data_type().clone(), true)]);
let expected_batch =
RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
let file = get_temp_file("bool_large_single_column", &[]);

let mut writer = ArrowWriter::try_new(
file.try_clone().unwrap(),
expected_batch.schema(),
None,
)
.expect("Unable to write file");
writer.write(&expected_batch).unwrap();
writer.close().unwrap();
}

#[test]
fn i8_single_column() {
required_and_optional::<Int8Array, _>(0..SMALL_SIZE as i8, "i8_single_column");
Expand Down
8 changes: 7 additions & 1 deletion parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,7 @@ pub(crate) mod private {
use crate::util::bit_util::{BitReader, BitWriter};
use crate::util::memory::ByteBufferPtr;

use arrow::util::bit_util::round_upto_power_of_2;
use byteorder::ByteOrder;
use std::convert::TryInto;

Expand Down Expand Up @@ -669,7 +670,12 @@ pub(crate) mod private {
bit_writer: &mut BitWriter,
) -> Result<()> {
if bit_writer.bytes_written() + values.len() / 8 >= bit_writer.capacity() {
bit_writer.extend(256);
let bits_available =
(bit_writer.capacity() - bit_writer.bytes_written()) * 8;
let bits_needed = values.len() - bits_available;
let bytes_needed = (bits_needed + 7) / 8;
let bytes_needed = round_upto_power_of_2(bytes_needed, 256);
bit_writer.extend(bytes_needed);
}
for value in values {
if !bit_writer.put_value(*value as u64, 1) {
Expand Down

0 comments on commit 75432ed

Please sign in to comment.