Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Migrated to latest parquet2 (#752)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Jan 11, 2022
1 parent 2493f7d commit c7570bf
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ futures = { version = "0.3", optional = true }
ahash = { version = "0.7", optional = true }

# parquet support
parquet2 = { version = "0.8", optional = true, default_features = false, features = ["stream"] }
parquet2 = { version = "0.9", optional = true, default_features = false, features = ["stream"] }

# avro support
avro-schema = { version = "0.2", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/write/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl<A: AsRef<dyn Array> + 'static, I: Iterator<Item = Result<Chunk<A>>>> RowGro
}
}

impl<A: AsRef<dyn Array> + 'static, I: Iterator<Item = Result<Chunk<A>>>> Iterator
impl<A: AsRef<dyn Array> + 'static + Send + Sync, I: Iterator<Item = Result<Chunk<A>>>> Iterator
for RowGroupIterator<A, I>
{
type Item = Result<RowGroupIter<'static, ArrowError>>;
Expand Down
25 changes: 14 additions & 11 deletions src/io/parquet/write/stream.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Contains `async` APIs to write to parquet.
use futures::stream::Stream;
use futures::Future;

use parquet2::write::RowGroupIter;
use parquet2::{
Expand All @@ -15,28 +16,29 @@ use super::schema::schema_to_metadata_key;
use super::WriteOptions;

/// Writes
pub async fn write_stream<'a, W, I>(
writer: &mut W,
row_groups: I,
schema: &Schema,
pub async fn write_stream<'a, 'b, W, S, F>(
mut writer: W,
row_groups: S,
schema: Schema,
parquet_schema: SchemaDescriptor,
options: WriteOptions,
key_value_metadata: Option<Vec<KeyValue>>,
) -> Result<u64>
where
W: std::io::Write,
I: Stream<Item = Result<RowGroupIter<'a, ArrowError>>>,
F: Future<Output = std::result::Result<RowGroupIter<'a, ArrowError>, ArrowError>>,
S: Stream<Item = F>,
{
let key_value_metadata = key_value_metadata
.map(|mut x| {
x.push(schema_to_metadata_key(schema));
x.push(schema_to_metadata_key(&schema));
x
})
.or_else(|| Some(vec![schema_to_metadata_key(schema)]));
.or_else(|| Some(vec![schema_to_metadata_key(&schema)]));

let created_by = Some("Arrow2 - Native Rust implementation of Arrow".to_string());
Ok(parquet_write_stream(
writer,
&mut writer,
row_groups,
parquet_schema,
options,
Expand All @@ -47,17 +49,18 @@ where
}

/// Async writes
pub async fn write_stream_stream<'a, W, I>(
pub async fn write_stream_stream<'a, W, S, F>(
writer: &mut W,
row_groups: I,
row_groups: S,
schema: &Schema,
parquet_schema: SchemaDescriptor,
options: WriteOptions,
key_value_metadata: Option<Vec<KeyValue>>,
) -> Result<u64>
where
W: futures::io::AsyncWrite + Unpin + Send,
I: Stream<Item = Result<RowGroupIter<'a, ArrowError>>>,
F: Future<Output = std::result::Result<RowGroupIter<'a, ArrowError>, ArrowError>>,
S: Stream<Item = F>,
{
let key_value_metadata = key_value_metadata
.map(|mut x| {
Expand Down

0 comments on commit c7570bf

Please sign in to comment.