Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed missing re-export of FileMetaData to allow using side-car API #148

Merged
merged 1 commit into from
Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/indexes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ mod index;
mod intervals;

pub use crate::parquet_bridge::BoundaryOrder;
pub use parquet_format_async_temp::PageLocation;
pub use crate::thrift_format::PageLocation;

pub use self::index::{BooleanIndex, ByteIndex, FixedLenByteIndex, Index, NativeIndex, PageIndex};
pub use intervals::{compute_rows, select_pages, FilteredPage, Interval};
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub mod statistics;
pub mod types;
pub mod write;

use parquet_format_async_temp as thrift_format;

pub use streaming_decompression::fallible_streaming_iterator;
pub use streaming_decompression::FallibleStreamingIterator;

Expand Down
4 changes: 2 additions & 2 deletions src/metadata/file_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{error::Error, metadata::get_sort_order};
use super::{column_order::ColumnOrder, schema_descriptor::SchemaDescriptor, RowGroupMetaData};
use parquet_format_async_temp::ColumnOrder as TColumnOrder;

pub use parquet_format_async_temp::KeyValue;
pub use crate::thrift_format::KeyValue;

/// Metadata for a Parquet file.
// This is almost equal to [`parquet_format_async_temp::FileMetaData`] but contains the descriptors,
Expand Down Expand Up @@ -59,7 +59,7 @@ impl FileMetaData {
.unwrap_or(ColumnOrder::Undefined)
}

/// Deserializes [`parquet_format_async_temp::FileMetaData`] into this struct
/// Deserializes [`crate::thrift_format::FileMetaData`] into this struct
pub fn try_from_thrift(
metadata: parquet_format_async_temp::FileMetaData,
) -> Result<Self, Error> {
Expand Down
2 changes: 2 additions & 0 deletions src/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ pub use file_metadata::{FileMetaData, KeyValue};
pub use row_metadata::RowGroupMetaData;
pub use schema_descriptor::SchemaDescriptor;
pub use sort::*;

pub use crate::thrift_format::FileMetaData as ThriftFileMetaData;
2 changes: 1 addition & 1 deletion src/page/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ pub use page_dict::*;

use std::sync::Arc;

pub use parquet_format_async_temp::{
pub use crate::thrift_format::{
DataPageHeader as DataPageHeaderV1, DataPageHeaderV2, PageHeader as ParquetPageHeader,
};

Expand Down
19 changes: 6 additions & 13 deletions src/parquet_bridge.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
// Bridges structs from thrift-generated code to rust enums.
use std::convert::TryFrom;

use parquet_format_async_temp::BoundaryOrder as ParquetBoundaryOrder;
use parquet_format_async_temp::CompressionCodec;
use parquet_format_async_temp::DataPageHeader;
use parquet_format_async_temp::DataPageHeaderV2;
use parquet_format_async_temp::DecimalType;
use parquet_format_async_temp::Encoding as ParquetEncoding;
use parquet_format_async_temp::FieldRepetitionType;
use parquet_format_async_temp::IntType;
use parquet_format_async_temp::LogicalType as ParquetLogicalType;
use parquet_format_async_temp::PageType as ParquetPageType;
use parquet_format_async_temp::TimeType;
use parquet_format_async_temp::TimeUnit as ParquetTimeUnit;
use parquet_format_async_temp::TimestampType;
use super::thrift_format::{
BoundaryOrder as ParquetBoundaryOrder, CompressionCodec, DataPageHeader, DataPageHeaderV2,
DecimalType, Encoding as ParquetEncoding, FieldRepetitionType, IntType,
LogicalType as ParquetLogicalType, PageType as ParquetPageType, TimeType,
TimeUnit as ParquetTimeUnit, TimestampType,
};

use crate::error::Error;

Expand Down
2 changes: 1 addition & 1 deletion src/schema/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pub use parquet_format_async_temp::SchemaElement;
pub use super::thrift_format::SchemaElement;

pub use crate::parquet_bridge::Repetition;

Expand Down
2 changes: 1 addition & 1 deletion src/statistics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mod primitive;

use std::{any::Any, sync::Arc};

pub use parquet_format_async_temp::Statistics as ParquetStatistics;
pub use crate::thrift_format::Statistics as ParquetStatistics;

use crate::error::Result;
use crate::schema::types::{PhysicalType, PrimitiveType};
Expand Down
21 changes: 12 additions & 9 deletions src/write/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use std::io::Write;

use parquet_format_async_temp::thrift::protocol::TCompactOutputProtocol;
use parquet_format_async_temp::thrift::protocol::TOutputProtocol;
use parquet_format_async_temp::FileMetaData;
use parquet_format_async_temp::RowGroup;

use crate::metadata::ThriftFileMetaData;
use crate::{
error::{Error, Result},
metadata::SchemaDescriptor,
Expand All @@ -23,7 +23,7 @@ pub(super) fn start_file<W: Write>(writer: &mut W) -> Result<u64> {
Ok(PARQUET_MAGIC.len() as u64)
}

pub(super) fn end_file<W: Write>(mut writer: &mut W, metadata: &FileMetaData) -> Result<u64> {
pub(super) fn end_file<W: Write>(mut writer: &mut W, metadata: &ThriftFileMetaData) -> Result<u64> {
// Write metadata
let mut protocol = TCompactOutputProtocol::new(&mut writer);
let metadata_len = metadata.write_to_out_protocol(&mut protocol)? as i32;
Expand Down Expand Up @@ -56,7 +56,7 @@ pub struct FileWriter<W: Write> {
/// Used to store the current state for writing the file
state: State,
// when the file is written, metadata becomes available
metadata: Option<FileMetaData>,
metadata: Option<ThriftFileMetaData>,
}

/// Writes a parquet file containing only the header and footer
Expand All @@ -66,7 +66,10 @@ pub struct FileWriter<W: Write> {
///
/// Note: Recall that when combining row groups from [`FileMetaData`], the `file_path` on each
/// of their column chunks must be updated with their path relative to where they are written to.
pub fn write_metadata_sidecar<W: Write>(writer: &mut W, metadata: &FileMetaData) -> Result<u64> {
pub fn write_metadata_sidecar<W: Write>(
writer: &mut W,
metadata: &ThriftFileMetaData,
) -> Result<u64> {
let mut len = start_file(writer)?;
len += end_file(writer, metadata)?;
Ok(len)
Expand All @@ -84,11 +87,11 @@ impl<W: Write> FileWriter<W> {
&self.schema
}

/// Returns the [`FileMetaData`]. This is Some iff the [`Self::end`] has been called.
/// Returns the [`ThriftFileMetaData`]. This is Some iff the [`Self::end`] has been called.
///
/// This is used to write the metadata as a separate Parquet file, usually when data
/// is partitioned across multiple files
pub fn metadata(&self) -> Option<&FileMetaData> {
pub fn metadata(&self) -> Option<&ThriftFileMetaData> {
self.metadata.as_ref()
}
}
Expand Down Expand Up @@ -207,7 +210,7 @@ impl<W: Write> FileWriter<W> {
Result::Ok(())
})?;

let metadata = FileMetaData::new(
let metadata = ThriftFileMetaData::new(
self.options.version.into(),
self.schema.clone().into_thrift(),
num_rows,
Expand All @@ -230,10 +233,10 @@ impl<W: Write> FileWriter<W> {
self.writer
}

/// Returns the underlying writer and [`FileMetaData`]
/// Returns the underlying writer and [`ThriftFileMetaData`]
/// # Panics
/// This function panics if [`Self::end`] has not yet been called
pub fn into_inner_and_metadata(self) -> (W, FileMetaData) {
pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetaData) {
(self.writer, self.metadata.expect("File to have ended"))
}
}
Expand Down