-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Added more content * Added toolkit to read files * Added examples * Added tests, docs and coverage
- Loading branch information
1 parent
b4fb45a
commit ff2fc8b
Showing
26 changed files
with
1,176 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
name: Coverage | ||
|
||
on: [pull_request, push] | ||
|
||
jobs: | ||
coverage: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Install Rust | ||
run: rustup toolchain install stable --component llvm-tools-preview | ||
- name: Install cargo-llvm-cov | ||
uses: taiki-e/install-action@cargo-llvm-cov | ||
- uses: Swatinem/rust-cache@v1 | ||
- name: Generate code coverage | ||
run: cargo llvm-cov --features full --lcov --output-path lcov.info | ||
- name: Upload coverage to Codecov | ||
uses: codecov/codecov-action@v1 | ||
with: | ||
files: lcov.info | ||
fail_ci_if_error: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
name = "avro-schema" | ||
version = "0.2.2" | ||
license = "Apache-2.0" | ||
description = "Implementation of Apache Avro spec" | ||
description = "Apache Avro specification" | ||
homepage = "https://github.com/DataEngineeringLabs/avro-schema" | ||
repository = "https://github.com/DataEngineeringLabs/avro-schema" | ||
authors = ["Jorge C. Leitao <[email protected]>"] | ||
|
@@ -12,3 +12,23 @@ edition = "2018" | |
[dependencies] | ||
serde_json = { version = "1.0", default-features = false, features = ["std"] } | ||
serde = { version = "1.0", default-features = false } | ||
|
||
fallible-streaming-iterator = { version = "0.1" } | ||
|
||
libflate = { version = "1.1.1", optional = true } | ||
snap = { version = "1", optional = true } | ||
crc = { version = "2", optional = true } | ||
|
||
# for async | ||
futures = { version = "0.3", optional = true } | ||
async-stream = { version = "0.3.2", optional = true } | ||
|
||
[features] | ||
default = [] | ||
full = ["compression", "async"] | ||
compression = [ | ||
"libflate", | ||
"snap", | ||
"crc", | ||
] | ||
async = ["futures", "async-stream"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
//! Contains [`Error`] | ||
/// Error from this crate | ||
#[derive(Debug, Clone, Copy)] | ||
pub enum Error { | ||
/// Generic error when the file is out of spec | ||
OutOfSpec, | ||
/// When reading or writing with compression but the feature flag "compression" is not active. | ||
RequiresCompression, | ||
} | ||
|
||
impl std::fmt::Display for Error { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
write!(f, "{:?}", self) | ||
} | ||
} | ||
|
||
impl From<std::io::Error> for Error { | ||
fn from(_: std::io::Error) -> Self { | ||
Error::OutOfSpec | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
//! Contains structs found in Avro files | ||
use crate::schema::Record; | ||
|
||
/// Avro file's Metadata | ||
#[derive(Debug, Clone, PartialEq, Hash)] | ||
pub struct FileMetadata { | ||
/// The Record represented in the file's Schema | ||
pub record: Record, | ||
/// The files' compression | ||
pub compression: Option<Compression>, | ||
/// The files' marker, present in every block | ||
pub marker: [u8; 16], | ||
} | ||
|
||
/// A compressed Avro block. | ||
#[derive(Debug, Clone, Default, PartialEq, Eq)] | ||
pub struct CompressedBlock { | ||
/// The number of rows | ||
pub number_of_rows: usize, | ||
/// The compressed data | ||
pub data: Vec<u8>, | ||
} | ||
|
||
impl CompressedBlock { | ||
/// Creates a new CompressedBlock | ||
pub fn new(number_of_rows: usize, data: Vec<u8>) -> Self { | ||
Self { | ||
number_of_rows, | ||
data, | ||
} | ||
} | ||
} | ||
|
||
/// An uncompressed Avro block. | ||
#[derive(Debug, Clone, Default, PartialEq, Eq)] | ||
pub struct Block { | ||
/// The number of rows | ||
pub number_of_rows: usize, | ||
/// The uncompressed data | ||
pub data: Vec<u8>, | ||
} | ||
|
||
impl Block { | ||
/// Creates a new Block | ||
pub fn new(number_of_rows: usize, data: Vec<u8>) -> Self { | ||
Self { | ||
number_of_rows, | ||
data, | ||
} | ||
} | ||
} | ||
|
||
/// Valid compressions | ||
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] | ||
pub enum Compression { | ||
/// Deflate | ||
Deflate, | ||
/// Snappy | ||
Snappy, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,17 @@ | ||
#![doc = include_str!("lib.md")] | ||
#![forbid(unsafe_code)] | ||
#![forbid(missing_docs)] | ||
|
||
mod de; | ||
mod schema; | ||
mod se; | ||
pub use schema::*; | ||
pub mod error; | ||
pub mod file; | ||
pub mod schema; | ||
|
||
pub mod read; | ||
#[cfg(feature = "async")] | ||
#[cfg_attr(docsrs, doc(cfg(feature = "async")))] | ||
pub mod read_async; | ||
|
||
pub mod write; | ||
#[cfg(feature = "async")] | ||
#[cfg_attr(docsrs, doc(cfg(feature = "async")))] | ||
pub mod write_async; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
//! APIs to read from Avro format to arrow. | ||
use std::io::Read; | ||
|
||
use fallible_streaming_iterator::FallibleStreamingIterator; | ||
|
||
use crate::{error::Error, file::CompressedBlock}; | ||
|
||
use super::decode; | ||
|
||
fn read_size<R: Read>(reader: &mut R) -> Result<(usize, usize), Error> { | ||
let rows = match decode::internal_zigzag_i64(reader) { | ||
Ok(a) => a, | ||
Err(error) => match error { | ||
decode::DecodeError::EndOfFile => return Ok((0, 0)), | ||
decode::DecodeError::OutOfSpec => return Err(Error::OutOfSpec), | ||
}, | ||
}; | ||
let bytes = decode::zigzag_i64(reader)?; | ||
Ok((rows as usize, bytes as usize)) | ||
} | ||
|
||
/// Reads a [`CompressedBlock`] from the `reader`. | ||
/// # Error | ||
/// This function errors iff either the block cannot be read or the sync marker does not match | ||
fn read_block<R: Read>( | ||
reader: &mut R, | ||
block: &mut CompressedBlock, | ||
marker: [u8; 16], | ||
) -> Result<(), Error> { | ||
let (rows, bytes) = read_size(reader)?; | ||
block.number_of_rows = rows; | ||
if rows == 0 { | ||
return Ok(()); | ||
}; | ||
|
||
block.data.clear(); | ||
block | ||
.data | ||
.try_reserve(bytes) | ||
.map_err(|_| Error::OutOfSpec)?; | ||
reader.take(bytes as u64).read_to_end(&mut block.data)?; | ||
|
||
let mut block_marker = [0u8; 16]; | ||
reader.read_exact(&mut block_marker)?; | ||
|
||
if block_marker != marker { | ||
return Err(Error::OutOfSpec); | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// [`FallibleStreamingIterator`] of [`CompressedBlock`]. | ||
pub struct CompressedBlockStreamingIterator<R: Read> { | ||
buf: CompressedBlock, | ||
reader: R, | ||
marker: [u8; 16], | ||
} | ||
|
||
impl<R: Read> CompressedBlockStreamingIterator<R> { | ||
/// Creates a new [`CompressedBlockStreamingIterator`]. | ||
pub fn new(reader: R, marker: [u8; 16], scratch: Vec<u8>) -> Self { | ||
Self { | ||
reader, | ||
marker, | ||
buf: CompressedBlock::new(0, scratch), | ||
} | ||
} | ||
|
||
/// The buffer of [`CompressedBlockStreamingIterator`]. | ||
pub fn buffer(&mut self) -> &mut CompressedBlock { | ||
&mut self.buf | ||
} | ||
|
||
/// Deconstructs itself | ||
pub fn into_inner(self) -> (R, Vec<u8>) { | ||
(self.reader, self.buf.data) | ||
} | ||
} | ||
|
||
impl<R: Read> FallibleStreamingIterator for CompressedBlockStreamingIterator<R> { | ||
type Error = Error; | ||
type Item = CompressedBlock; | ||
|
||
fn advance(&mut self) -> Result<(), Error> { | ||
read_block(&mut self.reader, &mut self.buf, self.marker)?; | ||
Ok(()) | ||
} | ||
|
||
fn get(&self) -> Option<&Self::Item> { | ||
if self.buf.number_of_rows > 0 { | ||
Some(&self.buf) | ||
} else { | ||
None | ||
} | ||
} | ||
} |
Oops, something went wrong.