Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
DRY code.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Nov 19, 2021
1 parent 64965b6 commit 3903150
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 109 deletions.
75 changes: 75 additions & 0 deletions src/io/avro/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,78 @@ impl From<avro_rs::Error> for ArrowError {
ArrowError::External("".to_string(), Box::new(error))
}
}

// macros that can operate in sync and async code.
macro_rules! avro_decode {
($reader:ident $($_await:tt)*) => {
{
let mut i = 0u64;
let mut buf = [0u8; 1];

let mut j = 0;
loop {
if j > 9 {
// if j * 7 > 64
return Err(ArrowError::ExternalFormat(
"zigzag decoding failed - corrupt avro file".to_string(),
));
}
$reader.read_exact(&mut buf[..])$($_await)*?;
i |= (u64::from(buf[0] & 0x7F)) << (j * 7);
if (buf[0] >> 7) == 0 {
break;
} else {
j += 1;
}
}

Ok(i)
}
}
}

macro_rules! read_header {
($reader:ident $($_await:tt)*) => {{
let mut items = HashMap::new();

loop {
let len = zigzag_i64($reader)$($_await)*? as usize;
if len == 0 {
break Ok(items);
}

items.reserve(len);
for _ in 0..len {
let key = _read_binary($reader)$($_await)*?;
let key = String::from_utf8(key)
.map_err(|_| ArrowError::ExternalFormat("Invalid Avro header".to_string()))?;
let value = _read_binary($reader)$($_await)*?;
items.insert(key, value);
}
}
}};
}

macro_rules! read_metadata {
($reader:ident $($_await:tt)*) => {{
let mut magic_number = [0u8; 4];
$reader.read_exact(&mut magic_number)$($_await)*?;

// see https://avro.apache.org/docs/current/spec.html#Object+Container+Files
if magic_number != [b'O', b'b', b'j', 1u8] {
return Err(ArrowError::ExternalFormat(
"Avro header does not contain a valid magic number".to_string(),
));
}

let header = read_header($reader)$($_await)*?;

let (schema, compression) = deserialize_header(header)?;

let marker = read_file_marker($reader)$($_await)*?;

Ok((schema, compression, marker))
}};
}

pub(crate) use {avro_decode, read_header, read_metadata};
57 changes: 4 additions & 53 deletions src/io/avro/read/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use avro_rs::Schema;

use crate::error::{ArrowError, Result};

use super::super::{avro_decode, read_header, read_metadata};
use super::{deserialize_header, Compression};

pub fn zigzag_i64<R: Read>(reader: &mut R) -> Result<i64> {
Expand All @@ -17,25 +18,7 @@ pub fn zigzag_i64<R: Read>(reader: &mut R) -> Result<i64> {
}

fn decode_variable<R: Read>(reader: &mut R) -> Result<u64> {
let mut i = 0u64;
let mut buf = [0u8; 1];

let mut j = 0;
loop {
if j > 9 {
// if j * 7 > 64
panic!()
}
reader.read_exact(&mut buf[..])?;
i |= (u64::from(buf[0] & 0x7F)) << (j * 7);
if (buf[0] >> 7) == 0 {
break;
} else {
j += 1;
}
}

Ok(i)
avro_decode!(reader)
}

fn _read_binary<R: Read>(reader: &mut R) -> Result<Vec<u8>> {
Expand All @@ -46,23 +29,7 @@ fn _read_binary<R: Read>(reader: &mut R) -> Result<Vec<u8>> {
}

fn read_header<R: Read>(reader: &mut R) -> Result<HashMap<String, Vec<u8>>> {
let mut items = HashMap::new();

loop {
let len = zigzag_i64(reader)? as usize;
if len == 0 {
break Ok(items);
}

items.reserve(len);
for _ in 0..len {
let key = _read_binary(reader)?;
let key = String::from_utf8(key)
.map_err(|_| ArrowError::ExternalFormat("Invalid Avro header".to_string()))?;
let value = _read_binary(reader)?;
items.insert(key, value);
}
}
read_header!(reader)
}

fn read_file_marker<R: Read>(reader: &mut R) -> Result<[u8; 16]> {
Expand All @@ -75,21 +42,5 @@ fn read_file_marker<R: Read>(reader: &mut R) -> Result<[u8; 16]> {
/// # Error
/// This function errors iff the header is not a valid avro file header.
pub fn read_schema<R: Read>(reader: &mut R) -> Result<(Schema, Option<Compression>, [u8; 16])> {
let mut magic_number = [0u8; 4];
reader.read_exact(&mut magic_number)?;

// see https://avro.apache.org/docs/current/spec.html#Object+Container+Files
if magic_number != [b'O', b'b', b'j', 1u8] {
return Err(ArrowError::ExternalFormat(
"Avro header does not contain a valid magic number".to_string(),
));
}

let header = read_header(reader)?;

let (schema, compression) = deserialize_header(header)?;

let marker = read_file_marker(reader)?;

Ok((schema, compression, marker))
read_metadata!(reader)
}
60 changes: 4 additions & 56 deletions src/io/avro/read_async/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,14 @@ use crate::error::{ArrowError, Result};

use super::read::deserialize_header;
use super::read::Compression;
use super::{avro_decode, read_header, read_metadata};

/// Reads Avro's metadata from `reader` into a [`Schema`], [`Compression`] and magic marker.
#[allow(clippy::type_complexity)]
pub async fn read_metadata_async<R: AsyncRead + Unpin + Send>(
reader: &mut R,
) -> Result<(Schema, Option<Compression>, [u8; 16])> {
let mut magic_number = [0u8; 4];
reader.read_exact(&mut magic_number).await?;

// see https://avro.apache.org/docs/current/spec.html#Object+Container+Files
if magic_number != [b'O', b'b', b'j', 1u8] {
return Err(ArrowError::ExternalFormat(
"Avro header does not contain a valid magic number".to_string(),
));
}

let header = read_header(reader).await?;

// this is blocking but we can't really live without it
let (schema, compression) = deserialize_header(header)?;

let marker = read_file_marker(reader).await?;

Ok((schema, compression, marker))
read_metadata!(reader.await)
}

/// Reads the file marker asynchronously
Expand All @@ -52,27 +36,7 @@ async fn zigzag_i64<R: AsyncRead + Unpin + Send>(reader: &mut R) -> Result<i64>
}

async fn decode_variable<R: AsyncRead + Unpin + Send>(reader: &mut R) -> Result<u64> {
let mut i = 0u64;
let mut buf = [0u8; 1];

let mut j = 0;
loop {
if j > 9 {
// if j * 7 > 64
return Err(ArrowError::ExternalFormat(
"zigzag decoding failed - corrupt avro file".to_string(),
));
}
reader.read_exact(&mut buf[..]).await?;
i |= (u64::from(buf[0] & 0x7F)) << (j * 7);
if (buf[0] >> 7) == 0 {
break;
} else {
j += 1;
}
}

Ok(i)
avro_decode!(reader.await)
}

async fn _read_binary<R: AsyncRead + Unpin + Send>(reader: &mut R) -> Result<Vec<u8>> {
Expand All @@ -85,21 +49,5 @@ async fn _read_binary<R: AsyncRead + Unpin + Send>(reader: &mut R) -> Result<Vec
async fn read_header<R: AsyncRead + Unpin + Send>(
reader: &mut R,
) -> Result<HashMap<String, Vec<u8>>> {
let mut items = HashMap::new();

loop {
let len = zigzag_i64(reader).await? as usize;
if len == 0 {
break Ok(items);
}

items.reserve(len);
for _ in 0..len {
let key = _read_binary(reader).await?;
let key = String::from_utf8(key)
.map_err(|_| ArrowError::ExternalFormat("Invalid Avro header".to_string()))?;
let value = _read_binary(reader).await?;
items.insert(key, value);
}
}
read_header!(reader.await)
}

0 comments on commit 3903150

Please sign in to comment.