Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added more docs. (#476)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Sep 30, 2021
1 parent 76e8992 commit 55c3f9c
Show file tree
Hide file tree
Showing 25 changed files with 96 additions and 36 deletions.
18 changes: 18 additions & 0 deletions src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub use iterator::*;
mod mutable;
pub use mutable::*;

/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
#[derive(Debug, Clone)]
pub struct ListArray<O: Offset> {
data_type: DataType,
Expand All @@ -28,11 +29,13 @@ pub struct ListArray<O: Offset> {
}

impl<O: Offset> ListArray<O> {
/// Returns a new empty [`ListArray`].
pub fn new_empty(data_type: DataType) -> Self {
let values = new_empty_array(Self::get_child_type(&data_type).clone()).into();
Self::from_data(data_type, Buffer::from(&[O::zero()]), values, None)
}

/// Returns a new null [`ListArray`].
#[inline]
pub fn new_null(data_type: DataType, length: usize) -> Self {
let child = Self::get_child_type(&data_type).clone();
Expand All @@ -44,6 +47,12 @@ impl<O: Offset> ListArray<O> {
)
}

/// Returns a new [`ListArray`].
/// # Panic
/// This function panics iff:
/// * The `data_type`'s physical type is not consistent with the offset `O`.
/// * The `offsets` and `values` are inconsistent
/// * The validity is not `None` and its length is different from `offsets.len() - 1`.
pub fn from_data(
data_type: DataType,
offsets: Buffer<O>,
Expand Down Expand Up @@ -153,18 +162,21 @@ impl<O: Offset> ListArray<O> {
self.validity.as_ref()
}

/// The offsets [`Buffer`].
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
&self.offsets
}

/// The values.
#[inline]
pub fn values(&self) -> &Arc<dyn Array> {
&self.values
}
}

impl<O: Offset> ListArray<O> {
/// Returns a default [`DataType`]: inner field is named "item" and is nullable
pub fn default_datatype(data_type: DataType) -> DataType {
let field = Box::new(Field::new("item", data_type, true));
if O::is_large() {
Expand All @@ -174,6 +186,9 @@ impl<O: Offset> ListArray<O> {
}
}

/// Returns a the inner [`Field`]
/// # Panics
/// Panics iff the logical type is not consistent with this struct.
pub fn get_child_field(data_type: &DataType) -> &Field {
if O::is_large() {
match data_type.to_logical_type() {
Expand All @@ -188,6 +203,9 @@ impl<O: Offset> ListArray<O> {
}
}

/// Returns a the inner [`DataType`]
/// # Panics
/// Panics iff the logical type is not consistent with this struct.
pub fn get_child_type(data_type: &DataType) -> &DataType {
Self::get_child_field(data_type).data_type()
}
Expand Down
24 changes: 18 additions & 6 deletions src/array/list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ pub struct MutableListArray<O: Offset, M: MutableArray> {
}

impl<O: Offset, M: MutableArray + Default> MutableListArray<O, M> {
/// Creates a new empty [`MutableListArray`].
pub fn new() -> Self {
let values = M::default();
let data_type = ListArray::<O>::default_datatype(values.data_type().clone());
Self::new_from(values, data_type, 0)
}

/// Creates a new [`MutableListArray`] with a capacity.
pub fn with_capacity(capacity: usize) -> Self {
let values = M::default();
let data_type = ListArray::<O>::default_datatype(values.data_type().clone());
Expand Down Expand Up @@ -79,6 +81,7 @@ where
M: MutableArray + TryExtend<Option<T>>,
I: IntoIterator<Item = Option<T>>,
{
#[inline]
fn try_push(&mut self, item: Option<I>) -> Result<()> {
if let Some(items) = item {
let values = self.mut_values();
Expand All @@ -92,6 +95,7 @@ where
}

impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
/// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity.
pub fn new_from(values: M, data_type: DataType, capacity: usize) -> Self {
let mut offsets = MutableBuffer::<O>::with_capacity(capacity + 1);
offsets.push(O::default());
Expand All @@ -105,6 +109,7 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}
}

/// Creates a new [`MutableListArray`] from a [`MutableArray`].
pub fn new_with_field(values: M, name: &str, nullable: bool) -> Self {
let field = Box::new(Field::new(name, values.data_type().clone(), nullable));
let data_type = if O::is_large() {
Expand All @@ -115,12 +120,14 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
Self::new_from(values, data_type, 0)
}

/// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity.
pub fn new_with_capacity(values: M, capacity: usize) -> Self {
let data_type = ListArray::<O>::default_datatype(values.data_type().clone());
Self::new_from(values, data_type, capacity)
}

pub fn try_push_valid(&mut self) -> Result<()> {
#[inline]
fn try_push_valid(&mut self) -> Result<()> {
let size = self.values.len();
let size = O::from_usize(size).ok_or(ArrowError::KeyOverflowError)?; // todo: make this error
assert!(size >= *self.offsets.last().unwrap());
Expand All @@ -132,6 +139,7 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
Ok(())
}

#[inline]
fn push_null(&mut self) {
self.offsets.push(self.last_offset());
match &mut self.validity {
Expand All @@ -140,10 +148,12 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}
}

/// The values
pub fn mut_values(&mut self) -> &mut M {
&mut self.values
}

/// The values
pub fn values(&self) -> &M {
&self.values
}
Expand All @@ -154,11 +164,12 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}

fn init_validity(&mut self) {
self.validity = Some(MutableBitmap::from_trusted_len_iter(
std::iter::repeat(true)
.take(self.offsets.len() - 1 - 1)
.chain(std::iter::once(false)),
))
let len = self.offsets.len() - 1;

let mut validity = MutableBitmap::new();
validity.extend_constant(len, true);
validity.set(len - 1, false);
self.validity = Some(validity)
}

/// Converts itself into an [`Array`].
Expand Down Expand Up @@ -207,6 +218,7 @@ impl<O: Offset, M: MutableArray + 'static> MutableArray for MutableListArray<O,
self
}

#[inline]
fn push_null(&mut self) {
self.push_null()
}
Expand Down
1 change: 1 addition & 0 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![deny(missing_docs)]
//! Contains the [`Array`] and [`MutableArray`] trait objects declaring arrays,
//! as well as concrete arrays (such as [`Utf8Array`] and [`MutableUtf8Array`]).
//!
Expand Down
15 changes: 13 additions & 2 deletions src/array/union/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub struct UnionArray {
}

impl UnionArray {
/// Creates a new null [`UnionArray`].
pub fn new_null(data_type: DataType, length: usize) -> Self {
if let DataType::Union(f, _, is_sparse) = &data_type {
let fields = f
Expand All @@ -57,6 +58,7 @@ impl UnionArray {
}
}

/// Creates a new empty [`UnionArray`].
pub fn new_empty(data_type: DataType) -> Self {
if let DataType::Union(f, _, is_sparse) = &data_type {
let fields = f
Expand All @@ -83,6 +85,7 @@ impl UnionArray {
}
}

/// Creates a new [`UnionArray`].
pub fn from_data(
data_type: DataType,
types: Buffer<i8>,
Expand Down Expand Up @@ -126,14 +129,17 @@ impl UnionArray {
}
}

/// The optional offsets.
pub fn offsets(&self) -> &Option<Buffer<i32>> {
&self.offsets
}

/// The fields.
pub fn fields(&self) -> &Vec<Arc<dyn Array>> {
&self.fields
}

/// The types.
pub fn types(&self) -> &Buffer<i8> {
&self.types
}
Expand Down Expand Up @@ -239,19 +245,24 @@ impl Array for UnionArray {

impl UnionArray {
fn get_all(data_type: &DataType) -> (&[Field], Option<&[i32]>, bool) {
match data_type {
match data_type.to_logical_type() {
DataType::Union(fields, ids, is_sparse) => {
(fields, ids.as_ref().map(|x| x.as_ref()), *is_sparse)
}
DataType::Extension(_, inner, _) => Self::get_all(inner),
_ => panic!("Wrong datatype passed to UnionArray."),
}
}

/// Returns all fields from [`DataType::Union`].
/// # Panic
/// Panics iff `data_type`'s logical type is not [`DataType::Union`].
pub fn get_fields(data_type: &DataType) -> &[Field] {
Self::get_all(data_type).0
}

/// Returns whether the [`DataType::Union`] is sparse or not.
/// # Panic
/// Panics iff `data_type`'s logical type is not [`DataType::Union`].
pub fn is_sparse(data_type: &DataType) -> bool {
Self::get_all(data_type).2
}
Expand Down
1 change: 1 addition & 0 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![deny(missing_docs)]
//! Contains all metadata, such as [`PhysicalType`], [`DataType`], [`Field`] and [`Schema`].
mod field;
mod physical_type;
Expand Down
2 changes: 1 addition & 1 deletion src/ffi/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ impl Ffi_ArrowArray {
}
}

// create an empty `Ffi_ArrowArray`, which can be used to import data into
/// creates an empty [`Ffi_ArrowArray`], which can be used to import data into
pub fn empty() -> Self {
Self {
length: 0,
Expand Down
1 change: 1 addition & 0 deletions src/ffi/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![deny(missing_docs)]
//! contains FFI bindings to import and export [`Array`](crate::array::Array) via
//! Arrow's [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)
mod array;
Expand Down
1 change: 1 addition & 0 deletions src/io/avro/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![deny(missing_docs)]
//! Read and write from and to Apache Avro
pub mod read;
Expand Down
7 changes: 7 additions & 0 deletions src/io/avro/read/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![deny(missing_docs)]
//! APIs to read from Avro format to arrow.
use std::io::Read;
use std::sync::Arc;

Expand All @@ -12,6 +14,7 @@ use crate::datatypes::Schema;
use crate::error::{ArrowError, Result};
use crate::record_batch::RecordBatch;

/// Reads the avro metadata from `reader` into a [`Schema`], [`Codec`] and magic marker.
pub fn read_metadata<R: std::io::Read>(reader: &mut R) -> Result<(Schema, Codec, [u8; 16])> {
let (schema, codec, marker) = util::read_schema(reader)?;
Ok((schema::convert_schema(&schema)?, codec, marker))
Expand Down Expand Up @@ -75,6 +78,7 @@ pub struct BlockStreamIterator<'a, R: Read> {
}

impl<'a, R: Read> BlockStreamIterator<'a, R> {
/// Creates a new [`BlockStreamIterator`].
pub fn new(reader: &'a mut R, file_marker: [u8; 16]) -> Self {
Self {
reader,
Expand All @@ -83,6 +87,7 @@ impl<'a, R: Read> BlockStreamIterator<'a, R> {
}
}

/// The buffer of [`BlockStreamIterator`].
pub fn buffer(&mut self) -> &mut Vec<u8> {
&mut self.buf.0
}
Expand Down Expand Up @@ -115,6 +120,7 @@ pub struct Decompressor<'a, R: Read> {
}

impl<'a, R: Read> Decompressor<'a, R> {
/// Creates a new [`Decompressor`].
pub fn new(blocks: BlockStreamIterator<'a, R>, codec: Codec) -> Self {
Self {
blocks,
Expand Down Expand Up @@ -154,6 +160,7 @@ pub struct Reader<'a, R: Read> {
}

impl<'a, R: Read> Reader<'a, R> {
/// Creates a new [`Reader`].
pub fn new(iter: Decompressor<'a, R>, schema: Arc<Schema>) -> Self {
Self { iter, schema }
}
Expand Down
1 change: 1 addition & 0 deletions src/io/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#![deny(missing_docs)]
//! Transfer data between the Arrow memory format and CSV (comma-separated values).
use crate::error::ArrowError;
Expand Down
1 change: 1 addition & 0 deletions src/io/csv/read/deserialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ fn deserialize_binary<O: Offset>(rows: &[ByteRecord], column: usize) -> Arc<dyn
Arc::new(BinaryArray::<O>::from_trusted_len_iter(iter))
}

/// Deserializes `column` of `rows` into an [`Array`] of [`DataType`] `datatype`.
pub fn deserialize_column(
rows: &[ByteRecord],
column: usize,
Expand Down
1 change: 1 addition & 0 deletions src/io/csv/read/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! APIs to read from CSV
mod deserialize;
mod reader;

Expand Down
1 change: 1 addition & 0 deletions src/io/csv/read/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::{
error::{ArrowError, Result},
};

/// Returns a new [`Schema`] whereby the fields are selected based on `projection`.
pub fn projected_schema(schema: &Schema, projection: Option<&[usize]>) -> Schema {
match &projection {
Some(projection) => {
Expand Down
1 change: 1 addition & 0 deletions src/io/csv/write/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! APIs to write to CSV
mod iterator;
mod serialize;

Expand Down
13 changes: 7 additions & 6 deletions src/io/csv/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@ use crate::{

use super::iterator::{BufStreamingIterator, StreamingIterator};

/// Options to serialize logical types to CSV
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct SerializeOptions {
/// used for date32
/// used for [`DataType::Date32`]
pub date32_format: String,
/// used for date64
/// used for [`DataType::Date64`]
pub date64_format: String,
/// used for time32
/// used for [`DataType::Time32`]
pub time32_format: String,
/// used for time64
/// used for [`DataType::Time64`]
pub time64_format: String,
/// used for timestamp
/// used for [`DataType::Timestamp`]
pub timestamp_format: String,
}

Expand Down Expand Up @@ -76,7 +77,7 @@ macro_rules! dyn_date {
}};
}

/// Returns an Iterator that returns items of `Array` as `Vec<u8>`, according to `options`.
/// Returns a [`StreamingIterator`] that yields `&[u8]` serialized from `array` according to `options`.
/// For numeric types, this serializes as usual. For dates, times and timestamps, it uses `options` to
/// Supported types:
/// * boolean
Expand Down
Loading

0 comments on commit 55c3f9c

Please sign in to comment.