From 929339587a06f4da61db25b4569ee06ae9a7cc1b Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Mon, 30 Aug 2021 19:14:15 +0000 Subject: [PATCH] Added more docs. --- src/alloc/mod.rs | 2 +- src/array/binary/from.rs | 3 +- src/array/binary/mutable.rs | 12 ++++++- src/array/boolean/mutable.rs | 7 ++++ src/array/dictionary/mutable.rs | 5 +++ src/array/ffi.rs | 1 + src/array/fixed_size_binary/mod.rs | 21 +++++++---- src/array/fixed_size_binary/mutable.rs | 17 ++++++++- src/array/fixed_size_list/mod.rs | 12 ++++++- src/array/list/iterator.rs | 2 +- src/array/mod.rs | 6 ++++ src/array/null.rs | 5 ++- src/array/specification.rs | 3 ++ src/array/struct_.rs | 36 +++++++++++++++++-- src/array/utf8/iterator.rs | 2 +- src/bitmap/mod.rs | 1 + src/bitmap/mutable.rs | 1 + .../utils/chunk_iterator/chunks_exact.rs | 6 +++- src/bitmap/utils/chunk_iterator/mod.rs | 10 +++--- src/bitmap/utils/iterator.rs | 2 +- src/bitmap/utils/mod.rs | 1 + src/bitmap/utils/slice_iterator.rs | 3 +- src/bitmap/utils/zip_validity.rs | 1 + src/buffer/immutable.rs | 2 ++ src/buffer/mod.rs | 1 + src/buffer/mutable.rs | 1 + src/datatypes/field.rs | 5 +++ src/datatypes/mod.rs | 10 ++---- src/error.rs | 7 ++++ src/io/ipc/mod.rs | 18 +--------- src/io/ipc/read/mod.rs | 18 +--------- src/io/ipc/write/common.rs | 1 + src/io/ipc/write/mod.rs | 18 +--------- src/io/mod.rs | 18 +--------- src/io/parquet/mod.rs | 1 + src/io/parquet/read/mod.rs | 1 + src/io/parquet/write/mod.rs | 1 + src/io/print.rs | 17 +-------- src/lib.rs | 2 ++ src/scalar/mod.rs | 3 ++ src/trusted_len.rs | 1 + src/types/bit_chunk.rs | 6 ++++ src/types/index.rs | 6 +++- src/types/mod.rs | 14 ++++++-- src/types/simd/mod.rs | 9 ++++- src/util/bench_util.rs | 1 + src/util/mod.rs | 17 +-------- 47 files changed, 201 insertions(+), 136 deletions(-) diff --git a/src/alloc/mod.rs b/src/alloc/mod.rs index 36e8a78b8db..deda8a83124 100644 --- a/src/alloc/mod.rs +++ b/src/alloc/mod.rs @@ -32,7 +32,7 @@ mod alignment; pub use alignment::ALIGNMENT; // If this number is not zero after all objects have been `drop`, there is a memory leak -pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0); +static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0); /// Returns the total number of bytes allocated to buffers by the allocator. pub fn total_allocated_bytes() -> isize { diff --git a/src/array/binary/from.rs b/src/array/binary/from.rs index c8efdfe462a..ef9a287a9c6 100644 --- a/src/array/binary/from.rs +++ b/src/array/binary/from.rs @@ -10,11 +10,12 @@ use crate::{ use super::{BinaryArray, MutableBinaryArray}; impl BinaryArray { + /// Creates a new [`BinaryArray`] from slices of `&[u8]`. pub fn from_slice, P: AsRef<[T]>>(slice: P) -> Self { Self::from_iter(slice.as_ref().iter().map(Some)) } - /// Creates a new [`BinaryArray`] from a slice of `&[u8]`. + /// Creates a new [`BinaryArray`] from a slice of optional `&[u8]`. // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it. pub fn from, P: AsRef<[Option]>>(slice: P) -> Self { Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref())) diff --git a/src/array/binary/mutable.rs b/src/array/binary/mutable.rs index fe4b3e050fa..3ae30d436d9 100644 --- a/src/array/binary/mutable.rs +++ b/src/array/binary/mutable.rs @@ -10,7 +10,10 @@ use crate::{ use super::BinaryArray; -/// The mutable version of [`BinaryArray`]. +/// The Arrow's equivalent to `Vec>>`. +/// Converting a [`MutableBinaryArray`] into a [`BinaryArray`] is `O(1)`. +/// # Implementation +/// This struct does not allocate a validity until one is required (i.e. push a null to it). #[derive(Debug)] pub struct MutableBinaryArray { offsets: MutableBuffer, @@ -35,10 +38,16 @@ impl Default for MutableBinaryArray { } impl MutableBinaryArray { + /// Creates a new empty [`MutableBinaryArray`]. + /// # Implementation + /// This allocates a [`MutableBuffer`] of one element pub fn new() -> Self { Self::with_capacity(0) } + /// Creates a new [`MutableBinaryArray`] with capacity for `capacity` values. + /// # Implementation + /// This does not allocate the validity. pub fn with_capacity(capacity: usize) -> Self { let mut offsets = MutableBuffer::::with_capacity(capacity + 1); offsets.push(O::default()); @@ -49,6 +58,7 @@ impl MutableBinaryArray { } } + /// Reserves `additional` slots. pub fn reserve(&mut self, additional: usize) { self.offsets.reserve(additional); if let Some(x) = self.validity.as_mut() { diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index 32f3ba6d89b..c0ccf5938e3 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -13,6 +13,8 @@ use super::BooleanArray; /// The Arrow's equivalent to `Vec>`, but with `1/16` of its size. /// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`. +/// # Implementation +/// This struct does not allocate a validity until one is required (i.e. push a null to it). #[derive(Debug)] pub struct MutableBooleanArray { values: MutableBitmap, @@ -39,10 +41,12 @@ impl Default for MutableBooleanArray { } impl MutableBooleanArray { + /// Creates an new empty [`MutableBooleanArray`]. pub fn new() -> Self { Self::with_capacity(0) } + /// Creates an new [`MutableBooleanArray`] with a capacity of values. pub fn with_capacity(capacity: usize) -> Self { Self { values: MutableBitmap::with_capacity(capacity), @@ -50,6 +54,7 @@ impl MutableBooleanArray { } } + /// Reserves `additional` slots. pub fn reserve(&mut self, additional: usize) { self.values.reserve(additional); if let Some(x) = self.validity.as_mut() { @@ -57,10 +62,12 @@ impl MutableBooleanArray { } } + /// Canonical method to create a new [`MutableBooleanArray`]. pub fn from_data(values: MutableBitmap, validity: Option) -> Self { Self { values, validity } } + /// Pushes a new entry to [`MutableBooleanArray`]. pub fn push(&mut self, value: Option) { match value { Some(value) => { diff --git a/src/array/dictionary/mutable.rs b/src/array/dictionary/mutable.rs index 07dfd3b13fd..386857a4a78 100644 --- a/src/array/dictionary/mutable.rs +++ b/src/array/dictionary/mutable.rs @@ -43,6 +43,7 @@ impl From for MutableDictionaryArray } impl MutableDictionaryArray { + /// Creates an empty [`MutableDictionaryArray`]. pub fn new() -> Self { let values = M::default(); Self { @@ -83,18 +84,22 @@ impl MutableDictionaryArray { } } + /// pushes a null value pub fn push_null(&mut self) { self.keys.push(None) } + /// returns a mutable reference to the inner values. pub fn mut_values(&mut self) -> &mut M { &mut self.values } + /// returns a reference to the inner values. pub fn values(&self) -> &M { &self.values } + /// converts itself into `Arc` pub fn into_arc(self) -> Arc { let a: DictionaryArray = self.into(); Arc::new(a) diff --git a/src/array/ffi.rs b/src/array/ffi.rs index f1029129fae..ea03602336f 100644 --- a/src/array/ffi.rs +++ b/src/array/ffi.rs @@ -23,6 +23,7 @@ pub unsafe trait ToFfi { /// Trait describing how a struct imports into itself from the /// [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI). pub unsafe trait FromFfi: Sized { + /// Convert itself from FFI. fn try_from_ffi(array: T) -> Result; } diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 714dccc8cc5..71a9a687513 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -6,6 +6,8 @@ mod iterator; mod mutable; pub use mutable::*; +/// The Arrow's equivalent to an immutable `Vec>`. +/// Cloning and slicing this struct is `O(1)`. #[derive(Debug, Clone)] pub struct FixedSizeBinaryArray { size: i32, // this is redundant with `data_type`, but useful to not have to deconstruct the data_type. @@ -16,12 +18,12 @@ pub struct FixedSizeBinaryArray { } impl FixedSizeBinaryArray { - #[inline] + /// Returns a new empty [`FixedSizeBinaryArray`]. pub fn new_empty(data_type: DataType) -> Self { Self::from_data(data_type, Buffer::new(), None) } - #[inline] + /// Returns a new null [`FixedSizeBinaryArray`]. pub fn new_null(data_type: DataType, length: usize) -> Self { Self::from_data( data_type, @@ -30,7 +32,7 @@ impl FixedSizeBinaryArray { ) } - #[inline] + /// Returns a new [`FixedSizeBinaryArray`]. pub fn from_data(data_type: DataType, values: Buffer, validity: Option) -> Self { let size = *Self::get_size(&data_type); @@ -45,7 +47,9 @@ impl FixedSizeBinaryArray { } } - #[inline] + /// Returns a slice of this [`FixedSizeBinaryArray`]. + /// # Implementation + /// This operation is `O(1)` as it amounts to increase 3 ref counts. pub fn slice(&self, offset: usize, length: usize) -> Self { let validity = self.validity.clone().map(|x| x.slice(offset, length)); let values = self @@ -61,11 +65,14 @@ impl FixedSizeBinaryArray { } } - #[inline] + /// Returns the values allocated on this [`FixedSizeBinaryArray`]. pub fn values(&self) -> &Buffer { &self.values } + /// Returns value at position `i`. + /// # Panic + /// Panics iff `i >= self.len()`. #[inline] pub fn value(&self, i: usize) -> &[u8] { &self.values()[i * self.size as usize..(i + 1) * self.size as usize] @@ -82,7 +89,7 @@ impl FixedSizeBinaryArray { ) } - #[inline] + /// Returns the size pub fn size(&self) -> usize { self.size as usize } @@ -145,6 +152,7 @@ unsafe impl ToFfi for FixedSizeBinaryArray { } impl FixedSizeBinaryArray { + /// Creates a [`FixedSizeBinaryArray`] from an fallible iterator of optional `[u8]`. pub fn try_from_iter, I: IntoIterator>>( iter: I, size: usize, @@ -152,6 +160,7 @@ impl FixedSizeBinaryArray { MutableFixedSizeBinaryArray::try_from_iter(iter, size).map(|x| x.into()) } + /// Creates a [`FixedSizeBinaryArray`] from an iterator of optional `[u8]`. pub fn from_iter, I: IntoIterator>>( iter: I, size: usize, diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index 4f06eae3473..1376a56386b 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -10,7 +10,10 @@ use crate::{ use super::{FixedSizeBinaryArray, FixedSizeBinaryValues}; -/// Mutable version of [`FixedSizeBinaryArray`]. +/// The Arrow's equivalent to a mutable `Vec>`. +/// Converting a [`MutableFixedSizeBinaryArray`] into a [`FixedSizeBinaryArray`] is `O(1)`. +/// # Implementation +/// This struct does not allocate a validity until one is required (i.e. push a null to it). #[derive(Debug)] pub struct MutableFixedSizeBinaryArray { data_type: DataType, @@ -30,6 +33,7 @@ impl From for FixedSizeBinaryArray { } impl MutableFixedSizeBinaryArray { + /// Canonical method to create a new [`MutableFixedSizeBinaryArray`]. pub fn from_data( size: usize, values: MutableBuffer, @@ -55,10 +59,12 @@ impl MutableFixedSizeBinaryArray { } } + /// Creates a new empty [`MutableFixedSizeBinaryArray`]. pub fn new(size: usize) -> Self { Self::with_capacity(size, 0) } + /// Creates a new [`MutableFixedSizeBinaryArray`] with capacity for `capacity` entries. pub fn with_capacity(size: usize, capacity: usize) -> Self { Self::from_data( size, @@ -67,6 +73,9 @@ impl MutableFixedSizeBinaryArray { ) } + /// tries to push a new entry to [`MutableFixedSizeBinaryArray`]. + /// # Error + /// Errors iff the size of `value` is not equal to its own size. #[inline] pub fn try_push>(&mut self, value: Option

) -> Result<()> { match value { @@ -95,11 +104,17 @@ impl MutableFixedSizeBinaryArray { Ok(()) } + /// pushes a new entry to [`MutableFixedSizeBinaryArray`]. + /// # Panics + /// Panics iff the size of `value` is not equal to its own size. #[inline] pub fn push>(&mut self, value: Option

) { self.try_push(value).unwrap() } + /// Creates a new [`MutableFixedSizeBinaryArray`] from an iterator of values. + /// # Errors + /// Errors iff the size of any of the `value` is not equal to its own size. pub fn try_from_iter, I: IntoIterator>>( iter: I, size: usize, diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index df5877162db..4147de5b6da 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -12,6 +12,8 @@ pub use iterator::*; mod mutable; pub use mutable::*; +/// The Arrow's equivalent to an immutable `Vec>` where `T` is an Arrow type. +/// Cloning and slicing this struct is `O(1)`. #[derive(Debug, Clone)] pub struct FixedSizeListArray { size: i32, // this is redundant with `data_type`, but useful to not have to deconstruct the data_type. @@ -22,16 +24,19 @@ pub struct FixedSizeListArray { } impl FixedSizeListArray { + /// Returns a new empty [`FixedSizeListArray`]. pub fn new_empty(data_type: DataType) -> Self { let values = new_empty_array(Self::get_child_and_size(&data_type).0.clone()).into(); Self::from_data(data_type, values, None) } + /// Returns a new null [`FixedSizeListArray`]. pub fn new_null(data_type: DataType, length: usize) -> Self { let values = new_null_array(Self::get_child_and_size(&data_type).0.clone(), length).into(); Self::from_data(data_type, values, Some(Bitmap::new_zeroed(length))) } + /// Returns a [`FixedSizeListArray`]. pub fn from_data( data_type: DataType, values: Arc, @@ -50,6 +55,9 @@ impl FixedSizeListArray { } } + /// Returns a slice of this [`FixedSizeListArray`]. + /// # Implementation + /// This operation is `O(1)`. pub fn slice(&self, offset: usize, length: usize) -> Self { let validity = self.validity.clone().map(|x| x.slice(offset, length)); let values = self @@ -66,11 +74,12 @@ impl FixedSizeListArray { } } - #[inline] + /// Returns the inner array. pub fn values(&self) -> &Arc { &self.values } + /// Returns the `Vec` at position `i`. #[inline] pub fn value(&self, i: usize) -> Box { self.values @@ -87,6 +96,7 @@ impl FixedSizeListArray { } } + /// Returns a [`DataType`] consistent with this Array. #[inline] pub fn default_datatype(data_type: DataType, size: usize) -> DataType { let field = Box::new(Field::new("item", data_type, true)); diff --git a/src/array/list/iterator.rs b/src/array/list/iterator.rs index 7b55ff03135..9d6991df4f4 100644 --- a/src/array/list/iterator.rs +++ b/src/array/list/iterator.rs @@ -4,7 +4,7 @@ use crate::{array::Offset, trusted_len::TrustedLen}; use super::ListArray; -/// Iterator of values of an `ListArray`. +/// Iterator of values of an [`ListArray`]. pub struct ListValuesIter<'a, A: IterableListArray> { array: &'a A, index: usize, diff --git a/src/array/mod.rs b/src/array/mod.rs index ee459660099..46bc3730d8f 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -26,6 +26,7 @@ use crate::{ /// A trait representing an immutable Arrow array. Arrow arrays are trait objects /// that are infalibly downcasted to concrete types according to the [`Array::data_type`]. pub trait Array: std::fmt::Debug + Send + Sync { + /// Convert to trait object. fn as_any(&self) -> &dyn Any; /// The length of the [`Array`]. Every array has a length corresponding to the number of @@ -404,6 +405,7 @@ pub use self::ffi::ToFfi; /// A trait describing the ability of a struct to create itself from a iterator. /// This is similar to [`Extend`], but accepted the creation to error. pub trait TryExtend { + /// Fallible version of [`Extend::extend`]. fn try_extend>(&mut self, iter: I) -> Result<()>; } @@ -468,10 +470,14 @@ pub trait IterableListArray: Array { /// 2. `offsets[i] >= offsets[i-1] for all i` /// 3. `offsets[i] < values.len() for all i` pub unsafe trait GenericBinaryArray: Array { + /// The values of the array fn values(&self) -> &[u8]; + /// The offsets of the array fn offsets(&self) -> &[O]; } // backward compatibility use std::sync::Arc; + +/// A type def of [`Array`]. pub type ArrayRef = Arc; diff --git a/src/array/null.rs b/src/array/null.rs index d603d1e28d6..4587e6bb174 100644 --- a/src/array/null.rs +++ b/src/array/null.rs @@ -11,15 +11,17 @@ pub struct NullArray { } impl NullArray { + /// Returns a new empty [`NullArray`]. pub fn new_empty() -> Self { Self::from_data(0) } - /// Returns a new null array + /// Returns a new [`NullArray`]. pub fn new_null(length: usize) -> Self { Self::from_data(length) } + /// Returns a new [`NullArray`]. pub fn from_data(length: usize) -> Self { Self { data_type: DataType::Null, @@ -28,6 +30,7 @@ impl NullArray { } } + /// Returns a slice of the [`NullArray`]. pub fn slice(&self, offset: usize, length: usize) -> Self { Self { data_type: self.data_type.clone(), diff --git a/src/array/specification.rs b/src/array/specification.rs index 00c69cbd979..dd1e8b77ad6 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -9,10 +9,13 @@ use crate::types::Index; /// # Safety /// Do not implement. pub unsafe trait Offset: Index + Num + Ord + num_traits::CheckedAdd { + /// Whether it is `i32` or `i64` fn is_large() -> bool; + /// converts itself to `isize` fn to_isize(&self) -> isize; + /// converts from `isize` fn from_isize(value: isize) -> Option; } diff --git a/src/array/struct_.rs b/src/array/struct_.rs index 41763c464ee..88bb1057b66 100644 --- a/src/array/struct_.rs +++ b/src/array/struct_.rs @@ -9,6 +9,23 @@ use crate::{ use super::{ffi::ToFfi, new_empty_array, new_null_array, Array, FromFfi}; +/// A [`StructArray`] is a nested [`Array`] with an optional validity representing +/// multiple [`Array`] with the same number of rows. +/// # Example +/// ``` +/// use std::sync::Arc; +/// use arrow2::array::*; +/// use arrow2::datatypes::*; +/// let boolean = Arc::new(BooleanArray::from_slice(&[false, false, true, true])) as Arc; +/// let int = Arc::new(Int32Array::from_slice(&[42, 28, 19, 31])) as Arc; +/// +/// let fields = vec![ +/// Field::new("b", DataType::Boolean, false), +/// Field::new("c", DataType::Int32, false), +/// ]; +/// +/// let array = StructArray::from_data(fields, vec![boolean, int], None); +/// ``` #[derive(Debug, Clone)] pub struct StructArray { data_type: DataType, @@ -17,6 +34,7 @@ pub struct StructArray { } impl StructArray { + /// Creates an empty [`StructArray`]. pub fn new_empty(fields: &[Field]) -> Self { let values = fields .iter() @@ -25,7 +43,7 @@ impl StructArray { Self::from_data(fields.to_vec(), values, None) } - #[inline] + /// Creates a null [`StructArray`] of length `length`. pub fn new_null(fields: &[Field], length: usize) -> Self { let values = fields .iter() @@ -34,6 +52,11 @@ impl StructArray { Self::from_data(fields.to_vec(), values, Some(Bitmap::new_zeroed(length))) } + /// Canonical method to create a [`StructArray`]. + /// # Panics + /// * fields are empty + /// * values's len is different from Fields' length. + /// * any element of values has a different length than the first element. pub fn from_data( fields: Vec, values: Vec>, @@ -52,6 +75,7 @@ impl StructArray { } } + /// Deconstructs the [`StructArray`] into its individual components. pub fn into_data(self) -> (Vec, Vec>, Option) { let Self { data_type, @@ -66,6 +90,11 @@ impl StructArray { (fields, values, validity) } + /// Creates a new [`StructArray`] that is a slice of `self`. + /// # Panics + /// * `offset + length` must be smaller than `self.len()`. + /// # Implementation + /// This operation is `O(F)` where `F` is the number of fields. pub fn slice(&self, offset: usize, length: usize) -> Self { let validity = self.validity.clone().map(|x| x.slice(offset, length)); Self { @@ -79,18 +108,19 @@ impl StructArray { } } - #[inline] + /// Returns the values of this [`StructArray`]. pub fn values(&self) -> &[Arc] { &self.values } - #[inline] + /// Returns the fields of this [`StructArray`]. pub fn fields(&self) -> &[Field] { Self::get_fields(&self.data_type) } } impl StructArray { + /// Returns the fields the `DataType::Struct`. pub fn get_fields(data_type: &DataType) -> &[Field] { if let DataType::Struct(fields) = data_type { fields diff --git a/src/array/utf8/iterator.rs b/src/array/utf8/iterator.rs index fd6f5d01dfa..fd9a01f14a5 100644 --- a/src/array/utf8/iterator.rs +++ b/src/array/utf8/iterator.rs @@ -15,7 +15,7 @@ pub struct Utf8ValuesIter<'a, O: Offset> { } impl<'a, O: Offset> Utf8ValuesIter<'a, O> { - #[inline] + /// Creates a new [`Utf8ValuesIter`] pub fn new(array: &'a Utf8Array) -> Self { Self { array, diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index 54d488c55b4..c8974b0a031 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -1,3 +1,4 @@ +#![deny(missing_docs)] //! Contains efficient containers of booleans: [`Bitmap`] and [`MutableBitmap`]. //! The memory backing these containers is cache-aligned and optimized for both vertical //! and horizontal operations over booleans. diff --git a/src/bitmap/mutable.rs b/src/bitmap/mutable.rs index 389fde3457c..d3eae6689e5 100644 --- a/src/bitmap/mutable.rs +++ b/src/bitmap/mutable.rs @@ -84,6 +84,7 @@ impl MutableBitmap { self.length += 1; } + /// Returns the capacity of [`MutableBitmap`] in number of bits. #[inline] pub fn capacity(&self) -> usize { self.buffer.capacity() * 8 diff --git a/src/bitmap/utils/chunk_iterator/chunks_exact.rs b/src/bitmap/utils/chunk_iterator/chunks_exact.rs index 87d8936f060..5b7389d3e48 100644 --- a/src/bitmap/utils/chunk_iterator/chunks_exact.rs +++ b/src/bitmap/utils/chunk_iterator/chunks_exact.rs @@ -2,7 +2,7 @@ use std::{convert::TryInto, slice::ChunksExact}; use super::{BitChunk, BitChunkIterExact}; -/// An iterator over a [`BitChunk`] from a slice of bytes. +/// An iterator over a slice of bytes in [`BitChunk`]s. #[derive(Debug)] pub struct BitChunksExact<'a, T: BitChunk> { iter: ChunksExact<'a, u8>, @@ -11,6 +11,7 @@ pub struct BitChunksExact<'a, T: BitChunk> { } impl<'a, T: BitChunk> BitChunksExact<'a, T> { + /// Creates a new [`BitChunksExact`]. #[inline] pub fn new(slice: &'a [u8], len: usize) -> Self { let size_of = std::mem::size_of::(); @@ -32,16 +33,19 @@ impl<'a, T: BitChunk> BitChunksExact<'a, T> { } } + /// Returns the number of chunks of this iterator #[inline] pub fn len(&self) -> usize { self.iter.len() } + /// Returns whether there are still elements in this iterator #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } + /// Returns the remaining [`BitChunk`]. It is zero iff `len / 8 == 0`. #[inline] pub fn remainder(&self) -> T { let remainder_bytes = self.remainder; diff --git a/src/bitmap/utils/chunk_iterator/mod.rs b/src/bitmap/utils/chunk_iterator/mod.rs index 47390955fb9..3c1cb47b420 100644 --- a/src/bitmap/utils/chunk_iterator/mod.rs +++ b/src/bitmap/utils/chunk_iterator/mod.rs @@ -9,7 +9,9 @@ pub use chunks_exact::BitChunksExact; use crate::{trusted_len::TrustedLen, types::BitChunkIter}; pub(crate) use merge::merge_reversed; +/// Trait representing an exact iterator over bytes in [`BitChunk`]. pub trait BitChunkIterExact: Iterator { + /// The remainder of the iterator. fn remainder(&self) -> B; } @@ -43,6 +45,7 @@ fn copy_with_merge(dst: &mut T::Bytes, bytes: &[u8], bit_offset: us } impl<'a, T: BitChunk> BitChunks<'a, T> { + /// Creates a [`BitChunks`]. pub fn new(slice: &'a [u8], offset: usize, len: usize) -> Self { assert!(offset + len <= slice.len() * 8); @@ -97,7 +100,7 @@ impl<'a, T: BitChunk> BitChunks<'a, T> { }; } - #[inline] + /// Returns the remainder [`BitChunk`]. pub fn remainder(&self) -> T { // remaining bytes may not fit in `size_of::()`. We complement // them to fit by allocating T and writing to it byte by byte @@ -124,13 +127,12 @@ impl<'a, T: BitChunk> BitChunks<'a, T> { T::from_ne_bytes(remainder) } - // in bits - #[inline] + /// Returns the remainder bits in [`BitChunks::remainder`]. pub fn remainder_len(&self) -> usize { self.len - (std::mem::size_of::() * ((self.len / 8) / std::mem::size_of::()) * 8) } - #[inline] + /// Returns an iterator over the remainder bits. pub fn remainder_iter(&self) -> BitChunkIter { BitChunkIter::new(self.remainder(), self.remainder_len()) } diff --git a/src/bitmap/utils/iterator.rs b/src/bitmap/utils/iterator.rs index 9211bbf730d..4e94fd21236 100644 --- a/src/bitmap/utils/iterator.rs +++ b/src/bitmap/utils/iterator.rs @@ -12,7 +12,7 @@ pub struct BitmapIter<'a> { } impl<'a> BitmapIter<'a> { - #[inline] + /// Creates a new [`BitmapIter`]. pub fn new(slice: &'a [u8], offset: usize, len: usize) -> Self { // example: // slice.len() = 4 diff --git a/src/bitmap/utils/mod.rs b/src/bitmap/utils/mod.rs index 212bb7ffb62..2daec27b031 100644 --- a/src/bitmap/utils/mod.rs +++ b/src/bitmap/utils/mod.rs @@ -1,3 +1,4 @@ +//! General utilities for bitmaps representing items where LSB is the first item. mod chunk_iterator; mod fmt; mod iterator; diff --git a/src/bitmap/utils/slice_iterator.rs b/src/bitmap/utils/slice_iterator.rs index c081a791988..09502dd079a 100644 --- a/src/bitmap/utils/slice_iterator.rs +++ b/src/bitmap/utils/slice_iterator.rs @@ -1,6 +1,6 @@ use crate::bitmap::Bitmap; -/// Internal state of [SlicesIterator] +/// Internal state of [`SlicesIterator`] #[derive(Debug, Clone, PartialEq)] enum State { // normal iteration @@ -27,6 +27,7 @@ pub struct SlicesIterator<'a> { } impl<'a> SlicesIterator<'a> { + /// Creates a new [`SlicesIterator`] pub fn new(values: &'a Bitmap) -> Self { let (buffer, offset, _) = values.as_slice(); let mut iter = buffer.iter(); diff --git a/src/bitmap/utils/zip_validity.rs b/src/bitmap/utils/zip_validity.rs index b23983aff17..aabacfef98d 100644 --- a/src/bitmap/utils/zip_validity.rs +++ b/src/bitmap/utils/zip_validity.rs @@ -21,6 +21,7 @@ impl<'a, T, I: Iterator + Clone> Clone for ZipValidity<'a, T, I> { } impl<'a, T, I: Iterator> ZipValidity<'a, T, I> { + /// Creates a new [`ZipValidity`]. pub fn new(values: I, validity: Option>) -> Self { let has_validity = validity.as_ref().is_some(); let validity_iter = validity.unwrap_or_else(|| BitmapIter::new(&[], 0, 0)); diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 5989e30491c..07f0d68239b 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -40,11 +40,13 @@ impl Default for Buffer { } impl Buffer { + /// Creates an empty [`Buffer`]. #[inline] pub fn new() -> Self { Self::default() } + /// Creates a new [`Buffer`] filled with zeros. #[inline] pub fn new_zeroed(length: usize) -> Self { MutableBuffer::from_len_zeroed(length).into() diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index a146c6fae62..bdb13195a3a 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -1,3 +1,4 @@ +#![deny(missing_docs)] //! Contains containers for all Arrow sized types (e.g. `i32`), //! [`Buffer`] and [`MutableBuffer`]. diff --git a/src/buffer/mutable.rs b/src/buffer/mutable.rs index 0c5544875ee..7d6cc0b5c73 100644 --- a/src/buffer/mutable.rs +++ b/src/buffer/mutable.rs @@ -609,6 +609,7 @@ impl From> for MutableBuffer { } impl MutableBuffer { + /// Creates a [`MutableBuffer`] from an iterator of `u64`. #[inline] pub fn from_chunk_iter>(iter: I) -> Self { MutableBuffer::from_trusted_len_iter(iter).into() diff --git a/src/datatypes/field.rs b/src/datatypes/field.rs index 141bb1e189b..3927a878f21 100644 --- a/src/datatypes/field.rs +++ b/src/datatypes/field.rs @@ -25,10 +25,15 @@ use super::DataType; /// [Arrow specification](https://arrow.apache.org/docs/cpp/api/datatype.html) #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Field { + /// Its name pub name: String, + /// Its logical [`DataType`] pub data_type: DataType, + /// Whether its values can be null or not pub nullable: bool, + /// The dictionary id of this field (currently un-used) pub dict_id: i64, + /// Whether the dictionary's values are ordered pub dict_is_ordered: bool, /// A map of key-value pairs containing additional custom meta data. pub metadata: Option>, diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs index 5b621982cf0..7aa4a886ed3 100644 --- a/src/datatypes/mod.rs +++ b/src/datatypes/mod.rs @@ -1,11 +1,4 @@ -//! This module contains logical types defined in the -//! [Arrow specification](https://arrow.apache.org/docs/cpp/api/datatype.html): -//! -//! * [`DataType`] -//! * [`Field`] -//! * [`Schema`] -//! * [`TimeUnit`] -//! * [`IntervalUnit`] +//! Metadata declarations such as [`DataType`], [`Field`] and [`Schema`]. mod field; mod schema; @@ -180,4 +173,5 @@ impl DataType { // backward compatibility use std::sync::Arc; +/// typedef for [`Arc`]. pub type SchemaRef = Arc; diff --git a/src/error.rs b/src/error.rs index 2b97afe5f29..06a3d963625 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,8 +10,11 @@ pub enum ArrowError { NotYetImplemented(String), /// Triggered by an external error, such as CSV, serde, chrono. External(String, Box), + /// Error associated with incompatible schemas. Schema(String), + /// Errors associated with IO Io(std::io::Error), + /// When an invalid argument is passed to a function. InvalidArgumentError(String), /// Error during import or export to/from C Data Interface Ffi(String), @@ -19,10 +22,13 @@ pub enum ArrowError { Ipc(String), /// Error during import or export to/from a format ExternalFormat(String), + /// Whenever pushing to a container fails because it does not support more entries. + /// (e.g. maximum size of the keys of a dictionary overflowed) KeyOverflowError, /// Error during arithmetic operation. Normally returned /// during checked operations ArithmeticError(String), + /// Any other error. Other(String), } @@ -83,4 +89,5 @@ impl Display for ArrowError { impl Error for ArrowError {} +/// Typedef for a [`std::result::Result`] of an [`ArrowError`]. pub type Result = std::result::Result; diff --git a/src/io/ipc/mod.rs b/src/io/ipc/mod.rs index d7246f41745..f6b347534c9 100644 --- a/src/io/ipc/mod.rs +++ b/src/io/ipc/mod.rs @@ -1,21 +1,5 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. +//! APIs to read from and write to Arrow's IPC format. -// TODO: (vcq): Protobuf codegen is not generating Debug impls. #![allow(missing_debug_implementations)] #![allow(non_camel_case_types)] #[allow(clippy::redundant_closure)] diff --git a/src/io/ipc/read/mod.rs b/src/io/ipc/read/mod.rs index 76b139a589f..aec51e1a8fd 100644 --- a/src/io/ipc/read/mod.rs +++ b/src/io/ipc/read/mod.rs @@ -1,20 +1,4 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - +//! APIs to read Arrow's IPC format. mod array; mod common; mod deserialize; diff --git a/src/io/ipc/write/common.rs b/src/io/ipc/write/common.rs index cc85b50b90d..2f346912b00 100644 --- a/src/io/ipc/write/common.rs +++ b/src/io/ipc/write/common.rs @@ -14,6 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. +//! Common utilities used to write to Arrow's IPC format. use std::io::Write; use std::{collections::HashMap, sync::Arc}; diff --git a/src/io/ipc/write/mod.rs b/src/io/ipc/write/mod.rs index 7bdf9089fe6..d24b09c377d 100644 --- a/src/io/ipc/write/mod.rs +++ b/src/io/ipc/write/mod.rs @@ -1,20 +1,4 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - +//! APIs to write to Arrow's IPC format. pub mod common; mod schema; mod serialize; diff --git a/src/io/mod.rs b/src/io/mod.rs index 8e459c592b8..9bef55f8ebd 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -1,20 +1,4 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - +//! Interact with different formats such as Arrow, CSV, parquet, etc. #[cfg(feature = "io_csv")] pub mod csv; diff --git a/src/io/parquet/mod.rs b/src/io/parquet/mod.rs index 404b4e38086..6c01e6e6bc4 100644 --- a/src/io/parquet/mod.rs +++ b/src/io/parquet/mod.rs @@ -1,3 +1,4 @@ +//! APIs to read from and write to Parquet format. use crate::error::ArrowError; pub mod read; diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index 6146787a440..e0c0aa1acf3 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -1,3 +1,4 @@ +//! APIs to read from Parquet format. use std::{ io::{Read, Seek}, sync::Arc, diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index febc803efc6..cb95a0c2e99 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -1,3 +1,4 @@ +//! APIs to write to Parquet format. mod binary; mod boolean; mod dictionary; diff --git a/src/io/print.rs b/src/io/print.rs index a96654249b6..4cc46f31e73 100644 --- a/src/io/print.rs +++ b/src/io/print.rs @@ -1,19 +1,4 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. +//! APIs to represent [`RecordBatch`] as a formatted table. use crate::{array::get_display, record_batch::RecordBatch}; diff --git a/src/lib.rs b/src/lib.rs index 6dcc54c0907..d2833904d4d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +//! Doc provided by README + pub mod alloc; #[macro_use] pub mod array; diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index 149703d15c2..39bb9a858ca 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -1,3 +1,5 @@ +//! Declares the [`Scalar`] API, an optional, trait object representing +//! the zero-dimension of an [`crate::array::Array`]. use std::any::Any; use crate::{array::*, datatypes::*, types::days_ms}; @@ -18,6 +20,7 @@ pub use null::*; mod struct_; pub use struct_::*; +/// Trait object declaring an optional value with a logical type. pub trait Scalar: std::fmt::Debug { fn as_any(&self) -> &dyn Any; diff --git a/src/trusted_len.rs b/src/trusted_len.rs index 8c52c0bf861..340d6cde828 100644 --- a/src/trusted_len.rs +++ b/src/trusted_len.rs @@ -1,3 +1,4 @@ +//! Declares [`TrustedLen`]. use std::slice::Iter; /// An iterator of known, fixed size. diff --git a/src/types/bit_chunk.rs b/src/types/bit_chunk.rs index 98a36b7b781..c3b1768454e 100644 --- a/src/types/bit_chunk.rs +++ b/src/types/bit_chunk.rs @@ -22,14 +22,19 @@ pub unsafe trait BitChunk: + BitAndAssign + BitOr { + /// The representation of this type in the stack. type Bytes: std::ops::Index + std::ops::IndexMut + for<'a> std::convert::TryFrom<&'a [u8]> + std::fmt::Debug; + /// A value with a single bit set at the most right position. fn one() -> Self; + /// A value with no bits set. fn zero() -> Self; + /// convert itself into bytes. fn to_ne_bytes(self) -> Self::Bytes; + /// convert itself from bytes. fn from_ne_bytes(v: Self::Bytes) -> Self; } @@ -148,6 +153,7 @@ pub struct BitChunkIter { } impl BitChunkIter { + /// Creates a new [`BitChunkIter`] with `len` bits. #[inline] pub fn new(value: T, len: usize) -> Self { assert!(len <= std::mem::size_of::() * 8); diff --git a/src/types/index.rs b/src/types/index.rs index 110c000bc30..20146b0c595 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -13,6 +13,7 @@ pub struct IndexRange { } impl IndexRange { + /// Returns a new [`IndexRange`]. pub fn new(start: I, end: I) -> Self { assert!(end >= start); Self { start, end } @@ -42,7 +43,7 @@ impl Iterator for IndexRange { /// Safety: a range is always of known length unsafe impl TrustedLen for IndexRange {} -/// Trait describing any type that can be used to index a slot of an array. +/// Types that can be used to index a slot of an array. pub trait Index: NativeType + NaturalDataType @@ -51,9 +52,12 @@ pub trait Index: + num_traits::One + PartialOrd { + /// Convert itself to [`usize`]. fn to_usize(&self) -> usize; + /// Convert itself from [`usize`]. fn from_usize(index: usize) -> Option; + /// An iterator from (inclusive) `start` to (exclusive) `end`. fn range(start: usize, end: usize) -> Option> { let start = Self::from_usize(start); let end = Self::from_usize(end); diff --git a/src/types/mod.rs b/src/types/mod.rs index 46d12540887..3a30ee7801d 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,4 +1,4 @@ -//! This module contains traits to handle all _physical_ types used in this crate. +//! traits to handle _all physical types_ used in this crate. //! Most physical types used in this crate are native Rust types, like `i32`. //! The most important trait is [`NativeType`], implemented for all Arrow types //! with a Rust correspondence (such as `i32` or `f64`). @@ -20,10 +20,13 @@ use crate::datatypes::{DataType, IntervalUnit, TimeUnit}; /// Trait denoting anything that has a natural logical [`DataType`]. /// For example, [`DataType::Int32`] for `i32`. pub trait NaturalDataType { + /// The natural [`DataType`]. const DATA_TYPE: DataType; } +/// describes whether a [`DataType`] is valid. pub unsafe trait Relation { + /// Whether `data_type` is a valid [`DataType`]. fn is_valid(data_type: &DataType) -> bool; } @@ -46,7 +49,7 @@ macro_rules! natural_type { }; } -/// Trait declaring any type that can be allocated, serialized and deserialized by this crate. +/// Declares any type that can be allocated, serialized and deserialized by this crate. /// All data-heavy memory operations are implemented for this trait alone. /// # Safety /// Do not implement. @@ -64,12 +67,16 @@ pub unsafe trait NativeType: + Sized + 'static { + /// Type denoting its representation as bytes type Bytes: AsRef<[u8]> + for<'a> TryFrom<&'a [u8]>; + /// To bytes in little endian fn to_le_bytes(&self) -> Self::Bytes; + /// To bytes in big endian fn to_be_bytes(&self) -> Self::Bytes; + /// From bytes in big endian fn from_be_bytes(bytes: Self::Bytes) -> Self; } @@ -213,16 +220,19 @@ unsafe impl NativeType for days_ms { create_relation!(days_ms, &DataType::Interval(IntervalUnit::DayTime)); impl days_ms { + /// A new [`days_ms`]. #[inline] pub fn new(days: i32, milliseconds: i32) -> Self { Self([days, milliseconds]) } + /// The number of days #[inline] pub fn days(&self) -> i32 { self.0[0] } + /// The number of milliseconds #[inline] pub fn milliseconds(&self) -> i32 { self.0[1] diff --git a/src/types/simd/mod.rs b/src/types/simd/mod.rs index 00ab6800f9c..a52c257d76a 100644 --- a/src/types/simd/mod.rs +++ b/src/types/simd/mod.rs @@ -1,19 +1,26 @@ +//! Contains traits and implementations of multi-data used in SIMD. +//! The actual representation is driven by the feature flag `"simd"`, which, if set, +//! uses `packed_simd2` to get the intrinsics. use super::{BitChunk, NativeType}; +/// Describes the ability to convert itself from a [`BitChunk`]. pub trait FromMaskChunk { /// Convert itself from a slice. fn from_chunk(v: T) -> Self; } -/// A struct lends itself well to be compiled leveraging SIMD +/// A struct that lends itself well to be compiled leveraging SIMD pub trait NativeSimd: Default { + /// Number of lanes const LANES: usize; /// The [`NativeType`] of this struct. E.g. `f32` for a `NativeSimd = f32x16`. type Native: NativeType; /// The type holding bits for masks. type Chunk: BitChunk; + /// Type used for masking. type Mask: FromMaskChunk; + /// Sets values to `default` based on `mask`. fn select(self, mask: Self::Mask, default: Self) -> Self; /// Convert itself from a slice. diff --git a/src/util/bench_util.rs b/src/util/bench_util.rs index 14cd3bc11a3..0185384af92 100644 --- a/src/util/bench_util.rs +++ b/src/util/bench_util.rs @@ -52,6 +52,7 @@ where .to(data_type) } +/// Creates a new [`PrimitiveArray`] from random values with a pre-set seed. pub fn create_primitive_array_with_seed( size: usize, data_type: DataType, diff --git a/src/util/mod.rs b/src/util/mod.rs index 71e30dfea7f..4062ed2bdfb 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,19 +1,4 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. +//! Misc utilities used in different places in the crate. #[cfg(any(feature = "compute", feature = "io_csv"))] mod lexical;