Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved docs. (#430)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Sep 20, 2021
1 parent d0f3a62 commit 24f6194
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 72 deletions.
3 changes: 1 addition & 2 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// When the validity is [`None`], all slots are valid.
fn validity(&self) -> &Option<Bitmap>;

/// The number of null slots on this [`Array`]. This is usually used to branch
/// implementations to cases where optimizations can be made.
/// The number of null slots on this [`Array`].
/// # Implementation
/// This is `O(1)`.
#[inline]
Expand Down
16 changes: 11 additions & 5 deletions src/array/primitive/from_natural.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ impl<T: NativeType + NaturalDataType, Ptr: std::borrow::Borrow<Option<T>>> FromI
}

impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
/// Creates a new array out an iterator over values
/// Creates a (non-null) [`PrimitiveArray`] from an iterator of values.
/// # Implementation
/// This does not assume that the iterator has a known length.
pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
Self::from_data(
T::DATA_TYPE,
Expand All @@ -32,14 +34,18 @@ impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
)
}

/// Creates a new array out an iterator over values
/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
/// # Implementation
/// This is essentially a memcopy and is the fastest way to create a [`PrimitiveArray`].
pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
Self::from_data(T::DATA_TYPE, Buffer::<T>::from(slice), None)
}
}

impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
/// Creates a new array out an iterator over values
/// Creates a (non-null) [`PrimitiveArray`] from a [`TrustedLen`] of values.
/// # Implementation
/// This does not assume that the iterator has a known length.
pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()
}
Expand All @@ -52,12 +58,12 @@ impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()
}

/// Creates a new [`PrimitiveArray`] from an iterator over optional values
/// Creates a [`PrimitiveArray`] from a [`TrustedLen`] of optional values.
pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {
MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()
}

/// Creates a new [`PrimitiveArray`] from an iterator over optional values
/// Creates a [`PrimitiveArray`] from an iterator of optional values.
/// # Safety
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
/// I.e. that `size_hint().1` correctly reports its length.
Expand Down
24 changes: 13 additions & 11 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ pub use mutable::*;

/// A [`PrimitiveArray`] is arrow's equivalent to `Vec<Option<T: NativeType>>`, i.e.
/// an array designed for highly performant operations on optionally nullable slots,
/// backed by a physical type of a physical byte-width, such as `i32` or `f64`.
/// backed by a physical type of a fixed byte-width, such as `i32` or `f64`.
/// The size of this struct is `O(1)` as all data is stored behind an [`std::sync::Arc`].
/// # Example
/// ```
/// use arrow2::array::PrimitiveArray;
/// use arrow2::array::{PrimitiveArray, Array};
/// use arrow2::bitmap::Bitmap;
/// # fn main() {
/// let array = PrimitiveArray::<i32>::from([Some(1), None, Some(2)]);
/// assert_eq!(array.value(0), 1);
/// assert_eq!(array.values().as_slice(), &[1, 0, 2]);
/// let array = PrimitiveArray::from([Some(1), None, Some(10)]);
/// assert_eq!(array.values().as_slice(), &[1, 0, 10]);
/// assert_eq!(array.validity(), &Some(Bitmap::from([true, false, true])));
/// # }
/// ```
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -95,7 +96,7 @@ impl<T: NativeType> PrimitiveArray<T> {
}

/// Sets the validity bitmap on this [`PrimitiveArray`].
/// # Panic
/// # Panics
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
Expand All @@ -106,21 +107,22 @@ impl<T: NativeType> PrimitiveArray<T> {
arr
}

/// The values [`Buffer`].
/// The values.
/// Values on null slots are undetermined (they can be anything).
#[inline]
pub fn values(&self) -> &Buffer<T> {
&self.values
}

/// Safe method to retrieve the value at slot `i`.
/// Equivalent to `self.values()[i]`.
/// Returns the value at slot `i`. Equivalent to `self.values()[i]`.
/// The value on null slots is undetermined (it can be anything).
#[inline]
pub fn value(&self, i: usize) -> T {
self.values()[i]
}

/// Returns the element at index `i` as `T`
///
/// Returns the element at index `i` as `T`.
/// The value on null slots is undetermined (it can be anything).
/// # Safety
/// Caller must be sure that `i < self.len()`
#[inline]
Expand Down
6 changes: 3 additions & 3 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ pub(crate) use field::{get_extension, Extension, Metadata};

/// The set of supported logical types.
/// Each variant uniquely identifies a logical type, which define specific semantics to the data (e.g. how it should be represented).
/// A [`DataType`] has an unique corresponding [`PhysicalType`], obtained via [`DataType::to_physical_type`],
/// which uniquely identifies an in-memory representation of data.
/// Each variant has a corresponding [`PhysicalType`], obtained via [`DataType::to_physical_type`],
/// which declares the in-memory representation of data.
/// The [`DataType::Extension`] is special in that it augments a [`DataType`] with metadata to support custom types.
/// Use `to_logical_type` to desugar such type and return its correspoding logical type.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -119,7 +119,7 @@ impl std::fmt::Display for DataType {
}
}

/// Time units defined in Arrow.
/// The time units defined in Arrow.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TimeUnit {
/// Time in seconds.
Expand Down
99 changes: 51 additions & 48 deletions src/datatypes/physical_type.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,42 @@
/// the set of valid indices used to index a dictionary-encoded Array.
/// The set of physical types: unique in-memory representations of an Arrow array.
/// A physical type has a one-to-many relationship with a [`crate::datatypes::DataType`] and
/// a one-to-one mapping to each struct in this crate that implements [`crate::array::Array`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DictionaryIndexType {
pub enum PhysicalType {
/// A Null with no allocation.
Null,
/// A boolean represented as a single bit.
Boolean,
/// An array where each slot has a known compile-time size.
Primitive(PrimitiveType),
/// Opaque binary data of variable length.
Binary,
/// Opaque binary data of fixed size.
FixedSizeBinary,
/// Opaque binary data of variable length and 64-bit offsets.
LargeBinary,
/// A variable-length string in Unicode with UTF-8 encoding.
Utf8,
/// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
LargeUtf8,
/// A list of some data type with variable length.
List,
/// A list of some data type with fixed length.
FixedSizeList,
/// A list of some data type with variable length and 64-bit offsets.
LargeList,
/// A nested type that contains an arbitrary number of fields.
Struct,
/// A nested type that represents slots of differing types.
Union,
/// A dictionary encoded array by `DictionaryIndexType`.
Dictionary(DictionaryIndexType),
}

/// The set of all (physical) primitive types.
/// Each type corresponds to a variant of [`crate::array::PrimitiveArray`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PrimitiveType {
/// A signed 8-bit integer.
Int8,
/// A signed 16-bit integer.
Expand All @@ -9,6 +45,8 @@ pub enum DictionaryIndexType {
Int32,
/// A signed 64-bit integer.
Int64,
/// A signed 128-bit integer.
Int128,
/// An unsigned 8-bit integer.
UInt8,
/// An unsigned 16-bit integer.
Expand All @@ -17,10 +55,20 @@ pub enum DictionaryIndexType {
UInt32,
/// An unsigned 64-bit integer.
UInt64,
/// A 32-bit floating point number.
Float32,
/// A 64-bit floating point number.
Float64,
/// Two i32 representing days and ms
DaysMs,
/// months_days_ns(i32, i32, i64)
MonthDayNano,
}

/// the set of valid indices types of a dictionary-encoded Array.
/// Each type corresponds to a variant of [`crate::array::DictionaryArray`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PrimitiveType {
pub enum DictionaryIndexType {
/// A signed 8-bit integer.
Int8,
/// A signed 16-bit integer.
Expand All @@ -29,8 +77,6 @@ pub enum PrimitiveType {
Int32,
/// A signed 64-bit integer.
Int64,
/// A signed 128-bit integer.
Int128,
/// An unsigned 8-bit integer.
UInt8,
/// An unsigned 16-bit integer.
Expand All @@ -39,47 +85,4 @@ pub enum PrimitiveType {
UInt32,
/// An unsigned 64-bit integer.
UInt64,
/// A 32-bit floating point number.
Float32,
/// A 64-bit floating point number.
Float64,
/// Two i32 representing days and ms
DaysMs,
/// months_days_ns(i32, i32, i64)
MonthDayNano,
}

/// The set of physical types: unique in-memory representations of an Arrow array.
/// A physical type has a one-to-many relationship with a [`crate::datatypes::DataType`] and
/// a one-to-one mapping with each struct in this crate that implements [`crate::array::Array`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PhysicalType {
/// A Null with no allocation.
Null,
/// A boolean represented as a single bit.
Boolean,
/// An array where each slot has a known compile-time size.
Primitive(PrimitiveType),
/// Opaque binary data of variable length.
Binary,
/// Opaque binary data of fixed size.
FixedSizeBinary,
/// Opaque binary data of variable length and 64-bit offsets.
LargeBinary,
/// A variable-length string in Unicode with UTF-8 encoding.
Utf8,
/// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
LargeUtf8,
/// A list of some data type with variable length.
List,
/// A list of some data type with fixed length.
FixedSizeList,
/// A list of some data type with variable length and 64-bit offsets.
LargeList,
/// A nested type that contains an arbitrary number of fields.
Struct,
/// A nested type that represents slots of differing types.
Union,
/// A dictionary encoded array by `DictionaryIndexType`.
Dictionary(DictionaryIndexType),
}
5 changes: 2 additions & 3 deletions src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! traits to handle _all physical types_ used in this crate.
//! traits to handle _all native types_ used in this crate.
//! Most physical types used in this crate are native Rust types, like `i32`.
//! The most important trait is [`NativeType`], implemented for all Arrow types
//! with a Rust correspondence (such as `i32` or `f64`).
//! The most important trait is [`NativeType`], the generic trait of [`crate::array::PrimitiveArray`].
//!
//! Another important trait is [`BitChunk`], describing types that can be used to
//! represent chunks of bits (e.g. `u8`, `u16`), and [`BitChunkIter`], that can be used to
Expand Down

0 comments on commit 24f6194

Please sign in to comment.