Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved documentation #430

Merged
merged 1 commit into from
Sep 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// When the validity is [`None`], all slots are valid.
fn validity(&self) -> &Option<Bitmap>;

/// The number of null slots on this [`Array`]. This is usually used to branch
/// implementations to cases where optimizations can be made.
/// The number of null slots on this [`Array`].
/// # Implementation
/// This is `O(1)`.
#[inline]
Expand Down
16 changes: 11 additions & 5 deletions src/array/primitive/from_natural.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ impl<T: NativeType + NaturalDataType, Ptr: std::borrow::Borrow<Option<T>>> FromI
}

impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
/// Creates a new array out an iterator over values
/// Creates a (non-null) [`PrimitiveArray`] from an iterator of values.
/// # Implementation
/// This does not assume that the iterator has a known length.
pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
Self::from_data(
T::DATA_TYPE,
Expand All @@ -32,14 +34,18 @@ impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
)
}

/// Creates a new array out an iterator over values
/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
/// # Implementation
/// This is essentially a memcopy and is the fastest way to create a [`PrimitiveArray`].
pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
Self::from_data(T::DATA_TYPE, Buffer::<T>::from(slice), None)
}
}

impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
/// Creates a new array out an iterator over values
/// Creates a (non-null) [`PrimitiveArray`] from a [`TrustedLen`] of values.
/// # Implementation
/// This does not assume that the iterator has a known length.
pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()
}
Expand All @@ -52,12 +58,12 @@ impl<T: NativeType + NaturalDataType> PrimitiveArray<T> {
MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()
}

/// Creates a new [`PrimitiveArray`] from an iterator over optional values
/// Creates a [`PrimitiveArray`] from a [`TrustedLen`] of optional values.
pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {
MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()
}

/// Creates a new [`PrimitiveArray`] from an iterator over optional values
/// Creates a [`PrimitiveArray`] from an iterator of optional values.
/// # Safety
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
/// I.e. that `size_hint().1` correctly reports its length.
Expand Down
24 changes: 13 additions & 11 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ pub use mutable::*;

/// A [`PrimitiveArray`] is arrow's equivalent to `Vec<Option<T: NativeType>>`, i.e.
/// an array designed for highly performant operations on optionally nullable slots,
/// backed by a physical type of a physical byte-width, such as `i32` or `f64`.
/// backed by a physical type of a fixed byte-width, such as `i32` or `f64`.
/// The size of this struct is `O(1)` as all data is stored behind an [`std::sync::Arc`].
/// # Example
/// ```
/// use arrow2::array::PrimitiveArray;
/// use arrow2::array::{PrimitiveArray, Array};
/// use arrow2::bitmap::Bitmap;
/// # fn main() {
/// let array = PrimitiveArray::<i32>::from([Some(1), None, Some(2)]);
/// assert_eq!(array.value(0), 1);
/// assert_eq!(array.values().as_slice(), &[1, 0, 2]);
/// let array = PrimitiveArray::from([Some(1), None, Some(10)]);
/// assert_eq!(array.values().as_slice(), &[1, 0, 10]);
/// assert_eq!(array.validity(), &Some(Bitmap::from([true, false, true])));
/// # }
/// ```
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -95,7 +96,7 @@ impl<T: NativeType> PrimitiveArray<T> {
}

/// Sets the validity bitmap on this [`PrimitiveArray`].
/// # Panic
/// # Panics
/// This function panics iff `validity.len() != self.len()`.
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
Expand All @@ -106,21 +107,22 @@ impl<T: NativeType> PrimitiveArray<T> {
arr
}

/// The values [`Buffer`].
/// The values.
/// Values on null slots are undetermined (they can be anything).
#[inline]
pub fn values(&self) -> &Buffer<T> {
&self.values
}

/// Safe method to retrieve the value at slot `i`.
/// Equivalent to `self.values()[i]`.
/// Returns the value at slot `i`. Equivalent to `self.values()[i]`.
/// The value on null slots is undetermined (it can be anything).
#[inline]
pub fn value(&self, i: usize) -> T {
self.values()[i]
}

/// Returns the element at index `i` as `T`
///
/// Returns the element at index `i` as `T`.
/// The value on null slots is undetermined (it can be anything).
/// # Safety
/// Caller must be sure that `i < self.len()`
#[inline]
Expand Down
6 changes: 3 additions & 3 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ pub(crate) use field::{get_extension, Extension, Metadata};

/// The set of supported logical types.
/// Each variant uniquely identifies a logical type, which define specific semantics to the data (e.g. how it should be represented).
/// A [`DataType`] has an unique corresponding [`PhysicalType`], obtained via [`DataType::to_physical_type`],
/// which uniquely identifies an in-memory representation of data.
/// Each variant has a corresponding [`PhysicalType`], obtained via [`DataType::to_physical_type`],
/// which declares the in-memory representation of data.
/// The [`DataType::Extension`] is special in that it augments a [`DataType`] with metadata to support custom types.
/// Use `to_logical_type` to desugar such type and return its correspoding logical type.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -119,7 +119,7 @@ impl std::fmt::Display for DataType {
}
}

/// Time units defined in Arrow.
/// The time units defined in Arrow.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TimeUnit {
/// Time in seconds.
Expand Down
99 changes: 51 additions & 48 deletions src/datatypes/physical_type.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,42 @@
/// the set of valid indices used to index a dictionary-encoded Array.
/// The set of physical types: unique in-memory representations of an Arrow array.
/// A physical type has a one-to-many relationship with a [`crate::datatypes::DataType`] and
/// a one-to-one mapping to each struct in this crate that implements [`crate::array::Array`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DictionaryIndexType {
pub enum PhysicalType {
/// A Null with no allocation.
Null,
/// A boolean represented as a single bit.
Boolean,
/// An array where each slot has a known compile-time size.
Primitive(PrimitiveType),
/// Opaque binary data of variable length.
Binary,
/// Opaque binary data of fixed size.
FixedSizeBinary,
/// Opaque binary data of variable length and 64-bit offsets.
LargeBinary,
/// A variable-length string in Unicode with UTF-8 encoding.
Utf8,
/// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
LargeUtf8,
/// A list of some data type with variable length.
List,
/// A list of some data type with fixed length.
FixedSizeList,
/// A list of some data type with variable length and 64-bit offsets.
LargeList,
/// A nested type that contains an arbitrary number of fields.
Struct,
/// A nested type that represents slots of differing types.
Union,
/// A dictionary encoded array by `DictionaryIndexType`.
Dictionary(DictionaryIndexType),
}

/// The set of all (physical) primitive types.
/// Each type corresponds to a variant of [`crate::array::PrimitiveArray`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PrimitiveType {
/// A signed 8-bit integer.
Int8,
/// A signed 16-bit integer.
Expand All @@ -9,6 +45,8 @@ pub enum DictionaryIndexType {
Int32,
/// A signed 64-bit integer.
Int64,
/// A signed 128-bit integer.
Int128,
/// An unsigned 8-bit integer.
UInt8,
/// An unsigned 16-bit integer.
Expand All @@ -17,10 +55,20 @@ pub enum DictionaryIndexType {
UInt32,
/// An unsigned 64-bit integer.
UInt64,
/// A 32-bit floating point number.
Float32,
/// A 64-bit floating point number.
Float64,
/// Two i32 representing days and ms
DaysMs,
/// months_days_ns(i32, i32, i64)
MonthDayNano,
}

/// the set of valid indices types of a dictionary-encoded Array.
/// Each type corresponds to a variant of [`crate::array::DictionaryArray`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PrimitiveType {
pub enum DictionaryIndexType {
/// A signed 8-bit integer.
Int8,
/// A signed 16-bit integer.
Expand All @@ -29,8 +77,6 @@ pub enum PrimitiveType {
Int32,
/// A signed 64-bit integer.
Int64,
/// A signed 128-bit integer.
Int128,
/// An unsigned 8-bit integer.
UInt8,
/// An unsigned 16-bit integer.
Expand All @@ -39,47 +85,4 @@ pub enum PrimitiveType {
UInt32,
/// An unsigned 64-bit integer.
UInt64,
/// A 32-bit floating point number.
Float32,
/// A 64-bit floating point number.
Float64,
/// Two i32 representing days and ms
DaysMs,
/// months_days_ns(i32, i32, i64)
MonthDayNano,
}

/// The set of physical types: unique in-memory representations of an Arrow array.
/// A physical type has a one-to-many relationship with a [`crate::datatypes::DataType`] and
/// a one-to-one mapping with each struct in this crate that implements [`crate::array::Array`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PhysicalType {
/// A Null with no allocation.
Null,
/// A boolean represented as a single bit.
Boolean,
/// An array where each slot has a known compile-time size.
Primitive(PrimitiveType),
/// Opaque binary data of variable length.
Binary,
/// Opaque binary data of fixed size.
FixedSizeBinary,
/// Opaque binary data of variable length and 64-bit offsets.
LargeBinary,
/// A variable-length string in Unicode with UTF-8 encoding.
Utf8,
/// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
LargeUtf8,
/// A list of some data type with variable length.
List,
/// A list of some data type with fixed length.
FixedSizeList,
/// A list of some data type with variable length and 64-bit offsets.
LargeList,
/// A nested type that contains an arbitrary number of fields.
Struct,
/// A nested type that represents slots of differing types.
Union,
/// A dictionary encoded array by `DictionaryIndexType`.
Dictionary(DictionaryIndexType),
}
5 changes: 2 additions & 3 deletions src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! traits to handle _all physical types_ used in this crate.
//! traits to handle _all native types_ used in this crate.
//! Most physical types used in this crate are native Rust types, like `i32`.
//! The most important trait is [`NativeType`], implemented for all Arrow types
//! with a Rust correspondence (such as `i32` or `f64`).
//! The most important trait is [`NativeType`], the generic trait of [`crate::array::PrimitiveArray`].
//!
//! Another important trait is [`BitChunk`], describing types that can be used to
//! represent chunks of bits (e.g. `u8`, `u16`), and [`BitChunkIter`], that can be used to
Expand Down