Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved documentation #306

Merged
merged 2 commits into from
Aug 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ use crate::{

use super::BinaryArray;

/// Iterator over slices of `&[u8]`.
#[derive(Debug, Clone)]
pub struct BinaryValueIter<'a, O: Offset> {
array: &'a BinaryArray<O>,
index: usize,
}

impl<'a, O: Offset> BinaryValueIter<'a, O> {
#[inline]
pub fn new(array: &'a BinaryArray<O>) -> Self {
Self { array, index: 0 }
}
Expand Down
32 changes: 28 additions & 4 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mod from;
mod mutable;
pub use mutable::*;

/// A [`BinaryArray`] is a nullable array of bytes - the Arrow equivalent of `Vec<Option<Vec<u8>>>`.
#[derive(Debug, Clone)]
pub struct BinaryArray<O: Offset> {
data_type: DataType,
Expand All @@ -21,11 +22,14 @@ pub struct BinaryArray<O: Offset> {
offset: usize,
}

// constructors
impl<O: Offset> BinaryArray<O> {
/// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero.
pub fn new_empty() -> Self {
Self::from_data(Buffer::from(&[O::zero()]), Buffer::new(), None)
}

/// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`.
#[inline]
pub fn new_null(length: usize) -> Self {
Self::from_data(
Expand All @@ -35,9 +39,17 @@ impl<O: Offset> BinaryArray<O> {
)
}

/// Creates a new [`BinaryArray`] from lower-level parts
/// # Panics
/// * The length of the offset buffer must be larger than 1
/// * The length of the values must be equal to the last offset value
pub fn from_data(offsets: Buffer<O>, values: Buffer<u8>, validity: Option<Bitmap>) -> Self {
check_offsets(&offsets, values.len());

if let Some(validity) = &validity {
assert_eq!(offsets.len() - 1, validity.len());
}

Self {
data_type: if O::is_large() {
DataType::LargeBinary
Expand All @@ -51,6 +63,11 @@ impl<O: Offset> BinaryArray<O> {
}
}

/// Creates a new [`BinaryArray`] by slicing this [`BinaryArray`].
/// # Implementation
/// This function is `O(1)`: all data will be shared between both arrays.
/// # Panics
/// iff `offset + length > self.len()`.
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
let offsets = self.offsets.clone().slice(offset, length + 1);
Expand All @@ -62,19 +79,24 @@ impl<O: Offset> BinaryArray<O> {
offset: self.offset + offset,
}
}
}

/// Returns the element at index `i` as &str
// accessors
impl<O: Offset> BinaryArray<O> {
/// Returns the element at index `i`
/// # Panics
/// iff `i > self.len()`
pub fn value(&self, i: usize) -> &[u8] {
let offsets = self.offsets.as_slice();
let offset = offsets[i];
let offset_1 = offsets[i + 1];
let length = (offset_1 - offset).to_usize();
let offset = offset.to_usize();

&self.values.as_slice()[offset..offset + length]
&self.values[offset..offset + length]
}

/// Returns the element at index `i` as &str
/// Returns the element at index `i`
/// # Safety
/// Assumes that the `i < self.len`.
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
Expand All @@ -83,14 +105,16 @@ impl<O: Offset> BinaryArray<O> {
let length = (offset_1 - offset).to_usize();
let offset = offset.to_usize();

std::slice::from_raw_parts(self.values.as_ptr().add(offset), length)
&self.values[offset..offset + length]
}

/// Returns the offsets that slice `.values()` to return valid values.
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
&self.offsets
}

/// Returns all values in this array. Use `.offsets()` to slice them.
#[inline]
pub fn values(&self) -> &Buffer<u8> {
&self.values
Expand Down
7 changes: 7 additions & 0 deletions src/array/boolean/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ use crate::trusted_len::TrustedLen;

use super::{BooleanArray, MutableBooleanArray};

impl<P: AsRef<[Option<bool>]>> From<P> for BooleanArray {
/// Creates a new [`BooleanArray`] out of a slice of Optional `bool`.
fn from(slice: P) -> Self {
MutableBooleanArray::from(slice).into()
}
}

impl BooleanArray {
/// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.
#[inline]
Expand Down
8 changes: 4 additions & 4 deletions src/array/boolean/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ impl<'a> IntoIterator for &'a BooleanArray {
}

impl<'a> BooleanArray {
/// constructs a new iterator
/// Returns an iterator over the optional values of this [`BooleanArray`].
#[inline]
pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> {
zip_validity(
Expand All @@ -23,7 +23,7 @@ impl<'a> BooleanArray {
)
}

/// Returns an iterator of `bool`
/// Returns an iterator over the values of this [`BooleanArray`]
#[inline]
pub fn values_iter(&'a self) -> BitmapIter<'a> {
self.values().iter()
Expand All @@ -41,7 +41,7 @@ impl<'a> IntoIterator for &'a MutableBooleanArray {
}

impl<'a> MutableBooleanArray {
/// Returns an iterator over `Option<bool>`
/// Returns an iterator over the optional values of this [`MutableBooleanArray`].
#[inline]
pub fn iter(&'a self) -> ZipValidity<'a, bool, BitmapIter<'a>> {
zip_validity(
Expand All @@ -50,7 +50,7 @@ impl<'a> MutableBooleanArray {
)
}

/// Returns an iterator of `bool`
/// Returns an iterator over the values of this [`MutableBooleanArray`]
#[inline]
pub fn values_iter(&'a self) -> BitmapIter<'a> {
self.values().iter()
Expand Down
26 changes: 8 additions & 18 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ mod mutable;
pub use iterator::*;
pub use mutable::*;

/// A [`BooleanArray`] is arrow's equivalent to `Vec<Option<bool>>`, i.e.
/// an array designed for highly performant operations on optionally nullable booleans.
/// The size of this struct is `O(1)` as all data is stored behind an `Arc`.
/// The Arrow's equivalent to an immutable `Vec<Option<bool>>`, but with `1/16` of its size.
/// Cloning and slicing this struct is `O(1)`.
#[derive(Debug, Clone)]
pub struct BooleanArray {
data_type: DataType,
values: Bitmap,
validity: Option<Bitmap>,
offset: usize,
Expand Down Expand Up @@ -45,7 +43,6 @@ impl BooleanArray {
assert_eq!(values.len(), validity.len());
}
Self {
data_type: DataType::Boolean,
values,
validity,
offset: 0,
Expand All @@ -54,36 +51,36 @@ impl BooleanArray {

/// Returns a slice of this [`BooleanArray`].
/// # Implementation
/// This operation is `O(1)` as it amounts to essentially increase two ref counts.
/// This operation is `O(1)` as it amounts to increase two ref counts.
/// # Panic
/// This function panics iff `offset + length >= self.len()`.
#[inline]
pub fn slice(&self, offset: usize, length: usize) -> Self {
let validity = self.validity.clone().map(|x| x.slice(offset, length));
Self {
data_type: self.data_type.clone(),
values: self.values.clone().slice(offset, length),
validity,
offset: self.offset + offset,
}
}

/// Returns the element at index `i` as bool
/// Returns the value at index `i`
/// # Panic
/// This function panics iff `i >= self.len()`.
#[inline]
pub fn value(&self, i: usize) -> bool {
self.values.get_bit(i)
}

/// Returns the element at index `i` as bool
///
/// # Safety
/// Caller must be sure that `i < self.len()`
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
self.values.get_bit_unchecked(i)
}

/// Returns the values bitmap of this [`BooleanArray`].
/// Returns the values of this [`BooleanArray`].
#[inline]
pub fn values(&self) -> &Bitmap {
&self.values
Expand All @@ -103,7 +100,7 @@ impl Array for BooleanArray {

#[inline]
fn data_type(&self) -> &DataType {
&self.data_type
&DataType::Boolean
}

#[inline]
Expand All @@ -122,10 +119,3 @@ impl std::fmt::Display for BooleanArray {
display_fmt(self.iter(), "BooleanArray", f, false)
}
}

impl<P: AsRef<[Option<bool>]>> From<P> for BooleanArray {
/// Creates a new [`BooleanArray`] out of a slice of Optional `bool`.
fn from(slice: P) -> Self {
MutableBooleanArray::from(slice).into()
}
}
3 changes: 2 additions & 1 deletion src/array/boolean/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{

use super::BooleanArray;

/// The mutable version of [`BooleanArray`]. See [`MutableArray`] for more details.
/// The Arrow's equivalent to `Vec<Option<bool>>`, but with `1/16` of its size.
/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`.
#[derive(Debug)]
pub struct MutableBooleanArray {
values: MutableBitmap,
Expand Down
2 changes: 1 addition & 1 deletion src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub trait Growable<'a> {
/// This function panics if the range is out of bounds, i.e. if `start + len >= array.len()`.
fn extend(&mut self, index: usize, start: usize, len: usize);

/// Extends this [`GrowableArray`] with null elements, disregarding the bound arrays
/// Extends this [`Growable`] with null elements, disregarding the bound arrays
fn extend_validity(&mut self, additional: usize);

/// Converts itself to an `Arc<dyn Array>`, thereby finishing the mutation.
Expand Down
12 changes: 4 additions & 8 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
//! This module contains arrays: fixed-length and immutable containers with optional values
//! fixed-length and immutable containers with optional values
//! that are layed in memory according to the Arrow specification.
//! Each array type has its own `struct`. The following are the main array types:
//!
//! * [`PrimitiveArray`], an array of values with a fixed length such as integers, floats, etc.
//! * [`BooleanArray`], an array of boolean values (stored as a bitmap)
//! * [`Utf8Array`], an array of utf8 values
//! * [`BinaryArray`], an array of binary values
//! * [`ListArray`], an array of arrays (e.g. `[[1, 2], None, [], [None]]`)
//! * [`StructArray`], an array of arrays identified by a string (e.g. `{"a": [1, 2], "b": [true, false]}`)
//!
//! This module contains constructors and accessors to operate on the arrays.
//! All the arrays implement the trait [`Array`] and are often trait objects.
//! Every array has a [`DataType`], which you can access with [`Array::data_type`].
//! This can be used to `downcast_ref` a `&dyn Array` to a concrete struct.
//! Arrays can share memory via [`crate::buffer::Buffer`] and thus cloning and slicing is `O(1)`.
//! All arrays implement the trait [`Array`] and are often trait objects that can be downcasted
//! to a concrete struct based on [`DataType`] available from [`Array::data_type`].
//! Arrays share memory via [`crate::buffer::Buffer`] and thus cloning and slicing them `O(1)`.
//!
//! This module also contains the mutable counterparts of arrays, that are neither clonable nor slicable, but that
//! can be operated in-place, such as [`MutablePrimitiveArray`] and [`MutableUtf8Array`].
Expand Down
9 changes: 9 additions & 0 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ pub use mutable::*;
/// an array designed for highly performant operations on optionally nullable slots,
/// backed by a physical type of a physical byte-width, such as `i32` or `f64`.
/// The size of this struct is `O(1)` as all data is stored behind an [`std::sync::Arc`].
/// # Example
/// ```
/// use arrow2::array::PrimitiveArray;
/// # fn main() {
/// let array = PrimitiveArray::<i32>::from([Some(1), None, Some(2)]);
/// assert_eq!(array.value(0), 1);
/// assert_eq!(array.values().as_slice(), &[1, 0, 2]);
/// # }
/// ```
#[derive(Debug, Clone)]
pub struct PrimitiveArray<T: NativeType> {
data_type: DataType,
Expand Down
3 changes: 2 additions & 1 deletion src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{

use super::PrimitiveArray;

/// The mutable version of [`PrimitiveArray`]. See [`MutableArray`] for more details.
/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).
/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.
#[derive(Debug)]
pub struct MutablePrimitiveArray<T: NativeType> {
data_type: DataType,
Expand Down
13 changes: 6 additions & 7 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@ mod mutable;
pub use iterator::*;
pub use mutable::*;

/// A [`Utf8Array`] is arrow's equivalent of `Vec<Option<String>>`, i.e.
/// an array designed for highly performant operations on optionally nullable strings.
/// The size of this struct is `O(1)` as all data is stored behind an `Arc`.
/// A [`Utf8Array`] is arrow's equivalent of an immutable `Vec<Option<String>>`.
/// Cloning and slicing this struct is `O(1)`.
/// # Example
/// ```
/// use std::iter::FromIterator;
/// use arrow2::array::Utf8Array;
/// # fn main() {
/// let data = vec![Some("hello"), None, Some("hello2")];
/// let array = Utf8Array::<i32>::from_iter(data);
/// assert_eq!(array.value(0), "hello");
/// let array = Utf8Array::<i32>::from([Some("hi"), None, Some("there")]);
/// assert_eq!(array.value(0), "hi");
/// assert_eq!(array.values().as_slice(), b"hithere".as_ref());
/// assert_eq!(array.offsets().as_slice(), &[0, 2, 2, 2 + 5]);
/// # }
/// ```
#[derive(Debug, Clone)]
Expand Down
2 changes: 1 addition & 1 deletion src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ impl Bitmap {
Bitmap::from_bytes(buffer.into(), length)
}

/// Creates a new [`Bitmap`] from [`Bytes`] and a length.
/// Creates a new [`Bitmap`] from a slice and length.
/// # Panic
/// Panics iff `length <= bytes.len() * 8`
#[inline]
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ impl<T: NativeType> MutableBuffer<T> {
/// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
/// if any of the items of the iterator is an error.
/// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
/// The only difference between this and [`try_from_trusted_len_iter`] is that this works
/// The only difference between this and [`Self::try_from_trusted_len_iter`] is that this works
/// on any iterator, while `try_from_trusted_len_iter` requires the iterator to implement the trait
/// [`TrustedLen`], which not every iterator currently implements due to limitations of the Rust compiler.
/// # Safety
Expand Down
2 changes: 1 addition & 1 deletion src/compute/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
use crate::array::{growable::make_growable, Array};
use crate::error::{ArrowError, Result};

/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
/// Concatenate multiple [Array] of the same type into a single [`Array`].
pub fn concatenate(arrays: &[&dyn Array]) -> Result<Box<dyn Array>> {
if arrays.is_empty() {
return Err(ArrowError::InvalidArgumentError(
Expand Down
Loading