Skip to content

Commit

Permalink
list namespace to polars-ops
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 17, 2022
1 parent 50f7bd9 commit 22b01e9
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 71 deletions.
4 changes: 2 additions & 2 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ lazy_regex = ["polars-lazy/regex"]
cum_agg = ["polars-core/cum_agg", "polars-core/cum_agg"]
rolling_window = ["polars-core/rolling_window", "polars-lazy/rolling_window"]
interpolate = ["polars-core/interpolate", "polars-lazy/interpolate"]
list = ["polars-core/list", "polars-lazy/list"]
list = ["polars-lazy/list", "polars-ops/list"]
rank = ["polars-core/rank", "polars-lazy/rank"]
diff = ["polars-core/diff", "polars-lazy/diff"]
diff = ["polars-core/diff", "polars-lazy/diff", "polars-ops/diff"]
pct_change = ["polars-core/pct_change", "polars-lazy/pct_change"]
moment = ["polars-core/moment", "polars-lazy/moment"]
arange = ["polars-lazy/arange"]
Expand Down
3 changes: 0 additions & 3 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ cum_agg = []
# rolling window functions
rolling_window = []
interpolate = []
# additional list utils
list = []
rank = []
diff = []
pct_change = ["diff"]
Expand Down Expand Up @@ -133,7 +131,6 @@ docs-selection = [
"moment",
"dtype-categorical",
"rank",
"list",
"diagonal_concat",
"horizontal_concat",
"abs",
Expand Down
3 changes: 0 additions & 3 deletions polars/polars-core/src/chunked_array/list/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
//! Special list utility methods
mod iterator;
#[cfg(feature = "list")]
#[cfg_attr(docsrs, doc(cfg(feature = "list")))]
pub mod namespace;

use crate::prelude::*;

Expand Down
6 changes: 1 addition & 5 deletions polars/polars-core/src/chunked_array/ops/full.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,7 @@ impl ChunkFullNull for ListChunked {
}

impl ListChunked {
pub(crate) fn full_null_with_dtype(
name: &str,
length: usize,
inner_dtype: &DataType,
) -> ListChunked {
pub fn full_null_with_dtype(name: &str, length: usize, inner_dtype: &DataType) -> ListChunked {
let arr = new_null_array(
ArrowDataType::LargeList(Box::new(ArrowField::new(
"item",
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ rank = ["polars-core/rank"]
diff = ["polars-core/diff"]
pct_change = ["polars-core/pct_change"]
moment = ["polars-core/moment"]
list = ["polars-core/list"]
list = ["polars-ops/list"]
abs = ["polars-core/abs"]
random = ["polars-core/random"]
dynamic_groupby = ["polars-core/dynamic_groupby"]
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-lazy/src/dsl/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use polars_core::export::arrow::temporal_conversions::NANOSECONDS;
use polars_core::prelude::*;
use polars_core::utils::arrow::temporal_conversions::SECONDS_IN_DAY;
use polars_core::utils::get_supertype;
#[cfg(feature = "list")]
use polars_ops::prelude::ListNameSpaceImpl;
use rayon::prelude::*;
use std::ops::{BitAnd, BitOr};

Expand Down
5 changes: 4 additions & 1 deletion polars/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ description = "More operations on polars data structures"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
polars-arrow = { version = "0.21.1", path = "../polars-arrow", default-features = false }
polars-core = { version = "0.21.1", path = "../polars-core", features = ["private"], default-features = false }

[features]
Expand All @@ -22,4 +23,6 @@ dtype-struct = ["polars-core/dtype-struct"]
dtype-u8 = ["polars-core/dtype-u8"]
object = ["polars-core/object"]
to_dummies = []
list_to_struct = ["polars-core/list", "polars-core/dtype-struct"]
list_to_struct = ["polars-core/dtype-struct"]
list = []
diff = []
5 changes: 5 additions & 0 deletions polars/polars-ops/src/chunked_array/list/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
use polars_core::prelude::*;

#[cfg(feature = "list")]
#[cfg_attr(docsrs, doc(cfg(feature = "list")))]
mod namespace;
#[cfg(feature = "list_to_struct")]
mod to_struct;

#[cfg(feature = "list")]
pub use namespace::*;
#[cfg(feature = "list_to_struct")]
pub use to_struct::*;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::chunked_array::builder::get_list_builder;
use crate::prelude::*;
use crate::series::ops::NullBehavior;
use super::*;
use polars_arrow::kernels::list::sublist_get;
use polars_arrow::prelude::ValueSize;
use polars_core::chunked_array::builder::get_list_builder;
use polars_core::series::ops::NullBehavior;
use polars_core::utils::CustomIterTools;
use std::convert::TryFrom;
use std::fmt::Write;

Expand Down Expand Up @@ -52,22 +53,23 @@ fn cast_rhs(
Ok(())
}

impl ListChunked {
pub trait ListNameSpaceImpl: AsList {
/// In case the inner dtype [`DataType::Utf8`], the individual items will be joined into a
/// single string separated by `separator`.
pub fn lst_join(&self, separator: &str) -> Result<Utf8Chunked> {
match self.inner_dtype() {
fn lst_join(&self, separator: &str) -> Result<Utf8Chunked> {
let ca = self.as_list();
match ca.inner_dtype() {
DataType::Utf8 => {
// used to amortize heap allocs
let mut buf = String::with_capacity(128);

let mut builder = Utf8ChunkedBuilder::new(
self.name(),
self.len(),
self.get_values_size() + separator.len() * self.len(),
ca.name(),
ca.len(),
ca.get_values_size() + separator.len() * ca.len(),
);

self.amortized_iter().for_each(|opt_s| {
ca.amortized_iter().for_each(|opt_s| {
let opt_val = opt_s.map(|s| {
// make sure that we don't write values of previous iteration
buf.clear();
Expand Down Expand Up @@ -97,114 +99,129 @@ impl ListChunked {
}
}

pub fn lst_max(&self) -> Series {
self.apply_amortized(|s| s.as_ref().max_as_series())
fn lst_max(&self) -> Series {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().max_as_series())
.explode()
.unwrap()
.into_series()
}

pub fn lst_min(&self) -> Series {
self.apply_amortized(|s| s.as_ref().min_as_series())
fn lst_min(&self) -> Series {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().min_as_series())
.explode()
.unwrap()
.into_series()
}

pub fn lst_sum(&self) -> Series {
self.apply_amortized(|s| s.as_ref().sum_as_series())
fn lst_sum(&self) -> Series {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().sum_as_series())
.explode()
.unwrap()
.into_series()
}

pub fn lst_mean(&self) -> Float64Chunked {
self.amortized_iter()
fn lst_mean(&self) -> Float64Chunked {
let ca = self.as_list();
ca.amortized_iter()
.map(|s| s.and_then(|s| s.as_ref().mean()))
.collect()
}

#[must_use]
pub fn lst_sort(&self, reverse: bool) -> ListChunked {
self.apply_amortized(|s| s.as_ref().sort(reverse))
fn lst_sort(&self, reverse: bool) -> ListChunked {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().sort(reverse))
}

#[must_use]
pub fn lst_reverse(&self) -> ListChunked {
self.apply_amortized(|s| s.as_ref().reverse())
fn lst_reverse(&self) -> ListChunked {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().reverse())
}

pub fn lst_unique(&self) -> Result<ListChunked> {
self.try_apply_amortized(|s| s.as_ref().unique())
fn lst_unique(&self) -> Result<ListChunked> {
let ca = self.as_list();
ca.try_apply_amortized(|s| s.as_ref().unique())
}

pub fn lst_arg_min(&self) -> IdxCa {
let mut out: IdxCa = self
fn lst_arg_min(&self) -> IdxCa {
let ca = self.as_list();
let mut out: IdxCa = ca
.amortized_iter()
.map(|opt_s| opt_s.and_then(|s| s.as_ref().arg_min().map(|idx| idx as IdxSize)))
.collect_trusted();
out.rename(self.name());
out.rename(ca.name());
out
}

pub fn lst_arg_max(&self) -> IdxCa {
let mut out: IdxCa = self
fn lst_arg_max(&self) -> IdxCa {
let ca = self.as_list();
let mut out: IdxCa = ca
.amortized_iter()
.map(|opt_s| opt_s.and_then(|s| s.as_ref().arg_max().map(|idx| idx as IdxSize)))
.collect_trusted();
out.rename(self.name());
out.rename(ca.name());
out
}

#[cfg(feature = "diff")]
#[cfg_attr(docsrs, doc(cfg(feature = "diff")))]
pub fn lst_diff(&self, n: usize, null_behavior: NullBehavior) -> ListChunked {
self.apply_amortized(|s| s.as_ref().diff(n, null_behavior))
fn lst_diff(&self, n: usize, null_behavior: NullBehavior) -> ListChunked {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().diff(n, null_behavior))
}

pub fn lst_shift(&self, periods: i64) -> ListChunked {
self.apply_amortized(|s| s.as_ref().shift(periods))
fn lst_shift(&self, periods: i64) -> ListChunked {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().shift(periods))
}

pub fn lst_slice(&self, offset: i64, length: usize) -> ListChunked {
self.apply_amortized(|s| s.as_ref().slice(offset, length))
fn lst_slice(&self, offset: i64, length: usize) -> ListChunked {
let ca = self.as_list();
ca.apply_amortized(|s| s.as_ref().slice(offset, length))
}

pub fn lst_lengths(&self) -> IdxCa {
let mut lengths = Vec::with_capacity(self.len());
self.downcast_iter().for_each(|arr| {
fn lst_lengths(&self) -> IdxCa {
let ca = self.as_list();
let mut lengths = Vec::with_capacity(ca.len());
ca.downcast_iter().for_each(|arr| {
let offsets = arr.offsets().as_slice();
let mut last = offsets[0];
for o in &offsets[1..] {
lengths.push((*o - last) as IdxSize);
last = *o;
}
});
IdxCa::from_vec(self.name(), lengths)
IdxCa::from_vec(ca.name(), lengths)
}

/// Get the value by index in the sublists.
/// So index `0` would return the first item of every sublist
/// and index `-1` would return the last item of every sublist
/// if an index is out of bounds, it will return a `None`.
pub fn lst_get(&self, idx: i64) -> Result<Series> {
let chunks = self
fn lst_get(&self, idx: i64) -> Result<Series> {
let ca = self.as_list();
let chunks = ca
.downcast_iter()
.map(|arr| sublist_get(arr, idx))
.collect::<Vec<_>>();
Series::try_from((self.name(), chunks))
Series::try_from((ca.name(), chunks))
}

pub fn lst_concat(&self, other: &[Series]) -> Result<ListChunked> {
fn lst_concat(&self, other: &[Series]) -> Result<ListChunked> {
let ca = self.as_list();
let other_len = other.len();
let length = self.len();
let length = ca.len();
let mut other = other.to_vec();
let dtype = self.dtype();
let inner_type = self.inner_dtype();
let dtype = ca.dtype();
let inner_type = ca.inner_dtype();

// broadcasting path in case all unit length
// this path will not expand the series, so saves memory
if other.iter().all(|s| s.len() == 1) && self.len() != 1 {
if other.iter().all(|s| s.len() == 1) && ca.len() != 1 {
cast_rhs(&mut other, &inner_type, dtype, length, false)?;
let to_append = other
.iter()
Expand All @@ -215,7 +232,11 @@ impl ListChunked {
.collect::<Vec<_>>();
// there was a None, so all values will be None
if to_append.len() != other_len {
return Ok(Self::full_null_with_dtype(self.name(), length, &inner_type));
return Ok(ListChunked::full_null_with_dtype(
ca.name(),
length,
&inner_type,
));
}

let vals_size_other = other
Expand All @@ -225,11 +246,11 @@ impl ListChunked {

let mut builder = get_list_builder(
&inner_type,
self.get_values_size() + vals_size_other + 1,
ca.get_values_size() + vals_size_other + 1,
length,
self.name(),
ca.name(),
)?;
self.into_iter().for_each(|opt_s| {
ca.into_iter().for_each(|opt_s| {
let opt_s = opt_s.map(|mut s| {
for append in &to_append {
s.append(append).unwrap();
Expand All @@ -252,15 +273,15 @@ impl ListChunked {
for s in other.iter_mut() {
iters.push(s.list()?.amortized_iter())
}
let mut first_iter = self.into_iter();
let mut first_iter = ca.into_iter();
let mut builder = get_list_builder(
&inner_type,
self.get_values_size() + vals_size_other + 1,
ca.get_values_size() + vals_size_other + 1,
length,
self.name(),
ca.name(),
)?;

for _ in 0..self.len() {
for _ in 0..ca.len() {
let mut acc = match first_iter.next().unwrap() {
Some(s) => s,
None => {
Expand Down Expand Up @@ -294,3 +315,5 @@ impl ListChunked {
}
}
}

impl ListNameSpaceImpl for ListChunked {}
1 change: 1 addition & 0 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 22b01e9

Please sign in to comment.