diff --git a/polars/Cargo.toml b/polars/Cargo.toml index 5cd7a0063962..e9d9677a5cd8 100644 --- a/polars/Cargo.toml +++ b/polars/Cargo.toml @@ -85,9 +85,9 @@ lazy_regex = ["polars-lazy/regex"] cum_agg = ["polars-core/cum_agg", "polars-core/cum_agg"] rolling_window = ["polars-core/rolling_window", "polars-lazy/rolling_window"] interpolate = ["polars-core/interpolate", "polars-lazy/interpolate"] -list = ["polars-core/list", "polars-lazy/list"] +list = ["polars-lazy/list", "polars-ops/list"] rank = ["polars-core/rank", "polars-lazy/rank"] -diff = ["polars-core/diff", "polars-lazy/diff"] +diff = ["polars-core/diff", "polars-lazy/diff", "polars-ops/diff"] pct_change = ["polars-core/pct_change", "polars-lazy/pct_change"] moment = ["polars-core/moment", "polars-lazy/moment"] arange = ["polars-lazy/arange"] diff --git a/polars/polars-core/Cargo.toml b/polars/polars-core/Cargo.toml index 3add486b003c..d18223950099 100644 --- a/polars/polars-core/Cargo.toml +++ b/polars/polars-core/Cargo.toml @@ -62,8 +62,6 @@ cum_agg = [] # rolling window functions rolling_window = [] interpolate = [] -# additional list utils -list = [] rank = [] diff = [] pct_change = ["diff"] @@ -133,7 +131,6 @@ docs-selection = [ "moment", "dtype-categorical", "rank", - "list", "diagonal_concat", "horizontal_concat", "abs", diff --git a/polars/polars-core/src/chunked_array/list/mod.rs b/polars/polars-core/src/chunked_array/list/mod.rs index 00b6bd1344dc..0d0d20cdec57 100644 --- a/polars/polars-core/src/chunked_array/list/mod.rs +++ b/polars/polars-core/src/chunked_array/list/mod.rs @@ -1,8 +1,5 @@ //! Special list utility methods mod iterator; -#[cfg(feature = "list")] -#[cfg_attr(docsrs, doc(cfg(feature = "list")))] -pub mod namespace; use crate::prelude::*; diff --git a/polars/polars-core/src/chunked_array/ops/full.rs b/polars/polars-core/src/chunked_array/ops/full.rs index 41f79abf447d..a8be315aca86 100644 --- a/polars/polars-core/src/chunked_array/ops/full.rs +++ b/polars/polars-core/src/chunked_array/ops/full.rs @@ -78,11 +78,7 @@ impl ChunkFullNull for ListChunked { } impl ListChunked { - pub(crate) fn full_null_with_dtype( - name: &str, - length: usize, - inner_dtype: &DataType, - ) -> ListChunked { + pub fn full_null_with_dtype(name: &str, length: usize, inner_dtype: &DataType) -> ListChunked { let arr = new_null_array( ArrowDataType::LargeList(Box::new(ArrowField::new( "item", diff --git a/polars/polars-lazy/Cargo.toml b/polars/polars-lazy/Cargo.toml index d9b4865dbc6f..a8d175ee7fbb 100644 --- a/polars/polars-lazy/Cargo.toml +++ b/polars/polars-lazy/Cargo.toml @@ -53,7 +53,7 @@ rank = ["polars-core/rank"] diff = ["polars-core/diff"] pct_change = ["polars-core/pct_change"] moment = ["polars-core/moment"] -list = ["polars-core/list"] +list = ["polars-ops/list"] abs = ["polars-core/abs"] random = ["polars-core/random"] dynamic_groupby = ["polars-core/dynamic_groupby"] diff --git a/polars/polars-lazy/src/dsl/functions.rs b/polars/polars-lazy/src/dsl/functions.rs index 7351714d902d..ba71a17587cd 100644 --- a/polars/polars-lazy/src/dsl/functions.rs +++ b/polars/polars-lazy/src/dsl/functions.rs @@ -8,6 +8,8 @@ use polars_core::export::arrow::temporal_conversions::NANOSECONDS; use polars_core::prelude::*; use polars_core::utils::arrow::temporal_conversions::SECONDS_IN_DAY; use polars_core::utils::get_supertype; +#[cfg(feature = "list")] +use polars_ops::prelude::ListNameSpaceImpl; use rayon::prelude::*; use std::ops::{BitAnd, BitOr}; diff --git a/polars/polars-ops/Cargo.toml b/polars/polars-ops/Cargo.toml index 2496b285bdaa..595fec97a789 100644 --- a/polars/polars-ops/Cargo.toml +++ b/polars/polars-ops/Cargo.toml @@ -10,6 +10,7 @@ description = "More operations on polars data structures" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +polars-arrow = { version = "0.21.1", path = "../polars-arrow", default-features = false } polars-core = { version = "0.21.1", path = "../polars-core", features = ["private"], default-features = false } [features] @@ -22,4 +23,6 @@ dtype-struct = ["polars-core/dtype-struct"] dtype-u8 = ["polars-core/dtype-u8"] object = ["polars-core/object"] to_dummies = [] -list_to_struct = ["polars-core/list", "polars-core/dtype-struct"] +list_to_struct = ["polars-core/dtype-struct"] +list = [] +diff = [] diff --git a/polars/polars-ops/src/chunked_array/list/mod.rs b/polars/polars-ops/src/chunked_array/list/mod.rs index 9b5219947506..0cc5ce8d4ccd 100644 --- a/polars/polars-ops/src/chunked_array/list/mod.rs +++ b/polars/polars-ops/src/chunked_array/list/mod.rs @@ -1,8 +1,13 @@ use polars_core::prelude::*; +#[cfg(feature = "list")] +#[cfg_attr(docsrs, doc(cfg(feature = "list")))] +mod namespace; #[cfg(feature = "list_to_struct")] mod to_struct; +#[cfg(feature = "list")] +pub use namespace::*; #[cfg(feature = "list_to_struct")] pub use to_struct::*; diff --git a/polars/polars-core/src/chunked_array/list/namespace.rs b/polars/polars-ops/src/chunked_array/list/namespace.rs similarity index 69% rename from polars/polars-core/src/chunked_array/list/namespace.rs rename to polars/polars-ops/src/chunked_array/list/namespace.rs index f68bb7c523a9..a5d3248e16bd 100644 --- a/polars/polars-core/src/chunked_array/list/namespace.rs +++ b/polars/polars-ops/src/chunked_array/list/namespace.rs @@ -1,8 +1,9 @@ -use crate::chunked_array::builder::get_list_builder; -use crate::prelude::*; -use crate::series::ops::NullBehavior; +use super::*; use polars_arrow::kernels::list::sublist_get; use polars_arrow::prelude::ValueSize; +use polars_core::chunked_array::builder::get_list_builder; +use polars_core::series::ops::NullBehavior; +use polars_core::utils::CustomIterTools; use std::convert::TryFrom; use std::fmt::Write; @@ -52,22 +53,23 @@ fn cast_rhs( Ok(()) } -impl ListChunked { +pub trait ListNameSpaceImpl: AsList { /// In case the inner dtype [`DataType::Utf8`], the individual items will be joined into a /// single string separated by `separator`. - pub fn lst_join(&self, separator: &str) -> Result { - match self.inner_dtype() { + fn lst_join(&self, separator: &str) -> Result { + let ca = self.as_list(); + match ca.inner_dtype() { DataType::Utf8 => { // used to amortize heap allocs let mut buf = String::with_capacity(128); let mut builder = Utf8ChunkedBuilder::new( - self.name(), - self.len(), - self.get_values_size() + separator.len() * self.len(), + ca.name(), + ca.len(), + ca.get_values_size() + separator.len() * ca.len(), ); - self.amortized_iter().for_each(|opt_s| { + ca.amortized_iter().for_each(|opt_s| { let opt_val = opt_s.map(|s| { // make sure that we don't write values of previous iteration buf.clear(); @@ -97,82 +99,95 @@ impl ListChunked { } } - pub fn lst_max(&self) -> Series { - self.apply_amortized(|s| s.as_ref().max_as_series()) + fn lst_max(&self) -> Series { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().max_as_series()) .explode() .unwrap() .into_series() } - pub fn lst_min(&self) -> Series { - self.apply_amortized(|s| s.as_ref().min_as_series()) + fn lst_min(&self) -> Series { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().min_as_series()) .explode() .unwrap() .into_series() } - pub fn lst_sum(&self) -> Series { - self.apply_amortized(|s| s.as_ref().sum_as_series()) + fn lst_sum(&self) -> Series { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().sum_as_series()) .explode() .unwrap() .into_series() } - pub fn lst_mean(&self) -> Float64Chunked { - self.amortized_iter() + fn lst_mean(&self) -> Float64Chunked { + let ca = self.as_list(); + ca.amortized_iter() .map(|s| s.and_then(|s| s.as_ref().mean())) .collect() } #[must_use] - pub fn lst_sort(&self, reverse: bool) -> ListChunked { - self.apply_amortized(|s| s.as_ref().sort(reverse)) + fn lst_sort(&self, reverse: bool) -> ListChunked { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().sort(reverse)) } #[must_use] - pub fn lst_reverse(&self) -> ListChunked { - self.apply_amortized(|s| s.as_ref().reverse()) + fn lst_reverse(&self) -> ListChunked { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().reverse()) } - pub fn lst_unique(&self) -> Result { - self.try_apply_amortized(|s| s.as_ref().unique()) + fn lst_unique(&self) -> Result { + let ca = self.as_list(); + ca.try_apply_amortized(|s| s.as_ref().unique()) } - pub fn lst_arg_min(&self) -> IdxCa { - let mut out: IdxCa = self + fn lst_arg_min(&self) -> IdxCa { + let ca = self.as_list(); + let mut out: IdxCa = ca .amortized_iter() .map(|opt_s| opt_s.and_then(|s| s.as_ref().arg_min().map(|idx| idx as IdxSize))) .collect_trusted(); - out.rename(self.name()); + out.rename(ca.name()); out } - pub fn lst_arg_max(&self) -> IdxCa { - let mut out: IdxCa = self + fn lst_arg_max(&self) -> IdxCa { + let ca = self.as_list(); + let mut out: IdxCa = ca .amortized_iter() .map(|opt_s| opt_s.and_then(|s| s.as_ref().arg_max().map(|idx| idx as IdxSize))) .collect_trusted(); - out.rename(self.name()); + out.rename(ca.name()); out } #[cfg(feature = "diff")] #[cfg_attr(docsrs, doc(cfg(feature = "diff")))] - pub fn lst_diff(&self, n: usize, null_behavior: NullBehavior) -> ListChunked { - self.apply_amortized(|s| s.as_ref().diff(n, null_behavior)) + fn lst_diff(&self, n: usize, null_behavior: NullBehavior) -> ListChunked { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().diff(n, null_behavior)) } - pub fn lst_shift(&self, periods: i64) -> ListChunked { - self.apply_amortized(|s| s.as_ref().shift(periods)) + fn lst_shift(&self, periods: i64) -> ListChunked { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().shift(periods)) } - pub fn lst_slice(&self, offset: i64, length: usize) -> ListChunked { - self.apply_amortized(|s| s.as_ref().slice(offset, length)) + fn lst_slice(&self, offset: i64, length: usize) -> ListChunked { + let ca = self.as_list(); + ca.apply_amortized(|s| s.as_ref().slice(offset, length)) } - pub fn lst_lengths(&self) -> IdxCa { - let mut lengths = Vec::with_capacity(self.len()); - self.downcast_iter().for_each(|arr| { + fn lst_lengths(&self) -> IdxCa { + let ca = self.as_list(); + let mut lengths = Vec::with_capacity(ca.len()); + ca.downcast_iter().for_each(|arr| { let offsets = arr.offsets().as_slice(); let mut last = offsets[0]; for o in &offsets[1..] { @@ -180,31 +195,33 @@ impl ListChunked { last = *o; } }); - IdxCa::from_vec(self.name(), lengths) + IdxCa::from_vec(ca.name(), lengths) } /// Get the value by index in the sublists. /// So index `0` would return the first item of every sublist /// and index `-1` would return the last item of every sublist /// if an index is out of bounds, it will return a `None`. - pub fn lst_get(&self, idx: i64) -> Result { - let chunks = self + fn lst_get(&self, idx: i64) -> Result { + let ca = self.as_list(); + let chunks = ca .downcast_iter() .map(|arr| sublist_get(arr, idx)) .collect::>(); - Series::try_from((self.name(), chunks)) + Series::try_from((ca.name(), chunks)) } - pub fn lst_concat(&self, other: &[Series]) -> Result { + fn lst_concat(&self, other: &[Series]) -> Result { + let ca = self.as_list(); let other_len = other.len(); - let length = self.len(); + let length = ca.len(); let mut other = other.to_vec(); - let dtype = self.dtype(); - let inner_type = self.inner_dtype(); + let dtype = ca.dtype(); + let inner_type = ca.inner_dtype(); // broadcasting path in case all unit length // this path will not expand the series, so saves memory - if other.iter().all(|s| s.len() == 1) && self.len() != 1 { + if other.iter().all(|s| s.len() == 1) && ca.len() != 1 { cast_rhs(&mut other, &inner_type, dtype, length, false)?; let to_append = other .iter() @@ -215,7 +232,11 @@ impl ListChunked { .collect::>(); // there was a None, so all values will be None if to_append.len() != other_len { - return Ok(Self::full_null_with_dtype(self.name(), length, &inner_type)); + return Ok(ListChunked::full_null_with_dtype( + ca.name(), + length, + &inner_type, + )); } let vals_size_other = other @@ -225,11 +246,11 @@ impl ListChunked { let mut builder = get_list_builder( &inner_type, - self.get_values_size() + vals_size_other + 1, + ca.get_values_size() + vals_size_other + 1, length, - self.name(), + ca.name(), )?; - self.into_iter().for_each(|opt_s| { + ca.into_iter().for_each(|opt_s| { let opt_s = opt_s.map(|mut s| { for append in &to_append { s.append(append).unwrap(); @@ -252,15 +273,15 @@ impl ListChunked { for s in other.iter_mut() { iters.push(s.list()?.amortized_iter()) } - let mut first_iter = self.into_iter(); + let mut first_iter = ca.into_iter(); let mut builder = get_list_builder( &inner_type, - self.get_values_size() + vals_size_other + 1, + ca.get_values_size() + vals_size_other + 1, length, - self.name(), + ca.name(), )?; - for _ in 0..self.len() { + for _ in 0..ca.len() { let mut acc = match first_iter.next().unwrap() { Some(s) => s, None => { @@ -294,3 +315,5 @@ impl ListChunked { } } } + +impl ListNameSpaceImpl for ListChunked {} diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock index 96b0648e3e63..414b91e344e1 100644 --- a/py-polars/Cargo.lock +++ b/py-polars/Cargo.lock @@ -1218,6 +1218,7 @@ dependencies = [ name = "polars-ops" version = "0.21.1" dependencies = [ + "polars-arrow", "polars-core", ]