Skip to content

Commit

Permalink
dispatch
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 4, 2022
1 parent eea2710 commit 64c8bc6
Show file tree
Hide file tree
Showing 19 changed files with 211 additions and 52 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/extend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ where
/// However if this does not cause a reallocation, the resulting data structure will not have any extra chunks
/// and thus will yield faster queries.
///
/// Prefer `extend` over `append` when you want do a query after a single append. For instance during
/// Prefer `extend` over `append` when you want to do a query after a single append. For instance during
/// online operations where you add `n` rows and rerun a query.
///
/// Prefer `append` over `extend` when you want to append many times before doing a query. For instance
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/frame/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ impl DataFrame {
let mut r = r.cast(&st)?;

if diff_l > 0 {
l = l.extend(AnyValue::Null, diff_l)?;
l = l.extend_constant(AnyValue::Null, diff_l)?;
};
if diff_r > 0 {
r = r.extend(AnyValue::Null, diff_r)?;
r = r.extend_constant(AnyValue::Null, diff_r)?;
};

f(&l, &r)
Expand Down
70 changes: 35 additions & 35 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,36 +775,6 @@ impl DataFrame {
Ok(df)
}

// utility to test if we can vstack/extend the columns
fn can_extend(&self, left: &Series, right: &Series) -> Result<()> {
if left.dtype() != right.dtype() || left.name() != right.name() {
if left.dtype() != right.dtype() {
return Err(PolarsError::SchemaMisMatch(
format!(
"cannot vstack: because column datatypes (dtypes) in the two DataFrames do not match for \
left.name='{}' with left.dtype={} != right.dtype={} with right.name='{}'",
left.name(),
left.dtype(),
right.dtype(),
right.name()
)
.into(),
));
} else {
return Err(PolarsError::SchemaMisMatch(
format!(
"cannot vstack: because column names in the two DataFrames do not match for \
left.name='{}' != right.name='{}'",
left.name(),
right.name()
)
.into(),
));
}
};
Ok(())
}

/// Concatenate a DataFrame to this DataFrame
///
/// If many `vstack` operations are done, it is recommended to call [`DataFrame::rechunk`].
Expand Down Expand Up @@ -855,8 +825,8 @@ impl DataFrame {
self.columns
.iter_mut()
.zip(other.columns.iter())
.try_for_each(|(left, right)| {
self.can_extend(left, right)?;
.try_for_each::<_, Result<_>>(|(left, right)| {
can_extend(left, right)?;
left.append(right).expect("should not fail");
Ok(())
})?;
Expand All @@ -871,7 +841,7 @@ impl DataFrame {
/// If this does not cause a reallocation, the resulting data structure will not have any extra chunks
/// and thus will yield faster queries.
///
/// Prefer `extend` over `vstack` when you want do a query after a single append. For instance during
/// Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
/// online operations where you add `n` rows and rerun a query.
///
/// Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
Expand All @@ -887,8 +857,8 @@ impl DataFrame {
self.columns
.iter_mut()
.zip(other.columns.iter())
.try_for_each(|(left, right)| {
self.can_extend(left, right)?;
.try_for_each::<_, Result<_>>(|(left, right)| {
can_extend(left, right)?;
left.extend(right).unwrap();
Ok(())
})?;
Expand Down Expand Up @@ -2887,6 +2857,36 @@ impl From<DataFrame> for Vec<Series> {
}
}

// utility to test if we can vstack/extend the columns
fn can_extend(left: &Series, right: &Series) -> Result<()> {
if left.dtype() != right.dtype() || left.name() != right.name() {
if left.dtype() != right.dtype() {
return Err(PolarsError::SchemaMisMatch(
format!(
"cannot vstack: because column datatypes (dtypes) in the two DataFrames do not match for \
left.name='{}' with left.dtype={} != right.dtype={} with right.name='{}'",
left.name(),
left.dtype(),
right.dtype(),
right.name()
)
.into(),
));
} else {
return Err(PolarsError::SchemaMisMatch(
format!(
"cannot vstack: because column names in the two DataFrames do not match for \
left.name='{}' != right.name='{}'",
left.name(),
right.name()
)
.into(),
));
}
};
Ok(())
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ pub fn hor_concat_df(dfs: &[DataFrame]) -> Result<DataFrame> {
let diff = max_len - df.height();
df.columns
.iter_mut()
.for_each(|s| *s = s.extend(AnyValue::Null, diff).unwrap());
.for_each(|s| *s = s.extend_constant(AnyValue::Null, diff).unwrap());
}
df
})
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/implementations/categorical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ impl SeriesTrait for SeriesWrap<CategoricalChunked> {
if self.0.dtype() == other.dtype() {
let other = other.categorical()?;
self.0.append(other);
self.0.merge_categorical_map(other);
self.0.categorical_map = Some(self.0.merge_categorical_map(other));
Ok(())
} else {
Err(PolarsError::SchemaMisMatch(
Expand All @@ -165,7 +165,7 @@ impl SeriesTrait for SeriesWrap<CategoricalChunked> {
if self.0.dtype() == other.dtype() {
let other = other.categorical()?;
self.0.extend(other);
self.0.merge_categorical_map(other);
self.0.categorical_map = Some(self.0.merge_categorical_map(other));
Ok(())
} else {
Err(PolarsError::SchemaMisMatch(
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/series/implementations/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ impl SeriesTrait for SeriesWrap<DurationChunked> {
fn extend(&mut self, other: &Series) -> Result<()> {
if self.0.dtype() == other.dtype() {
let other = other.to_physical_repr();
self.0.append(other.as_ref().as_ref().as_ref());
self.0.extend(other.as_ref().as_ref().as_ref());
Ok(())
} else {
Err(PolarsError::SchemaMisMatch(
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/series/implementations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ macro_rules! impl_dyn_series {

fn extend(&mut self, other: &Series) -> Result<()> {
if self.0.dtype() == other.dtype() {
self.0.append(other.as_ref().as_ref());
self.0.extend(other.as_ref().as_ref());
Ok(())
} else {
Err(PolarsError::SchemaMisMatch(
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/series/ops/extend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::prelude::*;

impl Series {
/// Extend with a constant value.
pub fn extend(&self, value: AnyValue, n: usize) -> Result<Self> {
pub fn extend_constant(&self, value: AnyValue, n: usize) -> Result<Self> {
use AnyValue::*;
let s = match value {
Float32(v) => Series::new("", vec![v]),
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ Manipulation/ selection
DataFrame.with_column
DataFrame.hstack
DataFrame.vstack
DataFrame.extend
DataFrame.groupby
DataFrame.groupby_dynamic
DataFrame.groupby_rolling
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ Manipulation/ selection
Expr.reshape
Expr.to_physical
Expr.shuffle
Expr.extend_constant
Expr.extend

Column names
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ Manipulation/ selection
Series.reshape
Series.to_dummies
Series.shuffle
Series.extend_constant
Series.extend

Various
Expand Down
20 changes: 19 additions & 1 deletion py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2303,7 +2303,25 @@ def extend(self, value: Optional[Union[int, float, str, bool]], n: int) -> "Expr
n
The number of values to extend.
"""
return wrap_expr(self._pyexpr.extend(value, n))
return wrap_expr(self._pyexpr.extend_constant(value, n))

def extend_constant(
self, value: Optional[Union[int, float, str, bool]], n: int
) -> "Expr":
"""
Extend the Series with given number of values.
.. deprecated::0.12.21
use extend_constant
Parameters
----------
value
The value to extend the Series with. This value may be None to fill with nulls.
n
The number of values to extend.
"""
return self.extend_constant(value, n)

# Below are the namespaces defined. Keep these at the end of the definition of Expr, as to not confuse mypy with
# the type annotation `str` with the namespace "str"
Expand Down
24 changes: 24 additions & 0 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3212,6 +3212,30 @@ def vstack(self, df: "DataFrame", in_place: bool = False) -> Optional["DataFrame
else:
return wrap_df(self._df.vstack(df._df))

def extend(self, other: "DataFrame") -> None:
"""
Extend the memory backed by this `DataFrame` with the values from `other`.
Different from `vstack` which adds the chunks from `other` to the chunks of this `DataFrame`
`extent` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
If this does not cause a reallocation, the resulting data structure will not have any extra chunks
and thus will yield faster queries.
Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
online operations where you add `n` rows and rerun a query.
Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
when you read in multiple files and when to store them in a single `DataFrame`.
In the latter case, finish the sequence of `vstack` operations with a `rechunk`.
Parameters
----------
other
DataFrame to vertically add.
"""
self._df.extend(other._df)

def drop(self, name: Union[str, List[str]]) -> "DataFrame":
"""
Remove column from DataFrame and return as new.
Expand Down
46 changes: 43 additions & 3 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,14 +1127,33 @@ def slice(self, offset: int, length: int) -> "Series":
"""
return wrap_s(self._s.slice(offset, length))

def append(self, other: "Series") -> None:
def append(self, other: "Series", append_chunks: bool = True) -> None:
"""
Append a Series to this one.
Parameters
----------
other
Series to append.
append_chunks
If set to `True` the append operation will add the chunks from `other` to self. This is super cheap.
if set to `False` the append operation will do the same as `DataFrame.extend` wich:
extends the memory backed by this `Series` with the values from `other`.
Different from `append chunks`, `extent` appends the data from `other` to the underlying memory locations and
thus may cause a reallocation (which are expensive).
If this does not cause a reallocation, the resulting data structure will not have any extra chunks
and thus will yield faster queries.
Prefer `extend` over `append_chunks` when you want to do a query after a single append. For instance during
online operations where you add `n` rows and rerun a query.
Prefer `append_chunks` over `extend` when you want to append many times before doing a query. For instance
when you read in multiple files and when to store them in a single `Series`.
In the latter case, finish the sequence of `append_chunks` operations with a `rechunk`.
Examples
--------
Expand All @@ -1154,7 +1173,10 @@ def append(self, other: "Series") -> None:
]
"""
self._s.append(other._s)
if append_chunks:
self._s.append(other._s)
else:
self._s.extend(other._s)

def filter(self, predicate: Union["Series", list]) -> "Series":
"""
Expand Down Expand Up @@ -3385,18 +3407,36 @@ def ewm_var(
.to_series()
)

def extend_constant(
self, value: Optional[Union[int, float, str, bool]], n: int
) -> "Series":
"""
Extend the Series with given number of values.
Parameters
----------
value
The value to extend the Series with. This value may be None to fill with nulls.
n
The number of values to extend.
"""
return wrap_s(self._s.extend_constant(value, n))

def extend(self, value: Optional[Union[int, float, str, bool]], n: int) -> "Series":
"""
Extend the Series with given number of values.
.. deprecated::0.12.21
use extend_constant
Parameters
----------
value
The value to extend the Series with. This value may be None to fill with nulls.
n
The number of values to extend.
"""
return wrap_s(self._s.extend(value, n))
return self.extend_constant(value, n)

@property
def time_unit(self) -> Optional[str]:
Expand Down
5 changes: 5 additions & 0 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,11 @@ impl PyDataFrame {
Ok(df.into())
}

pub fn extend(&mut self, df: &PyDataFrame) -> PyResult<()> {
self.df.extend(&df.df).map_err(PyPolarsEr::from)?;
Ok(())
}

pub fn vstack_mut(&mut self, df: &PyDataFrame) -> PyResult<()> {
self.df.vstack_mut(&df.df).map_err(PyPolarsEr::from)?;
Ok(())
Expand Down
4 changes: 2 additions & 2 deletions py-polars/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1246,7 +1246,7 @@ impl PyExpr {
};
self.inner.clone().ewm_var(options).into()
}
pub fn extend(&self, py: Python, value: Wrap<AnyValue>, n: usize) -> Self {
pub fn extend_constant(&self, py: Python, value: Wrap<AnyValue>, n: usize) -> Self {
let value = value.into_py(py);
self.inner
.clone()
Expand All @@ -1255,7 +1255,7 @@ impl PyExpr {
let gil = Python::acquire_gil();
let py = gil.python();
let value = value.extract::<Wrap<AnyValue>>(py).unwrap().0;
s.extend(value, n)
s.extend_constant(value, n)
},
GetOutput::same_type(),
)
Expand Down
Loading

0 comments on commit 64c8bc6

Please sign in to comment.