diff --git a/src/write/statistics.rs b/src/write/statistics.rs index 02374835e..3f8e366c8 100644 --- a/src/write/statistics.rs +++ b/src/write/statistics.rs @@ -53,6 +53,10 @@ pub fn reduce(stats: &[&Option>]) -> Result { + let stats = stats.iter().map(|x| x.as_any().downcast_ref().unwrap()); + Some(Arc::new(reduce_fix_len_binary(stats))) + } _ => todo!(), }) } @@ -83,6 +87,33 @@ fn reduce_binary<'a, I: Iterator>(mut stats: I) -> }) } +fn reduce_fix_len_binary<'a, I: Iterator>(mut stats: I) -> FixedLenStatistics { + let initial = stats.next().unwrap().clone(); + stats.fold(initial, |mut acc, new| { + acc.min_value = match (acc.min_value, &new.min_value) { + (None, None) => None, + (Some(x), None) => Some(x), + (None, Some(x)) => Some(x.clone()), + (Some(x), Some(y)) => Some(ord_binary(x, y.clone(), false)), + }; + acc.max_value = match (acc.max_value, &new.max_value) { + (None, None) => None, + (Some(x), None) => Some(x), + (None, Some(x)) => Some(x.clone()), + (Some(x), Some(y)) => Some(ord_binary(x, y.clone(), true)), + }; + acc.null_count = match (acc.null_count, &new.null_count) { + (None, None) => None, + (Some(x), None) => Some(x), + (None, Some(x)) => Some(*x), + (Some(x), Some(y)) => Some(x + *y), + }; + acc.distinct_count = None; + acc + }) +} + + fn ord_binary(a: Vec, b: Vec, max: bool) -> Vec { for (v1, v2) in a.iter().zip(b.iter()) { match v1.cmp(v2) {