diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs index 40a6a18bf543..d887527561f5 100644 --- a/datafusion/src/logical_plan/expr.rs +++ b/datafusion/src/logical_plan/expr.rs @@ -36,6 +36,7 @@ use functions::{ReturnTypeFunction, ScalarFunctionImplementation, Signature}; use std::collections::{HashMap, HashSet}; use std::convert::Infallible; use std::fmt; +use std::hash::Hash; use std::ops::Not; use std::str::FromStr; use std::sync::Arc; @@ -221,7 +222,7 @@ impl fmt::Display for Column { /// assert_eq!(op, Operator::Eq); /// } /// ``` -#[derive(Clone, PartialEq, PartialOrd)] +#[derive(Clone, PartialEq, Hash)] pub enum Expr { /// An expression with a specific name. Alias(Box, String), @@ -372,6 +373,22 @@ pub enum Expr { Wildcard, } +impl PartialOrd for Expr { + fn partial_cmp(&self, other: &Self) -> Option { + use std::hash::Hasher; + + let mut hasher = ahash::AHasher::default(); + self.hash(&mut hasher); + let s = hasher.finish(); + + let mut hasher = ahash::AHasher::default(); + other.hash(&mut hasher); + let o = hasher.finish(); + + Some(s.cmp(&o)) + } +} + impl Expr { /// Returns the [arrow::datatypes::DataType] of the expression based on [arrow::datatypes::Schema]. /// diff --git a/datafusion/src/logical_plan/operators.rs b/datafusion/src/logical_plan/operators.rs index 50bd682ae3f0..bf89c9391c28 100644 --- a/datafusion/src/logical_plan/operators.rs +++ b/datafusion/src/logical_plan/operators.rs @@ -20,7 +20,7 @@ use std::{fmt, ops}; use super::{binary_expr, Expr}; /// Operators applied to expressions -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Hash)] pub enum Operator { /// Expressions are equal Eq, diff --git a/datafusion/src/logical_plan/window_frames.rs b/datafusion/src/logical_plan/window_frames.rs index d65ed005231c..94536e70d8e4 100644 --- a/datafusion/src/logical_plan/window_frames.rs +++ b/datafusion/src/logical_plan/window_frames.rs @@ -34,7 +34,7 @@ use std::fmt; /// The ending frame boundary can be omitted (if the BETWEEN and AND keywords that surround the /// starting frame boundary are also omitted), in which case the ending frame boundary defaults to /// CURRENT ROW. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)] pub struct WindowFrame { /// A frame type - either ROWS, RANGE or GROUPS pub units: WindowFrameUnits, @@ -126,7 +126,7 @@ impl Default for WindowFrame { /// 5. UNBOUNDED FOLLOWING /// /// in this implementation we'll only allow to be u64 (i.e. no dynamic boundary) -#[derive(Debug, Clone, Copy, Eq)] +#[derive(Debug, Clone, Copy, Eq, Hash)] pub enum WindowFrameBound { /// 1. UNBOUNDED PRECEDING /// The frame boundary is the first row in the partition. @@ -211,7 +211,7 @@ impl WindowFrameBound { /// There are three frame types: ROWS, GROUPS, and RANGE. The frame type determines how the /// starting and ending boundaries of the frame are measured. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Hash)] pub enum WindowFrameUnits { /// The ROWS frame type means that the starting and ending boundaries for the frame are /// determined by counting individual rows relative to the current row. diff --git a/datafusion/src/physical_plan/aggregates.rs b/datafusion/src/physical_plan/aggregates.rs index 50e1a82c74c2..228d304dcb84 100644 --- a/datafusion/src/physical_plan/aggregates.rs +++ b/datafusion/src/physical_plan/aggregates.rs @@ -48,7 +48,7 @@ pub type StateTypeFunction = Arc Result>> + Send + Sync>; /// Enum of all built-in aggregate functions -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] pub enum AggregateFunction { /// count Count, diff --git a/datafusion/src/physical_plan/functions.rs b/datafusion/src/physical_plan/functions.rs index b7102a9d5162..1ca9231a0bbb 100644 --- a/datafusion/src/physical_plan/functions.rs +++ b/datafusion/src/physical_plan/functions.rs @@ -57,7 +57,7 @@ use std::convert::From; use std::{any::Any, fmt, str::FromStr, sync::Arc}; /// A function's type signature, which defines the function's supported argument types. -#[derive(Debug, Clone, PartialEq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, Hash)] pub enum TypeSignature { /// arbitrary number of arguments of an common type out of a list of valid types // A function such as `concat` is `Variadic(vec![DataType::Utf8, DataType::LargeUtf8])` @@ -79,7 +79,7 @@ pub enum TypeSignature { } ///The Signature of a function defines its supported input types as well as its volatility. -#[derive(Debug, Clone, PartialEq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, Hash)] pub struct Signature { /// type_signature - The types that the function accepts. See [TypeSignature] for more information. pub type_signature: TypeSignature, @@ -144,7 +144,7 @@ impl Signature { } ///A function's volatility, which defines the functions eligibility for certain optimizations -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub enum Volatility { /// Immutable - An immutable function will always return the same output when given the same input. An example of this is [BuiltinScalarFunction::Cos]. Immutable, @@ -170,7 +170,7 @@ pub type ReturnTypeFunction = Arc Result> + Send + Sync>; /// Enum of all built-in scalar functions -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] pub enum BuiltinScalarFunction { // math functions /// abs diff --git a/datafusion/src/physical_plan/udaf.rs b/datafusion/src/physical_plan/udaf.rs index 08ea5d30946e..33bc5b939b81 100644 --- a/datafusion/src/physical_plan/udaf.rs +++ b/datafusion/src/physical_plan/udaf.rs @@ -71,14 +71,10 @@ impl PartialEq for AggregateUDF { } } -impl PartialOrd for AggregateUDF { - fn partial_cmp(&self, other: &Self) -> Option { - let c = self.name.partial_cmp(&other.name); - if matches!(c, Some(std::cmp::Ordering::Equal)) { - self.signature.partial_cmp(&other.signature) - } else { - c - } +impl std::hash::Hash for AggregateUDF { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.signature.hash(state); } } diff --git a/datafusion/src/physical_plan/udf.rs b/datafusion/src/physical_plan/udf.rs index 0c5e80baea31..ae85a7feae4c 100644 --- a/datafusion/src/physical_plan/udf.rs +++ b/datafusion/src/physical_plan/udf.rs @@ -69,14 +69,10 @@ impl PartialEq for ScalarUDF { } } -impl PartialOrd for ScalarUDF { - fn partial_cmp(&self, other: &Self) -> Option { - let c = self.name.partial_cmp(&other.name); - if matches!(c, Some(std::cmp::Ordering::Equal)) { - self.signature.partial_cmp(&other.signature) - } else { - c - } +impl std::hash::Hash for ScalarUDF { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.signature.hash(state); } } diff --git a/datafusion/src/physical_plan/window_functions.rs b/datafusion/src/physical_plan/window_functions.rs index a8cb99172b24..5b34f672cbac 100644 --- a/datafusion/src/physical_plan/window_functions.rs +++ b/datafusion/src/physical_plan/window_functions.rs @@ -35,7 +35,7 @@ use std::sync::Arc; use std::{fmt, str::FromStr}; /// WindowFunction -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum WindowFunction { /// window function that leverages an aggregate function AggregateFunction(AggregateFunction), @@ -90,7 +90,7 @@ impl fmt::Display for WindowFunction { } /// An aggregate function that is part of a built-in window function -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum BuiltInWindowFunction { /// number of the current row within its partition, counting from 1 RowNumber,