From 1ef0f398afcdc1d65c4b6afda772fe8d870b6ae1 Mon Sep 17 00:00:00 2001 From: Jun Li Date: Thu, 9 Sep 2021 16:05:28 -0700 Subject: [PATCH] Add Interval type --- common/datavalues/src/types/data_type.rs | 73 ++++++----- .../src/types/data_type_coercion.rs | 36 +++++- .../src/types/serializations/interval.rs | 41 ------ .../src/types/serializations/mod.rs | 2 - .../src/scalars/arithmetics/arithmetic.rs | 13 +- common/functions/src/scalars/dates/date.rs | 2 +- .../functions/src/scalars/dates/interval.rs | 119 ------------------ common/functions/src/scalars/dates/mod.rs | 3 +- .../clickhouse/writers/query_writer.rs | 10 +- .../mysql/writers/query_result_writer.rs | 2 +- query/src/sql/plan_parser.rs | 62 ++++----- query/src/sql/plan_parser_test.rs | 11 +- 12 files changed, 132 insertions(+), 242 deletions(-) delete mode 100644 common/datavalues/src/types/serializations/interval.rs delete mode 100644 common/functions/src/scalars/dates/interval.rs diff --git a/common/datavalues/src/types/data_type.rs b/common/datavalues/src/types/data_type.rs index 11710123dc518..6d66357d28662 100644 --- a/common/datavalues/src/types/data_type.rs +++ b/common/datavalues/src/types/data_type.rs @@ -45,7 +45,6 @@ pub enum DataType { /// Option indicates the timezone, if it's None, it's UTC DateTime32(Option), - /// Representing the elapsed time or duration in seconds. You can use Interval-type values in arithmetical operations with DateTime, but not Date, which represents elapsed time in days. Interval(IntervalUnit), List(Box), @@ -53,6 +52,24 @@ pub enum DataType { String, } +#[derive( + serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, +)] +pub enum IntervalUnit { + YearMonth, + DayTime, +} + +impl fmt::Display for IntervalUnit { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let str = match self { + IntervalUnit::YearMonth => "YearMonth", + IntervalUnit::DayTime => "DayTime", + }; + write!(f, "{}", str) + } +} + impl DataType { pub fn to_physical_type(&self) -> PhysicalDataType { self.clone().into() @@ -83,7 +100,11 @@ impl DataType { ArrowDataType::Struct(arrows_fields) } String => ArrowDataType::LargeBinary, - Interval(_unit) => ArrowDataType::Int64, + Interval(unit) => ArrowDataType::Extension( + "Interval".to_string(), + Box::new(ArrowDataType::Int64), + Some(unit.to_string()), + ), } } } @@ -120,6 +141,25 @@ impl From<&ArrowDataType> for DataType { ArrowDataType::Timestamp(_, tz) => DataType::DateTime32(tz.clone()), ArrowDataType::Date32 => DataType::Date16, ArrowDataType::Date64 => DataType::Date32, + + ArrowDataType::Extension(name, _arrow_type, extra) => match name.as_str() { + "Date16" => DataType::Date16, + "Date32" => DataType::Date32, + "DateTime32" => DataType::DateTime32(extra.clone()), + "Interval" => { + if let Some(unit) = extra { + match unit.as_str() { + "YearMonth" => DataType::Interval(IntervalUnit::YearMonth), + "DayTime" => DataType::Interval(IntervalUnit::DayTime), + _ => unreachable!(), + } + } else { + unreachable!() + } + } + _ => unimplemented!("data_type: {}", dt), + }, + // this is safe, because we define the datatype firstly _ => { unimplemented!("data_type: {}", dt) @@ -163,6 +203,9 @@ impl fmt::Debug for DataType { Self::List(arg0) => f.debug_tuple("List").field(arg0).finish(), Self::Struct(arg0) => f.debug_tuple("Struct").field(arg0).finish(), Self::String => write!(f, "String"), + Self::Interval(unit) => { + write!(f, "Interval({})", unit.to_string()) + } } } } @@ -171,29 +214,3 @@ impl fmt::Display for DataType { write!(f, "{:?}", self) } } - -#[derive( - serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, -)] -pub enum IntervalUnit { - Year, - Month, - Day, - Hour, - Minute, - Second, -} - -impl IntervalUnit { - pub fn avg_seconds(unit: IntervalUnit) -> i32 { - // the average seconds number is from clickhouse: https://github.com/ClickHouse/ClickHouse/blob/9f5cd35a6963cc556a51218b46b0754dcac7306a/src/Common/IntervalKind.cpp - match unit { - IntervalUnit::Second => 1, - IntervalUnit::Minute => 60, - IntervalUnit::Hour => 3600, - IntervalUnit::Day => 86400, - IntervalUnit::Month => 2629746, // Exactly 1/12 of a year. - IntervalUnit::Year => 31556952, // The average length of a Gregorian year is equal to 365.2425 days, - } - } -} diff --git a/common/datavalues/src/types/data_type_coercion.rs b/common/datavalues/src/types/data_type_coercion.rs index 9045e737ecce0..2f5642f27797e 100644 --- a/common/datavalues/src/types/data_type_coercion.rs +++ b/common/datavalues/src/types/data_type_coercion.rs @@ -43,6 +43,10 @@ pub fn is_numeric(dt: &DataType) -> bool { ) } +pub fn is_interval(dt: &DataType) -> bool { + matches!(dt, DataType::Interval(_)) +} + fn next_size(size: usize) -> usize { if size < 8_usize { return size * 2; @@ -273,7 +277,7 @@ pub fn datetime_arithmetic_coercion( DataValueArithmeticOperator::Plus => Ok(a), DataValueArithmeticOperator::Minus => { - if is_numeric(&b) { + if is_numeric(&b) || is_interval(&b) { Ok(a) } else { // Date minus Date or DateTime minus DateTime @@ -284,6 +288,36 @@ pub fn datetime_arithmetic_coercion( } } +#[inline] +pub fn interval_arithmetic_coercion( + op: &DataValueArithmeticOperator, + lhs_type: &DataType, + rhs_type: &DataType, +) -> Result { + let e = Result::Err(ErrorCode::BadDataValueType(format!( + "DataValue Error: Unsupported date coercion ({:?}) {} ({:?})", + lhs_type, op, rhs_type + ))); + + // only allow date/datetime [+/-] interval + if !(is_date_or_date_time(lhs_type) && is_interval(rhs_type) + || is_date_or_date_time(rhs_type) && is_interval(lhs_type)) + { + return e; + } + + match op { + DataValueArithmeticOperator::Plus | DataValueArithmeticOperator::Minus => { + if is_date_or_date_time(lhs_type) { + Ok(lhs_type.clone()) + } else { + Ok(rhs_type.clone()) + } + } + _ => e, + } +} + #[inline] pub fn numerical_signed_coercion(val_type: &DataType) -> Result { // error on any non-numeric type diff --git a/common/datavalues/src/types/serializations/interval.rs b/common/datavalues/src/types/serializations/interval.rs deleted file mode 100644 index 100361dae5681..0000000000000 --- a/common/datavalues/src/types/serializations/interval.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2020 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_exception::Result; - -use crate::prelude::DataColumn; -use crate::DataValue; -use crate::DataValueArithmeticOperator; -use crate::IntervalUnit; -use crate::NumberSerializer; -use crate::TypeSerializer; - -pub struct IntervalSerializer { - unit: IntervalUnit, -} - -impl IntervalSerializer { - pub fn new(unit: IntervalUnit) -> Self { - Self { unit } - } -} - -impl TypeSerializer for IntervalSerializer { - fn serialize_strings(&self, column: &DataColumn) -> Result> { - let seconds_per_unit = IntervalUnit::avg_seconds(self.unit.clone()); - let seconds = DataColumn::Constant(DataValue::Float64(Some(seconds_per_unit as f64)), 1); - let interval = column.arithmetic(DataValueArithmeticOperator::Div, &seconds)?; - NumberSerializer::::default().serialize_strings(&interval) - } -} diff --git a/common/datavalues/src/types/serializations/mod.rs b/common/datavalues/src/types/serializations/mod.rs index e6860d5cdcce6..01bf1ad3b655b 100644 --- a/common/datavalues/src/types/serializations/mod.rs +++ b/common/datavalues/src/types/serializations/mod.rs @@ -22,14 +22,12 @@ use crate::DataType; mod boolean; mod date; mod date_time; -mod null; mod number; mod string; pub use boolean::*; pub use date::*; pub use date_time::*; -pub use null::*; pub use number::*; pub use string::*; diff --git a/common/functions/src/scalars/arithmetics/arithmetic.rs b/common/functions/src/scalars/arithmetics/arithmetic.rs index 0b91da78c5f35..73803788d1dec 100644 --- a/common/functions/src/scalars/arithmetics/arithmetic.rs +++ b/common/functions/src/scalars/arithmetics/arithmetic.rs @@ -63,6 +63,10 @@ impl Function for ArithmeticFunction { if args.len() == 1 { return Ok(args[0].clone()); } + + if is_interval(&args[0]) || is_interval(&args[1]) { + return interval_arithmetic_coercion(&self.op, &args[0], &args[1]); + } if is_date_or_date_time(&args[0]) || is_date_or_date_time(&args[1]) { return datetime_arithmetic_coercion(&self.op, &args[0], &args[1]); } @@ -74,6 +78,14 @@ impl Function for ArithmeticFunction { } fn eval(&self, columns: &DataColumnsWithField, _input_rows: usize) -> Result { + let has_date_or_date_time = columns.iter().any(|c| is_date_or_date_time(c.data_type())); + let has_interval = columns.iter().any(|c| is_interval(c.data_type())); + + if has_date_or_date_time && has_interval { + //TODO(Jun): implement data/datatime +/- interval + unimplemented!() + } + let result = match columns.len() { 1 => std::ops::Neg::neg(columns[0].column()), _ => columns[0] @@ -81,7 +93,6 @@ impl Function for ArithmeticFunction { .arithmetic(self.op.clone(), columns[1].column()), }?; - let has_date_or_date_time = columns.iter().any(|c| is_date_or_date_time(c.data_type())); if has_date_or_date_time { let args = columns .iter() diff --git a/common/functions/src/scalars/dates/date.rs b/common/functions/src/scalars/dates/date.rs index 2fbda31da6c2e..59935db80bc6f 100644 --- a/common/functions/src/scalars/dates/date.rs +++ b/common/functions/src/scalars/dates/date.rs @@ -16,9 +16,9 @@ use common_exception::Result; use super::now::NowFunction; use super::RoundFunction; +use super::ToYYYYMMFunction; use super::ToYYYYMMDDFunction; use super::ToYYYYMMDDhhmmssFunction; -use super::ToYYYYMMFunction; use super::TodayFunction; use super::TomorrowFunction; use super::YesterdayFunction; diff --git a/common/functions/src/scalars/dates/interval.rs b/common/functions/src/scalars/dates/interval.rs deleted file mode 100644 index 0b9f8040dcbde..0000000000000 --- a/common/functions/src/scalars/dates/interval.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2020 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt; - -use common_datavalues::prelude::*; -use common_exception::Result; - -use crate::scalars::Function; - -macro_rules! define_interval_struct { - ($interval_type: ident) => { - #[derive(Clone)] - pub struct $interval_type { - display_name: String, - } - }; -} - -define_interval_struct!(IntervalYearFunction); -define_interval_struct!(IntervalMonthFunction); -define_interval_struct!(IntervalDayFunction); -define_interval_struct!(IntervalHourFunction); -define_interval_struct!(IntervalMinuteFunction); -define_interval_struct!(IntervalSecondFunction); - -macro_rules! impl_function { - ($interval_type: ident, $interval_unit: expr) => { - impl Function for $interval_type { - fn name(&self) -> &str { - self.display_name.as_str() - } - - fn return_type(&self, _args: &[DataType]) -> Result { - Ok(DataType::Interval($interval_unit)) - } - - fn nullable(&self, _input_schema: &DataSchema) -> Result { - Ok(false) - } - - fn num_arguments(&self) -> usize { - 1 - } - - fn eval( - &self, - columns: &DataColumnsWithField, - input_rows: usize, - ) -> Result { - let interval_num = columns[0].column(); - let seconds_per_unit = IntervalUnit::avg_seconds($interval_unit); - let seconds = - DataColumn::Constant(DataValue::Int32(Some(seconds_per_unit)), input_rows); - let total_seconds = - interval_num.arithmetic(DataValueArithmeticOperator::Mul, &seconds)?; - - // convert to Int64 type - let series = total_seconds.to_minimal_array()?; - let column: DataColumn = series.cast_with_type(&DataType::Int64)?.into(); - Ok(column.resize_constant(input_rows)) - } - } - }; -} - -impl_function!(IntervalYearFunction, IntervalUnit::Year); -impl_function!(IntervalMonthFunction, IntervalUnit::Month); -impl_function!(IntervalDayFunction, IntervalUnit::Day); -impl_function!(IntervalHourFunction, IntervalUnit::Hour); -impl_function!(IntervalMinuteFunction, IntervalUnit::Minute); -impl_function!(IntervalSecondFunction, IntervalUnit::Second); - -macro_rules! define_try_create { - ($interval_type: ident) => { - impl $interval_type { - pub fn try_create(display_name: &str) -> Result> { - Ok(Box::new($interval_type { - display_name: display_name.to_string(), - })) - } - } - }; -} - -define_try_create!(IntervalYearFunction); -define_try_create!(IntervalMonthFunction); -define_try_create!(IntervalDayFunction); -define_try_create!(IntervalHourFunction); -define_try_create!(IntervalMinuteFunction); -define_try_create!(IntervalSecondFunction); - -macro_rules! define_display { - ($interval_type: ident, $content: expr) => { - impl fmt::Display for $interval_type { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, $content) - } - } - }; -} - -define_display!(IntervalYearFunction, "toIntervalYear()"); -define_display!(IntervalMonthFunction, "toIntervalMonth()"); -define_display!(IntervalDayFunction, "toIntervalDay()"); -define_display!(IntervalHourFunction, "toIntervalHour()"); -define_display!(IntervalMinuteFunction, "toIntervalMinute()"); -define_display!(IntervalSecondFunction, "toIntervalSecond()"); diff --git a/common/functions/src/scalars/dates/mod.rs b/common/functions/src/scalars/dates/mod.rs index 2159f7765f329..93b33571ab687 100644 --- a/common/functions/src/scalars/dates/mod.rs +++ b/common/functions/src/scalars/dates/mod.rs @@ -18,16 +18,15 @@ mod date_function_test; #[cfg(test)] mod date_test; -mod interval; mod now; mod number_function; mod round_function; mod simple_date; pub use date::DateFunction; +pub use number_function::ToYYYYMMFunction; pub use number_function::ToYYYYMMDDFunction; pub use number_function::ToYYYYMMDDhhmmssFunction; -pub use number_function::ToYYYYMMFunction; pub use round_function::RoundFunction; pub use simple_date::TodayFunction; pub use simple_date::TomorrowFunction; diff --git a/query/src/servers/clickhouse/writers/query_writer.rs b/query/src/servers/clickhouse/writers/query_writer.rs index 6ad47626b26ec..1bb16b49d084c 100644 --- a/query/src/servers/clickhouse/writers/query_writer.rs +++ b/query/src/servers/clickhouse/writers/query_writer.rs @@ -307,14 +307,8 @@ pub fn to_clickhouse_block(block: DataBlock) -> Result { .collect(); result.column(name, vs) } - DataType::Interval(unit) => { - let seconds = IntervalUnit::avg_seconds(unit.clone()) as f64; - let vs: Vec = column - .i64()? - .into_no_null_iter() - .map(|c| (*c as f64) / seconds) - .collect(); - result.column(name, vs) + DataType::Interval(_) => { + result.column(name, column.i64()?.inner().values().as_slice().to_vec()) } _ => { return Err(ErrorCode::BadDataValueType(format!( diff --git a/query/src/servers/mysql/writers/query_result_writer.rs b/query/src/servers/mysql/writers/query_result_writer.rs index a00c696de31ca..6de8f22f3fbdd 100644 --- a/query/src/servers/mysql/writers/query_result_writer.rs +++ b/query/src/servers/mysql/writers/query_result_writer.rs @@ -68,7 +68,7 @@ impl<'a, W: std::io::Write> DFQueryResultWriter<'a, W> { DataType::Date16 | DataType::Date32 => Ok(ColumnType::MYSQL_TYPE_DATE), DataType::DateTime32(_) => Ok(ColumnType::MYSQL_TYPE_DATETIME), DataType::Null => Ok(ColumnType::MYSQL_TYPE_NULL), - DataType::Interval(_) => Ok(ColumnType::MYSQL_TYPE_DOUBLE), + DataType::Interval(_) => Ok(ColumnType::MYSQL_TYPE_LONG), _ => Err(ErrorCode::UnImplement(format!( "Unsupported column type:{:?}", field.data_type() diff --git a/query/src/sql/plan_parser.rs b/query/src/sql/plan_parser.rs index 2485f5783f949..1a048beeed3bb 100644 --- a/query/src/sql/plan_parser.rs +++ b/query/src/sql/plan_parser.rs @@ -889,38 +889,42 @@ impl PlanParser { } } + fn interval_to_day_time(days: i32, ms: i32) -> Result { + let milliseconds_per_day = 24 * 3600 * 1000; + let wrapped_days = days + ms / milliseconds_per_day; + let wrapped_ms = ms % milliseconds_per_day; + let num = (wrapped_days as i64) << 32 | (wrapped_ms as i64); + let data_type = DataType::Interval(IntervalUnit::DayTime); + + Ok(Expression::Literal { + value: DataValue::Int64(Some(num)), + column_name: Some(num.to_string()), + data_type, + }) + } + + fn interval_to_year_month(months: i32) -> Result { + let data_type = DataType::Interval(IntervalUnit::YearMonth); + + Ok(Expression::Literal { + value: DataValue::Int64(Some(months as i64)), + column_name: Some(months.to_string()), + data_type, + }) + } + fn interval_to_rex( value: &str, interval_kind: sqlparser::ast::DateTimeField, ) -> Result { - let mut args = Vec::with_capacity(1); - let num = DataValue::try_from_literal(value).map(Expression::create_literal)?; - args.push(num); + let num = value.parse::()?; // we only accept i32 for number in "interval [num] [year|month|day|hour|minute|second]" match interval_kind { - sqlparser::ast::DateTimeField::Year => Ok(Expression::ScalarFunction { - op: "toIntervalYear".into(), - args, - }), - sqlparser::ast::DateTimeField::Month => Ok(Expression::ScalarFunction { - op: "toIntervalMonth".into(), - args, - }), - sqlparser::ast::DateTimeField::Day => Ok(Expression::ScalarFunction { - op: "toIntervalDay".into(), - args, - }), - sqlparser::ast::DateTimeField::Hour => Ok(Expression::ScalarFunction { - op: "toIntervalHour".into(), - args, - }), - sqlparser::ast::DateTimeField::Minute => Ok(Expression::ScalarFunction { - op: "toIntervalMinute".into(), - args, - }), - sqlparser::ast::DateTimeField::Second => Ok(Expression::ScalarFunction { - op: "toIntervalSecond".into(), - args, - }), + sqlparser::ast::DateTimeField::Year => Self::interval_to_year_month(num * 12), + sqlparser::ast::DateTimeField::Month => Self::interval_to_year_month(num), + sqlparser::ast::DateTimeField::Day => Self::interval_to_day_time(num, 0), + sqlparser::ast::DateTimeField::Hour => Self::interval_to_day_time(0, num * 3600 * 1000), + sqlparser::ast::DateTimeField::Minute => Self::interval_to_day_time(0, num * 60 * 1000), + sqlparser::ast::DateTimeField::Second => Self::interval_to_day_time(0, num * 1000), } } @@ -953,8 +957,8 @@ impl PlanParser { ))); } - // when the input is Interval '1 hour', leading_field will be None and value_expr will be '1 hour'. - // we may want to support this pattern in native paser (sqlparser-rs). + // When the input is like "interval '1 hour'", leading_field will be None and value_expr will be '1 hour'. + // We may want to support this pattern in native paser (sqlparser-rs), to have a parsing result that leading_field is Some(Hour) and value_expr is number '1'. if leading_field.is_none() { //TODO: support parsing literal interval like '1 hour' return Result::Err(ErrorCode::SyntaxException(format!( diff --git a/query/src/sql/plan_parser_test.rs b/query/src/sql/plan_parser_test.rs index c9f63ca28e3cf..cff1e94555d3f 100644 --- a/query/src/sql/plan_parser_test.rs +++ b/query/src/sql/plan_parser_test.rs @@ -126,16 +126,9 @@ fn test_plan_parser() -> Result<()> { Test { name: "interval-passed", sql: "SELECT INTERVAL '1' year, INTERVAL '1' month, INTERVAL '1' day, INTERVAL '1' hour, INTERVAL '1' minute, INTERVAL '1' second", - expect: "Projection: toIntervalYear(1):Interval(Year), toIntervalMonth(1):Interval(Month), toIntervalDay(1):Interval(Day), toIntervalHour(1):Interval(Hour), toIntervalMinute(1):Interval(Minute), toIntervalSecond(1):Interval(Second)\n Expression: toIntervalYear(1):Interval(Year), toIntervalMonth(1):Interval(Month), toIntervalDay(1):Interval(Day), toIntervalHour(1):Interval(Hour), toIntervalMinute(1):Interval(Minute), toIntervalSecond(1):Interval(Second) (Before Projection)\n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", - error: "" + expect: "Projection: 12:Interval(YearMonth), 1:Interval(YearMonth), 4294967296:Interval(DayTime), 3600000:Interval(DayTime), 60000:Interval(DayTime), 1000:Interval(DayTime)\n Expression: 12:Interval(YearMonth), 1:Interval(YearMonth), 4294967296:Interval(DayTime), 3600000:Interval(DayTime), 60000:Interval(DayTime), 1000:Interval(DayTime) (Before Projection)\n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", + error: "", }, - // TODO: support interval type - // Test { - // name: "interval-passed", - // sql: "SELECT INTERVAL '1 year', INTERVAL '1 month', INTERVAL '1 day', INTERVAL '1 hour', INTERVAL '1 minute', INTERVAL '1 second'", - // expect: "Projection: 12:Interval(YearMonth), 1:Interval(YearMonth), 4294967296:Interval(DayTime), 3600000:Interval(DayTime), 60000:Interval(DayTime), 1000:Interval(DayTime)\n Expression: 12:Interval(YearMonth), 1:Interval(YearMonth), 4294967296:Interval(DayTime), 3600000:Interval(DayTime), 60000:Interval(DayTime), 1000:Interval(DayTime) (Before Projection)\n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", - // error: "", - // }, // Test { // name: "interval-unsupported", // sql: "SELECT INTERVAL '1 year 1 day'",