diff --git a/src/common/vector/src/distance.rs b/src/common/vector/src/distance.rs index 0fe3a513b31fc..3d5a1f45d4d50 100644 --- a/src/common/vector/src/distance.rs +++ b/src/common/vector/src/distance.rs @@ -32,3 +32,20 @@ pub fn cosine_distance(from: &[f32], to: &[f32]) -> Result { Ok(1.0 - (&a * &b).sum() / ((aa_sum).sqrt() * (bb_sum).sqrt())) } + +pub fn l2_distance(from: &[f32], to: &[f32]) -> Result { + if from.len() != to.len() { + return Err(ErrorCode::InvalidArgument(format!( + "Vector length not equal: {:} != {:}", + from.len(), + to.len(), + ))); + } + + Ok(from + .iter() + .zip(to.iter()) + .map(|(a, b)| (a - b).powi(2)) + .sum::() + .sqrt()) +} diff --git a/src/common/vector/src/lib.rs b/src/common/vector/src/lib.rs index ea98276b10928..8ee8bc83e7cd3 100644 --- a/src/common/vector/src/lib.rs +++ b/src/common/vector/src/lib.rs @@ -15,3 +15,4 @@ mod distance; pub use distance::cosine_distance; +pub use distance::l2_distance; diff --git a/src/meta/process/src/examples.rs b/src/meta/process/src/examples.rs index b6c16247cde7e..9321a66093309 100644 --- a/src/meta/process/src/examples.rs +++ b/src/meta/process/src/examples.rs @@ -104,7 +104,6 @@ pub fn print_table_meta(config: &Config) -> anyhow::Result<()> { Ok(()) } -#[allow(dead_code)] fn pretty(v: &T) -> Result where T: Serialize { serde_json::to_string_pretty(v) diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index 71395ad5fae1b..974ffe54cde89 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -419,6 +419,8 @@ pub enum BinaryOperator { BitwiseXor, BitwiseShiftLeft, BitwiseShiftRight, + + L2Distance, } impl BinaryOperator { @@ -445,6 +447,7 @@ impl BinaryOperator { BinaryOperator::BitwiseShiftLeft => "bit_shift_left".to_string(), BinaryOperator::BitwiseShiftRight => "bit_shift_right".to_string(), BinaryOperator::Caret => "pow".to_string(), + BinaryOperator::L2Distance => "l2_distance".to_string(), _ => { let name = format!("{:?}", self); name.to_lowercase() @@ -664,6 +667,9 @@ impl Display for BinaryOperator { BinaryOperator::BitwiseShiftRight => { write!(f, ">>") } + BinaryOperator::L2Distance => { + write!(f, "<->") + } } } } diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 52f96c423274e..825ef407d501a 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -396,6 +396,7 @@ impl<'a, I: Iterator>> PrattParser for ExprP BinaryOperator::BitwiseOr => Affix::Infix(Precedence(22), Associativity::Left), BinaryOperator::BitwiseAnd => Affix::Infix(Precedence(22), Associativity::Left), BinaryOperator::BitwiseXor => Affix::Infix(Precedence(22), Associativity::Left), + BinaryOperator::L2Distance => Affix::Infix(Precedence(22), Associativity::Left), BinaryOperator::BitwiseShiftLeft => { Affix::Infix(Precedence(23), Associativity::Left) @@ -1093,6 +1094,7 @@ pub fn binary_op(i: Input) -> IResult { value(BinaryOperator::Div, rule! { DIV }), value(BinaryOperator::Modulo, rule! { "%" }), value(BinaryOperator::StringConcat, rule! { "||" }), + value(BinaryOperator::L2Distance, rule! { "<->" }), value(BinaryOperator::Gt, rule! { ">" }), value(BinaryOperator::Lt, rule! { "<" }), value(BinaryOperator::Gte, rule! { ">=" }), diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 998423b6b68c0..f13b4b0803b91 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -613,6 +613,9 @@ pub enum TokenKind { LOCATION_PREFIX, #[token("ROLES", ignore(ascii_case))] ROLES, + /// L2DISTANCE op, from https://github.com/pgvector/pgvector + #[token("<->")] + L2DISTANCE, #[token("LEADING", ignore(ascii_case))] LEADING, #[token("LEFT", ignore(ascii_case))] @@ -1048,6 +1051,7 @@ impl TokenKind { | Abs | SquareRoot | CubeRoot + | L2DISTANCE | Placeholder | EOI ) diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index 657a9712e1fda..2e7b377f64fa1 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -53,7 +53,7 @@ error: --> SQL:1:10 | 1 | CAST(col1) - | ---- ^ expected `AS`, `,`, `(`, `.`, `IS`, `NOT`, or 69 more ... + | ---- ^ expected `AS`, `,`, `(`, `.`, `IS`, `NOT`, or 70 more ... | | | while parsing `CAST(... AS ...)` | while parsing expression diff --git a/src/query/ast/tests/it/testdata/statement-error.txt b/src/query/ast/tests/it/testdata/statement-error.txt index 3126be4e8cde5..e13ed04e19622 100644 --- a/src/query/ast/tests/it/testdata/statement-error.txt +++ b/src/query/ast/tests/it/testdata/statement-error.txt @@ -394,7 +394,7 @@ error: --> SQL:1:41 | 1 | SELECT * FROM t GROUP BY GROUPING SETS () - | ------ ^ expected `(`, `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, or 67 more ... + | ------ ^ expected `(`, `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, or 68 more ... | | | while parsing `SELECT ...` diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs index 32ebed746c44d..cf041d642256e 100644 --- a/src/query/functions/src/scalars/vector.rs +++ b/src/query/functions/src/scalars/vector.rs @@ -23,6 +23,7 @@ use common_expression::FunctionDomain; use common_expression::FunctionRegistry; use common_openai::OpenAI; use common_vector::cosine_distance; +use common_vector::l2_distance; pub fn register(registry: &mut FunctionRegistry) { // cosine_distance @@ -50,6 +51,32 @@ pub fn register(registry: &mut FunctionRegistry) { ), ); + // L2 distance + // cosine_distance + // This function takes two Float32 arrays as input and computes the l2 distance between them. + registry.register_passthrough_nullable_2_arg::, ArrayType, Float32Type, _, _>( + "l2_distance", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::, ArrayType, Float32Type>( + |lhs, rhs, output, ctx| { + let l_f32= + unsafe { std::mem::transmute::, Buffer>(lhs) }; + let r_f32= + unsafe { std::mem::transmute::, Buffer>(rhs) }; + + match l2_distance(l_f32.as_slice(), r_f32.as_slice()) { + Ok(dist) => { + output.push(F32::from(dist)); + } + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + output.push(F32::from(0.0)); + } + } + } + ), + ); + // embedding_vector // This function takes two strings as input, sends an API request to OpenAI, and returns the Float32 array of embeddings. // The OpenAI API key is pre-configured during the binder phase, so we rewrite this function and set the API key. diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 600abdb9b2cba..cd6776277daa1 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -1805,6 +1805,8 @@ Functions overloads: 1 json_path_query_first(Variant NULL, String NULL) :: Variant NULL 0 json_to_string(Variant) :: String 1 json_to_string(Variant NULL) :: String NULL +0 l2_distance(Array(Float32), Array(Float32)) :: Float32 +1 l2_distance(Array(Float32) NULL, Array(Float32) NULL) :: Float32 NULL 0 left(String, UInt64) :: String 1 left(String NULL, UInt64 NULL) :: String NULL 0 length(Variant NULL) :: UInt32 NULL diff --git a/src/query/service/src/interpreters/interpreter_table_vacuum.rs b/src/query/service/src/interpreters/interpreter_table_vacuum.rs index e0f227fab7c4b..83f40617d4e7a 100644 --- a/src/query/service/src/interpreters/interpreter_table_vacuum.rs +++ b/src/query/service/src/interpreters/interpreter_table_vacuum.rs @@ -30,7 +30,6 @@ use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; -#[allow(dead_code)] pub struct VacuumTableInterpreter { ctx: Arc, plan: VacuumTablePlan, diff --git a/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs b/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs index 3c38cc87c6ecf..2226faee4b4f3 100644 --- a/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs +++ b/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs @@ -38,7 +38,6 @@ use crate::sessions::TableContext; const DRY_RUN_LIMIT: usize = 1000; -#[allow(dead_code)] pub struct VacuumDropTablesInterpreter { ctx: Arc, plan: VacuumDropTablePlan, diff --git a/src/query/service/src/pipelines/executor/executor_graph.rs b/src/query/service/src/pipelines/executor/executor_graph.rs index 699ec4013b700..7fb162bec21c1 100644 --- a/src/query/service/src/pipelines/executor/executor_graph.rs +++ b/src/query/service/src/pipelines/executor/executor_graph.rs @@ -57,9 +57,7 @@ struct Node { processor: ProcessorPtr, updated_list: Arc, - #[allow(dead_code)] inputs_port: Vec>, - #[allow(dead_code)] outputs_port: Vec>, } diff --git a/src/query/service/src/pipelines/executor/pipeline_complete_executor.rs b/src/query/service/src/pipelines/executor/pipeline_complete_executor.rs index 9a673532a5b2a..c869b2f42a36a 100644 --- a/src/query/service/src/pipelines/executor/pipeline_complete_executor.rs +++ b/src/query/service/src/pipelines/executor/pipeline_complete_executor.rs @@ -27,7 +27,6 @@ pub struct PipelineCompleteExecutor { } // Use this executor when the pipeline is complete pipeline (has source and sink) -#[allow(dead_code)] impl PipelineCompleteExecutor { pub fn try_create( pipeline: Pipeline, diff --git a/src/query/service/src/pipelines/executor/pipeline_pushing_executor.rs b/src/query/service/src/pipelines/executor/pipeline_pushing_executor.rs index 1c937814f0335..d9c761f024482 100644 --- a/src/query/service/src/pipelines/executor/pipeline_pushing_executor.rs +++ b/src/query/service/src/pipelines/executor/pipeline_pushing_executor.rs @@ -52,14 +52,12 @@ impl State { } // Use this executor when the pipeline is pushing pipeline (exists sink but not exists source) -#[allow(dead_code)] pub struct PipelinePushingExecutor { state: Arc, executor: Arc, sender: SyncSender>, } -#[allow(dead_code)] impl PipelinePushingExecutor { fn wrap_pipeline( ctx: Arc, diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs index 6bb659aec5a5e..4cf479fb5acee 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_partial.rs @@ -116,7 +116,6 @@ pub struct TransformPartialAggregate { } impl TransformPartialAggregate { - #[allow(dead_code)] pub fn try_create( ctx: Arc, method: Method, diff --git a/src/query/service/src/servers/http/formats/mod.rs b/src/query/service/src/servers/http/formats/mod.rs deleted file mode 100644 index ba27358d3c669..0000000000000 --- a/src/query/service/src/servers/http/formats/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -pub enum Format { - NDJson, -} diff --git a/src/query/service/src/servers/http/mod.rs b/src/query/service/src/servers/http/mod.rs index 4c5b0e84a0821..802dc4aa2b79b 100644 --- a/src/query/service/src/servers/http/mod.rs +++ b/src/query/service/src/servers/http/mod.rs @@ -14,7 +14,6 @@ mod clickhouse_federated; mod clickhouse_handler; -pub mod formats; mod http_services; pub mod middleware; pub mod v1; diff --git a/src/query/service/src/servers/mysql/mysql_federated.rs b/src/query/service/src/servers/mysql/mysql_federated.rs index a4fc4b67100d2..c2cfc33fae4ba 100644 --- a/src/query/service/src/servers/mysql/mysql_federated.rs +++ b/src/query/service/src/servers/mysql/mysql_federated.rs @@ -15,7 +15,6 @@ use std::collections::HashMap; use std::sync::Arc; -use common_config::DATABEND_COMMIT_VERSION; use common_expression::types::StringType; use common_expression::utils::FromData; use common_expression::DataBlock; @@ -30,36 +29,12 @@ use regex::Regex; use crate::servers::federated_helper::FederatedHelper; use crate::servers::federated_helper::LazyBlockFunc; -use crate::servers::mysql::MYSQL_VERSION; -#[allow(dead_code)] -pub struct MySQLFederated { - mysql_version: String, - databend_version: String, -} +pub struct MySQLFederated {} impl MySQLFederated { pub fn create() -> Self { - MySQLFederated { - mysql_version: MYSQL_VERSION.to_string(), - databend_version: DATABEND_COMMIT_VERSION.to_string(), - } - } - - // Build block for select @@variable. - // Format: - // |@@variable| - // |value| - #[allow(dead_code)] - fn select_variable_block(name: &str, value: &str) -> Option<(TableSchemaRef, DataBlock)> { - let schema = TableSchemaRefExt::create(vec![TableField::new( - &format!("@@{}", name), - TableDataType::String, - )]); - let block = DataBlock::new_from_columns(vec![StringType::from_data(vec![ - value.as_bytes().to_vec(), - ])]); - Some((schema, block)) + MySQLFederated {} } // Build block for select function. @@ -260,7 +235,7 @@ impl MySQLFederated { (Regex::new("(?i)^(/\\*!40103 SET(.*) \\*/)$").unwrap(), None), (Regex::new("(?i)^(/\\*!40111 SET(.*) \\*/)$").unwrap(), None), (Regex::new("(?i)^(/\\*!40101 SET(.*) \\*/)$").unwrap(), None), - (Regex::new("(?i)^(/\\*!40014 SET(.*) \\*/)$").unwrap(), None), + (Regex::new("(?i)^(/\\*!40014 SET(.*) \\*/)$").unwrap(), None), (Regex::new("(?i)^(/\\*!40000 SET(.*) \\*/)$").unwrap(), None), (Regex::new("(?i)^(/\\*!40000 ALTER(.*) \\*/)$").unwrap(), None), ]; diff --git a/src/query/service/src/test_kits/context.rs b/src/query/service/src/test_kits/context.rs index bff96b142c2ca..f9bf9fcf0ba6f 100644 --- a/src/query/service/src/test_kits/context.rs +++ b/src/query/service/src/test_kits/context.rs @@ -102,13 +102,11 @@ pub async fn create_query_context_with_config( Ok((guard, dummy_query_context)) } -#[allow(dead_code)] pub struct ClusterDescriptor { local_node_id: String, cluster_nodes_list: Vec>, } -#[allow(dead_code)] impl ClusterDescriptor { pub fn new() -> ClusterDescriptor { ClusterDescriptor { diff --git a/src/query/sql/src/planner/optimizer/heuristic/mod.rs b/src/query/sql/src/planner/optimizer/heuristic/mod.rs index c4fadbabafdde..359f66bb0af3f 100644 --- a/src/query/sql/src/planner/optimizer/heuristic/mod.rs +++ b/src/query/sql/src/planner/optimizer/heuristic/mod.rs @@ -16,11 +16,9 @@ mod decorrelate; #[allow(clippy::module_inception)] mod heuristic; mod prune_unused_columns; -mod rule_list; mod subquery_rewriter; pub use heuristic::HeuristicOptimizer; pub use heuristic::DEFAULT_REWRITE_RULES; pub use heuristic::RESIDUAL_RULES; -pub use rule_list::RuleList; pub use subquery_rewriter::SubqueryRewriter; diff --git a/src/query/sql/src/planner/optimizer/heuristic/rule_list.rs b/src/query/sql/src/planner/optimizer/heuristic/rule_list.rs deleted file mode 100644 index a2297bdaef391..0000000000000 --- a/src/query/sql/src/planner/optimizer/heuristic/rule_list.rs +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use crate::optimizer::rule::RulePtr; - -#[allow(dead_code)] -// Ordered list of rules, may contain duplicated rules. -pub struct RuleList { - rules: Vec, -} - -impl RuleList { - // pub fn create(ids: Vec, metadata: Option) -> Result { - // let factory = RuleFactory::create(); - // let mut rules = vec![]; - // for id in ids { - // rules.push(factory.create_rule(id, metadata.clone())?); - // } - // Ok(RuleList { rules }) - // } - - // pub fn iter(&self) -> impl Iterator { - // self.rules.iter() - // } -} diff --git a/src/query/storages/fuse/src/io/write/block_writer.rs b/src/query/storages/fuse/src/io/write/block_writer.rs index 78405c91bbbbe..b09c86a011da0 100644 --- a/src/query/storages/fuse/src/io/write/block_writer.rs +++ b/src/query/storages/fuse/src/io/write/block_writer.rs @@ -109,7 +109,6 @@ pub struct BloomIndexState { pub(crate) data: Vec, pub(crate) size: u64, pub(crate) location: Location, - #[allow(dead_code)] pub(crate) column_distinct_count: HashMap, } diff --git a/tests/sqllogictests/suites/query/02_function/02_0063_function_vector b/tests/sqllogictests/suites/query/02_function/02_0063_function_vector index 24dfbb38771c6..3baed013c009b 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0063_function_vector +++ b/tests/sqllogictests/suites/query/02_function/02_0063_function_vector @@ -3,3 +3,8 @@ query F select cosine_distance([3.0, 45.0, 7.0, 2.0, 5.0, 20.0, 13.0, 12.0], [2.0, 54.0, 13.0, 15.0, 22.0, 34.0, 50.0, 1.0]) as sim ---- 0.1264193 + +query F +select [1, 2] <-> [2, 3] as sim +---- +1.4142135