From c2c222d982e9ff1824794af62a533bed2a05b37b Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Thu, 16 Jan 2025 14:00:34 +0800 Subject: [PATCH] fix: set query_kind earlier to ensure it takes effect. maybe related to the performance regression reported in issue 17284. --- src/query/sql/src/planner/planner.rs | 13 ++++++++----- .../src/parquet_rs/parquet_table/table.rs | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/query/sql/src/planner/planner.rs b/src/query/sql/src/planner/planner.rs index 62126bca834b7..5eeb3c9455b35 100644 --- a/src/query/sql/src/planner/planner.rs +++ b/src/query/sql/src/planner/planner.rs @@ -223,6 +223,7 @@ impl Planner { #[fastrace::trace] pub async fn plan_stmt(&mut self, stmt: &Statement, attach_query: bool) -> Result { let start = Instant::now(); + let query_kind = get_query_kind(stmt); let settings = self.ctx.get_settings(); // Step 3: Bind AST with catalog, and generate a pure logical SExpr let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; @@ -243,8 +244,7 @@ impl Planner { info!("logical plan from cache, time used: {:?}", start.elapsed()); if attach_query { // update for clickhouse handler - self.ctx - .attach_query_str(get_query_kind(stmt), stmt.to_mask_sql()); + self.ctx.attach_query_str(query_kind, stmt.to_mask_sql()); } return Ok(plan.plan); } @@ -260,11 +260,14 @@ impl Planner { ) .with_subquery_executor(self.query_executor.clone()); - // Indicate binder there is no need to collect column statistics for the binding table. + // must attach before bind, because ParquetRSTable::create used it. + if attach_query { + self.ctx.attach_query_str(query_kind, stmt.to_mask_sql()); + } let plan = binder.bind(stmt).await?; + // attach again to avoid the query kind is overwritten by the subquery if attach_query { - self.ctx - .attach_query_str(get_query_kind(stmt), stmt.to_mask_sql()); + self.ctx.attach_query_str(query_kind, stmt.to_mask_sql()); } // Step 4: Optimize the SExpr with optimizers, and generate optimized physical SExpr diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs index fd0cae9405d7b..f06a765f02b22 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs @@ -34,6 +34,7 @@ use databend_common_catalog::table::DummyColumnStatisticsProvider; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::TableField; use databend_common_expression::TableSchema; @@ -141,10 +142,17 @@ impl ParquetRSTable { // If the query is `COPY`, we don't need to collect column statistics. // It's because the only transform could be contained in `COPY` command is projection. - let need_stats_provider = !matches!( - query_kind, - QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation - ); + let need_stats_provider = match query_kind { + QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation => true, + QueryKind::Unknown => { + // add this branch to ensure query_kind is set + return Err(ErrorCode::Internal( + "Unexpected QueryKind::Unknown: query_kind was not properly set before calling ParquetRSTable::create.", + )); + } + _ => false, + }; + let max_threads = settings.get_max_threads()? as usize; let max_memory_usage = settings.get_max_memory_usage()?;