diff --git a/Cargo.lock b/Cargo.lock index 4b25560d084b3..bd3a90c8cd505 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1294,9 +1294,13 @@ dependencies = [ name = "common-cache" version = "0.1.0" dependencies = [ + "crc32fast", "heapsize", + "hex", "ritelinked", - "tempfile", + "siphasher", + "tracing", + "walkdir", ] [[package]] @@ -2107,7 +2111,6 @@ dependencies = [ "chrono", "common-arrow", "common-base", - "common-cache", "common-catalog", "common-exception", "common-expression", @@ -6975,6 +6978,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "ringbuffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1973f95452ec56a4b2c683f5eabc15617e38f4faf2776ed4a0011d5070ecb37e" + [[package]] name = "rio" version = "0.9.4" @@ -7808,11 +7817,19 @@ dependencies = [ "common-base", "common-cache", "common-exception", + "crc32fast", + "crossbeam-channel", + "hex", "metrics", "opendal", "parking_lot 0.12.1", + "ringbuffer", "serde", "serde_json", + "siphasher", + "tempfile", + "tracing", + "walkdir", ] [[package]] @@ -7821,6 +7838,7 @@ version = "0.1.0" dependencies = [ "common-arrow", "common-base", + "common-cache", "common-config", "common-exception", "storages-common-cache", diff --git a/docs/doc/13-sql-reference/70-system-tables/system-configs.md b/docs/doc/13-sql-reference/70-system-tables/system-configs.md index 24e01fc96a887..ab12f0a1d6a57 100644 --- a/docs/doc/13-sql-reference/70-system-tables/system-configs.md +++ b/docs/doc/13-sql-reference/70-system-tables/system-configs.md @@ -5,50 +5,131 @@ title: system.configs Contains information about Databend server configs. ```sql -SELECT * FROM system.configs; -+--------------------------------------+------------------+-------+-------------+ -| name | value | group | description | -+--------------------------------------+------------------+-------+-------------+ -| tenant_id | | query | | -| cluster_id | | query | | -| num_cpus | 16 | query | | -| mysql_handler_host | 127.0.0.1 | query | | -| mysql_handler_port | 3307 | query | | -| max_active_sessions | 256 | query | | -| max_memory_usage | 0 | query | | -| clickhouse_handler_host | 127.0.0.1 | query | | -| clickhouse_handler_port | 9000 | query | | -| http_handler_host | 127.0.0.1 | query | | -| http_handler_port | 8000 | query | | -| flight_api_address | 127.0.0.1:9090 | query | | -| admin_api_address | 127.0.0.1:8080 | query | | -| metric_api_address | 127.0.0.1:7070 | query | | -| http_handler_tls_server_cert | | query | | -| http_handler_tls_server_key | | query | | -| http_handler_tls_server_root_ca_cert | | query | | -| api_tls_server_cert | | query | | -| api_tls_server_key | | query | | -| api_tls_server_root_ca_cert | | query | | -| rpc_tls_server_cert | | query | | -| rpc_tls_server_key | | query | | -| rpc_tls_query_server_root_ca_cert | | query | | -| rpc_tls_query_service_domain_name | localhost | query | | -| table_engine_memory_enabled | true | query | | -| database_engine_github_enabled | true | query | | -| wait_timeout_mills | 5000 | query | | -| max_query_log_size | 10000 | query | | -| table_cache_enabled | false | query | | -| table_memory_cache_mb_size | 256 | query | | -| table_disk_cache_root | _cache | query | | -| table_disk_cache_mb_size | 1024 | query | | -| log_level | INFO | log | | -| log_dir | ./_logs | log | | -| meta_embedded_dir | | meta | | -| meta_address | | meta | | -| meta_username | | meta | | -| meta_password | | meta | | -| meta_client_timeout_in_second | 10 | meta | | -| rpc_tls_meta_server_root_ca_cert | | meta | | -| rpc_tls_meta_service_domain_name | localhost | meta | | -+--------------------------------------+------------------+-------+-------------+ +mysql> SELECT * FROM system.configs; ++---------+----------------------------------------+--------------------------------+-------------+ +| group | name | value | description | ++---------+----------------------------------------+--------------------------------+-------------+ +| query | tenant_id | admin | | +| query | cluster_id | | | +| query | num_cpus | 0 | | +| query | mysql_handler_host | 127.0.0.1 | | +| query | mysql_handler_port | 3307 | | +| query | max_active_sessions | 256 | | +| query | max_server_memory_usage | 0 | | +| query | max_memory_limit_enabled | false | | +| query | clickhouse_handler_host | 127.0.0.1 | | +| query | clickhouse_handler_port | 9000 | | +| query | clickhouse_http_handler_host | 127.0.0.1 | | +| query | clickhouse_http_handler_port | 8124 | | +| query | http_handler_host | 127.0.0.1 | | +| query | http_handler_port | 8000 | | +| query | http_handler_result_timeout_secs | 60 | | +| query | flight_api_address | 127.0.0.1:9090 | | +| query | admin_api_address | 127.0.0.1:8080 | | +| query | metric_api_address | 127.0.0.1:7070 | | +| query | http_handler_tls_server_cert | | | +| query | http_handler_tls_server_key | | | +| query | http_handler_tls_server_root_ca_cert | | | +| query | api_tls_server_cert | | | +| query | api_tls_server_key | | | +| query | api_tls_server_root_ca_cert | | | +| query | rpc_tls_server_cert | | | +| query | rpc_tls_server_key | | | +| query | rpc_tls_query_server_root_ca_cert | | | +| query | rpc_tls_query_service_domain_name | localhost | | +| query | table_engine_memory_enabled | true | | +| query | database_engine_github_enabled | true | | +| query | wait_timeout_mills | 5000 | | +| query | max_query_log_size | 10000 | | +| query | management_mode | false | | +| query | jwt_key_file | | | +| query | jwt_key_files | | | +| query | async_insert_max_data_size | 10000 | | +| query | async_insert_busy_timeout | 200 | | +| query | async_insert_stale_timeout | 0 | | +| query | users | | | +| query | share_endpoint_address | | | +| query | share_endpoint_auth_token_file | | | +| query | quota | null | | +| query | internal_enable_sandbox_tenant | false | | +| log | level | INFO | | +| log | dir | ./.databend/logs | | +| log | query_enabled | false | | +| log | file.on | true | | +| log | file.level | INFO | | +| log | file.dir | ./.databend/logs | | +| log | file.format | json | | +| log | stderr.on | false | | +| log | stderr.level | INFO | | +| log | stderr.format | text | | +| meta | embedded_dir | .databend/meta | | +| meta | endpoints | | | +| meta | username | root | | +| meta | password | | | +| meta | client_timeout_in_second | 10 | | +| meta | auto_sync_interval | 0 | | +| meta | rpc_tls_meta_server_root_ca_cert | | | +| meta | rpc_tls_meta_service_domain_name | localhost | | +| cache | enable_table_meta_cache | true | | +| cache | table_meta_snapshot_count | 256 | | +| cache | table_meta_segment_count | 10240 | | +| cache | table_meta_statistic_count | 256 | | +| cache | enable_table_index_bloom | true | | +| cache | table_bloom_index_meta_count | 3000 | | +| cache | table_bloom_index_filter_count | 1048576 | | +| cache | data_cache_storage | none | | +| cache | table_data_cache_population_queue_size | 65536 | | +| cache | disk.max_bytes | 21474836480 | | +| cache | disk.path | ./.databend/_cache | | +| cache | table_data_deserialized_data_bytes | 0 | | +| storage | type | fs | | +| storage | num_cpus | 0 | | +| storage | allow_insecure | false | | +| storage | fs.data_path | _data | | +| storage | gcs.endpoint_url | https://storage.googleapis.com | | +| storage | gcs.bucket | | | +| storage | gcs.root | | | +| storage | gcs.credential | | | +| storage | s3.region | | | +| storage | s3.endpoint_url | https://s3.amazonaws.com | | +| storage | s3.access_key_id | | | +| storage | s3.secret_access_key | | | +| storage | s3.security_token | | | +| storage | s3.bucket | | | +| storage | s3.root | | | +| storage | s3.master_key | | | +| storage | s3.enable_virtual_host_style | false | | +| storage | s3.role_arn | | | +| storage | s3.external_id | | | +| storage | azblob.account_name | | | +| storage | azblob.account_key | | | +| storage | azblob.container | | | +| storage | azblob.endpoint_url | | | +| storage | azblob.root | | | +| storage | hdfs.name_node | | | +| storage | hdfs.root | | | +| storage | obs.access_key_id | | | +| storage | obs.secret_access_key | | | +| storage | obs.bucket | | | +| storage | obs.endpoint_url | | | +| storage | obs.root | | | +| storage | oss.access_key_id | | | +| storage | oss.access_key_secret | | | +| storage | oss.bucket | | | +| storage | oss.endpoint_url | | | +| storage | oss.presign_endpoint_url | | | +| storage | oss.root | | | +| storage | cache.type | none | | +| storage | cache.num_cpus | 0 | | +| storage | cache.fs.data_path | _data | | +| storage | cache.moka.max_capacity | 1073741824 | | +| storage | cache.moka.time_to_live | 3600 | | +| storage | cache.moka.time_to_idle | 600 | | +| storage | cache.redis.endpoint_url | | | +| storage | cache.redis.username | | | +| storage | cache.redis.password | | | +| storage | cache.redis.root | | | +| storage | cache.redis.db | 0 | | +| storage | cache.redis.default_ttl | 0 | | ++---------+----------------------------------------+--------------------------------+-------------+ ``` diff --git a/scripts/ci/deploy/config/databend-query-node-1.toml b/scripts/ci/deploy/config/databend-query-node-1.toml index c147f99d18f42..5e4e65229ac6f 100644 --- a/scripts/ci/deploy/config/databend-query-node-1.toml +++ b/scripts/ci/deploy/config/databend-query-node-1.toml @@ -33,13 +33,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "_cache" -table_disk_cache_mb_size = 10240 -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - # [[query.users]] # name = "admin" # auth_type = "no_password" @@ -110,3 +103,45 @@ data_path = "./.databend/stateless_test_data" # endpoint_url = "" # access_key_id = "" # access_key_secret = "" + + +[cache] + +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_cache = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches +enable_table_bloom_index_cache = true +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 + +### table data caches ### + +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" + +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/scripts/ci/deploy/config/databend-query-node-2.toml b/scripts/ci/deploy/config/databend-query-node-2.toml index b1318b85c45d5..8f66c5fdaa80e 100644 --- a/scripts/ci/deploy/config/databend-query-node-2.toml +++ b/scripts/ci/deploy/config/databend-query-node-2.toml @@ -33,13 +33,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "./.databend/cache" -table_disk_cache_mb_size = 10240 -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - [log] [log.file] @@ -64,3 +57,44 @@ type = "fs" # Comment out this block if you're NOT using local file system as storage. [storage.fs] data_path = "./.databend/stateless_test_data" + +[cache] + +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_cache = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches +enable_table_bloom_index_cache = true +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 + +### table data caches ### + +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" + +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/scripts/ci/deploy/config/databend-query-node-3.toml b/scripts/ci/deploy/config/databend-query-node-3.toml index 5045053e8778d..594e5338a0027 100644 --- a/scripts/ci/deploy/config/databend-query-node-3.toml +++ b/scripts/ci/deploy/config/databend-query-node-3.toml @@ -34,13 +34,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "./.databend/cache" -table_disk_cache_mb_size = 10240 -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - [log] [log.file] @@ -65,3 +58,44 @@ type = "fs" # Comment out this block if you're NOT using local file system as storage. [storage.fs] data_path = "./.databend/stateless_test_data" + +[cache] + +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_cache = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches +enable_table_bloom_index_cache = true +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 + +### table data caches ### + +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" + +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/scripts/ci/deploy/config/databend-query-node-shared.toml b/scripts/ci/deploy/config/databend-query-node-shared.toml index ac27fea74587c..9d42539fa08ef 100644 --- a/scripts/ci/deploy/config/databend-query-node-shared.toml +++ b/scripts/ci/deploy/config/databend-query-node-shared.toml @@ -33,13 +33,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "_cache" -table_disk_cache_mb_size = 10240 -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - share_endpoint_address = "127.0.0.1:33003" # receive shared information from open sharing # [[query.users]] # name = "admin" @@ -110,3 +103,44 @@ data_path = "./.databend/stateless_test_data" # endpoint_url = "" # access_key_id = "" # access_key_secret = "" + +[cache] + +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_cache = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches +enable_table_bloom_index_cache = true +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 + +### table data caches ### + +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" + +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/src/binaries/query/local.rs b/src/binaries/query/local.rs index 29a540fd35e9e..00715f5c5330d 100644 --- a/src/binaries/query/local.rs +++ b/src/binaries/query/local.rs @@ -16,7 +16,7 @@ use std::time::Instant; use comfy_table::Cell; use comfy_table::Table; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -28,7 +28,7 @@ use databend_query::sql::Planner; use databend_query::GlobalServices; use tokio_stream::StreamExt; -pub async fn query_local(conf: &Config) -> Result<()> { +pub async fn query_local(conf: &InnerConfig) -> Result<()> { let mut conf = conf.clone(); conf.storage.allow_insecure = true; let local_conf = conf.local.clone(); diff --git a/src/binaries/query/main.rs b/src/binaries/query/main.rs index c31066ef451e3..c14f8d11445c4 100644 --- a/src/binaries/query/main.rs +++ b/src/binaries/query/main.rs @@ -23,7 +23,7 @@ use common_base::mem_allocator::GlobalAllocator; use common_base::runtime::Runtime; use common_base::runtime::GLOBAL_MEM_STAT; use common_base::set_alloc_error_hook; -use common_config::Config; +use common_config::InnerConfig; use common_config::DATABEND_COMMIT_VERSION; use common_config::QUERY_SEMVER; use common_exception::Result; @@ -62,7 +62,7 @@ fn main() { } async fn main_entrypoint() -> Result<()> { - let conf: Config = Config::load()?; + let conf: InnerConfig = InnerConfig::load()?; if run_cmd(&conf).await? { return Ok(()); @@ -310,7 +310,7 @@ async fn main_entrypoint() -> Result<()> { Ok(()) } -async fn run_cmd(conf: &Config) -> Result { +async fn run_cmd(conf: &InnerConfig) -> Result { if conf.cmd.is_empty() { return Ok(false); } diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index 20c3305168ad2..a702e881b8478 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -14,14 +14,17 @@ test = false heapsize = ["heapsize_"] amortized = ["ritelinked/ahash-amortized", "ritelinked/inline-more-amortized"] -[dependencies] # In alphabetical order -# Github dependencies +[dependencies] # Crates.io dependencies +crc32fast = "1.3.2" +hex = "0.4.3" ritelinked = { version = "0.3.2", default-features = false, features = ["ahash", "inline-more"] } +siphasher = "0.3.10" +tracing = "0.1.36" +walkdir = "2.3.2" [target.'cfg(not(target_os = "macos"))'.dependencies] heapsize_ = { package = "heapsize", version = "0.4.2", optional = true } [dev-dependencies] -tempfile = "3.3.0" diff --git a/src/common/cache/src/lib.rs b/src/common/cache/src/lib.rs index 920d9cbdf6221..b3a62cefb3e8b 100644 --- a/src/common/cache/src/lib.rs +++ b/src/common/cache/src/lib.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![feature(write_all_vectored)] #![allow(clippy::uninlined_format_args)] #[cfg(feature = "heapsize")] #[cfg(not(target_os = "macos"))] diff --git a/src/common/storage/src/column_node.rs b/src/common/storage/src/column_node.rs index 59390da310d3f..d37c3a39f2432 100644 --- a/src/common/storage/src/column_node.rs +++ b/src/common/storage/src/column_node.rs @@ -100,6 +100,27 @@ impl ColumnNodes { } Ok(column_node) } + + pub fn traverse_path_nested_aware<'a>( + column_nodes: &'a [ColumnNode], + path: &'a [usize], + is_nested: bool, + ) -> Result<(&'a ColumnNode, bool)> { + let column_node = &column_nodes[path[0]]; + let is_nested = is_nested || column_node.children.is_some(); + if path.len() > 1 { + return match &column_node.children { + Some(ref children) => { + Self::traverse_path_nested_aware(children, &path[1..], is_nested) + } + None => Err(ErrorCode::Internal(format!( + "Cannot get column_node by path: {:?}", + path + ))), + }; + } + Ok((column_node, is_nested)) + } } /// `ColumnNode` contains all the leaf column ids of the column. diff --git a/src/query/catalog/src/plan/projection.rs b/src/query/catalog/src/plan/projection.rs index 3ae550e963257..b286631dc1fde 100644 --- a/src/query/catalog/src/plan/projection.rs +++ b/src/query/catalog/src/plan/projection.rs @@ -63,14 +63,45 @@ impl Projection { .iter() .map(|idx| &column_nodes.column_nodes[*idx]) .collect(), + Projection::InnerColumns(path_indices) => { + let paths: Vec<&Vec> = path_indices.values().collect(); + paths + .iter() + .map(|path| ColumnNodes::traverse_path(&column_nodes.column_nodes, path)) + .collect::>()? + } + }; + Ok(column_nodes) + } + + /// ColumnNode projection. + /// + /// `ColumnNode`s returned are paired with a boolean which indicates if it + /// is part of a nested field + pub fn project_column_nodes_nested_aware<'a>( + &'a self, + column_nodes: &'a ColumnNodes, + ) -> Result> { + let column_nodes = match self { + Projection::Columns(indices) => indices + .iter() + .map(|idx| { + let column_node = &column_nodes.column_nodes[*idx]; + (column_node, column_node.children.is_some()) + }) + .collect(), Projection::InnerColumns(path_indices) => { let paths: Vec<&Vec> = path_indices.values().collect(); paths .iter() .map(|path| { - ColumnNodes::traverse_path(&column_nodes.column_nodes, path).unwrap() + ColumnNodes::traverse_path_nested_aware( + &column_nodes.column_nodes, + path, + false, + ) }) - .collect() + .collect::>()? } }; Ok(column_nodes) diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/config.rs similarity index 76% rename from src/query/config/src/outer_v0.rs rename to src/query/config/src/config.rs index 3b2f75f75b90a..6f1020d2477d6 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/config.rs @@ -21,6 +21,7 @@ use std::str::FromStr; use clap::Args; use clap::Parser; +use clap::ValueEnum; use common_base::base::mask_string; use common_exception::ErrorCode; use common_exception::Result; @@ -50,17 +51,20 @@ use serfig::collectors::from_file; use serfig::collectors::from_self; use serfig::parsers::Toml; +use super::inner; use super::inner::CatalogConfig as InnerCatalogConfig; use super::inner::CatalogHiveConfig as InnerCatalogHiveConfig; -use super::inner::Config as InnerConfig; +use super::inner::InnerConfig; use super::inner::LocalConfig as InnerLocalConfig; use super::inner::MetaConfig as InnerMetaConfig; use super::inner::QueryConfig as InnerQueryConfig; use crate::DATABEND_COMMIT_VERSION; +// FIXME: too much boilerplate here + const CATALOG_HIVE: &str = "hive"; -/// Outer config for `query`. +/// Config for `query`. /// /// We will use this config to handle /// @@ -111,6 +115,10 @@ pub struct Config { #[clap(flatten)] pub local: LocalConfig, + // cache configs + #[clap(flatten)] + pub cache: CacheConfig, + /// external catalog config. /// /// - Later, catalog information SHOULD be kept in KV Service @@ -125,7 +133,7 @@ pub struct Config { impl Default for Config { fn default() -> Self { - InnerConfig::default().into_outer() + InnerConfig::default().into_config() } } @@ -178,58 +186,6 @@ impl Config { } } -impl From for Config { - fn from(inner: InnerConfig) -> Self { - Self { - cmd: inner.cmd, - config_file: inner.config_file, - query: inner.query.into(), - log: inner.log.into(), - meta: inner.meta.into(), - storage: inner.storage.into(), - catalog: HiveCatalogConfig::default(), - local: inner.local.into(), - - catalogs: inner - .catalogs - .into_iter() - .map(|(k, v)| (k, v.into())) - .collect(), - } - } -} - -impl TryInto for Config { - type Error = ErrorCode; - - fn try_into(self) -> Result { - let mut catalogs = HashMap::new(); - for (k, v) in self.catalogs.into_iter() { - let catalog = v.try_into()?; - catalogs.insert(k, catalog); - } - if !self.catalog.meta_store_address.is_empty() || !self.catalog.protocol.is_empty() { - tracing::warn!( - "`catalog` is planned to be deprecated, please add catalog in `catalogs` instead" - ); - let hive = self.catalog.try_into()?; - let catalog = InnerCatalogConfig::Hive(hive); - catalogs.insert(CATALOG_HIVE.to_string(), catalog); - } - - Ok(InnerConfig { - cmd: self.cmd, - config_file: self.config_file, - query: self.query.try_into()?, - log: self.log.try_into()?, - meta: self.meta.try_into()?, - storage: self.storage.try_into()?, - local: self.local.try_into()?, - catalogs, - }) - } -} - /// Storage config group. /// /// # TODO(xuanwo) @@ -291,7 +247,7 @@ pub struct StorageConfig { pub oss: OssStorageConfig, #[clap(skip)] - pub cache: CacheConfig, + pub cache: StorageCacheConfig, } impl Default for StorageConfig { @@ -490,7 +446,7 @@ impl From for HiveCatalogConfig { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(default)] -pub struct CacheConfig { +pub struct StorageCacheConfig { #[serde(rename = "type")] pub cache_type: String, @@ -507,13 +463,13 @@ pub struct CacheConfig { pub redis: RedisStorageConfig, } -impl Default for CacheConfig { +impl Default for StorageCacheConfig { fn default() -> Self { InnerCacheConfig::default().into() } } -impl From for CacheConfig { +impl From for StorageCacheConfig { fn from(inner: InnerCacheConfig) -> Self { let mut cfg = Self { cache_num_cpus: inner.num_cpus, @@ -546,7 +502,7 @@ impl From for CacheConfig { } } -impl TryInto for CacheConfig { +impl TryInto for StorageCacheConfig { type Error = ErrorCode; fn try_into(self) -> Result { Ok(InnerCacheConfig { @@ -1257,50 +1213,6 @@ pub struct QueryConfig { #[clap(long, default_value = "10000")] pub max_query_log_size: usize, - /// Table Meta Cached enabled - #[clap(long, default_value = "true")] - pub table_meta_cache_enabled: bool, - - /// Max number of cached table block meta - #[clap(long, default_value = "102400")] - pub table_cache_block_meta_count: u64, - - /// Table memory cache size (mb) - #[clap(long, default_value = "256")] - pub table_memory_cache_mb_size: u64, - - /// Table disk cache folder root - #[clap(long, default_value = "_cache")] - pub table_disk_cache_root: String, - - /// Table disk cache size (mb) - #[clap(long, default_value = "1024")] - pub table_disk_cache_mb_size: u64, - - /// Max number of cached table snapshot - #[clap(long, default_value = "256")] - pub table_cache_snapshot_count: u64, - - /// Max number of cached table snapshot statistics - #[clap(long, default_value = "256")] - pub table_cache_statistic_count: u64, - - /// Max number of cached table segment - #[clap(long, default_value = "10240")] - pub table_cache_segment_count: u64, - - /// Max number of cached bloom index meta objects - #[clap(long, default_value = "3000")] - pub table_cache_bloom_index_meta_count: u64, - - /// Max number of cached bloom index filters, default value is 1024 * 1024 items. - /// One bloom index filter per column of data block being indexed will be generated if necessary. - /// - /// For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full - /// table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) - #[clap(long, default_value = "1048576")] - pub table_cache_bloom_index_filter_count: u64, - /// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv. #[clap(long)] pub management_mode: bool, @@ -1339,6 +1251,53 @@ pub struct QueryConfig { #[clap(long)] pub internal_enable_sandbox_tenant: bool, + + // ----- the following options/args are all deprecated ---- + // ----- and turned into Option, to help user migrate the configs ---- + /// OBSOLETED: Table disk cache size (mb). + #[clap(long)] + pub table_disk_cache_mb_size: Option, + + /// OBSOLETED: Table Meta Cached enabled + #[clap(long)] + pub table_meta_cache_enabled: Option, + + /// OBSOLETED: Max number of cached table block meta + #[clap(long)] + pub table_cache_block_meta_count: Option, + + /// OBSOLETED: Table memory cache size (mb) + #[clap(long)] + pub table_memory_cache_mb_size: Option, + + /// OBSOLETED: Table disk cache folder root + #[clap(long)] + pub table_disk_cache_root: Option, + + /// OBSOLETED: Max number of cached table snapshot + #[clap(long)] + pub table_cache_snapshot_count: Option, + + /// OBSOLETED: Max number of cached table snapshot statistics + #[clap(long)] + pub table_cache_statistic_count: Option, + + /// OBSOLETED: Max number of cached table segment + #[clap(long)] + pub table_cache_segment_count: Option, + + /// OBSOLETED: Max number of cached bloom index meta objects + #[clap(long)] + pub table_cache_bloom_index_meta_count: Option, + + /// OBSOLETED: + /// Max number of cached bloom index filters, default value is 1024 * 1024 items. + /// One bloom index filter per column of data block being indexed will be generated if necessary. + /// + /// For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full + /// table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) + #[clap(long)] + pub table_cache_bloom_index_filter_count: Option, } impl Default for QueryConfig { @@ -1351,6 +1310,7 @@ impl TryInto for QueryConfig { type Error = ErrorCode; fn try_into(self) -> Result { + self.check_obsoleted()?; Ok(InnerQueryConfig { tenant_id: self.tenant_id, cluster_id: self.cluster_id, @@ -1381,16 +1341,6 @@ impl TryInto for QueryConfig { table_engine_memory_enabled: self.table_engine_memory_enabled, wait_timeout_mills: self.wait_timeout_mills, max_query_log_size: self.max_query_log_size, - table_meta_cache_enabled: self.table_meta_cache_enabled, - table_cache_block_meta_count: self.table_cache_block_meta_count, - table_memory_cache_mb_size: self.table_memory_cache_mb_size, - table_disk_cache_root: self.table_disk_cache_root, - table_disk_cache_mb_size: self.table_disk_cache_mb_size, - table_cache_snapshot_count: self.table_cache_snapshot_count, - table_cache_statistic_count: self.table_cache_statistic_count, - table_cache_segment_count: self.table_cache_segment_count, - table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, - table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, management_mode: self.management_mode, jwt_key_file: self.jwt_key_file, jwt_key_files: self.jwt_key_files, @@ -1447,16 +1397,6 @@ impl From for QueryConfig { database_engine_github_enabled: true, wait_timeout_mills: inner.wait_timeout_mills, max_query_log_size: inner.max_query_log_size, - table_meta_cache_enabled: inner.table_meta_cache_enabled, - table_cache_block_meta_count: inner.table_cache_block_meta_count, - table_memory_cache_mb_size: inner.table_memory_cache_mb_size, - table_disk_cache_root: inner.table_disk_cache_root, - table_disk_cache_mb_size: inner.table_disk_cache_mb_size, - table_cache_snapshot_count: inner.table_cache_snapshot_count, - table_cache_statistic_count: inner.table_cache_statistic_count, - table_cache_segment_count: inner.table_cache_segment_count, - table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, - table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, management_mode: inner.management_mode, jwt_key_file: inner.jwt_key_file, jwt_key_files: inner.jwt_key_files, @@ -1468,6 +1408,17 @@ impl From for QueryConfig { share_endpoint_auth_token_file: inner.share_endpoint_auth_token_file, quota: inner.tenant_quota, internal_enable_sandbox_tenant: inner.internal_enable_sandbox_tenant, + // obsoleted config entries + table_disk_cache_mb_size: None, + table_meta_cache_enabled: None, + table_cache_block_meta_count: None, + table_memory_cache_mb_size: None, + table_disk_cache_root: None, + table_cache_snapshot_count: None, + table_cache_statistic_count: None, + table_cache_segment_count: None, + table_cache_bloom_index_meta_count: None, + table_cache_bloom_index_filter_count: None, } } } @@ -1859,3 +1810,405 @@ impl TryInto for LocalConfig { }) } } + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args, Default)] +#[serde(default, deny_unknown_fields)] +pub struct CacheConfig { + /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches + #[clap(long = "cache-enable-table-meta-cache", default_value = "true")] + #[serde(default = "bool_true")] + pub enable_table_meta_cache: bool, + + /// Max number of cached table snapshot + #[clap(long = "cache-table-meta-snapshot-count", default_value = "256")] + pub table_meta_snapshot_count: u64, + + /// Max number of cached table segment + #[clap(long = "cache-table-meta-segment-count", default_value = "10240")] + pub table_meta_segment_count: u64, + + /// Max number of cached table statistic meta + #[clap(long = "cache-table-meta-statistic-count", default_value = "256")] + pub table_meta_statistic_count: u64, + + /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches + #[clap(long = "cache-enable-table-bloom-index-cache", default_value = "true")] + #[serde(default = "bool_true")] + pub enable_table_bloom_index_cache: bool, + + /// Max number of cached bloom index meta objects. Set it to 0 to disable it. + #[clap(long = "cache-table-bloom-index-meta-count", default_value = "3000")] + pub table_bloom_index_meta_count: u64, + + /// Max number of cached bloom index filters. Set it to 0 to disable it. + // One bloom index filter per column of data block being indexed will be generated if necessary. + // + // For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full + // table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) + #[clap( + long = "cache-table-bloom-index-filter-count", + default_value = "1048576" + )] + pub table_bloom_index_filter_count: u64, + + /// Type of data cache storage + #[clap(long = "cache-data-cache-storage", value_enum, default_value_t)] + pub data_cache_storage: CacheStorageTypeConfig, + + /// Max size of external cache population queue length + /// + /// the items being queued reference table column raw data, which are + /// un-deserialized and usually compressed (depends on table compression options). + /// + /// - please monitor the 'table_data_cache_population_pending_count' metric + /// if it is too high, and takes too much memory, please consider decrease this value + /// + /// - please monitor the 'population_overflow_count' metric + /// if it keeps increasing, and disk cache hits rate is not as expected. please consider + /// increase this value. + #[clap( + long = "cache-data-cache-population-queue-size", + default_value = "65536" + )] + pub table_data_cache_population_queue_size: u32, + + /// Storage that hold the data caches + #[clap(flatten)] + #[serde(rename = "disk")] + pub disk_cache_config: DiskCacheConfig, + + /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// + /// CAUTION: The cached items are deserialized table column objects, may take a lot of memory. + /// + /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, + /// and the access pattern will benefit from caching, consider enabled this cache. + #[clap(long = "cache-table-data-deserialized-data-bytes", default_value = "0")] + pub table_data_deserialized_data_bytes: u64, +} + +#[inline] +fn bool_true() -> bool { + true +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] +#[serde(rename_all = "lowercase")] +pub enum CacheStorageTypeConfig { + None, + Disk, + // Redis, +} + +impl Default for CacheStorageTypeConfig { + fn default() -> Self { + Self::None + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args, Default)] +#[serde(default, deny_unknown_fields)] +pub struct DiskCacheConfig { + /// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it. + #[clap(long = "cache-disk-max-bytes", default_value = "21474836480")] + pub max_bytes: u64, + + /// Table disk cache root path + #[clap(long = "cache-disk-path", default_value = "./.databend/_cache")] + pub path: String, +} + +mod cache_config_converters { + use super::*; + + impl From for Config { + fn from(inner: InnerConfig) -> Self { + Self { + cmd: inner.cmd, + config_file: inner.config_file, + query: inner.query.into(), + log: inner.log.into(), + meta: inner.meta.into(), + storage: inner.storage.into(), + catalog: HiveCatalogConfig::default(), + local: inner.local.into(), + + catalogs: inner + .catalogs + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect(), + cache: inner.cache.into(), + } + } + } + + impl TryInto for Config { + type Error = ErrorCode; + + fn try_into(self) -> Result { + let mut catalogs = HashMap::new(); + for (k, v) in self.catalogs.into_iter() { + let catalog = v.try_into()?; + catalogs.insert(k, catalog); + } + if !self.catalog.meta_store_address.is_empty() || !self.catalog.protocol.is_empty() { + tracing::warn!( + "`catalog` is planned to be deprecated, please add catalog in `catalogs` instead" + ); + let hive = self.catalog.try_into()?; + let catalog = InnerCatalogConfig::Hive(hive); + catalogs.insert(CATALOG_HIVE.to_string(), catalog); + } + + Ok(InnerConfig { + cmd: self.cmd, + config_file: self.config_file, + query: self.query.try_into()?, + log: self.log.try_into()?, + meta: self.meta.try_into()?, + storage: self.storage.try_into()?, + local: self.local.try_into()?, + catalogs, + cache: self.cache.try_into()?, + }) + } + } + + impl TryFrom for inner::CacheConfig { + type Error = ErrorCode; + + fn try_from(value: CacheConfig) -> std::result::Result { + Ok(Self { + enable_table_meta_cache: value.enable_table_meta_cache, + table_meta_snapshot_count: value.table_meta_snapshot_count, + table_meta_segment_count: value.table_meta_segment_count, + table_meta_statistic_count: value.table_meta_statistic_count, + enable_table_index_bloom: value.enable_table_bloom_index_cache, + table_bloom_index_meta_count: value.table_bloom_index_meta_count, + table_bloom_index_filter_count: value.table_bloom_index_filter_count, + data_cache_storage: value.data_cache_storage.try_into()?, + table_data_cache_population_queue_size: value + .table_data_cache_population_queue_size, + disk_cache_config: value.disk_cache_config.try_into()?, + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + }) + } + } + + impl From for CacheConfig { + fn from(value: inner::CacheConfig) -> Self { + Self { + enable_table_meta_cache: value.enable_table_meta_cache, + table_meta_snapshot_count: value.table_meta_snapshot_count, + table_meta_segment_count: value.table_meta_segment_count, + table_meta_statistic_count: value.table_meta_statistic_count, + enable_table_bloom_index_cache: value.enable_table_index_bloom, + table_bloom_index_meta_count: value.table_bloom_index_meta_count, + table_bloom_index_filter_count: value.table_bloom_index_filter_count, + data_cache_storage: value.data_cache_storage.into(), + table_data_cache_population_queue_size: value + .table_data_cache_population_queue_size, + disk_cache_config: value.disk_cache_config.into(), + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + } + } + } + + impl TryFrom for inner::DiskCacheConfig { + type Error = ErrorCode; + fn try_from(value: DiskCacheConfig) -> std::result::Result { + Ok(Self { + max_bytes: value.max_bytes, + path: value.path, + }) + } + } + + impl From for DiskCacheConfig { + fn from(value: inner::DiskCacheConfig) -> Self { + Self { + max_bytes: value.max_bytes, + path: value.path, + } + } + } + + impl TryFrom for inner::CacheStorageTypeConfig { + type Error = ErrorCode; + fn try_from(value: CacheStorageTypeConfig) -> std::result::Result { + Ok(match value { + CacheStorageTypeConfig::None => inner::CacheStorageTypeConfig::None, + CacheStorageTypeConfig::Disk => inner::CacheStorageTypeConfig::Disk, + }) + } + } + + impl From for CacheStorageTypeConfig { + fn from(value: inner::CacheStorageTypeConfig) -> Self { + match value { + inner::CacheStorageTypeConfig::None => CacheStorageTypeConfig::None, + inner::CacheStorageTypeConfig::Disk => CacheStorageTypeConfig::Disk, + } + } + } +} + +// Obsoleted configuration entries checking +// +// The following code should be removed from the release after the next release. +// Just give user errors without any detail explanation and migration suggestions. +impl QueryConfig { + fn check( + target: &Option, + cli_arg_name: &str, + alternative_cli_arg: &str, + alternative_toml_config: &str, + alternative_env_var: &str, + ) -> Option { + target.as_ref().map(|_| { + format!( + " + -------------------------------------------------------------- + *** {cli_arg_name} *** is obsoleted : + -------------------------------------------------------------- + alternative command-line options : {alternative_cli_arg} + alternative environment variable : {alternative_env_var} + alternative toml config : {alternative_toml_config} + -------------------------------------------------------------- +" + ) + }) + } + fn check_obsoleted(&self) -> Result<()> { + let check_results = vec![ + Self::check( + &self.table_disk_cache_mb_size, + "table-disk-cache-mb-size", + "cache-disk-max-bytes", + r#" + [cache] + ... + data_cache_storage = "disk" + ... + [cache.disk] + max_bytes = [MAX_BYTES] + ... + "#, + "CACHE_DISK_MAX_BYTES", + ), + Self::check( + &self.table_meta_cache_enabled, + "table-meta-cache-enabled", + "cache-enable-table-meta-cache", + r#" + [cache] + enable_table_meta_cache=[true|false] + "#, + "CACHE_ENABLE_TABLE_META_CACHE", + ), + Self::check( + &self.table_cache_block_meta_count, + "table-cache-block-meta-count", + "N/A", + "N/A", + "N/A", + ), + Self::check( + &self.table_memory_cache_mb_size, + "table-memory-cache-mb-size", + "N/A", + "N/A", + "N/A", + ), + Self::check( + &self.table_disk_cache_root, + "table-disk-cache-root", + "cache-disk-path", + r#" + [cache] + ... + data_cache_storage = "disk" + ... + [cache.disk] + max_bytes = [MAX_BYTES] + path = [PATH] + ... + "#, + "CACHE_DISK_PATH", + ), + Self::check( + &self.table_cache_snapshot_count, + "table-cache-snapshot-count", + "cache-table-meta-snapshot-count", + r#" + [cache] + table_meta_snapshot_count = [COUNT] + "#, + "CACHE_TABLE_META_SNAPSHOT_COUNT", + ), + Self::check( + &self.table_cache_statistic_count, + "table-cache-statistic-count", + "cache-table-meta-statistic-count", + r#" + [cache] + table_meta_statistic_count = [COUNT] + "#, + "CACHE_TABLE_META_STATISTIC_COUNT", + ), + Self::check( + &self.table_cache_segment_count, + "table-cache-segment-count", + "cache-table-meta-segment-count", + r#" + [cache] + table_meta_segment_count = [COUNT] + "#, + "CACHE_TABLE_META_SEGMENT_COUNT", + ), + Self::check( + &self.table_cache_bloom_index_meta_count, + "table-cache-bloom-index-meta-count", + "cache-table-bloom-index-meta-count", + r#" + [cache] + table_bloom_index_meta_count = [COUNT] + "#, + "CACHE_TABLE_BLOOM_INDEX_META_COUNT", + ), + Self::check( + &self.table_cache_bloom_index_filter_count, + "table-cache-bloom-index-filter-count", + "cache-table-bloom-index-filter-count", + r#" + [cache] + table_bloom_index_filter_count = [COUNT] + "#, + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", + ), + ]; + let messages = check_results.into_iter().flatten().collect::>(); + if !messages.is_empty() { + let errors = messages.join("\n"); + Err(ErrorCode::InvalidConfig(format!("\n{errors}"))) + } else { + Ok(()) + } + } + + pub const fn obsoleted_option_keys() -> &'static [&'static str; 10] { + &[ + "table_disk_cache_mb_size", + "table_meta_cache_enabled", + "table_cache_block_meta_count", + "table_memory_cache_mb_size", + "table_disk_cache_root", + "table_cache_snapshot_count", + "table_cache_statistic_count", + "table_cache_segment_count", + "table_cache_bloom_index_meta_count", + "table_cache_bloom_index_filter_count", + ] + } +} diff --git a/src/query/config/src/global.rs b/src/query/config/src/global.rs index ebabd55375e3c..f14da60210b85 100644 --- a/src/query/config/src/global.rs +++ b/src/query/config/src/global.rs @@ -17,17 +17,17 @@ use std::sync::Arc; use common_base::base::GlobalInstance; use common_exception::Result; -use crate::Config; +use crate::InnerConfig; pub struct GlobalConfig; impl GlobalConfig { - pub fn init(config: Config) -> Result<()> { + pub fn init(config: InnerConfig) -> Result<()> { GlobalInstance::set(Arc::new(config)); Ok(()) } - pub fn instance() -> Arc { + pub fn instance() -> Arc { GlobalInstance::get() } } diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index de1bed5120292..f9cf2eb359591 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -30,13 +30,13 @@ use common_storage::StorageConfig; use common_tracing::Config as LogConfig; use common_users::idm_config::IDMConfig; -use super::outer_v0::Config as OuterV0Config; +use super::config::Config; /// Inner config for query. /// /// All function should implement based on this Config. #[derive(Clone, Default, Debug, PartialEq, Eq)] -pub struct Config { +pub struct InnerConfig { pub cmd: String, pub config_file: String, @@ -58,14 +58,17 @@ pub struct Config { // - Later, catalog information SHOULD be kept in KV Service // - currently only supports HIVE (via hive meta store) pub catalogs: HashMap, + + // Cache Config + pub cache: CacheConfig, } -impl Config { - /// As requires by [RFC: Config Backward Compatibility](https://github.com/datafuselabs/databend/pull/5324), we will load user's config via wrapper [`ConfigV0`] and then convert from [`ConfigV0`] to [`Config`]. +impl InnerConfig { + /// As requires by [RFC: Config Backward Compatibility](https://github.com/datafuselabs/databend/pull/5324), we will load user's config via wrapper [`ConfigV0`] and then convert from [`ConfigV0`] to [`InnerConfig`]. /// /// In the future, we could have `ConfigV1` and `ConfigV2`. pub fn load() -> Result { - let cfg: Self = OuterV0Config::load(true)?.try_into()?; + let cfg: Self = Config::load(true)?.try_into()?; // Only check meta config when cmd is empty. if cfg.cmd.is_empty() { @@ -78,7 +81,7 @@ impl Config { /// /// This function is served for tests only. pub fn load_for_test() -> Result { - let cfg: Self = OuterV0Config::load(false)?.try_into()?; + let cfg: Self = Config::load(false)?.try_into()?; Ok(cfg) } @@ -96,7 +99,7 @@ impl Config { !self.query.rpc_tls_server_key.is_empty() && !self.query.rpc_tls_server_cert.is_empty() } - /// Transform config into the outer style. + /// Transform inner::Config into the Config. /// /// This function should only be used for end-users. /// @@ -105,8 +108,8 @@ impl Config { /// - system config table /// - HTTP Handler /// - tests - pub fn into_outer(self) -> OuterV0Config { - OuterV0Config::from(self) + pub fn into_config(self) -> Config { + Config::from(self) } } @@ -147,26 +150,6 @@ pub struct QueryConfig { pub table_engine_memory_enabled: bool, pub wait_timeout_mills: u64, pub max_query_log_size: usize, - /// Table Cached enabled - pub table_meta_cache_enabled: bool, - /// Max number of cached table block meta - pub table_cache_block_meta_count: u64, - /// Table memory cache size (mb) - pub table_memory_cache_mb_size: u64, - /// Table disk cache folder root - pub table_disk_cache_root: String, - /// Table disk cache size (mb) - pub table_disk_cache_mb_size: u64, - /// Max number of cached table snapshot - pub table_cache_snapshot_count: u64, - /// Max number of cached table statistic - pub table_cache_statistic_count: u64, - /// Max number of cached table segment - pub table_cache_segment_count: u64, - /// Max number of cached bloom index meta objects - pub table_cache_bloom_index_meta_count: u64, - /// Max number of cached bloom index filters - pub table_cache_bloom_index_filter_count: u64, /// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv. pub management_mode: bool, pub jwt_key_file: String, @@ -212,17 +195,7 @@ impl Default for QueryConfig { rpc_tls_query_service_domain_name: "localhost".to_string(), table_engine_memory_enabled: true, wait_timeout_mills: 5000, - max_query_log_size: 10000, - table_meta_cache_enabled: true, - table_cache_block_meta_count: 102400, - table_memory_cache_mb_size: 256, - table_disk_cache_root: "_cache".to_string(), - table_disk_cache_mb_size: 1024, - table_cache_snapshot_count: 256, - table_cache_statistic_count: 256, - table_cache_segment_count: 10240, - table_cache_bloom_index_meta_count: 3000, - table_cache_bloom_index_filter_count: 1024 * 1024, + max_query_log_size: 10_000, management_mode: false, jwt_key_file: "".to_string(), jwt_key_files: Vec::new(), @@ -420,3 +393,106 @@ impl Default for LocalConfig { } } } + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct CacheConfig { + /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches + pub enable_table_meta_cache: bool, + + /// Max number of cached table snapshot + pub table_meta_snapshot_count: u64, + + /// Max number of cached table segment + pub table_meta_segment_count: u64, + + /// Max number of cached table segment + pub table_meta_statistic_count: u64, + + /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches + pub enable_table_index_bloom: bool, + + /// Max number of cached bloom index meta objects. Set it to 0 to disable it. + pub table_bloom_index_meta_count: u64, + + /// Max number of cached bloom index filters. Set it to 0 to disable it. + // One bloom index filter per column of data block being indexed will be generated if necessary. + // + // For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full + // table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) + pub table_bloom_index_filter_count: u64, + + pub data_cache_storage: CacheStorageTypeConfig, + + /// Max size of external cache population queue length + /// + /// the items being queued reference table column raw data, which are + /// un-deserialized and usually compressed (depends on table compression options). + /// + /// - please monitor the 'table_data_cache_population_pending_count' metric + /// if it is too high, and takes too much memory, please consider decrease this value + /// + /// - please monitor the 'population_overflow_count' metric + /// if it keeps increasing, and disk cache hits rate is not as expected. please consider + /// increase this value. + pub table_data_cache_population_queue_size: u32, + + /// Storage that hold the raw data caches + pub disk_cache_config: DiskCacheConfig, + + /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// + /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. + /// + /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, + /// and the access pattern will benefit from caching, consider enabled this cache. + pub table_data_deserialized_data_bytes: u64, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum CacheStorageTypeConfig { + None, + Disk, + // Redis, +} + +impl Default for CacheStorageTypeConfig { + fn default() -> Self { + Self::None + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct DiskCacheConfig { + /// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it. + pub max_bytes: u64, + + /// Table disk cache root path + pub path: String, +} + +impl Default for DiskCacheConfig { + fn default() -> Self { + Self { + max_bytes: 21474836480, + path: "./.databend/_cache".to_owned(), + } + } +} + +impl Default for CacheConfig { + fn default() -> Self { + Self { + enable_table_meta_cache: true, + table_meta_snapshot_count: 256, + table_meta_segment_count: 10240, + table_meta_statistic_count: 256, + enable_table_index_bloom: true, + table_bloom_index_meta_count: 3000, + table_bloom_index_filter_count: 1048576, + data_cache_storage: Default::default(), + table_data_cache_population_queue_size: 65536, + disk_cache_config: Default::default(), + table_data_deserialized_data_bytes: 0, + } + } +} diff --git a/src/query/config/src/lib.rs b/src/query/config/src/lib.rs index 9684d9b5e0f72..157f625907bd2 100644 --- a/src/query/config/src/lib.rs +++ b/src/query/config/src/lib.rs @@ -19,26 +19,29 @@ /// /// We are providing two config types: /// -/// - [`inner::Config`] which will be exposed as [`crate::Config`] will be used in all business logic. -/// - [`outer_v0::Config`] is the outer config for [`inner::Config`] which will be exposed to end-users. -/// - [`global::GlobalConfig`] is a global config singleton of [`crate::Config`]. +/// - [`config::Config`] represents the options from command line , configuration files or environment vars. +/// - [`inner::InnerConfig`] "internal representation" of application settings, built from Config. +/// - [`global::GlobalConfig`] A global singleton of [`crate::InnerConfig`]. /// -/// It's safe to refactor [`inner::Config`] in anyway, as long as it satisfied the following traits +/// It's safe to refactor [`inner::InnerConfig`] in anyway, as long as it satisfied the following traits /// -/// - `TryInto for outer_v0::Config` -/// - `From for outer_v0::Config` +/// - `TryInto for config::Config` +/// - `From for config::Config` +mod config; mod global; mod inner; -mod outer_v0; mod version; +pub use config::CacheStorageTypeConfig; +pub use config::Config; +pub use config::QueryConfig; +pub use config::StorageConfig; pub use global::GlobalConfig; +pub use inner::CacheConfig; +pub use inner::CacheStorageTypeConfig as CacheStorageTypeInnerConfig; pub use inner::CatalogConfig; pub use inner::CatalogHiveConfig; -pub use inner::Config; -pub use inner::QueryConfig; +pub use inner::InnerConfig; pub use inner::ThriftProtocol; -pub use outer_v0::Config as OuterConfig; -pub use outer_v0::StorageConfig; pub use version::DATABEND_COMMIT_VERSION; pub use version::QUERY_SEMVER; diff --git a/src/query/config/tests/main.rs b/src/query/config/tests/main.rs index 811d8fbf30fbe..7769d6b02edce 100644 --- a/src/query/config/tests/main.rs +++ b/src/query/config/tests/main.rs @@ -16,19 +16,19 @@ use std::ffi::OsString; use clap::Parser; use common_config::Config; -use common_config::OuterConfig; +use common_config::InnerConfig; use pretty_assertions::assert_eq; -/// It's required to make sure config's default value is the same with clap. +/// It's required to make sure setting's default value is the same with clap. #[test] fn test_config_default() { - let type_default = Config::default(); - let args_default: Config = OuterConfig::parse_from(Vec::::new()) + let setting_default = InnerConfig::default(); + let config_default: InnerConfig = Config::parse_from(Vec::::new()) .try_into() .expect("parse from args must succeed"); assert_eq!( - type_default, args_default, - "inner config's default value is different from args, please check again" + setting_default, config_default, + "default setting is different from default config, please check again" ) } diff --git a/src/query/pipeline/sources/src/sync_source.rs b/src/query/pipeline/sources/src/sync_source.rs index b39a7ab671a19..a362dc050657d 100644 --- a/src/query/pipeline/sources/src/sync_source.rs +++ b/src/query/pipeline/sources/src/sync_source.rs @@ -37,7 +37,6 @@ pub trait SyncSource: Send { // TODO: This can be refactored using proc macros pub struct SyncSourcer { is_finish: bool, - inner: T, output: Arc, generated_data: Option, diff --git a/src/query/service/src/api/http/v1/config.rs b/src/query/service/src/api/http/v1/config.rs index 0e9ebb1cac42e..da255fa93fc50 100644 --- a/src/query/service/src/api/http/v1/config.rs +++ b/src/query/service/src/api/http/v1/config.rs @@ -18,5 +18,7 @@ use poem::IntoResponse; #[poem::handler] pub async fn config_handler() -> poem::Result { - Ok(Json(GlobalConfig::instance().as_ref().clone().into_outer())) + Ok(Json( + GlobalConfig::instance().as_ref().clone().into_config(), + )) } diff --git a/src/query/service/src/api/http_service.rs b/src/query/service/src/api/http_service.rs index 8064850e54cb4..7997ed7714aae 100644 --- a/src/query/service/src/api/http_service.rs +++ b/src/query/service/src/api/http_service.rs @@ -15,7 +15,7 @@ use std::net::SocketAddr; use std::path::Path; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_http::health_handler; use common_http::home::debug_home_handler; @@ -36,12 +36,12 @@ use tracing::warn; use crate::servers::Server; pub struct HttpService { - config: Config, + config: InnerConfig, shutdown_handler: HttpShutdownHandler, } impl HttpService { - pub fn create(config: &Config) -> Box { + pub fn create(config: &InnerConfig) -> Box { Box::new(HttpService { config: config.clone(), shutdown_handler: HttpShutdownHandler::create("http api".to_string()), @@ -92,7 +92,7 @@ impl HttpService { route } - fn build_tls(config: &Config) -> Result { + fn build_tls(config: &InnerConfig) -> Result { let certificate = RustlsCertificate::new() .cert(std::fs::read(config.query.api_tls_server_cert.as_str())?) .key(std::fs::read(config.query.api_tls_server_key.as_str())?); diff --git a/src/query/service/src/api/rpc/packets/packet.rs b/src/query/service/src/api/rpc/packets/packet.rs index d86562c91c0a2..25d9eb7c1f6b0 100644 --- a/src/query/service/src/api/rpc/packets/packet.rs +++ b/src/query/service/src/api/rpc/packets/packet.rs @@ -13,7 +13,7 @@ // limitations under the License. use common_arrow::arrow_format::flight::service::flight_service_client::FlightServiceClient; -use common_config::Config; +use common_config::InnerConfig; use common_exception::Result; use common_grpc::ConnectionFactory; @@ -21,12 +21,12 @@ use crate::api::FlightClient; #[async_trait::async_trait] pub trait Packet: Send + Sync { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()>; + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()>; } #[async_trait::async_trait] impl Packet for Vec { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { for packet in self.iter() { packet.commit(config, timeout).await?; } @@ -35,7 +35,7 @@ impl Packet for Vec { } } -pub async fn create_client(config: &Config, address: &str) -> Result { +pub async fn create_client(config: &InnerConfig, address: &str) -> Result { match config.tls_query_cli_enabled() { true => Ok(FlightClient::new(FlightServiceClient::new( ConnectionFactory::create_rpc_channel( diff --git a/src/query/service/src/api/rpc/packets/packet_execute.rs b/src/query/service/src/api/rpc/packets/packet_execute.rs index 35000e6ae38f2..dcfe5f828c533 100644 --- a/src/query/service/src/api/rpc/packets/packet_execute.rs +++ b/src/query/service/src/api/rpc/packets/packet_execute.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::NodeInfo; @@ -49,7 +49,7 @@ impl ExecutePartialQueryPacket { #[async_trait::async_trait] impl Packet for ExecutePartialQueryPacket { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { if !self.executors_info.contains_key(&self.executor) { return Err(ErrorCode::ClusterUnknownNode(format!( "Not found {} node in cluster", diff --git a/src/query/service/src/api/rpc/packets/packet_executor.rs b/src/query/service/src/api/rpc/packets/packet_executor.rs index 1fef1e582f3da..5e5f29d45d1d8 100644 --- a/src/query/service/src/api/rpc/packets/packet_executor.rs +++ b/src/query/service/src/api/rpc/packets/packet_executor.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::NodeInfo; @@ -56,7 +56,7 @@ impl QueryFragmentsPlanPacket { #[async_trait::async_trait] impl Packet for QueryFragmentsPlanPacket { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { if !self.executors_info.contains_key(&self.executor) { return Err(ErrorCode::Internal(format!( "Not found {} node in cluster", diff --git a/src/query/service/src/api/rpc/packets/packet_publisher.rs b/src/query/service/src/api/rpc/packets/packet_publisher.rs index 1a22c6119bb00..c148bb1b4829c 100644 --- a/src/query/service/src/api/rpc/packets/packet_publisher.rs +++ b/src/query/service/src/api/rpc/packets/packet_publisher.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_exception::Result; use common_meta_types::NodeInfo; @@ -53,7 +53,7 @@ impl InitNodesChannelPacket { #[async_trait::async_trait] impl Packet for InitNodesChannelPacket { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { let executor_info = &self.executor; let mut conn = create_client(config, &executor_info.flight_address).await?; let action = FlightAction::InitNodesChannel(InitNodesChannel { diff --git a/src/query/service/src/api/rpc_service.rs b/src/query/service/src/api/rpc_service.rs index 350d8e45b31be..a53c528fd5878 100644 --- a/src/query/service/src/api/rpc_service.rs +++ b/src/query/service/src/api/rpc_service.rs @@ -20,7 +20,7 @@ use common_arrow::arrow_format::flight::service::flight_service_server::FlightSe use common_base::base::tokio; use common_base::base::tokio::net::TcpListener; use common_base::base::tokio::sync::Notify; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use tokio_stream::wrappers::TcpListenerStream; @@ -33,12 +33,12 @@ use crate::api::rpc::DatabendQueryFlightService; use crate::servers::Server as DatabendQueryServer; pub struct RpcService { - pub config: Config, + pub config: InnerConfig, pub abort_notify: Arc, } impl RpcService { - pub fn create(config: Config) -> Result> { + pub fn create(config: InnerConfig) -> Result> { Ok(Box::new(Self { config, abort_notify: Arc::new(Notify::new()), @@ -60,7 +60,7 @@ impl RpcService { } } - async fn server_tls_config(conf: &Config) -> Result { + async fn server_tls_config(conf: &InnerConfig) -> Result { let cert = tokio::fs::read(conf.query.rpc_tls_server_cert.as_str()).await?; let key = tokio::fs::read(conf.query.rpc_tls_server_key.as_str()).await?; let server_identity = Identity::from_pem(cert, key); diff --git a/src/query/service/src/auth.rs b/src/query/service/src/auth.rs index 6330490b92bf5..fb4ef331b268e 100644 --- a/src/query/service/src/auth.rs +++ b/src/query/service/src/auth.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -pub use common_config::Config; +pub use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::AuthInfo; @@ -41,7 +41,7 @@ pub enum Credential { } impl AuthMgr { - pub fn create(cfg: &Config) -> Arc { + pub fn create(cfg: &InnerConfig) -> Arc { Arc::new(AuthMgr { jwt_auth: JwtAuthenticator::create( cfg.query.jwt_key_file.clone(), diff --git a/src/query/service/src/catalogs/catalog_manager.rs b/src/query/service/src/catalogs/catalog_manager.rs index 5663163fe1bc1..94f7168824032 100644 --- a/src/query/service/src/catalogs/catalog_manager.rs +++ b/src/query/service/src/catalogs/catalog_manager.rs @@ -19,7 +19,7 @@ use common_catalog::catalog::Catalog; pub use common_catalog::catalog::CatalogManager; use common_catalog::catalog_kind::CATALOG_DEFAULT; use common_config::CatalogConfig; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CatalogType; @@ -33,13 +33,13 @@ use crate::catalogs::DatabaseCatalog; #[async_trait::async_trait] pub trait CatalogManagerHelper { - async fn init(conf: &Config) -> Result<()>; + async fn init(conf: &InnerConfig) -> Result<()>; - async fn try_create(conf: &Config) -> Result>; + async fn try_create(conf: &InnerConfig) -> Result>; - async fn register_build_in_catalogs(&self, conf: &Config) -> Result<()>; + async fn register_build_in_catalogs(&self, conf: &InnerConfig) -> Result<()>; - fn register_external_catalogs(&self, conf: &Config) -> Result<()>; + fn register_external_catalogs(&self, conf: &InnerConfig) -> Result<()>; fn create_user_defined_catalog(&self, req: CreateCatalogReq) -> Result<()>; @@ -48,13 +48,13 @@ pub trait CatalogManagerHelper { #[async_trait::async_trait] impl CatalogManagerHelper for CatalogManager { - async fn init(conf: &Config) -> Result<()> { + async fn init(conf: &InnerConfig) -> Result<()> { GlobalInstance::set(Self::try_create(conf).await?); Ok(()) } - async fn try_create(conf: &Config) -> Result> { + async fn try_create(conf: &InnerConfig) -> Result> { let catalog_manager = CatalogManager { catalogs: DashMap::new(), }; @@ -69,7 +69,7 @@ impl CatalogManagerHelper for CatalogManager { Ok(Arc::new(catalog_manager)) } - async fn register_build_in_catalogs(&self, conf: &Config) -> Result<()> { + async fn register_build_in_catalogs(&self, conf: &InnerConfig) -> Result<()> { let default_catalog: Arc = Arc::new(DatabaseCatalog::try_create_with_config(conf.clone()).await?); self.catalogs @@ -77,7 +77,7 @@ impl CatalogManagerHelper for CatalogManager { Ok(()) } - fn register_external_catalogs(&self, conf: &Config) -> Result<()> { + fn register_external_catalogs(&self, conf: &InnerConfig) -> Result<()> { // currently, if the `hive` feature is not enabled // the loop will quit after the first iteration. // this is expected. diff --git a/src/query/service/src/catalogs/default/database_catalog.rs b/src/query/service/src/catalogs/default/database_catalog.rs index 1c0ab50caa20e..b65265116a6b7 100644 --- a/src/query/service/src/catalogs/default/database_catalog.rs +++ b/src/query/service/src/catalogs/default/database_catalog.rs @@ -16,7 +16,7 @@ use std::any::Any; use std::sync::Arc; use common_catalog::table_args::TableArgs; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CountTablesReply; @@ -87,7 +87,7 @@ impl DatabaseCatalog { } } - pub async fn try_create_with_config(conf: Config) -> Result { + pub async fn try_create_with_config(conf: InnerConfig) -> Result { let immutable_catalog = ImmutableCatalog::try_create_with_config(&conf).await?; let mutable_catalog = MutableCatalog::try_create_with_config(conf).await?; let table_function_factory = TableFunctionFactory::create(); diff --git a/src/query/service/src/catalogs/default/immutable_catalog.rs b/src/query/service/src/catalogs/default/immutable_catalog.rs index 1dda37632ee56..5ee134d46da48 100644 --- a/src/query/service/src/catalogs/default/immutable_catalog.rs +++ b/src/query/service/src/catalogs/default/immutable_catalog.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CountTablesReply; @@ -70,7 +70,7 @@ pub struct ImmutableCatalog { } impl ImmutableCatalog { - pub async fn try_create_with_config(conf: &Config) -> Result { + pub async fn try_create_with_config(conf: &InnerConfig) -> Result { // The global db meta. let mut sys_db_meta = InMemoryMetas::create(SYS_DB_ID_BEGIN, SYS_TBL_ID_BEGIN); sys_db_meta.init_db("system"); diff --git a/src/query/service/src/catalogs/default/mutable_catalog.rs b/src/query/service/src/catalogs/default/mutable_catalog.rs index 475b4e58e108c..78a4b74823dba 100644 --- a/src/query/service/src/catalogs/default/mutable_catalog.rs +++ b/src/query/service/src/catalogs/default/mutable_catalog.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_exception::Result; use common_meta_api::SchemaApi; use common_meta_app::schema::CountTablesReply; @@ -91,7 +91,7 @@ impl MutableCatalog { /// /// MetaEmbedded /// ``` - pub async fn try_create_with_config(conf: Config) -> Result { + pub async fn try_create_with_config(conf: InnerConfig) -> Result { let meta = { let provider = Arc::new(MetaStoreProvider::new(conf.meta.to_meta_grpc_client_conf())); diff --git a/src/query/service/src/clusters/cluster.rs b/src/query/service/src/clusters/cluster.rs index 27206766ccf16..afd9da7e08072 100644 --- a/src/query/service/src/clusters/cluster.rs +++ b/src/query/service/src/clusters/cluster.rs @@ -32,7 +32,7 @@ use common_base::base::GlobalUniqName; use common_base::base::SignalStream; use common_base::base::SignalType; pub use common_catalog::cluster_info::Cluster; -use common_config::Config; +use common_config::InnerConfig; use common_config::DATABEND_COMMIT_VERSION; use common_exception::ErrorCode; use common_exception::Result; @@ -73,7 +73,7 @@ pub trait ClusterHelper { fn is_empty(&self) -> bool; fn is_local(&self, node: &NodeInfo) -> bool; fn local_id(&self) -> String; - async fn create_node_conn(&self, name: &str, config: &Config) -> Result; + async fn create_node_conn(&self, name: &str, config: &InnerConfig) -> Result; fn get_nodes(&self) -> Vec>; } @@ -102,7 +102,7 @@ impl ClusterHelper for Cluster { self.local_id.clone() } - async fn create_node_conn(&self, name: &str, config: &Config) -> Result { + async fn create_node_conn(&self, name: &str, config: &InnerConfig) -> Result { for node in &self.nodes { if node.id == name { return match config.tls_query_cli_enabled() { @@ -140,7 +140,7 @@ impl ClusterHelper for Cluster { impl ClusterDiscovery { const METRIC_LABEL_FUNCTION: &'static str = "function"; - pub async fn create_meta_client(cfg: &Config) -> Result { + pub async fn create_meta_client(cfg: &InnerConfig) -> Result { let meta_api_provider = MetaStoreProvider::new(cfg.meta.to_meta_grpc_client_conf()); match meta_api_provider.create_meta_store().await { Ok(meta_store) => Ok(meta_store), @@ -150,14 +150,17 @@ impl ClusterDiscovery { } } - pub async fn init(cfg: Config) -> Result<()> { + pub async fn init(cfg: InnerConfig) -> Result<()> { let metastore = ClusterDiscovery::create_meta_client(&cfg).await?; GlobalInstance::set(Self::try_create(&cfg, metastore).await?); Ok(()) } - pub async fn try_create(cfg: &Config, metastore: MetaStore) -> Result> { + pub async fn try_create( + cfg: &InnerConfig, + metastore: MetaStore, + ) -> Result> { let (lift_time, provider) = Self::create_provider(cfg, metastore)?; Ok(Arc::new(ClusterDiscovery { @@ -180,7 +183,7 @@ impl ClusterDiscovery { } fn create_provider( - cfg: &Config, + cfg: &InnerConfig, metastore: MetaStore, ) -> Result<(Duration, Arc)> { // TODO: generate if tenant or cluster id is empty @@ -192,7 +195,7 @@ impl ClusterDiscovery { Ok((lift_time, Arc::new(cluster_manager))) } - pub async fn discover(&self, config: &Config) -> Result> { + pub async fn discover(&self, config: &InnerConfig) -> Result> { match self.api_provider.get_nodes().await { Err(cause) => { label_counter_with_val_and_labels( @@ -351,7 +354,7 @@ impl ClusterDiscovery { }; } - pub async fn register_to_metastore(self: &Arc, cfg: &Config) -> Result<()> { + pub async fn register_to_metastore(self: &Arc, cfg: &InnerConfig) -> Result<()> { let cpus = cfg.query.num_cpus; let mut address = cfg.query.flight_api_address.clone(); @@ -503,7 +506,7 @@ impl ClusterHeartbeat { } } -pub async fn create_client(config: &Config, address: &str) -> Result { +pub async fn create_client(config: &InnerConfig, address: &str) -> Result { match config.tls_query_cli_enabled() { true => Ok(FlightClient::new(FlightServiceClient::new( ConnectionFactory::create_rpc_channel( diff --git a/src/query/service/src/databases/database_factory.rs b/src/query/service/src/databases/database_factory.rs index 6e56e574bb7ac..91e2390aed618 100644 --- a/src/query/service/src/databases/database_factory.rs +++ b/src/query/service/src/databases/database_factory.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::DatabaseInfo; @@ -45,7 +45,7 @@ pub struct DatabaseFactory { } impl DatabaseFactory { - pub fn create(_: Config) -> Self { + pub fn create(_: InnerConfig) -> Self { let creators: DashMap> = DashMap::new(); creators.insert( DefaultDatabase::NAME.to_string(), diff --git a/src/query/service/src/databases/system/system_database.rs b/src/query/service/src/databases/system/system_database.rs index fbca5e70f5ac0..a614f81080883 100644 --- a/src/query/service/src/databases/system/system_database.rs +++ b/src/query/service/src/databases/system/system_database.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_meta_app::schema::DatabaseIdent; use common_meta_app::schema::DatabaseInfo; use common_meta_app::schema::DatabaseMeta; @@ -54,7 +54,7 @@ pub struct SystemDatabase { } impl SystemDatabase { - pub fn create(sys_db_meta: &mut InMemoryMetas, config: &Config) -> Self { + pub fn create(sys_db_meta: &mut InMemoryMetas, config: &InnerConfig) -> Self { let table_list: Vec> = vec![ OneTable::create(sys_db_meta.next_table_id()), FunctionsTable::create(sys_db_meta.next_table_id()), diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 6e65ebc036916..48edc92382e84 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -15,8 +15,8 @@ use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_catalog::catalog::CatalogManager; -use common_config::Config; use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::Result; use common_storage::CacheOperator; use common_storage::DataOperator; @@ -35,12 +35,12 @@ use crate::sessions::SessionManager; pub struct GlobalServices; impl GlobalServices { - pub async fn init(config: Config) -> Result<()> { + pub async fn init(config: InnerConfig) -> Result<()> { GlobalInstance::init_production(); GlobalServices::init_with(config).await } - pub async fn init_with(config: Config) -> Result<()> { + pub async fn init_with(config: InnerConfig) -> Result<()> { // The order of initialization is very important GlobalConfig::init(config.clone())?; @@ -61,7 +61,7 @@ impl GlobalServices { config.query.tenant_id.clone(), )?; - CacheManager::init(&config.query)?; + CacheManager::init(&config.cache, &config.query.tenant_id)?; CatalogManager::init(&config).await?; HttpQueryManager::init(&config).await?; DataExchangeManager::init()?; diff --git a/src/query/service/src/servers/http/http_services.rs b/src/query/service/src/servers/http/http_services.rs index 06966747a06ed..dcbc2bf25614f 100644 --- a/src/query/service/src/servers/http/http_services.rs +++ b/src/query/service/src/servers/http/http_services.rs @@ -15,8 +15,8 @@ use std::net::SocketAddr; use std::path::Path; -use common_config::Config; use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_http::HttpError; use common_http::HttpShutdownHandler; @@ -82,7 +82,7 @@ impl HttpHandler { }) } - async fn build_router(&self, config: &Config, sock: SocketAddr) -> impl Endpoint { + async fn build_router(&self, config: &InnerConfig, sock: SocketAddr) -> impl Endpoint { let ep = match self.kind { HttpHandlerKind::Query => Route::new() .at( @@ -107,7 +107,7 @@ impl HttpHandler { .boxed() } - fn build_tls(config: &Config) -> Result { + fn build_tls(config: &InnerConfig) -> Result { let certificate = RustlsCertificate::new() .cert(std::fs::read( config.query.http_handler_tls_server_cert.as_str(), diff --git a/src/query/service/src/servers/http/v1/query/http_query_manager.rs b/src/query/service/src/servers/http/v1/query/http_query_manager.rs index 772aae01ddca0..66a06380ac719 100644 --- a/src/query/service/src/servers/http/v1/query/http_query_manager.rs +++ b/src/query/service/src/servers/http/v1/query/http_query_manager.rs @@ -21,7 +21,7 @@ use common_base::base::tokio::time::sleep; use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_base::runtime::TrySpawn; -use common_config::Config; +use common_config::InnerConfig; use common_exception::Result; use parking_lot::Mutex; use tracing::warn; @@ -48,7 +48,7 @@ pub struct HttpQueryManager { } impl HttpQueryManager { - pub async fn init(cfg: &Config) -> Result<()> { + pub async fn init(cfg: &InnerConfig) -> Result<()> { GlobalInstance::set(Arc::new(HttpQueryManager { queries: Arc::new(RwLock::new(HashMap::new())), sessions: Mutex::new(ExpiringMap::default()), diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs index 3956f3dba44e9..7ebf724eac09b 100644 --- a/src/query/service/src/sessions/query_ctx_shared.rs +++ b/src/query/service/src/sessions/query_ctx_shared.rs @@ -23,7 +23,7 @@ use std::time::SystemTime; use common_base::base::Progress; use common_base::runtime::Runtime; use common_catalog::table_context::StageAttachment; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -84,7 +84,7 @@ pub struct QueryContextShared { impl QueryContextShared { pub fn try_create( - config: &Config, + config: &InnerConfig, session: Arc, cluster_cache: Arc, ) -> Result> { diff --git a/src/query/service/src/sessions/session_mgr.rs b/src/query/service/src/sessions/session_mgr.rs index c6999557a720a..cbcaeaf0e50fe 100644 --- a/src/query/service/src/sessions/session_mgr.rs +++ b/src/query/service/src/sessions/session_mgr.rs @@ -24,8 +24,8 @@ use std::time::Duration; use common_base::base::tokio; use common_base::base::GlobalInstance; use common_base::base::SignalStream; -use common_config::Config; use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_metrics::label_counter; @@ -59,13 +59,13 @@ pub struct SessionManager { } impl SessionManager { - pub fn init(conf: &Config) -> Result<()> { + pub fn init(conf: &InnerConfig) -> Result<()> { GlobalInstance::set(Self::create(conf)); Ok(()) } - pub fn create(conf: &Config) -> Arc { + pub fn create(conf: &InnerConfig) -> Arc { let max_sessions = conf.query.max_active_sessions as usize; Arc::new(SessionManager { max_sessions, diff --git a/src/query/service/tests/it/configs.rs b/src/query/service/tests/it/configs.rs index 1624061a55e21..e1c8c6b64fc74 100644 --- a/src/query/service/tests/it/configs.rs +++ b/src/query/service/tests/it/configs.rs @@ -17,10 +17,12 @@ use std::env::temp_dir; use std::fs; use std::io::Write; +use common_config::CacheStorageTypeConfig; use common_config::CatalogConfig; use common_config::CatalogHiveConfig; -use common_config::Config; +use common_config::InnerConfig; use common_config::ThriftProtocol; +use common_exception::ErrorCode; use common_exception::Result; use pretty_assertions::assert_eq; @@ -44,16 +46,15 @@ fn test_env_config_s3() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("s3")), @@ -83,7 +84,9 @@ fn test_env_config_s3() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = InnerConfig::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -125,15 +128,14 @@ fn test_env_config_s3() -> Result<()> { assert_eq!("us.key", configured.storage.s3.secret_access_key); assert_eq!("us.bucket", configured.storage.s3.bucket); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_meta_cache); + assert!(configured.cache.enable_table_bloom_index_cache); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); assert_eq!(HashMap::new(), configured.catalogs); }, @@ -162,16 +164,15 @@ fn test_env_config_fs() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QU-ERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYTES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("fs")), @@ -201,7 +202,9 @@ fn test_env_config_fs() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = InnerConfig::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -243,18 +246,16 @@ fn test_env_config_fs() -> Result<()> { assert_eq!("", configured.storage.gcs.gcs_root); assert_eq!("", configured.storage.gcs.credential); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(512, configured.query.table_memory_cache_mb_size); - assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(512, configured.query.table_disk_cache_mb_size); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_bloom_index_cache); + assert!(configured.cache.enable_table_meta_cache); + assert_eq!("_cache_env", configured.cache.disk_cache_config.path); + assert_eq!(512, configured.cache.disk_cache_config.max_bytes); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); }, ); @@ -281,16 +282,15 @@ fn test_env_config_gcs() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYTES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("gcs")), @@ -320,7 +320,9 @@ fn test_env_config_gcs() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = InnerConfig::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -369,18 +371,16 @@ fn test_env_config_gcs() -> Result<()> { assert_eq!("", configured.storage.oss.oss_access_key_id); assert_eq!("", configured.storage.oss.oss_access_key_secret); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(512, configured.query.table_memory_cache_mb_size); - assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(512, configured.query.table_disk_cache_mb_size); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_meta_cache); + assert!(configured.cache.enable_table_bloom_index_cache); + assert_eq!("_cache_env", configured.cache.disk_cache_config.path); + assert_eq!(512, configured.cache.disk_cache_config.max_bytes); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); }, ); @@ -407,16 +407,17 @@ fn test_env_config_oss() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), + ("CACHE_DATA_CACHE_STORAGE", Some("disk")), + ("TABLE_CACHE_BLOOM_INDEX_FILTER_COUNT", Some("1")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYTES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("oss")), @@ -446,7 +447,9 @@ fn test_env_config_oss() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = InnerConfig::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -502,18 +505,15 @@ fn test_env_config_oss() -> Result<()> { assert_eq!("", configured.storage.gcs.gcs_root); assert_eq!("", configured.storage.gcs.credential); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(512, configured.query.table_memory_cache_mb_size); - assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(512, configured.query.table_disk_cache_mb_size); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_meta_cache); + assert_eq!("_cache_env", configured.cache.disk_cache_config.path); + assert_eq!(512, configured.cache.disk_cache_config.max_bytes); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); }, ); @@ -561,15 +561,6 @@ table_engine_memory_enabled = true database_engine_github_enabled = true wait_timeout_mills = 5000 max_query_log_size = 10000 -table_meta_cache_enabled = false -table_cache_snapshot_count = 256 -table_cache_segment_count = 10240 -table_cache_block_meta_count = 102400 -table_memory_cache_mb_size = 256 -table_disk_cache_root = "_cache" -table_disk_cache_mb_size = 1024 -table_cache_bloom_index_meta_count = 3000 -table_cache_bloom_index_filter_count = 1048576 management_mode = false jwt_key_file = "" async_insert_max_data_size = 10000 @@ -640,6 +631,19 @@ protocol = "binary" type = "hive" address = "127.0.0.1:9083" protocol = "binary" + +[cache] + +enable_table_meta_cache = false +table_meta_snapshot_count = 256 +table_meta_segment_count = 10240 +table_bloom_index_meta_count = 3000 +table_bloom_index_filter_count = 1048576 + +data_cache_storage = "disk" + +[cache.disk] +path = "_cache" "# .as_bytes(), )?; @@ -655,14 +659,21 @@ protocol = "binary" ("STORAGE_TYPE", None), ], || { - let cfg = Config::load_for_test() + let cfg = InnerConfig::load_for_test() .expect("config load success") - .into_outer(); + .into_config(); assert_eq!("tenant_id_from_env", cfg.query.tenant_id); assert_eq!("access_key_id_from_env", cfg.storage.s3.access_key_id); assert_eq!("s3", cfg.storage.storage_type); + let cache_config = &cfg.cache; + assert_eq!( + cache_config.data_cache_storage, + CacheStorageTypeConfig::Disk + ); + assert_eq!(cache_config.disk_cache_config.path, "_cache"); + // NOTE: // // after the config conversion procedure: @@ -715,7 +726,7 @@ protocol = "binary" temp_env::with_vars( vec![("CONFIG_FILE", Some(file_path.to_string_lossy().as_ref()))], || { - let cfg = Config::load_for_test().expect("config load success"); + let cfg = InnerConfig::load_for_test().expect("config load success"); assert_eq!( cfg.catalogs["hive"], @@ -755,7 +766,7 @@ protocol = "binary" temp_env::with_vars( vec![("CONFIG_FILE", Some(file_path.to_string_lossy().as_ref()))], || { - let cfg = Config::load_for_test().expect("config load success"); + let cfg = InnerConfig::load_for_test().expect("config load success"); assert_eq!( cfg.catalogs["my_hive"], @@ -772,3 +783,29 @@ protocol = "binary" Ok(()) } + +#[test] +fn test_env_config_obsoleted() -> Result<()> { + let obsoleted = vec![ + ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("1")), + ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), + ("QUERY_TABLE_CACHE_BLOCK_META_COUNT", Some("1")), + ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("1")), + ("QUERY_TABLE_DISK_CACHE_ROOT", Some("1")), + ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_STATISTIC_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_BLOOM_INDEX_FILTER_COUNT", Some("1")), + ]; + + for env_var in obsoleted { + temp_env::with_vars(vec![env_var], || { + let r = InnerConfig::load_for_test(); + assert!(r.is_err(), "expecting `Err`, but got `Ok`"); + assert_eq!(r.unwrap_err().code(), ErrorCode::INVALID_CONFIG) + }); + } + + Ok(()) +} diff --git a/src/query/service/tests/it/servers/http/clickhouse_handler.rs b/src/query/service/tests/it/servers/http/clickhouse_handler.rs index ff9ccc6e7ca71..ef00f659c7959 100644 --- a/src/query/service/tests/it/servers/http/clickhouse_handler.rs +++ b/src/query/service/tests/it/servers/http/clickhouse_handler.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use common_base::base::tokio; -use common_config::Config; +use common_config::InnerConfig; use databend_query::auth::AuthMgr; use databend_query::servers::http::middleware::HTTPSessionEndpoint; use databend_query::servers::http::middleware::HTTPSessionMiddleware; @@ -421,7 +421,7 @@ struct Server { } impl Server { - pub async fn new(config: &Config) -> Self { + pub async fn new(config: &InnerConfig) -> Self { let session_middleware = HTTPSessionMiddleware::create(HttpHandlerKind::Clickhouse, AuthMgr::create(config)); let endpoint = Route::new() diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index a0ae3f622b0a6..f4ccbed3a45ad 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -1,129 +1,131 @@ ---------- TABLE INFO ------------ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemConfigs -------- TABLE CONTENTS ---------- -+-----------+----------------------------------------+----------------------------------+----------+ -| Column 0 | Column 1 | Column 2 | Column 3 | -+-----------+----------------------------------------+----------------------------------+----------+ -| "log" | "dir" | "./.databend/logs" | "" | -| "log" | "file.dir" | "./.databend/logs" | "" | -| "log" | "file.format" | "text" | "" | -| "log" | "file.level" | "DEBUG" | "" | -| "log" | "file.on" | "true" | "" | -| "log" | "level" | "DEBUG" | "" | -| "log" | "query_enabled" | "false" | "" | -| "log" | "stderr.format" | "text" | "" | -| "log" | "stderr.level" | "INFO" | "" | -| "log" | "stderr.on" | "true" | "" | -| "meta" | "auto_sync_interval" | "0" | "" | -| "meta" | "client_timeout_in_second" | "10" | "" | -| "meta" | "embedded_dir" | "" | "" | -| "meta" | "endpoints" | "" | "" | -| "meta" | "password" | "" | "" | -| "meta" | "rpc_tls_meta_server_root_ca_cert" | "" | "" | -| "meta" | "rpc_tls_meta_service_domain_name" | "localhost" | "" | -| "meta" | "username" | "root" | "" | -| "query" | "admin_api_address" | "127.0.0.1:8080" | "" | -| "query" | "api_tls_server_cert" | "" | "" | -| "query" | "api_tls_server_key" | "" | "" | -| "query" | "api_tls_server_root_ca_cert" | "" | "" | -| "query" | "async_insert_busy_timeout" | "200" | "" | -| "query" | "async_insert_max_data_size" | "10000" | "" | -| "query" | "async_insert_stale_timeout" | "0" | "" | -| "query" | "clickhouse_handler_host" | "127.0.0.1" | "" | -| "query" | "clickhouse_handler_port" | "9000" | "" | -| "query" | "clickhouse_http_handler_host" | "127.0.0.1" | "" | -| "query" | "clickhouse_http_handler_port" | "8124" | "" | -| "query" | "cluster_id" | "" | "" | -| "query" | "database_engine_github_enabled" | "true" | "" | -| "query" | "flight_api_address" | "127.0.0.1:9090" | "" | -| "query" | "http_handler_host" | "127.0.0.1" | "" | -| "query" | "http_handler_port" | "8000" | "" | -| "query" | "http_handler_result_timeout_secs" | "60" | "" | -| "query" | "http_handler_tls_server_cert" | "" | "" | -| "query" | "http_handler_tls_server_key" | "" | "" | -| "query" | "http_handler_tls_server_root_ca_cert" | "" | "" | -| "query" | "internal_enable_sandbox_tenant" | "false" | "" | -| "query" | "jwt_key_file" | "" | "" | -| "query" | "jwt_key_files" | "" | "" | -| "query" | "management_mode" | "false" | "" | -| "query" | "max_active_sessions" | "256" | "" | -| "query" | "max_memory_limit_enabled" | "false" | "" | -| "query" | "max_query_log_size" | "10000" | "" | -| "query" | "max_server_memory_usage" | "0" | "" | -| "query" | "metric_api_address" | "127.0.0.1:7070" | "" | -| "query" | "mysql_handler_host" | "127.0.0.1" | "" | -| "query" | "mysql_handler_port" | "3307" | "" | -| "query" | "num_cpus" | "0" | "" | -| "query" | "quota" | "null" | "" | -| "query" | "rpc_tls_query_server_root_ca_cert" | "" | "" | -| "query" | "rpc_tls_query_service_domain_name" | "localhost" | "" | -| "query" | "rpc_tls_server_cert" | "" | "" | -| "query" | "rpc_tls_server_key" | "" | "" | -| "query" | "share_endpoint_address" | "" | "" | -| "query" | "share_endpoint_auth_token_file" | "" | "" | -| "query" | "table_cache_block_meta_count" | "102400" | "" | -| "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | -| "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | -| "query" | "table_cache_segment_count" | "10240" | "" | -| "query" | "table_cache_snapshot_count" | "256" | "" | -| "query" | "table_cache_statistic_count" | "256" | "" | -| "query" | "table_disk_cache_mb_size" | "1024" | "" | -| "query" | "table_disk_cache_root" | "_cache" | "" | -| "query" | "table_engine_memory_enabled" | "true" | "" | -| "query" | "table_memory_cache_mb_size" | "256" | "" | -| "query" | "table_meta_cache_enabled" | "true" | "" | -| "query" | "tenant_id" | "test" | "" | -| "query" | "users" | "" | "" | -| "query" | "wait_timeout_mills" | "5000" | "" | -| "storage" | "allow_insecure" | "false" | "" | -| "storage" | "azblob.account_key" | "" | "" | -| "storage" | "azblob.account_name" | "" | "" | -| "storage" | "azblob.container" | "" | "" | -| "storage" | "azblob.endpoint_url" | "" | "" | -| "storage" | "azblob.root" | "" | "" | -| "storage" | "cache.fs.data_path" | "_data" | "" | -| "storage" | "cache.moka.max_capacity" | "1073741824" | "" | -| "storage" | "cache.moka.time_to_idle" | "600" | "" | -| "storage" | "cache.moka.time_to_live" | "3600" | "" | -| "storage" | "cache.num_cpus" | "0" | "" | -| "storage" | "cache.redis.db" | "0" | "" | -| "storage" | "cache.redis.default_ttl" | "0" | "" | -| "storage" | "cache.redis.endpoint_url" | "" | "" | -| "storage" | "cache.redis.password" | "" | "" | -| "storage" | "cache.redis.root" | "" | "" | -| "storage" | "cache.redis.username" | "" | "" | -| "storage" | "cache.type" | "none" | "" | -| "storage" | "fs.data_path" | "_data" | "" | -| "storage" | "gcs.bucket" | "" | "" | -| "storage" | "gcs.credential" | "" | "" | -| "storage" | "gcs.endpoint_url" | "https://storage.googleapis.com" | "" | -| "storage" | "gcs.root" | "" | "" | -| "storage" | "hdfs.name_node" | "" | "" | -| "storage" | "hdfs.root" | "" | "" | -| "storage" | "num_cpus" | "0" | "" | -| "storage" | "obs.access_key_id" | "" | "" | -| "storage" | "obs.bucket" | "" | "" | -| "storage" | "obs.endpoint_url" | "" | "" | -| "storage" | "obs.root" | "" | "" | -| "storage" | "obs.secret_access_key" | "" | "" | -| "storage" | "oss.access_key_id" | "" | "" | -| "storage" | "oss.access_key_secret" | "" | "" | -| "storage" | "oss.bucket" | "" | "" | -| "storage" | "oss.endpoint_url" | "" | "" | -| "storage" | "oss.presign_endpoint_url" | "" | "" | -| "storage" | "oss.root" | "" | "" | -| "storage" | "s3.access_key_id" | "" | "" | -| "storage" | "s3.bucket" | "" | "" | -| "storage" | "s3.enable_virtual_host_style" | "false" | "" | -| "storage" | "s3.endpoint_url" | "https://s3.amazonaws.com" | "" | -| "storage" | "s3.external_id" | "" | "" | -| "storage" | "s3.master_key" | "" | "" | -| "storage" | "s3.region" | "" | "" | -| "storage" | "s3.role_arn" | "" | "" | -| "storage" | "s3.root" | "" | "" | -| "storage" | "s3.secret_access_key" | "" | "" | -| "storage" | "s3.security_token" | "" | "" | -| "storage" | "type" | "fs" | "" | -+-----------+----------------------------------------+----------------------------------+----------+ ++-----------+------------------------------------------+----------------------------------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++-----------+------------------------------------------+----------------------------------+----------+ +| "cache" | "data_cache_storage" | "none" | "" | +| "cache" | "disk.max_bytes" | "21474836480" | "" | +| "cache" | "disk.path" | "./.databend/_cache" | "" | +| "cache" | "enable_table_bloom_index_cache" | "true" | "" | +| "cache" | "enable_table_meta_cache" | "true" | "" | +| "cache" | "table_bloom_index_filter_count" | "1048576" | "" | +| "cache" | "table_bloom_index_meta_count" | "3000" | "" | +| "cache" | "table_data_cache_population_queue_size" | "65536" | "" | +| "cache" | "table_data_deserialized_data_bytes" | "0" | "" | +| "cache" | "table_meta_segment_count" | "10240" | "" | +| "cache" | "table_meta_snapshot_count" | "256" | "" | +| "cache" | "table_meta_statistic_count" | "256" | "" | +| "log" | "dir" | "./.databend/logs" | "" | +| "log" | "file.dir" | "./.databend/logs" | "" | +| "log" | "file.format" | "text" | "" | +| "log" | "file.level" | "DEBUG" | "" | +| "log" | "file.on" | "true" | "" | +| "log" | "level" | "DEBUG" | "" | +| "log" | "query_enabled" | "false" | "" | +| "log" | "stderr.format" | "text" | "" | +| "log" | "stderr.level" | "INFO" | "" | +| "log" | "stderr.on" | "true" | "" | +| "meta" | "auto_sync_interval" | "0" | "" | +| "meta" | "client_timeout_in_second" | "10" | "" | +| "meta" | "embedded_dir" | "" | "" | +| "meta" | "endpoints" | "" | "" | +| "meta" | "password" | "" | "" | +| "meta" | "rpc_tls_meta_server_root_ca_cert" | "" | "" | +| "meta" | "rpc_tls_meta_service_domain_name" | "localhost" | "" | +| "meta" | "username" | "root" | "" | +| "query" | "admin_api_address" | "127.0.0.1:8080" | "" | +| "query" | "api_tls_server_cert" | "" | "" | +| "query" | "api_tls_server_key" | "" | "" | +| "query" | "api_tls_server_root_ca_cert" | "" | "" | +| "query" | "async_insert_busy_timeout" | "200" | "" | +| "query" | "async_insert_max_data_size" | "10000" | "" | +| "query" | "async_insert_stale_timeout" | "0" | "" | +| "query" | "clickhouse_handler_host" | "127.0.0.1" | "" | +| "query" | "clickhouse_handler_port" | "9000" | "" | +| "query" | "clickhouse_http_handler_host" | "127.0.0.1" | "" | +| "query" | "clickhouse_http_handler_port" | "8124" | "" | +| "query" | "cluster_id" | "" | "" | +| "query" | "database_engine_github_enabled" | "true" | "" | +| "query" | "flight_api_address" | "127.0.0.1:9090" | "" | +| "query" | "http_handler_host" | "127.0.0.1" | "" | +| "query" | "http_handler_port" | "8000" | "" | +| "query" | "http_handler_result_timeout_secs" | "60" | "" | +| "query" | "http_handler_tls_server_cert" | "" | "" | +| "query" | "http_handler_tls_server_key" | "" | "" | +| "query" | "http_handler_tls_server_root_ca_cert" | "" | "" | +| "query" | "internal_enable_sandbox_tenant" | "false" | "" | +| "query" | "jwt_key_file" | "" | "" | +| "query" | "jwt_key_files" | "" | "" | +| "query" | "management_mode" | "false" | "" | +| "query" | "max_active_sessions" | "256" | "" | +| "query" | "max_memory_limit_enabled" | "false" | "" | +| "query" | "max_query_log_size" | "10000" | "" | +| "query" | "max_server_memory_usage" | "0" | "" | +| "query" | "metric_api_address" | "127.0.0.1:7070" | "" | +| "query" | "mysql_handler_host" | "127.0.0.1" | "" | +| "query" | "mysql_handler_port" | "3307" | "" | +| "query" | "num_cpus" | "0" | "" | +| "query" | "quota" | "null" | "" | +| "query" | "rpc_tls_query_server_root_ca_cert" | "" | "" | +| "query" | "rpc_tls_query_service_domain_name" | "localhost" | "" | +| "query" | "rpc_tls_server_cert" | "" | "" | +| "query" | "rpc_tls_server_key" | "" | "" | +| "query" | "share_endpoint_address" | "" | "" | +| "query" | "share_endpoint_auth_token_file" | "" | "" | +| "query" | "table_engine_memory_enabled" | "true" | "" | +| "query" | "tenant_id" | "test" | "" | +| "query" | "users" | "" | "" | +| "query" | "wait_timeout_mills" | "5000" | "" | +| "storage" | "allow_insecure" | "false" | "" | +| "storage" | "azblob.account_key" | "" | "" | +| "storage" | "azblob.account_name" | "" | "" | +| "storage" | "azblob.container" | "" | "" | +| "storage" | "azblob.endpoint_url" | "" | "" | +| "storage" | "azblob.root" | "" | "" | +| "storage" | "cache.fs.data_path" | "_data" | "" | +| "storage" | "cache.moka.max_capacity" | "1073741824" | "" | +| "storage" | "cache.moka.time_to_idle" | "600" | "" | +| "storage" | "cache.moka.time_to_live" | "3600" | "" | +| "storage" | "cache.num_cpus" | "0" | "" | +| "storage" | "cache.redis.db" | "0" | "" | +| "storage" | "cache.redis.default_ttl" | "0" | "" | +| "storage" | "cache.redis.endpoint_url" | "" | "" | +| "storage" | "cache.redis.password" | "" | "" | +| "storage" | "cache.redis.root" | "" | "" | +| "storage" | "cache.redis.username" | "" | "" | +| "storage" | "cache.type" | "none" | "" | +| "storage" | "fs.data_path" | "_data" | "" | +| "storage" | "gcs.bucket" | "" | "" | +| "storage" | "gcs.credential" | "" | "" | +| "storage" | "gcs.endpoint_url" | "https://storage.googleapis.com" | "" | +| "storage" | "gcs.root" | "" | "" | +| "storage" | "hdfs.name_node" | "" | "" | +| "storage" | "hdfs.root" | "" | "" | +| "storage" | "num_cpus" | "0" | "" | +| "storage" | "obs.access_key_id" | "" | "" | +| "storage" | "obs.bucket" | "" | "" | +| "storage" | "obs.endpoint_url" | "" | "" | +| "storage" | "obs.root" | "" | "" | +| "storage" | "obs.secret_access_key" | "" | "" | +| "storage" | "oss.access_key_id" | "" | "" | +| "storage" | "oss.access_key_secret" | "" | "" | +| "storage" | "oss.bucket" | "" | "" | +| "storage" | "oss.endpoint_url" | "" | "" | +| "storage" | "oss.presign_endpoint_url" | "" | "" | +| "storage" | "oss.root" | "" | "" | +| "storage" | "s3.access_key_id" | "" | "" | +| "storage" | "s3.bucket" | "" | "" | +| "storage" | "s3.enable_virtual_host_style" | "false" | "" | +| "storage" | "s3.endpoint_url" | "https://s3.amazonaws.com" | "" | +| "storage" | "s3.external_id" | "" | "" | +| "storage" | "s3.master_key" | "" | "" | +| "storage" | "s3.region" | "" | "" | +| "storage" | "s3.role_arn" | "" | "" | +| "storage" | "s3.root" | "" | "" | +| "storage" | "s3.secret_access_key" | "" | "" | +| "storage" | "s3.security_token" | "" | "" | +| "storage" | "type" | "fs" | "" | ++-----------+------------------------------------------+----------------------------------+----------+ diff --git a/src/query/service/tests/it/tests/config.rs b/src/query/service/tests/it/tests/config.rs index 5a5b2b5546503..b66de9347c8f3 100644 --- a/src/query/service/tests/it/tests/config.rs +++ b/src/query/service/tests/it/tests/config.rs @@ -14,17 +14,17 @@ use std::collections::HashMap; -use common_config::Config; +use common_config::InnerConfig; use common_meta_app::principal::AuthInfo; use common_users::idm_config::IDMConfig; pub struct ConfigBuilder { - conf: Config, + conf: InnerConfig, } impl ConfigBuilder { pub fn create() -> ConfigBuilder { - let mut conf = Config::default(); + let mut conf = InnerConfig::default(); conf.query.tenant_id = "test".to_string(); conf.log = common_tracing::Config::new_testing(); @@ -128,11 +128,11 @@ impl ConfigBuilder { self } - pub fn build(self) -> Config { + pub fn build(self) -> InnerConfig { self.conf } - pub fn config(&self) -> Config { + pub fn config(&self) -> InnerConfig { self.conf.clone() } } diff --git a/src/query/service/tests/it/tests/context.rs b/src/query/service/tests/it/tests/context.rs index 35714080abf6c..7674613fbd023 100644 --- a/src/query/service/tests/it/tests/context.rs +++ b/src/query/service/tests/it/tests/context.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::InnerConfig; use common_config::DATABEND_COMMIT_VERSION; use common_exception::Result; use common_meta_app::principal::AuthInfo; @@ -69,7 +69,7 @@ pub async fn create_query_context_with_session( } pub async fn create_query_context_with_config( - config: Config, + config: InnerConfig, mut current_user: Option, ) -> Result<(TestGuard, Arc)> { let guard = TestGlobalServices::setup(config).await?; diff --git a/src/query/service/tests/it/tests/sessions.rs b/src/query/service/tests/it/tests/sessions.rs index f24e3c7d7c7c0..ea5f15379d5ce 100644 --- a/src/query/service/tests/it/tests/sessions.rs +++ b/src/query/service/tests/it/tests/sessions.rs @@ -13,7 +13,7 @@ // limitations under the License. use common_base::base::GlobalInstance; -use common_config::Config; +use common_config::InnerConfig; use common_exception::Result; use common_tracing::set_panic_hook; use databend_query::clusters::ClusterDiscovery; @@ -27,7 +27,7 @@ unsafe impl Send for TestGlobalServices {} unsafe impl Sync for TestGlobalServices {} impl TestGlobalServices { - pub async fn setup(config: Config) -> Result { + pub async fn setup(config: InnerConfig) -> Result { set_panic_hook(); std::env::set_var("UNIT_TEST", "TRUE"); diff --git a/src/query/settings/src/lib.rs b/src/query/settings/src/lib.rs index 738de0cc18f46..04ae7786239d1 100644 --- a/src/query/settings/src/lib.rs +++ b/src/query/settings/src/lib.rs @@ -25,8 +25,8 @@ use std::sync::Arc; use common_ast::Dialect; use common_base::runtime::GlobalIORuntime; use common_base::runtime::TrySpawn; -use common_config::Config; use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::UserSetting; @@ -107,7 +107,7 @@ impl Settings { Ok(ret) } - pub fn default_settings(tenant: &str, conf: Arc) -> Result> { + pub fn default_settings(tenant: &str, conf: Arc) -> Result> { let memory_info = sys_info::mem_info().map_err(ErrorCode::from_std_error)?; let mut num_cpus = num_cpus::get() as u64; if conf.query.num_cpus != 0 { @@ -484,7 +484,7 @@ impl Settings { // Only used for testings pub fn default_test_settings() -> Result> { - Self::default_settings("default", Arc::new(Config::default())) + Self::default_settings("default", Arc::new(InnerConfig::default())) } // Get max_block_size. diff --git a/src/query/sql/tests/location.rs b/src/query/sql/tests/location.rs index b38e36913be5d..4e0f36d6515fb 100644 --- a/src/query/sql/tests/location.rs +++ b/src/query/sql/tests/location.rs @@ -21,8 +21,8 @@ use std::collections::BTreeMap; use anyhow::Result; use common_ast::ast::UriLocation; use common_base::base::GlobalInstance; -use common_config::Config; use common_config::GlobalConfig; +use common_config::InnerConfig; use common_meta_app::storage::StorageFsConfig; // use common_storage::StorageFtpConfig; use common_meta_app::storage::StorageGcsConfig; @@ -44,7 +44,7 @@ fn test_parse_uri_location() -> Result<()> { }; GlobalInstance::init_testing(&thread_name); - GlobalConfig::init(Config::default())?; + GlobalConfig::init(InnerConfig::default())?; let cases = vec![ ( diff --git a/src/query/storages/common/cache-manager/Cargo.toml b/src/query/storages/common/cache-manager/Cargo.toml index bcd467f6a3d70..2e8f98cd5162c 100644 --- a/src/query/storages/common/cache-manager/Cargo.toml +++ b/src/query/storages/common/cache-manager/Cargo.toml @@ -11,6 +11,7 @@ edition = { workspace = true } [dependencies] common-arrow = { path = "../../../../common/arrow" } common-base = { path = "../../../../common/base" } +common-cache = { path = "../../../../common/cache" } common-config = { path = "../../../config" } common-exception = { path = "../../../../common/exception" } storages-common-cache = { path = "../../common/cache" } diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index c887c90ad6e25..ebe7ed61558a3 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -13,20 +13,30 @@ // limitations under the License. // +use std::path::PathBuf; use std::sync::Arc; use common_base::base::GlobalInstance; -use common_config::QueryConfig; +use common_cache::CountableMeter; +use common_cache::DefaultHashBuilder; +use common_config::CacheConfig; +use common_config::CacheStorageTypeInnerConfig; use common_exception::Result; use storages_common_cache::InMemoryCacheBuilder; use storages_common_cache::InMemoryItemCacheHolder; +use storages_common_cache::Named; +use storages_common_cache::NamedCache; +use storages_common_cache::TableDataCache; +use storages_common_cache::TableDataCacheBuilder; use crate::caches::BloomIndexFilterCache; use crate::caches::BloomIndexMetaCache; +use crate::caches::ColumnArrayCache; use crate::caches::FileMetaDataCache; use crate::caches::SegmentInfoCache; use crate::caches::TableSnapshotCache; use crate::caches::TableSnapshotStatisticCache; +use crate::ColumnArrayMeter; static DEFAULT_FILE_META_DATA_CACHE_ITEMS: u64 = 3000; @@ -38,14 +48,39 @@ pub struct CacheManager { bloom_index_filter_cache: Option, bloom_index_meta_cache: Option, file_meta_data_cache: Option, + table_data_cache: Option, + table_column_array_cache: Option, } impl CacheManager { /// Initialize the caches according to the relevant configurations. - /// - /// For convenience, ids of cluster and tenant are also kept - pub fn init(config: &QueryConfig) -> Result<()> { - if !config.table_meta_cache_enabled { + pub fn init(config: &CacheConfig, tenant_id: impl Into) -> Result<()> { + // setup table data cache + let table_data_cache = { + match config.data_cache_storage { + CacheStorageTypeInnerConfig::None => None, + CacheStorageTypeInnerConfig::Disk => { + let real_disk_cache_root = PathBuf::from(&config.disk_cache_config.path) + .join(tenant_id.into()) + .join("v1"); + Self::new_block_data_cache( + &real_disk_cache_root, + config.table_data_cache_population_queue_size, + config.disk_cache_config.max_bytes, + )? + } + } + }; + + // setup in-memory table column cache + let table_column_array_cache = Self::new_in_memory_cache( + config.table_data_deserialized_data_bytes, + ColumnArrayMeter, + "table_data_column_array", + ); + + // setup in-memory table meta cache + if !config.enable_table_meta_cache { GlobalInstance::set(Arc::new(Self { table_snapshot_cache: None, segment_info_cache: None, @@ -53,16 +88,24 @@ impl CacheManager { bloom_index_meta_cache: None, file_meta_data_cache: None, table_statistic_cache: None, + table_data_cache, + table_column_array_cache, })); } else { - let table_snapshot_cache = Self::new_item_cache(config.table_cache_snapshot_count); - let table_statistic_cache = Self::new_item_cache(config.table_cache_statistic_count); - let segment_info_cache = Self::new_item_cache(config.table_cache_segment_count); + let table_snapshot_cache = + Self::new_item_cache(config.table_meta_snapshot_count, "table_snapshot"); + let table_statistic_cache = + Self::new_item_cache(config.table_meta_statistic_count, "table_statistics"); + let segment_info_cache = + Self::new_item_cache(config.table_meta_segment_count, "segment_info"); let bloom_index_filter_cache = - Self::new_item_cache(config.table_cache_bloom_index_filter_count); - let bloom_index_meta_cache = - Self::new_item_cache(config.table_cache_bloom_index_meta_count); - let file_meta_data_cache = Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS); + Self::new_item_cache(config.table_bloom_index_filter_count, "bloom_index_filter"); + let bloom_index_meta_cache = Self::new_item_cache( + config.table_bloom_index_meta_count, + "bloom_index_file_meta_data", + ); + let file_meta_data_cache = + Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS, "parquet_file_meta"); GlobalInstance::set(Arc::new(Self { table_snapshot_cache, segment_info_cache, @@ -70,6 +113,8 @@ impl CacheManager { bloom_index_meta_cache, file_meta_data_cache, table_statistic_cache, + table_data_cache, + table_column_array_cache, })); } @@ -104,11 +149,59 @@ impl CacheManager { self.file_meta_data_cache.clone() } - fn new_item_cache(capacity: u64) -> Option> { + pub fn get_table_data_cache(&self) -> Option { + self.table_data_cache.clone() + } + + pub fn get_table_data_array_cache(&self) -> Option { + self.table_column_array_cache.clone() + } + + // create cache that meters size by `Count` + fn new_item_cache( + capacity: u64, + name: impl Into, + ) -> Option>> { + if capacity > 0 { + Some(InMemoryCacheBuilder::new_item_cache(capacity).name_with(name.into())) + } else { + None + } + } + + // create cache that meters size by `meter` + fn new_in_memory_cache( + capacity: u64, + meter: M, + name: &str, + ) -> Option>> + where + M: CountableMeter>, + { if capacity > 0 { - Some(InMemoryCacheBuilder::new_item_cache(capacity)) + Some( + InMemoryCacheBuilder::new_in_memory_cache(capacity, meter) + .name_with(name.to_owned()), + ) } else { None } } + + fn new_block_data_cache( + path: &PathBuf, + population_queue_size: u32, + disk_cache_bytes_size: u64, + ) -> Result> { + if disk_cache_bytes_size > 0 { + let cache_holder = TableDataCacheBuilder::new_table_data_disk_cache( + path, + population_queue_size, + disk_cache_bytes_size, + )?; + Ok(Some(cache_holder)) + } else { + Ok(None) + } + } } diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index 0a9db2c8f1eb9..41ba7b42455f1 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -12,9 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Borrow; +use std::sync::Arc; + use common_arrow::parquet::metadata::FileMetaData; +use common_cache::DefaultHashBuilder; +use common_cache::Meter; use storages_common_cache::CacheAccessor; use storages_common_cache::InMemoryItemCacheHolder; +use storages_common_cache::NamedCache; use storages_common_index::filters::Xor8Filter; use storages_common_table_meta::meta::SegmentInfo; use storages_common_table_meta::meta::TableSnapshot; @@ -23,24 +29,33 @@ use storages_common_table_meta::meta::TableSnapshotStatistics; use crate::cache_manager::CacheManager; /// In memory object cache of SegmentInfo -pub type SegmentInfoCache = InMemoryItemCacheHolder; +pub type SegmentInfoCache = NamedCache>; /// In memory object cache of TableSnapshot -pub type TableSnapshotCache = InMemoryItemCacheHolder; +pub type TableSnapshotCache = NamedCache>; /// In memory object cache of TableSnapshotStatistics -pub type TableSnapshotStatisticCache = InMemoryItemCacheHolder; -/// In memory data cache of bloom index data. +pub type TableSnapshotStatisticCache = NamedCache>; +/// In memory object cache of bloom filter. /// For each indexed data block, the bloom xor8 filter of column is cached individually -pub type BloomIndexFilterCache = InMemoryItemCacheHolder; +pub type BloomIndexFilterCache = NamedCache>; pub struct BloomIndexMeta(pub FileMetaData); /// In memory object cache of parquet FileMetaData of bloom index data -pub type BloomIndexMetaCache = InMemoryItemCacheHolder; +pub type BloomIndexMetaCache = NamedCache>; /// In memory object cache of parquet FileMetaData of external parquet files -pub type FileMetaDataCache = InMemoryItemCacheHolder; +pub type FileMetaDataCache = NamedCache>; + +/// In memory object cache of table column array +pub type ColumnArrayCache = + NamedCache>; +pub type ArrayRawDataUncompressedSize = usize; +pub type SizedColumnArray = ( + Box, + ArrayRawDataUncompressedSize, +); // Bind Type of cached objects to Caches // -// The `Cache` returned should -// - cache item s of Type `T` +// The `Cache` should return +// - cache item of Type `T` // - and implement `CacheAccessor` properly pub trait CachedObject { type Cache: CacheAccessor; @@ -88,3 +103,13 @@ impl CachedObject for FileMetaData { CacheManager::instance().get_file_meta_data_cache() } } + +pub struct ColumnArrayMeter; + +impl Meter> for ColumnArrayMeter { + type Measure = usize; + fn measure(&self, _: &Q, v: &Arc<(V, usize)>) -> usize + where K: Borrow { + v.1 + } +} diff --git a/src/query/storages/common/cache/Cargo.toml b/src/query/storages/common/cache/Cargo.toml index 51b121cb1ffb1..811a2c1baa01d 100644 --- a/src/query/storages/common/cache/Cargo.toml +++ b/src/query/storages/common/cache/Cargo.toml @@ -17,10 +17,18 @@ common-cache = { path = "../../../../common/cache" } common-exception = { path = "../../../../common/exception" } async-trait = { version = "0.1.57", package = "async-trait-fn" } +crc32fast = "1.3.2" +crossbeam-channel = "0.5.6" +hex = "0.4.3" metrics = "0.20.1" opendal = { workspace = true } parking_lot = "0.12.1" +ringbuffer = "0.12.0" serde = { workspace = true } serde_json = { workspace = true } +siphasher = "0.3.10" +tracing = "0.1.36" +walkdir = "2.3.2" -[build-dependencies] +[dev-dependencies] +tempfile = "3.3.0" diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index c0623be645b80..2f9fcec21da0c 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -12,103 +12,89 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Borrow; +use std::hash::BuildHasher; use std::hash::Hash; use std::sync::Arc; -pub trait CacheAccessor { - fn get(&self, k: &Q) -> Option> - where - K: Borrow, - Q: Hash + Eq + ?Sized; +use common_cache::Count; +use common_cache::CountableMeter; +use common_cache::DefaultHashBuilder; +use crate::metrics_inc_cache_access_count; +use crate::metrics_inc_cache_hit_count; +use crate::metrics_inc_cache_miss_count; + +// The cache accessor, crate users usually working on this interface while manipulating caches +pub trait CacheAccessor +where + K: Eq + Hash, + S: BuildHasher, + M: CountableMeter>, +{ + fn get>(&self, k: Q) -> Option>; fn put(&self, key: K, value: Arc); - fn evict(&self, k: &Q) -> bool - where - K: Borrow, - Q: Hash + Eq + ?Sized; + fn evict(&self, k: &str) -> bool; + fn contains_key(&self, k: &str) -> bool; } -/// The minimum interface that cache providers should implement -pub trait StorageCache { - type Meter; - fn put(&mut self, key: K, value: Arc); - - fn get(&mut self, k: &Q) -> Option<&Arc> - where - K: Borrow, - Q: Hash + Eq + ?Sized; - - fn evict(&mut self, k: &Q) -> bool - where - K: Borrow, - Q: Hash + Eq + ?Sized; +/// Helper trait to convert a Cache into NamedCache +pub trait Named +where Self: Sized +{ + fn name_with(self, name: impl Into) -> NamedCache { + NamedCache { + name: name.into(), + cache: self, + } + } } -mod impls { - use std::borrow::Borrow; - use std::hash::Hash; - use std::sync::Arc; - - use parking_lot::RwLock; +impl Named for T where T: Sized + Clone {} - use crate::cache::CacheAccessor; - use crate::cache::StorageCache; - - impl CacheAccessor for Arc> - where C: StorageCache - { - fn get(&self, k: &Q) -> Option> - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { - let mut guard = self.write(); - guard.get(k).cloned() - } +/// A named cache that with embedded metrics logging +#[derive(Clone)] +pub struct NamedCache { + name: String, + cache: C, +} - fn put(&self, k: String, v: Arc) { - let mut guard = self.write(); - guard.put(k, v); - } +impl NamedCache { + #[inline] + pub fn name(&self) -> &str { + &self.name + } +} - fn evict(&self, k: &Q) -> bool - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { - let mut guard = self.write(); - guard.evict(k) +impl CacheAccessor for NamedCache +where + C: CacheAccessor, + K: Eq + Hash, + S: BuildHasher, + M: CountableMeter>, +{ + fn get>(&self, k: Q) -> Option> { + metrics_inc_cache_access_count(1, &self.name); + match self.cache.get(k) { + None => { + metrics_inc_cache_miss_count(1, &self.name); + None + } + v @ Some(_) => { + metrics_inc_cache_hit_count(1, &self.name); + v + } } } - impl CacheAccessor for Option>> - where C: StorageCache - { - fn get(&self, k: &Q) -> Option> - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { - self.as_ref().and_then(|cache| cache.get(k)) - } + fn put(&self, key: K, value: Arc) { + self.cache.put(key, value) + } - fn put(&self, k: String, v: Arc) { - if let Some(cache) = self { - cache.put(k, v); - } - } + fn evict(&self, k: &str) -> bool { + self.cache.evict(k) + } - fn evict(&self, k: &Q) -> bool - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { - if let Some(cache) = self { - cache.evict(k) - } else { - false - } - } + fn contains_key(&self, k: &str) -> bool { + self.cache.contains_key(k) } } diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index 55bc641613412..af50223ff5dd4 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -12,19 +12,33 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![feature(write_all_vectored)] + mod cache; mod metrics; mod providers; mod read; pub use cache::CacheAccessor; -pub use providers::DiskCache; +pub use cache::Named; +pub use cache::NamedCache; +pub use providers::DiskCacheError; +pub use providers::DiskCacheKey; +pub use providers::DiskCacheResult; pub use providers::InMemoryBytesCacheHolder; pub use providers::InMemoryCacheBuilder; pub use providers::InMemoryItemCacheHolder; +pub use providers::LruDiskCache; +pub use providers::LruDiskCacheBuilder; +pub use providers::LruDiskCacheHolder; +pub use providers::TableDataCache; +pub use providers::TableDataCacheBuilder; +pub use providers::TableDataCacheKey; pub use read::CacheKey; -pub use read::DiskCacheReader; +pub use read::CachedReader; pub use read::InMemoryBytesCacheReader; pub use read::InMemoryItemCacheReader; pub use read::LoadParams; pub use read::Loader; + +pub use self::metrics::*; diff --git a/src/query/storages/common/cache/src/metrics.rs b/src/query/storages/common/cache/src/metrics.rs index 92386fc7771df..9fd88c75a9613 100644 --- a/src/query/storages/common/cache/src/metrics.rs +++ b/src/query/storages/common/cache/src/metrics.rs @@ -34,5 +34,13 @@ pub fn metrics_inc_cache_miss_load_millisecond(c: u64, cache_name: &str) { } pub fn metrics_inc_cache_hit_count(c: u64, cache_name: &str) { - increment_gauge!(key_str(cache_name, "memory_hit_count"), c as f64); + increment_gauge!(key_str(cache_name, "hit_count"), c as f64); +} + +pub fn metrics_inc_cache_population_pending_count(c: i64, cache_name: &str) { + increment_gauge!(key_str(cache_name, "population_pending_count"), c as f64); +} + +pub fn metrics_inc_cache_population_overflow_count(c: i64, cache_name: &str) { + increment_gauge!(key_str(cache_name, "population_overflow_count"), c as f64); } diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 5abc3ad386830..22649b9eec1e5 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -12,35 +12,421 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Borrow; -use std::hash::Hash; +use std::fs; +use std::fs::File; +use std::hash::Hasher; +use std::io::IoSlice; +use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; use std::sync::Arc; -use crate::cache::StorageCache; +use common_cache::Cache; +use common_cache::Count; +use common_cache::DefaultHashBuilder; +use common_cache::FileSize; +use common_cache::LruCache; +use common_exception::ErrorCode; +use common_exception::Result; +use parking_lot::RwLock; +use siphasher::sip128; +use siphasher::sip128::Hasher128; +use tracing::error; +use tracing::warn; -// TODO: local disk file based LRU/LFU/xxxx cache -pub struct DiskCache {} +use crate::CacheAccessor; -impl StorageCache for DiskCache { - type Meter = (); +pub struct DiskCache { + cache: C, + root: PathBuf, +} + +pub struct DiskCacheKey(String); + +impl From for DiskCacheKey +where S: AsRef +{ + // convert key string into hex string of SipHash 2-4 128 bit + fn from(key: S) -> Self { + let mut sip = sip128::SipHasher24::new(); + let key = key.as_ref(); + sip.write(key.as_bytes()); + let hash = sip.finish128(); + let hex_hash = hex::encode(hash.as_bytes()); + DiskCacheKey(hex_hash) + } +} + +impl From<&DiskCacheKey> for PathBuf { + fn from(cache_key: &DiskCacheKey) -> Self { + let prefix = &cache_key.0[0..3]; + let mut path_buf = PathBuf::from(prefix); + path_buf.push(Path::new(&cache_key.0)); + path_buf + } +} + +impl DiskCache +where C: Cache +{ + /// Create an `DiskCache` with `ritelinked::DefaultHashBuilder` that stores files in `path`, + /// limited to `size` bytes. + /// + /// Existing files in `path` will be stored with their last-modified time from the filesystem + /// used as the order for the recency of their use. Any files that are individually larger + /// than `size` bytes will be removed. + /// + /// The cache is not observant of changes to files under `path` from external sources, it + /// expects to have sole maintenance of the contents. + pub fn new(path: T, size: u64) -> self::result::Result + where PathBuf: From { + DiskCache { + cache: C::with_meter_and_hasher(size, FileSize, DefaultHashBuilder::default()), + root: PathBuf::from(path), + } + .init() + } +} + +impl DiskCache +where C: Cache +{ + /// Return the current size of all the files in the cache. + pub fn size(&self) -> u64 { + self.cache.size() + } + + /// Return the count of entries in the cache. + pub fn len(&self) -> usize { + self.cache.len() + } + + pub fn is_empty(&self) -> bool { + self.cache.len() == 0 + } + + /// Return the maximum size of the cache. + pub fn capacity(&self) -> u64 { + self.cache.capacity() + } + + /// Return the path in which the cache is stored. + pub fn path(&self) -> &Path { + self.root.as_path() + } + + /// Return the path that `key` would be stored at. + fn rel_to_abs_path>(&self, rel_path: K) -> PathBuf { + self.root.join(rel_path) + } + + /// Scan `self.root` for existing files and store them. + fn init(mut self) -> self::result::Result { + fs::create_dir_all(&self.root)?; + for (file, size) in get_all_files(&self.root) { + if !self.can_store(size) { + fs::remove_file(file).unwrap_or_else(|e| { + error!( + "Error removing file `{}` which is too large for the cache ({} bytes)", + e, size + ) + }); + } else { + while self.cache.size() + size > self.cache.capacity() { + let (rel_path, _) = self + .cache + .pop_by_policy() + .expect("Unexpectedly empty cache!"); + let cache_item_path = self.abs_path_of_cache_key(&DiskCacheKey(rel_path)); + fs::remove_file(&cache_item_path).unwrap_or_else(|e| { + error!( + "Error removing file from cache: `{:?}`: {}", + cache_item_path, e + ) + }); + } + let relative_path = file + .strip_prefix(&self.root) + .map_err(|_e| self::Error::MalformedPath)?; + let cache_key = Self::recovery_from(relative_path); + self.cache.put(cache_key, size); + } + } + Ok(self) + } + + /// Returns `true` if the disk cache can store a file of `size` bytes. + pub fn can_store(&self, size: u64) -> bool { + size <= self.cache.capacity() + } + + fn recovery_from(relative_path: &Path) -> String { + let key_string = match relative_path.file_name() { + Some(file_name) => match file_name.to_str() { + Some(str) => str.to_owned(), + None => { + // relative_path is constructed by ourself, and shall be valid utf8 string + unreachable!() + } + }, + None => { + // only called during init, and only path of files are passed in + unreachable!() + } + }; + key_string + } + + fn cache_key(&self, key: &str) -> DiskCacheKey { + DiskCacheKey::from(key) + } + + fn abs_path_of_cache_key(&self, cache_key: &DiskCacheKey) -> PathBuf { + let path = PathBuf::from(cache_key); + self.rel_to_abs_path(path) + } + + pub fn insert_bytes(&mut self, key: &str, bytes: &[&[u8]]) -> self::result::Result<()> { + let bytes_len = bytes.iter().map(|x| x.len() as u64).sum::(); + // check if this chunk of bytes itself is too large + if !self.can_store(bytes_len) { + return Err(Error::FileTooLarge); + } - fn put(&mut self, _key: K, _value: Arc) { - todo!() + // check eviction + if self.cache.size() + bytes_len > self.cache.capacity() { + if let Some((rel_path, _)) = self.cache.pop_by_policy() { + let cached_item_path = self.abs_path_of_cache_key(&DiskCacheKey(rel_path)); + fs::remove_file(&cached_item_path).unwrap_or_else(|e| { + error!( + "Error removing file from cache: `{:?}`: {}", + cached_item_path, e + ) + }); + } + } + + let cache_key = self.cache_key(key.as_ref()); + let path = self.abs_path_of_cache_key(&cache_key); + if let Some(parent_path) = path.parent() { + fs::create_dir_all(parent_path)?; + } + let mut f = File::create(&path)?; + let mut bufs = Vec::with_capacity(bytes.len()); + for slick in bytes { + bufs.push(IoSlice::new(slick)); + } + f.write_all_vectored(&mut bufs)?; + self.cache.put(cache_key.0, bytes_len); + Ok(()) + } + + /// Return `true` if a file with path `key` is in the cache. + pub fn contains_key(&self, key: &str) -> bool { + let cache_key = self.cache_key(key); + self.cache.contains(&cache_key.0) + } + + pub fn get_cache_path(&mut self, key: &str) -> Option { + let cache_key = self.cache_key(key); + self.cache + .get(&cache_key.0) + .map(|_| ()) // release the &mut self + .map(|_| self.abs_path_of_cache_key(&cache_key)) + } + + /// Remove the given key from the cache. + pub fn remove(&mut self, key: &str) -> Result<()> { + let cache_key = self.cache_key(key); + match self.cache.pop(&cache_key.0) { + Some(_) => { + let path = self.abs_path_of_cache_key(&cache_key); + fs::remove_file(&path).map_err(|e| { + error!("Error removing file from cache: `{:?}`: {}", path, e); + Into::into(e) + }) + } + None => Ok(()), + } + } +} + +pub mod result { + use std::error::Error as StdError; + use std::fmt; + use std::io; + + /// Errors returned by this crate. + #[derive(Debug)] + pub enum Error { + /// The file was too large to fit in the cache. + FileTooLarge, + /// The file was not in the cache. + MalformedPath, + /// An IO Error occurred. + Io(io::Error), + } + + impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::FileTooLarge => write!(f, "File too large"), + Error::MalformedPath => write!(f, "Malformed catch file path"), + Error::Io(ref e) => write!(f, "{e}"), + } + } + } + + impl StdError for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::Io(ref e) => Some(e), + _ => None, + } + } + } + + impl From for Error { + fn from(e: io::Error) -> Error { + Error::Io(e) + } } - fn get(&mut self, _k: &Q) -> Option<&Arc> - where - K: Borrow, - Q: Hash + Eq + ?Sized, - { - todo!() + /// A convenience `Result` type + pub type Result = std::result::Result; +} + +use result::*; + +impl CacheAccessor, common_cache::DefaultHashBuilder, Count> + for LruDiskCacheHolder +{ + fn get>(&self, k: Q) -> Option>> { + let k = k.as_ref(); + { + let mut cache = self.write(); + cache.get_cache_path(k) + } + .and_then(|cache_file_path| { + // check disk cache + let get_cache_content = || { + let mut v = vec![]; + let mut file = File::open(cache_file_path)?; + file.read_to_end(&mut v)?; + Ok::<_, Box>(v) + }; + + match get_cache_content() { + Ok(mut bytes) => { + if let Err(e) = validate_checksum(bytes.as_slice()) { + error!("data cache, of key {k}, crc validation failure: {e}"); + { + // remove the invalid cache, error of removal ignored + let r = { + let mut cache = self.write(); + cache.remove(k) + }; + if let Err(e) = r { + warn!("failed to remove invalid cache item, key {k}. {e}"); + } + } + None + } else { + // trim the checksum bytes and return + let total_len = bytes.len(); + let body_len = total_len - 4; + bytes.truncate(body_len); + let item = Arc::new(bytes); + Some(item) + } + } + Err(e) => { + error!("get disk cache item failed, cache_key {k}. {e}"); + None + } + } + }) } - fn evict(&mut self, _k: &Q) -> bool - where - K: Borrow, - Q: Hash + Eq + ?Sized, - { - todo!() + fn put(&self, key: String, value: Arc>) { + let crc = crc32fast::hash(value.as_slice()); + let crc_bytes = crc.to_le_bytes(); + let mut cache = self.write(); + if let Err(e) = cache.insert_bytes(&key, &[value.as_slice(), &crc_bytes]) { + error!("put disk cache item failed {}", e); + } + } + + fn evict(&self, k: &str) -> bool { + if let Err(e) = { + let mut cache = self.write(); + cache.remove(k) + } { + error!("evict disk cache item failed {}", e); + false + } else { + true + } + } + + fn contains_key(&self, k: &str) -> bool { + let cache = self.read(); + cache.contains_key(k) + } +} + +/// The crc32 checksum is stored at the end of `bytes` and encoded as le u32. +// Although parquet page has built-in crc, but it is optional (and not generated in parquet2) +fn validate_checksum(bytes: &[u8]) -> Result<()> { + let total_len = bytes.len(); + if total_len <= 4 { + Err(ErrorCode::StorageOther(format!( + "crc checksum validation failure: invalid file length {total_len}" + ))) + } else { + // total_len > 4 is ensured + let crc_bytes: [u8; 4] = bytes[total_len - 4..].try_into().unwrap(); + let crc_provided = u32::from_le_bytes(crc_bytes); + let crc_calculated = crc32fast::hash(&bytes[0..total_len - 4]); + if crc_provided == crc_calculated { + Ok(()) + } else { + Err(ErrorCode::StorageOther(format!( + "crc checksum validation failure, key : crc checksum not match, crc provided {crc_provided}, crc calculated {crc_calculated}" + ))) + } + } +} + +/// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified +/// time, such that the oldest modified file is returned first. +fn get_all_files>(path: P) -> impl Iterator { + walkdir::WalkDir::new(path.as_ref()) + .into_iter() + .filter_map(|e| { + e.ok().and_then(|f| { + // Only look at files + if f.file_type().is_file() { + f.metadata().ok().map(|m| (f.path().to_owned(), m.len())) + } else { + None + } + }) + }) +} + +pub type LruDiskCache = DiskCache>; +pub type LruDiskCacheHolder = Arc>; + +pub struct LruDiskCacheBuilder; +impl LruDiskCacheBuilder { + pub fn new_disk_cache( + path: &PathBuf, + disk_cache_bytes_size: u64, + ) -> Result { + let external_cache = DiskCache::new(path, disk_cache_bytes_size) + .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; + Ok(Arc::new(RwLock::new(external_cache))) } } diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index 952bc553aed53..ab5709fc1a5cd 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -12,9 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Borrow; use std::hash::BuildHasher; -use std::hash::Hash; use std::sync::Arc; use common_cache::BytesMeter; @@ -25,21 +23,34 @@ use common_cache::DefaultHashBuilder; use common_cache::LruCache; use parking_lot::RwLock; -use crate::cache::StorageCache; - -pub type ItemCache = LruCache, DefaultHashBuilder, Count>; +pub type ImMemoryCache = LruCache, S, M>; pub type BytesCache = LruCache>, DefaultHashBuilder, BytesMeter>; -pub type InMemoryItemCacheHolder = Arc>>; +pub type InMemoryItemCacheHolder = + Arc>>; pub type InMemoryBytesCacheHolder = Arc>; pub struct InMemoryCacheBuilder; impl InMemoryCacheBuilder { + // new cache that cache `V`, and metered by the given `meter` + pub fn new_in_memory_cache( + capacity: u64, + meter: M, + ) -> InMemoryItemCacheHolder + where + M: CountableMeter>, + { + let cache = LruCache::with_meter_and_hasher(capacity, meter, DefaultHashBuilder::new()); + Arc::new(RwLock::new(cache)) + } + + // new cache that caches `V` and meter by counting pub fn new_item_cache(capacity: u64) -> InMemoryItemCacheHolder { let cache = LruCache::new(capacity); Arc::new(RwLock::new(cache)) } + // new cache that cache `Vec`, and metered by byte size pub fn new_bytes_cache(capacity: u64) -> InMemoryBytesCacheHolder { let cache = LruCache::with_meter_and_hasher(capacity, BytesMeter, DefaultHashBuilder::new()); @@ -47,31 +58,74 @@ impl InMemoryCacheBuilder { } } -impl StorageCache for LruCache, S, M> -where - M: CountableMeter>, - S: BuildHasher, - K: Eq + Hash, -{ - type Meter = M; +// default impls +mod impls { + use std::sync::Arc; - fn put(&mut self, key: K, value: Arc) { - Cache::put(self, key, value); - } + use parking_lot::RwLock; - fn get(&mut self, k: &Q) -> Option<&Arc> + use super::*; + use crate::cache::CacheAccessor; + + // Wrap a Cache with RwLock, and impl CacheAccessor for it + impl CacheAccessor for Arc> where - K: Borrow, - Q: Hash + Eq + ?Sized, + C: Cache, S, M>, + M: CountableMeter>, + S: BuildHasher, { - Cache::get(self, k) + fn get>(&self, k: Q) -> Option> { + let mut guard = self.write(); + guard.get(k.as_ref()).cloned() + } + + fn put(&self, k: String, v: Arc) { + let mut guard = self.write(); + guard.put(k, v); + } + + fn evict(&self, k: &str) -> bool { + let mut guard = self.write(); + guard.pop(k).is_some() + } + + fn contains_key(&self, k: &str) -> bool { + let guard = self.read(); + guard.contains(k) + } } - fn evict(&mut self, k: &Q) -> bool + // Wrap an Option, and impl CacheAccessor for it + impl CacheAccessor for Option where - K: Borrow, - Q: Hash + Eq + ?Sized, + C: CacheAccessor, + M: CountableMeter>, + S: BuildHasher, { - self.pop(k).is_some() + fn get>(&self, k: Q) -> Option> { + self.as_ref().and_then(|cache| cache.get(k)) + } + + fn put(&self, k: String, v: Arc) { + if let Some(cache) = self { + cache.put(k, v); + } + } + + fn evict(&self, k: &str) -> bool { + if let Some(cache) = self { + cache.evict(k) + } else { + false + } + } + + fn contains_key(&self, k: &str) -> bool { + if let Some(cache) = self { + cache.contains_key(k) + } else { + false + } + } } } diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index 579bb711d3692..083a40dcd2749 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -14,9 +14,19 @@ mod disk_cache; mod memory_cache; +mod table_data_cache; +pub use disk_cache::result::Error as DiskCacheError; +pub use disk_cache::result::Result as DiskCacheResult; pub use disk_cache::DiskCache; +pub use disk_cache::DiskCacheKey; +pub use disk_cache::LruDiskCache; +pub use disk_cache::LruDiskCacheBuilder; +pub use disk_cache::LruDiskCacheHolder; pub use memory_cache::BytesCache; +pub use memory_cache::ImMemoryCache; pub use memory_cache::InMemoryBytesCacheHolder; pub use memory_cache::InMemoryCacheBuilder; pub use memory_cache::InMemoryItemCacheHolder; -pub use memory_cache::ItemCache; +pub use table_data_cache::TableDataCache; +pub use table_data_cache::TableDataCacheBuilder; +pub use table_data_cache::TableDataCacheKey; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs new file mode 100644 index 0000000000000..2ec6b129b1364 --- /dev/null +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -0,0 +1,191 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::path::PathBuf; +use std::sync::Arc; +use std::thread::JoinHandle; + +use common_cache::Count; +use common_cache::DefaultHashBuilder; +use common_exception::ErrorCode; +use common_exception::Result; +use crossbeam_channel::TrySendError; +use tracing::error; +use tracing::info; + +use crate::metrics_inc_cache_access_count; +use crate::metrics_inc_cache_hit_count; +use crate::metrics_inc_cache_miss_count; +use crate::metrics_inc_cache_population_overflow_count; +use crate::metrics_inc_cache_population_pending_count; +use crate::providers::LruDiskCacheHolder; +use crate::CacheAccessor; +use crate::LruDiskCacheBuilder; + +struct CacheItem { + key: String, + value: Arc>, +} + +#[derive(Clone)] +pub struct TableDataCacheKey { + cache_key: String, +} + +impl TableDataCacheKey { + pub fn new(block_path: &str, column_id: u32) -> Self { + Self { + cache_key: format!("{block_path}-{column_id}"), + } + } +} + +impl From for String { + fn from(value: TableDataCacheKey) -> Self { + value.cache_key + } +} + +impl AsRef for TableDataCacheKey { + fn as_ref(&self) -> &str { + &self.cache_key + } +} + +#[derive(Clone)] +pub struct TableDataCache { + external_cache: T, + population_queue: crossbeam_channel::Sender, + _cache_populator: DiskCachePopulator, +} + +const TABLE_DATA_CACHE_NAME: &str = "table_data"; + +pub struct TableDataCacheBuilder; +impl TableDataCacheBuilder { + pub fn new_table_data_disk_cache( + path: &PathBuf, + population_queue_size: u32, + disk_cache_bytes_size: u64, + ) -> Result> { + let disk_cache = LruDiskCacheBuilder::new_disk_cache(path, disk_cache_bytes_size)?; + let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); + let num_population_thread = 1; + Ok(TableDataCache { + external_cache: disk_cache.clone(), + population_queue: rx, + _cache_populator: DiskCachePopulator::new(tx, disk_cache, num_population_thread)?, + }) + } +} + +impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCache { + fn get>(&self, k: Q) -> Option>> { + metrics_inc_cache_access_count(1, TABLE_DATA_CACHE_NAME); + let k = k.as_ref(); + if let Some(item) = self.external_cache.get(k) { + metrics_inc_cache_hit_count(1, TABLE_DATA_CACHE_NAME); + Some(item) + } else { + metrics_inc_cache_miss_count(1, TABLE_DATA_CACHE_NAME); + None + } + } + + fn put(&self, k: String, v: Arc>) { + // check if external(disk/redis) already have it. + if !self.external_cache.contains_key(&k) { + // populate the cache to external cache(disk/redis) asyncly + let msg = CacheItem { key: k, value: v }; + match self.population_queue.try_send(msg) { + Ok(_) => { + metrics_inc_cache_population_pending_count(1, TABLE_DATA_CACHE_NAME); + } + Err(TrySendError::Full(_)) => { + metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); + metrics_inc_cache_population_overflow_count(1, TABLE_DATA_CACHE_NAME); + } + Err(TrySendError::Disconnected(_)) => { + error!("table data cache population thread is down"); + } + } + } + } + + fn evict(&self, k: &str) -> bool { + self.external_cache.evict(k) + } + + fn contains_key(&self, k: &str) -> bool { + self.external_cache.contains_key(k) + } +} + +struct CachePopulationWorker { + cache: T, + population_queue: crossbeam_channel::Receiver, +} + +impl CachePopulationWorker +where T: CacheAccessor, DefaultHashBuilder, Count> + Send + Sync + 'static +{ + fn populate(&self) { + loop { + match self.population_queue.recv() { + Ok(CacheItem { key, value }) => { + { + if self.cache.contains_key(&key) { + continue; + } + } + self.cache.put(key, value); + metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); + } + Err(_) => { + info!("table data cache worker shutdown"); + break; + } + } + } + } + + fn start(self: Arc) -> Result> { + let thread_builder = + std::thread::Builder::new().name("table-data-cache-population".to_owned()); + thread_builder.spawn(move || self.populate()).map_err(|e| { + ErrorCode::StorageOther(format!("spawn cache population worker thread failed, {e}")) + }) + } +} + +#[derive(Clone)] +struct DiskCachePopulator; + +impl DiskCachePopulator { + fn new( + incoming: crossbeam_channel::Receiver, + cache: T, + _num_worker_thread: usize, + ) -> Result + where + T: CacheAccessor, DefaultHashBuilder, Count> + Send + Sync + 'static, + { + let worker = Arc::new(CachePopulationWorker { + cache, + population_queue: incoming, + }); + let _join_handler = worker.start()?; + Ok(Self) + } +} diff --git a/src/query/storages/common/cache/src/read/cached_reader.rs b/src/query/storages/common/cache/src/read/cached_reader.rs index 29923bd509393..d0f62b1e63057 100644 --- a/src/query/storages/common/cache/src/read/cached_reader.rs +++ b/src/query/storages/common/cache/src/read/cached_reader.rs @@ -12,69 +12,47 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::marker::PhantomData; +use std::hash::BuildHasher; use std::sync::Arc; use std::time::Instant; +use common_cache::CountableMeter; use common_exception::Result; use parking_lot::RwLock; use super::loader::LoadParams; -use super::loader::Loader; -use crate::cache::StorageCache; -use crate::metrics::metrics_inc_cache_access_count; -use crate::metrics::metrics_inc_cache_hit_count; -use crate::metrics::metrics_inc_cache_miss_count; use crate::metrics::metrics_inc_cache_miss_load_millisecond; +use crate::providers::ImMemoryCache; +use crate::CacheAccessor; +use crate::Loader; +use crate::NamedCache; -/// A generic cache-aware reader -/// -/// Given an impl of [StorageCache], e.g. `ItemCache` or `DiskCache` and a proper impl -/// [LoaderWithCacheKey], which is able to load `T`, `CachedReader` will load the `T` -/// by using [LoaderWithCacheKey], and populate the cache item into [StorageCache] by using -/// the loaded `T` and the key that [LoaderWithCacheKey] provides. -pub struct CachedReader { - cache: Option>>, +/// A cache-aware reader +pub struct CachedReader { + cache: Option, loader: L, - /// name of this cache instance - name: String, - _p: PhantomData, } -impl CachedReader +pub type CacheHolder = Arc>>; + +impl CachedReader>> where - L: Loader + Sync, - C: StorageCache, + L: Loader + Sync, + S: BuildHasher, + M: CountableMeter>, { - pub fn new(cache: Option>>, name: impl Into, loader: L) -> Self { - Self { - cache, - name: name.into(), - loader, - _p: PhantomData, - } + pub fn new(cache: Option>>, loader: L) -> Self { + Self { cache, loader } } /// Load the object at `location`, uses/populates the cache if possible/necessary. - pub async fn read(&self, params: &LoadParams) -> Result> { + pub async fn read(&self, params: &LoadParams) -> Result> { match &self.cache { None => Ok(Arc::new(self.loader.load(params).await?)), - Some(labeled_cache) => { - // Perf. - { - metrics_inc_cache_access_count(1, &self.name); - } - + Some(cache) => { let cache_key = self.loader.cache_key(params); - match self.get_cached(cache_key.as_ref(), labeled_cache) { - Some(item) => { - // Perf. - { - metrics_inc_cache_hit_count(1, &self.name); - } - - Ok(item) - } + match cache.get(cache_key.as_str()) { + Some(item) => Ok(item), None => { let start = Instant::now(); @@ -83,15 +61,13 @@ where // Perf. { - metrics_inc_cache_miss_count(1, &self.name); metrics_inc_cache_miss_load_millisecond( start.elapsed().as_millis() as u64, - &self.name, + cache.name(), ); } - let mut cache_guard = labeled_cache.write(); - cache_guard.put(cache_key, item.clone()); + cache.put(cache_key, item.clone()); Ok(item) } } @@ -100,10 +76,6 @@ where } pub fn name(&self) -> &str { - self.name.as_str() - } - - fn get_cached(&self, key: &str, cache: &RwLock) -> Option> { - cache.write().get(key).cloned() + self.cache.as_ref().map(|c| c.name()).unwrap_or("") } } diff --git a/src/query/storages/common/cache/src/read/mod.rs b/src/query/storages/common/cache/src/read/mod.rs index 8e2000cb01903..7d7611460a92b 100644 --- a/src/query/storages/common/cache/src/read/mod.rs +++ b/src/query/storages/common/cache/src/read/mod.rs @@ -17,9 +17,9 @@ mod cached_reader; mod loader; mod readers; +pub use cached_reader::CachedReader; pub use loader::CacheKey; pub use loader::LoadParams; pub use loader::Loader; -pub use readers::DiskCacheReader; pub use readers::InMemoryBytesCacheReader; pub use readers::InMemoryItemCacheReader; diff --git a/src/query/storages/common/cache/src/read/readers.rs b/src/query/storages/common/cache/src/read/readers.rs index 44ba165b8a8c1..a0b27db424ddf 100644 --- a/src/query/storages/common/cache/src/read/readers.rs +++ b/src/query/storages/common/cache/src/read/readers.rs @@ -12,12 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::providers::BytesCache; -use crate::providers::DiskCache; -use crate::providers::ItemCache; use crate::read::cached_reader::CachedReader; +use crate::InMemoryBytesCacheHolder; +use crate::InMemoryItemCacheHolder; +use crate::NamedCache; -pub type InMemoryItemCacheReader = CachedReader>; -pub type InMemoryBytesCacheReader = CachedReader; -// NOTE: dummy impl, just for api testing -pub type DiskCacheReader = CachedReader; +pub type InMemoryItemCacheReader = CachedReader>>; +pub type InMemoryBytesCacheReader = CachedReader>; diff --git a/src/query/storages/common/cache/tests/it/main.rs b/src/query/storages/common/cache/tests/it/main.rs index ea0ed57e60edf..39aa524049506 100644 --- a/src/query/storages/common/cache/tests/it/main.rs +++ b/src/query/storages/common/cache/tests/it/main.rs @@ -11,3 +11,5 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +mod providers; diff --git a/src/query/storages/common/cache/tests/it/providers/disk_cache.rs b/src/query/storages/common/cache/tests/it/providers/disk_cache.rs new file mode 100644 index 0000000000000..42347d8d31b8f --- /dev/null +++ b/src/query/storages/common/cache/tests/it/providers/disk_cache.rs @@ -0,0 +1,199 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +use std::fs; +use std::fs::File; +use std::io; +use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; + +use storages_common_cache::DiskCacheError; +use storages_common_cache::DiskCacheKey; +use storages_common_cache::DiskCacheResult; +use storages_common_cache::LruDiskCache as DiskCache; +use tempfile::TempDir; + +struct TestFixture { + /// Temp directory. + pub tempdir: TempDir, +} + +// helper trait to simplify the test case +trait InsertSingleSlice { + fn insert_single_slice(&mut self, key: &str, bytes: &[u8]) -> DiskCacheResult<()>; +} + +impl InsertSingleSlice for DiskCache { + fn insert_single_slice(&mut self, key: &str, bytes: &[u8]) -> DiskCacheResult<()> { + self.insert_bytes(key, &[bytes]) + } +} + +fn create_file, F: FnOnce(File) -> io::Result<()>>( + dir: &Path, + path: T, + fill_contents: F, +) -> io::Result { + let b = dir.join(path); + fs::create_dir_all(b.parent().unwrap())?; + let f = fs::File::create(&b)?; + fill_contents(f)?; + b.canonicalize() +} + +fn read_all(r: &mut R) -> io::Result> { + let mut v = vec![]; + r.read_to_end(&mut v)?; + Ok(v) +} + +impl TestFixture { + pub fn new() -> TestFixture { + TestFixture { + tempdir: tempfile::Builder::new() + .prefix("lru-disk-cache-test") + .tempdir() + .unwrap(), + } + } + + pub fn tmp(&self) -> &Path { + self.tempdir.path() + } + + pub fn create_file>(&self, path: T, size: usize) -> PathBuf { + create_file(self.tempdir.path(), path, |mut f| { + f.write_all(&vec![0; size]) + }) + .unwrap() + } +} + +#[test] +fn test_empty_dir() { + let f = TestFixture::new(); + DiskCache::new(f.tmp(), 1024).unwrap(); +} + +#[test] +fn test_missing_root() { + let f = TestFixture::new(); + DiskCache::new(f.tmp().join("not-here"), 1024).unwrap(); +} + +#[test] +fn test_some_existing_files() { + let f = TestFixture::new(); + let items = 10; + let sizes = (0..).take(items); + let total_bytes: u64 = sizes.clone().sum(); + for i in sizes { + let file_name = format!("file-{i}"); + let test_key = DiskCacheKey::from(file_name.as_str()); + let test_path = PathBuf::from(&test_key); + f.create_file(test_path, i as usize); + } + + let c = DiskCache::new(f.tmp(), total_bytes).unwrap(); + assert_eq!(c.size(), total_bytes); + assert_eq!(c.len(), items); +} + +#[test] +fn test_existing_file_too_large() { + let f = TestFixture::new(); + let items_count = 10; + let items_count_shall_be_kept = 10 - 2; + let item_size = 10; + let capacity = items_count_shall_be_kept * item_size; + let sizes = (0..).take(items_count); + for i in sizes { + let file_name = format!("file-{i}"); + let test_key = DiskCacheKey::from(file_name.as_str()); + let test_path = PathBuf::from(&test_key); + f.create_file(test_path, item_size); + } + let c = DiskCache::new(f.tmp(), capacity as u64).unwrap(); + + assert_eq!(c.size(), capacity as u64); + assert_eq!(c.len(), items_count_shall_be_kept); + for i in (0..).take(items_count_shall_be_kept) { + let file_name = format!("file-{i}"); + c.contains_key(file_name.as_str()); + } +} + +#[test] +fn test_insert_bytes() { + let f = TestFixture::new(); + let mut c = DiskCache::new(f.tmp(), 25).unwrap(); + c.insert_single_slice("a/b/c", &[0; 10]).unwrap(); + assert!(c.contains_key("a/b/c")); + c.insert_single_slice("a/b/d", &[0; 10]).unwrap(); + assert_eq!(c.size(), 20); + // Adding this third file should put the cache above the limit. + c.insert_single_slice("x/y/z", &[0; 10]).unwrap(); + assert_eq!(c.size(), 20); + // The least-recently-used file should have been removed. + assert!(!c.contains_key("a/b/c")); + + let evicted_file_path = PathBuf::from(&DiskCacheKey::from("a/b/c")); + assert!(!f.tmp().join(evicted_file_path).exists()); +} + +#[test] +fn test_insert_bytes_exact() { + // Test that files adding up to exactly the size limit works. + let f = TestFixture::new(); + let mut c = DiskCache::new(f.tmp(), 20).unwrap(); + c.insert_single_slice("file1", &[1; 10]).unwrap(); + c.insert_single_slice("file2", &[2; 10]).unwrap(); + assert_eq!(c.size(), 20); + c.insert_single_slice("file3", &[3; 10]).unwrap(); + assert_eq!(c.size(), 20); + assert!(!c.contains_key("file1")); +} + +#[test] +fn test_add_get_lru() { + let f = TestFixture::new(); + { + let mut c = DiskCache::new(f.tmp(), 25).unwrap(); + c.insert_single_slice("file1", &[1; 10]).unwrap(); + c.insert_single_slice("file2", &[2; 10]).unwrap(); + // Get the file to bump its LRU status. + assert_eq!( + read_all(&mut File::open(c.get_cache_path("file1").unwrap()).unwrap()).unwrap(), + vec![1u8; 10] + ); + // Adding this third file should put the cache above the limit. + c.insert_single_slice("file3", &[3; 10]).unwrap(); + assert_eq!(c.size(), 20); + // The least-recently-used file should have been removed. + assert!(!c.contains_key("file2")); + } +} + +#[test] +fn test_insert_bytes_too_large() { + let f = TestFixture::new(); + let mut c = DiskCache::new(f.tmp(), 1).unwrap(); + match c.insert_single_slice("a/b/c", &[0; 2]) { + Err(DiskCacheError::FileTooLarge) => {} + x => panic!("Unexpected result: {x:?}"), + } +} diff --git a/src/query/storages/common/cache/tests/it/providers/mod.rs b/src/query/storages/common/cache/tests/it/providers/mod.rs new file mode 100644 index 0000000000000..ad150d5b016b8 --- /dev/null +++ b/src/query/storages/common/cache/tests/it/providers/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +mod disk_cache; diff --git a/src/query/storages/factory/src/storage_factory.rs b/src/query/storages/factory/src/storage_factory.rs index a69dc27db9658..0105aab999cfb 100644 --- a/src/query/storages/factory/src/storage_factory.rs +++ b/src/query/storages/factory/src/storage_factory.rs @@ -15,7 +15,7 @@ use std::sync::Arc; pub use common_catalog::catalog::StorageDescription; -use common_config::Config; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::TableInfo; @@ -67,7 +67,7 @@ pub struct StorageFactory { } impl StorageFactory { - pub fn create(conf: Config) -> Self { + pub fn create(conf: InnerConfig) -> Self { let creators: DashMap = Default::default(); // Register memory table engine. diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index 04263c153db42..e6cd92f38f04e 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -14,7 +14,6 @@ test = false [dependencies] common-arrow = { path = "../../../common/arrow" } common-base = { path = "../../../common/base" } -common-cache = { path = "../../../common/cache" } common-catalog = { path = "../../catalog" } common-exception = { path = "../../../common/exception" } common-expression = { path = "../../expression" } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index b07d1a169848c..be3c0d8dcd5b8 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -13,17 +13,11 @@ // limitations under the License. use std::collections::BTreeMap; -use std::collections::HashMap; -use std::ops::Range; use std::sync::Arc; -use std::time::Instant; use common_arrow::arrow::datatypes::Field; use common_arrow::arrow::io::parquet::write::to_parquet_schema; use common_arrow::parquet::metadata::SchemaDescriptor; -use common_base::rangemap::RangeMerger; -use common_base::runtime::UnlimitedFuture; -use common_catalog::plan::PartInfoPtr; use common_catalog::plan::Projection; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; @@ -39,14 +33,7 @@ use common_expression::TableSchemaRef; use common_sql::field_default_value; use common_storage::ColumnNode; use common_storage::ColumnNodes; -use futures::future::try_join_all; -use opendal::Object; use opendal::Operator; -use storages_common_table_meta::meta::ColumnMeta; - -use crate::fuse_part::FusePartInfo; -use crate::io::read::ReadSettings; -use crate::metrics::*; // TODO: make BlockReader as a trait. #[derive(Clone)] @@ -60,66 +47,6 @@ pub struct BlockReader { pub(crate) default_vals: Vec, } -pub struct OwnerMemory { - chunks: HashMap>, -} - -impl OwnerMemory { - pub fn create(chunks: Vec<(usize, Vec)>) -> OwnerMemory { - let chunks = chunks.into_iter().collect::>(); - OwnerMemory { chunks } - } - - pub fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { - match self.chunks.get(&index) { - Some(chunk) => Ok(chunk.as_slice()), - None => Err(ErrorCode::Internal(format!( - "It's a terrible bug, not found range data, merged_range_idx:{}, path:{}", - index, path - ))), - } - } -} - -pub struct MergeIOReadResult -where Self: 'static -{ - path: String, - owner_memory: OwnerMemory, - columns_chunks: HashMap)>, -} - -impl MergeIOReadResult -where Self: 'static -{ - pub fn create(owner_memory: OwnerMemory, capacity: usize, path: String) -> MergeIOReadResult { - MergeIOReadResult { - path, - owner_memory, - columns_chunks: HashMap::with_capacity(capacity), - } - } - - pub fn columns_chunks(&self) -> Result> { - let mut res = Vec::with_capacity(self.columns_chunks.len()); - - for (column_idx, (chunk_idx, range)) in &self.columns_chunks { - let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.path)?; - res.push((*column_idx, &chunk[range.clone()])); - } - - Ok(res) - } - - pub fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { - self.owner_memory.get_chunk(index, path) - } - - pub fn add_column_chunk(&mut self, chunk: usize, column: usize, range: Range) { - self.columns_chunks.insert(column, (chunk, range)); - } -} - fn inner_project_field_default_values(default_vals: &[Scalar], paths: &[usize]) -> Result { if paths.is_empty() { return Err(ErrorCode::BadArguments( @@ -189,6 +116,7 @@ impl BlockReader { let parquet_schema_descriptor = to_parquet_schema(&arrow_schema)?; let column_nodes = ColumnNodes::new_from_schema(&arrow_schema, Some(&schema)); + let project_column_nodes: Vec = projection .project_column_nodes(&column_nodes)? .iter() @@ -211,181 +139,6 @@ impl BlockReader { self.operator.metadata().can_blocking() } - /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. - /// If the distance between two IO request ranges to be read is less than storage_io_min_bytes_for_seek(Default is 48Bytes), - /// will read the range that contains both ranges, thus avoiding extra seek. - /// - /// It will *NOT* merge two requests: - /// if the last io request size is larger than storage_io_page_bytes_for_read(Default is 512KB). - pub async fn merge_io_read( - read_settings: &ReadSettings, - object: Object, - raw_ranges: Vec<(usize, Range)>, - ) -> Result { - let path = object.path().to_string(); - - // Build merged read ranges. - let ranges = raw_ranges - .iter() - .map(|(_, r)| r.clone()) - .collect::>(); - let range_merger = RangeMerger::from_iter( - ranges, - read_settings.storage_io_min_bytes_for_seek, - read_settings.storage_io_max_page_bytes_for_read, - ); - let merged_ranges = range_merger.ranges(); - - // Read merged range data. - let mut read_handlers = Vec::with_capacity(merged_ranges.len()); - for (idx, range) in merged_ranges.iter().enumerate() { - // Perf. - { - metrics_inc_remote_io_seeks_after_merged(1); - metrics_inc_remote_io_read_bytes_after_merged(range.end - range.start); - } - - read_handlers.push(UnlimitedFuture::create(Self::read_range( - object.clone(), - idx, - range.start, - range.end, - ))); - } - - let start = Instant::now(); - let owner_memory = OwnerMemory::create(try_join_all(read_handlers).await?); - let mut read_res = MergeIOReadResult::create(owner_memory, raw_ranges.len(), path.clone()); - - // Perf. - { - metrics_inc_remote_io_read_milliseconds(start.elapsed().as_millis() as u64); - } - - for (raw_idx, raw_range) in &raw_ranges { - let column_range = raw_range.start..raw_range.end; - - // Find the range index and Range from merged ranges. - let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { - None => Err(ErrorCode::Internal(format!( - "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", - column_range, path, merged_ranges - ))), - Some((index, range)) => Ok((index, range)), - }?; - - // Fetch the raw data for the raw range. - let start = (column_range.start - merged_range.start) as usize; - let end = (column_range.end - merged_range.start) as usize; - read_res.add_column_chunk(merged_range_idx, *raw_idx, start..end); - } - - Ok(read_res) - } - - pub fn sync_merge_io_read( - read_settings: &ReadSettings, - object: Object, - raw_ranges: Vec<(usize, Range)>, - ) -> Result { - let path = object.path().to_string(); - - // Build merged read ranges. - let ranges = raw_ranges - .iter() - .map(|(_, r)| r.clone()) - .collect::>(); - let range_merger = RangeMerger::from_iter( - ranges, - read_settings.storage_io_min_bytes_for_seek, - read_settings.storage_io_max_page_bytes_for_read, - ); - let merged_ranges = range_merger.ranges(); - - // Read merged range data. - let mut io_res = Vec::with_capacity(merged_ranges.len()); - for (idx, range) in merged_ranges.iter().enumerate() { - io_res.push(Self::sync_read_range( - object.clone(), - idx, - range.start, - range.end, - )?); - } - - let owner_memory = OwnerMemory::create(io_res); - let mut read_res = MergeIOReadResult::create(owner_memory, raw_ranges.len(), path.clone()); - - for (raw_idx, raw_range) in &raw_ranges { - let column_range = raw_range.start..raw_range.end; - - // Find the range index and Range from merged ranges. - let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { - None => Err(ErrorCode::Internal(format!( - "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", - column_range, path, merged_ranges - ))), - Some((index, range)) => Ok((index, range)), - }?; - - // Fetch the raw data for the raw range. - let start = (column_range.start - merged_range.start) as usize; - let end = (column_range.end - merged_range.start) as usize; - read_res.add_column_chunk(merged_range_idx, *raw_idx, start..end); - } - - Ok(read_res) - } - - pub async fn read_columns_data_by_merge_io( - &self, - settings: &ReadSettings, - location: &str, - columns_meta: &HashMap, - ) -> Result { - // Perf - { - metrics_inc_remote_io_read_parts(1); - } - - let mut ranges = vec![]; - for (index, (column_id, ..)) in self.project_indices.iter() { - if let Some(column_meta) = columns_meta.get(column_id) { - let (offset, len) = column_meta.offset_length(); - ranges.push((*index, offset..(offset + len))); - - // Perf - { - metrics_inc_remote_io_seeks(1); - metrics_inc_remote_io_read_bytes(len); - } - } - } - - let object = self.operator.object(location); - - Self::merge_io_read(settings, object, ranges).await - } - - pub fn sync_read_columns_data_by_merge_io( - &self, - settings: &ReadSettings, - part: PartInfoPtr, - ) -> Result { - let part = FusePartInfo::from_part(&part)?; - - let mut ranges = vec![]; - for (index, (column_id, ..)) in self.project_indices.iter() { - if let Some(column_meta) = part.columns_meta.get(column_id) { - let (offset, len) = column_meta.offset_length(); - ranges.push((*index, offset..(offset + len))); - } - } - - let object = self.operator.object(&part.location); - Self::sync_merge_io_read(settings, object, ranges) - } - // Build non duplicate leaf_indices to avoid repeated read column from parquet pub(crate) fn build_projection_indices( columns: &[ColumnNode], @@ -404,28 +157,6 @@ impl BlockReader { indices } - #[inline] - pub async fn read_range( - o: Object, - index: usize, - start: u64, - end: u64, - ) -> Result<(usize, Vec)> { - let chunk = o.range_read(start..end).await?; - Ok((index, chunk)) - } - - #[inline] - pub fn sync_read_range( - o: Object, - index: usize, - start: u64, - end: u64, - ) -> Result<(usize, Vec)> { - let chunk = o.blocking_range_read(start..end)?; - Ok((index, chunk)) - } - pub fn schema(&self) -> TableSchemaRef { self.projected_schema.clone() } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs new file mode 100644 index 0000000000000..97ded3a7a9c80 --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs @@ -0,0 +1,124 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::ops::Range; +use std::sync::Arc; + +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::ColumnId; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataCache; +use storages_common_cache::TableDataCacheKey; +use storages_common_cache_manager::SizedColumnArray; + +pub struct OwnerMemory { + chunks: HashMap>, +} + +impl OwnerMemory { + pub fn create(chunks: Vec<(usize, Vec)>) -> OwnerMemory { + let chunks = chunks.into_iter().collect::>(); + OwnerMemory { chunks } + } + + pub fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { + match self.chunks.get(&index) { + Some(chunk) => Ok(chunk.as_slice()), + None => Err(ErrorCode::Internal(format!( + "It's a terrible bug, not found range data, merged_range_idx:{}, path:{}", + index, path + ))), + } + } +} + +type CachedColumnData = Vec<(ColumnId, Arc>)>; +type CachedColumnArray = Vec<(ColumnId, Arc)>; +pub struct MergeIOReadResult { + block_path: String, + columns_chunk_offsets: HashMap)>, + owner_memory: OwnerMemory, + pub cached_column_data: CachedColumnData, + pub cached_column_array: CachedColumnArray, + table_data_cache: Option, +} + +pub enum DataItem<'a> { + RawData(&'a [u8]), + ColumnArray(&'a Arc), +} + +impl MergeIOReadResult { + pub fn create( + owner_memory: OwnerMemory, + capacity: usize, + path: String, + table_data_cache: Option, + ) -> MergeIOReadResult { + MergeIOReadResult { + block_path: path, + columns_chunk_offsets: HashMap::with_capacity(capacity), + owner_memory, + cached_column_data: vec![], + cached_column_array: vec![], + table_data_cache, + } + } + + pub fn columns_chunks(&self) -> Result> { + let mut res = HashMap::with_capacity(self.columns_chunk_offsets.len()); + + // merge column data fetched from object storage + for (column_id, (chunk_idx, range)) in &self.columns_chunk_offsets { + let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.block_path)?; + res.insert(*column_id, DataItem::RawData(&chunk[range.clone()])); + } + + // merge column data from cache + for (column_id, data) in &self.cached_column_data { + res.insert(*column_id, DataItem::RawData(data.as_slice())); + } + + // merge column array from cache + for (column_id, data) in &self.cached_column_array { + res.insert(*column_id, DataItem::ColumnArray(data)); + } + + Ok(res) + } + + fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { + self.owner_memory.get_chunk(index, path) + } + + pub fn add_column_chunk( + &mut self, + chunk_index: usize, + column_id: ColumnId, + range: Range, + ) { + if let Some(table_data_cache) = &self.table_data_cache { + // populate raw column data cache (compressed raw bytes) + if let Ok(chunk_data) = self.get_chunk(chunk_index, &self.block_path) { + let cache_key = TableDataCacheKey::new(&self.block_path, column_id); + let data = &chunk_data[range.clone()]; + table_data_cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); + } + } + self.columns_chunk_offsets + .insert(column_id, (chunk_index, range)); + } +} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs new file mode 100644 index 0000000000000..f640c1b7ba89a --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs @@ -0,0 +1,192 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::ops::Range; +use std::time::Instant; + +use common_base::rangemap::RangeMerger; +use common_base::runtime::UnlimitedFuture; +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::ColumnId; +use futures::future::try_join_all; +use opendal::Object; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataCacheKey; +use storages_common_cache_manager::CacheManager; +use storages_common_table_meta::meta::ColumnMeta; + +use crate::io::read::block::block_reader_merge_io::OwnerMemory; +use crate::io::read::ReadSettings; +use crate::io::BlockReader; +use crate::metrics::*; +use crate::MergeIOReadResult; + +impl BlockReader { + /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. + /// If the distance between two IO request ranges to be read is less than storage_io_min_bytes_for_seek(Default is 48Bytes), + /// will read the range that contains both ranges, thus avoiding extra seek. + /// + /// It will *NOT* merge two requests: + /// if the last io request size is larger than storage_io_page_bytes_for_read(Default is 512KB). + async fn merge_io_read( + read_settings: &ReadSettings, + object: Object, + raw_ranges: Vec<(ColumnId, Range)>, + ) -> Result { + if raw_ranges.is_empty() { + // shortcut + let read_res = MergeIOReadResult::create( + OwnerMemory::create(vec![]), + raw_ranges.len(), + object.path().to_string(), + CacheManager::instance().get_table_data_cache(), + ); + return Ok(read_res); + } + + // Build merged read ranges. + let ranges = raw_ranges + .iter() + .map(|(_, r)| r.clone()) + .collect::>(); + let range_merger = RangeMerger::from_iter( + ranges, + read_settings.storage_io_min_bytes_for_seek, + read_settings.storage_io_max_page_bytes_for_read, + ); + let merged_ranges = range_merger.ranges(); + + // Read merged range data. + let mut read_handlers = Vec::with_capacity(merged_ranges.len()); + for (idx, range) in merged_ranges.iter().enumerate() { + // Perf. + { + metrics_inc_remote_io_seeks_after_merged(1); + metrics_inc_remote_io_read_bytes_after_merged(range.end - range.start); + } + + read_handlers.push(UnlimitedFuture::create(Self::read_range( + object.clone(), + idx, + range.start, + range.end, + ))); + } + + let start = Instant::now(); + let owner_memory = OwnerMemory::create(try_join_all(read_handlers).await?); + let table_data_cache = CacheManager::instance().get_table_data_cache(); + let mut read_res = MergeIOReadResult::create( + owner_memory, + raw_ranges.len(), + object.path().to_string(), + table_data_cache, + ); + + // Perf. + { + metrics_inc_remote_io_read_milliseconds(start.elapsed().as_millis() as u64); + } + + for (raw_idx, raw_range) in &raw_ranges { + let column_range = raw_range.start..raw_range.end; + + // Find the range index and Range from merged ranges. + let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { + None => Err(ErrorCode::Internal(format!( + "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", + column_range, + object.path(), + merged_ranges + ))), + Some((index, range)) => Ok((index, range)), + }?; + + // Fetch the raw data for the raw range. + let start = (column_range.start - merged_range.start) as usize; + let end = (column_range.end - merged_range.start) as usize; + let column_id = *raw_idx as ColumnId; + read_res.add_column_chunk(merged_range_idx, column_id, start..end); + } + + Ok(read_res) + } + + pub async fn read_columns_data_by_merge_io( + &self, + settings: &ReadSettings, + location: &str, + columns_meta: &HashMap, + ) -> Result { + // Perf + { + metrics_inc_remote_io_read_parts(1); + } + + let mut ranges = vec![]; + // for async read, try using table data cache (if enabled in settings) + let column_data_cache = CacheManager::instance().get_table_data_cache(); + let column_array_cache = CacheManager::instance().get_table_data_array_cache(); + let mut cached_column_data = vec![]; + let mut cached_column_array = vec![]; + for (_index, (column_id, ..)) in self.project_indices.iter() { + let column_cache_key = TableDataCacheKey::new(location, *column_id); + + // first, check column array object cache + if let Some(cache_array) = column_array_cache.get(&column_cache_key) { + cached_column_array.push((*column_id, cache_array)); + continue; + } + + // and then, check column data cache + if let Some(cached_column_raw_data) = column_data_cache.get(&column_cache_key) { + cached_column_data.push((*column_id, cached_column_raw_data)); + continue; + } + + // if all cache missed, prepare the ranges to be read + if let Some(column_meta) = columns_meta.get(column_id) { + let (offset, len) = column_meta.offset_length(); + ranges.push((*column_id, offset..(offset + len))); + + // Perf + { + metrics_inc_remote_io_seeks(1); + metrics_inc_remote_io_read_bytes(len); + } + } + } + + let object = self.operator.object(location); + + let mut merge_io_read_res = Self::merge_io_read(settings, object, ranges).await?; + // TODO set + merge_io_read_res.cached_column_data = cached_column_data; + merge_io_read_res.cached_column_array = cached_column_array; + Ok(merge_io_read_res) + } + + #[inline] + pub async fn read_range( + o: Object, + index: usize, + start: u64, + end: u64, + ) -> Result<(usize, Vec)> { + let chunk = o.range_read(start..end).await?; + Ok((index, chunk)) + } +} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs new file mode 100644 index 0000000000000..d87a3504d8bd6 --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs @@ -0,0 +1,137 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ops::Range; + +use common_base::rangemap::RangeMerger; +use common_catalog::plan::PartInfoPtr; +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::ColumnId; +use opendal::Object; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataCacheKey; +use storages_common_cache_manager::CacheManager; + +use crate::fuse_part::FusePartInfo; +use crate::io::read::block::block_reader_merge_io::OwnerMemory; +use crate::io::read::ReadSettings; +use crate::io::BlockReader; +use crate::MergeIOReadResult; + +impl BlockReader { + pub fn sync_merge_io_read( + read_settings: &ReadSettings, + object: Object, + raw_ranges: Vec<(ColumnId, Range)>, + ) -> Result { + let path = object.path().to_string(); + + // Build merged read ranges. + let ranges = raw_ranges + .iter() + .map(|(_, r)| r.clone()) + .collect::>(); + let range_merger = RangeMerger::from_iter( + ranges, + read_settings.storage_io_min_bytes_for_seek, + read_settings.storage_io_max_page_bytes_for_read, + ); + let merged_ranges = range_merger.ranges(); + + // Read merged range data. + let mut io_res = Vec::with_capacity(merged_ranges.len()); + for (idx, range) in merged_ranges.iter().enumerate() { + io_res.push(Self::sync_read_range( + object.clone(), + idx, + range.start, + range.end, + )?); + } + + let owner_memory = OwnerMemory::create(io_res); + + // for sync read, we disable table data cache + let table_data_cache = None; + let mut read_res = MergeIOReadResult::create( + owner_memory, + raw_ranges.len(), + path.clone(), + table_data_cache, + ); + + for (raw_idx, raw_range) in &raw_ranges { + let column_id = *raw_idx as ColumnId; + let column_range = raw_range.start..raw_range.end; + + // Find the range index and Range from merged ranges. + let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { + None => Err(ErrorCode::Internal(format!( + "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", + column_range, path, merged_ranges + ))), + Some((index, range)) => Ok((index, range)), + }?; + + // Fetch the raw data for the raw range. + let start = (column_range.start - merged_range.start) as usize; + let end = (column_range.end - merged_range.start) as usize; + read_res.add_column_chunk(merged_range_idx, column_id, start..end); + } + + Ok(read_res) + } + + pub fn sync_read_columns_data_by_merge_io( + &self, + settings: &ReadSettings, + part: PartInfoPtr, + ) -> Result { + let part = FusePartInfo::from_part(&part)?; + let column_array_cache = CacheManager::instance().get_table_data_array_cache(); + + let mut ranges = vec![]; + let mut cached_column_array = vec![]; + for (_index, (column_id, ..)) in self.project_indices.iter() { + // first, check column array object cache + let block_path = &part.location; + let column_cache_key = TableDataCacheKey::new(block_path, *column_id); + if let Some(cache_array) = column_array_cache.get(&column_cache_key) { + cached_column_array.push((*column_id, cache_array)); + continue; + } + if let Some(column_meta) = part.columns_meta.get(column_id) { + let (offset, len) = column_meta.offset_length(); + ranges.push((*column_id, offset..(offset + len))); + } + } + + let object = self.operator.object(&part.location); + let mut merge_io_result = Self::sync_merge_io_read(settings, object, ranges)?; + merge_io_result.cached_column_array = cached_column_array; + Ok(merge_io_result) + } + + #[inline] + pub fn sync_read_range( + o: Object, + index: usize, + start: u64, + end: u64, + ) -> Result<(usize, Vec)> { + let chunk = o.blocking_range_read(start..end)?; + Ok((index, chunk)) + } +} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 4a46a4bd302c4..cc484c929514c 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -12,34 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; -use std::sync::Arc; -use std::time::Instant; - -use common_arrow::arrow::chunk::Chunk; -use common_arrow::arrow::datatypes::Field; -use common_arrow::arrow::io::parquet::read::column_iter_to_arrays; -use common_arrow::arrow::io::parquet::read::ArrayIter; -use common_arrow::parquet::compression::Compression as ParquetCompression; -use common_arrow::parquet::metadata::ColumnDescriptor; -use common_arrow::parquet::read::PageMetaData; -use common_arrow::parquet::read::PageReader; -use common_catalog::plan::PartInfoPtr; -use common_exception::ErrorCode; use common_exception::Result; -use common_expression::ColumnId; use common_expression::DataBlock; -use common_expression::FieldIndex; use storages_common_table_meta::meta::BlockMeta; -use storages_common_table_meta::meta::ColumnMeta; -use storages_common_table_meta::meta::Compression; -use crate::fuse_part::FusePartInfo; -use crate::io::read::block::decompressor::BuffedBasicDecompressor; use crate::io::read::ReadSettings; use crate::io::BlockReader; -use crate::io::UncompressedBuffer; -use crate::metrics::*; impl BlockReader { /// Read a parquet file and convert to DataBlock. @@ -49,248 +27,25 @@ impl BlockReader { settings: &ReadSettings, meta: &BlockMeta, ) -> Result { - // Build columns meta. - let columns_meta = meta - .col_metas - .iter() - .map(|(column_id, meta)| (*column_id, meta.clone())) - .collect::>(); + let columns_meta = &meta.col_metas; // Get the merged IO read result. - let read_res = self - .read_columns_data_by_merge_io(settings, &meta.location.0, &columns_meta) + let merge_io_read_result = self + .read_columns_data_by_merge_io(settings, &meta.location.0, columns_meta) .await?; // Get the columns chunk. - let chunks = read_res - .columns_chunks()? - .into_iter() - .map(|(column_idx, column_chunk)| (column_idx, column_chunk)) - .collect::>(); + let column_chunks = merge_io_read_result.columns_chunks()?; let num_rows = meta.row_count as usize; - let columns_chunk = chunks - .iter() - .map(|(index, chunk)| (*index, *chunk)) - .collect::>(); self.deserialize_parquet_chunks_with_buffer( + &meta.location.0, num_rows, &meta.compression, - &columns_meta, - columns_chunk, + columns_meta, + column_chunks, None, ) } - - /// Deserialize column chunks data from parquet format to DataBlock. - pub fn deserialize_parquet_chunks( - &self, - part: PartInfoPtr, - chunks: Vec<(FieldIndex, &[u8])>, - ) -> Result { - let part = FusePartInfo::from_part(&part)?; - let start = Instant::now(); - - if chunks.is_empty() { - return self.build_default_values_block(part.nums_rows); - } - - let reads = chunks - .iter() - .map(|(index, chunk)| (*index, *chunk)) - .collect::>(); - - let deserialized_res = self.deserialize_parquet_chunks_with_buffer( - part.nums_rows, - &part.compression, - &part.columns_meta, - reads, - None, - ); - - // Perf. - { - metrics_inc_remote_io_deserialize_milliseconds(start.elapsed().as_millis() as u64); - } - - deserialized_res - } - - pub fn build_default_values_block(&self, num_rows: usize) -> Result { - let data_schema = self.data_schema(); - let default_vals = self.default_vals.clone(); - DataBlock::create_with_default_value(&data_schema, &default_vals, num_rows) - } - - /// Deserialize column chunks data from parquet format to DataBlock with a uncompressed buffer. - pub fn deserialize_parquet_chunks_with_buffer( - &self, - num_rows: usize, - compression: &Compression, - columns_meta: &HashMap, - columns_chunks: Vec<(FieldIndex, &[u8])>, - uncompressed_buffer: Option>, - ) -> Result { - if columns_chunks.is_empty() { - return self.build_default_values_block(num_rows); - } - - let chunk_map: HashMap = columns_chunks.into_iter().collect(); - let mut columns_array_iter = Vec::with_capacity(self.projection.len()); - - let columns = self.projection.project_column_nodes(&self.column_nodes)?; - let mut need_default_vals = Vec::with_capacity(columns.len()); - let mut need_to_fill_default_val = false; - - for column in columns { - let field = column.field.clone(); - let indices = &column.leaf_indices; - let mut column_metas = Vec::with_capacity(indices.len()); - let mut column_chunks = Vec::with_capacity(indices.len()); - let mut column_descriptors = Vec::with_capacity(indices.len()); - let mut column_in_block = false; - - for (j, index) in indices.iter().enumerate() { - let column_id = column.leaf_column_ids[j]; - if let Some(column_meta) = columns_meta.get(&column_id) { - let column_read = <&[u8]>::clone(&chunk_map[index]); - let column_descriptor = &self.parquet_schema_descriptor.columns()[*index]; - column_metas.push(column_meta); - column_chunks.push(column_read); - column_descriptors.push(column_descriptor); - column_in_block = true; - } else { - column_in_block = false; - break; - } - } - - if column_in_block { - columns_array_iter.push(Self::chunks_to_parquet_array_iter( - column_metas, - column_chunks, - num_rows, - column_descriptors, - field, - compression, - uncompressed_buffer - .clone() - .unwrap_or_else(|| UncompressedBuffer::new(0)), - )?); - need_default_vals.push(false); - } else { - need_default_vals.push(true); - need_to_fill_default_val = true; - } - } - - let mut arrays = Vec::with_capacity(columns_array_iter.len()); - for mut column_array_iter in columns_array_iter.into_iter() { - let array = column_array_iter.next().unwrap()?; - arrays.push(array); - drop(column_array_iter); - } - - let chunk = Chunk::try_new(arrays)?; - if !need_to_fill_default_val { - DataBlock::from_arrow_chunk(&chunk, &self.data_schema()) - } else { - let data_schema = self.data_schema(); - let schema_default_vals = self.default_vals.clone(); - let mut default_vals = Vec::with_capacity(need_default_vals.len()); - for (i, need_default_val) in need_default_vals.iter().enumerate() { - if !need_default_val { - default_vals.push(None); - } else { - default_vals.push(Some(schema_default_vals[i].clone())); - } - } - DataBlock::create_with_default_value_and_chunk( - &data_schema, - &chunk, - &default_vals, - num_rows, - ) - } - } - - fn chunks_to_parquet_array_iter<'a>( - metas: Vec<&ColumnMeta>, - chunks: Vec<&'a [u8]>, - rows: usize, - column_descriptors: Vec<&ColumnDescriptor>, - field: Field, - compression: &Compression, - uncompressed_buffer: Arc, - ) -> Result> { - let columns = metas - .iter() - .zip(chunks.into_iter().zip(column_descriptors.iter())) - .map(|(meta, (chunk, column_descriptor))| { - let meta = meta.as_parquet().unwrap(); - - let page_meta_data = PageMetaData { - column_start: meta.offset, - num_values: meta.num_values as i64, - compression: Self::to_parquet_compression(compression)?, - descriptor: column_descriptor.descriptor.clone(), - }; - let pages = PageReader::new_with_page_meta( - chunk, - page_meta_data, - Arc::new(|_, _| true), - vec![], - usize::MAX, - ); - - Ok(BuffedBasicDecompressor::new( - pages, - uncompressed_buffer.clone(), - )) - }) - .collect::>>()?; - - let types = column_descriptors - .iter() - .map(|column_descriptor| &column_descriptor.descriptor.primitive_type) - .collect::>(); - - Ok(column_iter_to_arrays( - columns, - types, - field, - Some(rows), - rows, - )?) - } - - fn to_parquet_compression(meta_compression: &Compression) -> Result { - match meta_compression { - Compression::Lz4 => { - let err_msg = r#"Deprecated compression algorithm [Lz4] detected. - - The Legacy compression algorithm [Lz4] is no longer supported. - To migrate data from old format, please consider re-create the table, - by using an old compatible version [v0.8.25-nightly … v0.7.12-nightly]. - - - Bring up the compatible version of databend-query - - re-create the table - Suppose the name of table is T - ~~~ - create table tmp_t as select * from T; - drop table T all; - alter table tmp_t rename to T; - ~~~ - Please note that the history of table T WILL BE LOST. - "#; - Err(ErrorCode::StorageOther(err_msg)) - } - Compression::Lz4Raw => Ok(ParquetCompression::Lz4Raw), - Compression::Snappy => Ok(ParquetCompression::Snappy), - Compression::Zstd => Ok(ParquetCompression::Zstd), - Compression::Gzip => Ok(ParquetCompression::Gzip), - Compression::None => Ok(ParquetCompression::Uncompressed), - } - } } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs new file mode 100644 index 0000000000000..f8d87464c517a --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -0,0 +1,361 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Instant; + +use common_arrow::arrow::array::Array; +use common_arrow::arrow::chunk::Chunk; +use common_arrow::arrow::datatypes::Field; +use common_arrow::arrow::io::parquet::read::column_iter_to_arrays; +use common_arrow::arrow::io::parquet::read::ArrayIter; +use common_arrow::parquet::compression::Compression as ParquetCompression; +use common_arrow::parquet::metadata::ColumnDescriptor; +use common_arrow::parquet::read::PageMetaData; +use common_arrow::parquet::read::PageReader; +use common_catalog::plan::PartInfoPtr; +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::ColumnId; +use common_expression::DataBlock; +use common_storage::ColumnNode; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataCacheKey; +use storages_common_cache_manager::CacheManager; +use storages_common_cache_manager::SizedColumnArray; +use storages_common_table_meta::meta::ColumnMeta; +use storages_common_table_meta::meta::Compression; + +use crate::fuse_part::FusePartInfo; +use crate::io::read::block::block_reader_merge_io::DataItem; +use crate::io::read::block::decompressor::BuffedBasicDecompressor; +use crate::io::BlockReader; +use crate::io::UncompressedBuffer; +use crate::metrics::*; + +enum DeserializedArray<'a> { + Cached(&'a Arc), + Deserialized((ColumnId, Box, usize)), + NoNeedToCache(Box), +} + +pub struct FieldDeserializationContext<'a> { + column_metas: &'a HashMap, + column_chunks: &'a HashMap>, + num_rows: usize, + compression: &'a Compression, + uncompressed_buffer: &'a Option>, +} + +impl BlockReader { + /// Deserialize column chunks data from parquet format to DataBlock. + pub fn deserialize_parquet_chunks( + &self, + part: PartInfoPtr, + chunks: HashMap, + ) -> Result { + let part = FusePartInfo::from_part(&part)?; + let start = Instant::now(); + + if chunks.is_empty() { + return self.build_default_values_block(part.nums_rows); + } + + let deserialized_res = self.deserialize_parquet_chunks_with_buffer( + &part.location, + part.nums_rows, + &part.compression, + &part.columns_meta, + chunks, + None, + ); + + // Perf. + { + metrics_inc_remote_io_deserialize_milliseconds(start.elapsed().as_millis() as u64); + } + + deserialized_res + } + + pub fn build_default_values_block(&self, num_rows: usize) -> Result { + let data_schema = self.data_schema(); + let default_vals = self.default_vals.clone(); + DataBlock::create_with_default_value(&data_schema, &default_vals, num_rows) + } + + /// Deserialize column chunks data from parquet format to DataBlock with a uncompressed buffer. + pub fn deserialize_parquet_chunks_with_buffer( + &self, + block_path: &str, + num_rows: usize, + compression: &Compression, + column_metas: &HashMap, + column_chunks: HashMap, + uncompressed_buffer: Option>, + ) -> Result { + if column_chunks.is_empty() { + return self.build_default_values_block(num_rows); + } + + let fields = self + .projection + .project_column_nodes_nested_aware(&self.column_nodes)?; + let mut need_default_vals = Vec::with_capacity(fields.len()); + let mut need_to_fill_default_val = false; + let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); + let field_deserialization_ctx = FieldDeserializationContext { + column_metas, + column_chunks: &column_chunks, + num_rows, + compression, + uncompressed_buffer: &uncompressed_buffer, + }; + for (column, is_nested_field) in &fields { + match self.deserialize_field(&field_deserialization_ctx, column, *is_nested_field)? { + None => { + need_to_fill_default_val = true; + need_default_vals.push(true); + } + Some(v) => { + deserialized_column_arrays.push(v); + need_default_vals.push(false); + } + } + } + + // assembly the arrays + let mut chunk_arrays = vec![]; + for array in &deserialized_column_arrays { + match array { + DeserializedArray::Deserialized((_, array, ..)) => { + chunk_arrays.push(array); + } + DeserializedArray::NoNeedToCache(array) => { + chunk_arrays.push(array); + } + DeserializedArray::Cached(sized_column) => { + chunk_arrays.push(&sized_column.0); + } + } + } + + // build data block + let chunk = Chunk::try_new(chunk_arrays)?; + let data_block = if !need_to_fill_default_val { + DataBlock::from_arrow_chunk(&chunk, &self.data_schema()) + } else { + let data_schema = self.data_schema(); + let mut default_vals = Vec::with_capacity(need_default_vals.len()); + for (i, need_default_val) in need_default_vals.iter().enumerate() { + if !need_default_val { + default_vals.push(None); + } else { + default_vals.push(Some(self.default_vals[i].clone())); + } + } + DataBlock::create_with_default_value_and_chunk( + &data_schema, + &chunk, + &default_vals, + num_rows, + ) + }; + + // populate cache if necessary + if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { + // populate array cache items + for item in deserialized_column_arrays.into_iter() { + if let DeserializedArray::Deserialized((column_id, array, size)) = item { + let key = TableDataCacheKey::new(block_path, column_id); + cache.put(key.into(), Arc::new((array, size))) + } + } + } + data_block + } + + fn chunks_to_parquet_array_iter<'a>( + metas: Vec<&ColumnMeta>, + chunks: Vec<&'a [u8]>, + rows: usize, + column_descriptors: Vec<&ColumnDescriptor>, + field: Field, + compression: &Compression, + uncompressed_buffer: Arc, + ) -> Result> { + let columns = metas + .iter() + .zip(chunks.into_iter().zip(column_descriptors.iter())) + .map(|(meta, (chunk, column_descriptor))| { + let meta = meta.as_parquet().unwrap(); + + let page_meta_data = PageMetaData { + column_start: meta.offset, + num_values: meta.num_values as i64, + compression: Self::to_parquet_compression(compression)?, + descriptor: column_descriptor.descriptor.clone(), + }; + let pages = PageReader::new_with_page_meta( + chunk, + page_meta_data, + Arc::new(|_, _| true), + vec![], + usize::MAX, + ); + + Ok(BuffedBasicDecompressor::new( + pages, + uncompressed_buffer.clone(), + )) + }) + .collect::>>()?; + + let types = column_descriptors + .iter() + .map(|column_descriptor| &column_descriptor.descriptor.primitive_type) + .collect::>(); + + Ok(column_iter_to_arrays( + columns, + types, + field, + Some(rows), + rows, + )?) + } + + fn deserialize_field<'a>( + &self, + deserialization_context: &'a FieldDeserializationContext, + column: &ColumnNode, + is_nested_column: bool, + ) -> Result>> { + let indices = &column.leaf_indices; + let column_chunks = deserialization_context.column_chunks; + let compression = deserialization_context.compression; + let uncompressed_buffer = deserialization_context.uncompressed_buffer; + // column passed in may be a compound field (with sub leaves), + // or a leaf column of compound field + let is_nested = is_nested_column || indices.len() > 1; + let estimated_cap = indices.len(); + let mut field_column_metas = Vec::with_capacity(estimated_cap); + let mut field_column_data = Vec::with_capacity(estimated_cap); + let mut field_column_descriptors = Vec::with_capacity(estimated_cap); + let mut field_uncompressed_size = 0; + + for (i, leaf_index) in indices.iter().enumerate() { + let column_id = column.leaf_column_ids[i]; + if let Some(column_meta) = deserialization_context.column_metas.get(&column_id) { + if let Some(chunk) = column_chunks.get(&column_id) { + match chunk { + DataItem::RawData(data) => { + let column_descriptor = + &self.parquet_schema_descriptor.columns()[*leaf_index]; + field_column_metas.push(column_meta); + field_column_data.push(*data); + field_column_descriptors.push(column_descriptor); + field_uncompressed_size += data.len(); + } + DataItem::ColumnArray(column_array) => { + if is_nested { + // TODO more context info for error message + return Err(ErrorCode::StorageOther( + "unexpected nested field: nested leaf field hits cached", + )); + } + // since it is not nested, one column is enough + return Ok(Some(DeserializedArray::Cached(column_array))); + } + } + } else { + // TODO more context info for error message + // no raw data of given column id, it is unexpected + return Err(ErrorCode::StorageOther("unexpected: column data not found")); + } + } else { + // no column meta of given colmun id + break; + } + } + + let num_rows = deserialization_context.num_rows; + if !field_column_metas.is_empty() { + let field_name = column.field.name.to_owned(); + let mut array_iter = Self::chunks_to_parquet_array_iter( + field_column_metas, + field_column_data, + num_rows, + field_column_descriptors, + column.field.clone(), + compression, + uncompressed_buffer + .clone() + .unwrap_or_else(|| UncompressedBuffer::new(0)), + )?; + let array = array_iter.next().transpose()?.ok_or_else(|| { + ErrorCode::StorageOther(format!( + "unexpected deserialization error, no array found for field {field_name} " + )) + })?; + + // mark the array + if is_nested { + // the array is not intended to be cached + // currently, caching of compound filed columns is not support + Ok(Some(DeserializedArray::NoNeedToCache(array))) + } else { + // the array is deserialized from raw bytes, should be cached + let column_id = column.leaf_column_ids[0]; + Ok(Some(DeserializedArray::Deserialized(( + column_id, + array, + field_uncompressed_size, + )))) + } + } else { + Ok(None) + } + } + + fn to_parquet_compression(meta_compression: &Compression) -> Result { + match meta_compression { + Compression::Lz4 => { + let err_msg = r#"Deprecated compression algorithm [Lz4] detected. + + The Legacy compression algorithm [Lz4] is no longer supported. + To migrate data from old format, please consider re-create the table, + by using an old compatible version [v0.8.25-nightly … v0.7.12-nightly]. + + - Bring up the compatible version of databend-query + - re-create the table + Suppose the name of table is T + ~~~ + create table tmp_t as select * from T; + drop table T all; + alter table tmp_t rename to T; + ~~~ + Please note that the history of table T WILL BE LOST. + "#; + Err(ErrorCode::StorageOther(err_msg)) + } + Compression::Lz4Raw => Ok(ParquetCompression::Lz4Raw), + Compression::Snappy => Ok(ParquetCompression::Snappy), + Compression::Zstd => Ok(ParquetCompression::Zstd), + Compression::Gzip => Ok(ParquetCompression::Gzip), + Compression::None => Ok(ParquetCompression::Uncompressed), + } + } +} diff --git a/src/query/storages/fuse/src/io/read/block/mod.rs b/src/query/storages/fuse/src/io/read/block/mod.rs index 587e676d1c9d9..ebc08b71e60eb 100644 --- a/src/query/storages/fuse/src/io/read/block/mod.rs +++ b/src/query/storages/fuse/src/io/read/block/mod.rs @@ -13,11 +13,15 @@ // limitations under the License. // mod block_reader; +mod block_reader_merge_io; +mod block_reader_merge_io_async; +mod block_reader_merge_io_sync; mod block_reader_native; mod block_reader_parquet; +mod block_reader_parquet_deserialize; mod decompressor; pub use block_reader::BlockReader; -pub use block_reader::MergeIOReadResult; +pub use block_reader_merge_io::MergeIOReadResult; pub use block_reader_native::NativeReaderExt; pub use decompressor::UncompressedBuffer; diff --git a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs index 704ff207f4b74..f47635ff63171 100644 --- a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs +++ b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs @@ -44,8 +44,6 @@ type CachedReader = InMemoryItemCacheReader; /// Load the filter of a given bloom index column. Also /// - generates the proper cache key /// - takes cares of getting the correct cache instance from [CacheManager] -/// -/// this could be generified to be the template of cached data block column reader pub struct BloomColumnFilterReader { cached_reader: CachedReader, param: LoadParams, @@ -68,11 +66,7 @@ impl BloomColumnFilterReader { column_descriptor: column_chunk_meta.descriptor().clone(), }; - let cached_reader = CachedReader::new( - Xor8Filter::cache(), - "bloom_index_filter_cache".to_owned(), - loader, - ); + let cached_reader = CachedReader::new(Xor8Filter::cache(), loader); let param = LoadParams { location: index_path, diff --git a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs index fb98c34b67adb..3ca710c0caec4 100644 --- a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs +++ b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs @@ -46,7 +46,6 @@ impl MetaReaders { pub fn segment_info_reader(dal: Operator, schema: TableSchemaRef) -> SegmentInfoReader { SegmentInfoReader::new( CacheManager::instance().get_table_segment_cache(), - "segment_info_cache".to_owned(), LoaderWrapper((dal, schema)), ) } @@ -54,7 +53,6 @@ impl MetaReaders { pub fn table_snapshot_reader(dal: Operator) -> TableSnapshotReader { TableSnapshotReader::new( CacheManager::instance().get_table_snapshot_cache(), - "snapshot_cache".to_owned(), LoaderWrapper(dal), ) } @@ -62,7 +60,6 @@ impl MetaReaders { pub fn table_snapshot_statistics_reader(dal: Operator) -> TableSnapshotStatisticsReader { TableSnapshotStatisticsReader::new( CacheManager::instance().get_table_snapshot_statistics_cache(), - "table_statistics_cache".to_owned(), LoaderWrapper(dal), ) } @@ -70,7 +67,6 @@ impl MetaReaders { pub fn file_meta_data_reader(dal: Operator) -> BloomIndexFileMetaDataReader { BloomIndexFileMetaDataReader::new( CacheManager::instance().get_bloom_index_meta_cache(), - "bloom_index_file_meta_data_cache".to_owned(), LoaderWrapper(dal), ) } diff --git a/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs b/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs index 19254a15f0e64..8ba7912917f34 100644 --- a/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs +++ b/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs @@ -20,7 +20,6 @@ use std::time::Instant; use common_base::base::Progress; use common_base::base::ProgressValues; -use common_cache::Cache; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; @@ -31,6 +30,7 @@ use common_expression::TableSchemaRef; use common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use opendal::Operator; use storages_common_blocks::blocks_to_parquet; +use storages_common_cache::CacheAccessor; use storages_common_cache_manager::CacheManager; use storages_common_index::BloomIndex; use storages_common_table_meta::meta::BlockMeta; @@ -315,8 +315,7 @@ impl Processor for CompactTransform { } State::Output { location, segment } => { if let Some(segment_cache) = CacheManager::instance().get_table_segment_cache() { - let cache = &mut segment_cache.write(); - cache.put(location.clone(), segment.clone()); + segment_cache.put(location.clone(), segment.clone()) } let meta = CompactSinkMeta::create( diff --git a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs index f90f8c5603bc6..c607dc2ee68eb 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs @@ -62,7 +62,7 @@ enum State { }, MergeRemain { part: PartInfoPtr, - chunks: MergeIOReadResult, + merged_io_read_result: MergeIOReadResult, data_block: DataBlock, filter: Value, }, @@ -170,11 +170,7 @@ impl Processor for MutationSource { fn process(&mut self) -> Result<()> { match std::mem::replace(&mut self.state, State::Finish) { State::FilterData(part, read_res) => { - let chunks = read_res - .columns_chunks()? - .into_iter() - .map(|(column_index, column_chunk)| (column_index, column_chunk)) - .collect::>(); + let chunks = read_res.columns_chunks()?; let mut data_block = self .block_reader .deserialize_parquet_chunks(part.clone(), chunks)?; @@ -272,17 +268,12 @@ impl Processor for MutationSource { } State::MergeRemain { part, - chunks, + merged_io_read_result, mut data_block, filter, } => { if let Some(remain_reader) = self.remain_reader.as_ref() { - let chunks = chunks - .columns_chunks()? - .into_iter() - .map(|(column_index, column_chunk)| (column_index, column_chunk)) - .collect::>(); - + let chunks = merged_io_read_result.columns_chunks()?; let remain_block = remain_reader.deserialize_parquet_chunks(part, chunks)?; match self.action { @@ -360,7 +351,7 @@ impl Processor for MutationSource { .await?; self.state = State::MergeRemain { part, - chunks: read_res, + merged_io_read_result: read_res, data_block, filter, }; diff --git a/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs b/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs index c366e9e74cfcf..baedbf2dcb000 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs @@ -17,7 +17,6 @@ use std::collections::BTreeMap; use std::collections::HashMap; use std::sync::Arc; -use common_cache::Cache; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; @@ -25,6 +24,7 @@ use common_expression::BlockThresholds; use common_expression::DataBlock; use common_expression::TableSchemaRef; use opendal::Operator; +use storages_common_cache::CacheAccessor; use storages_common_cache_manager::CacheManager; use storages_common_table_meta::meta::BlockMeta; use storages_common_table_meta::meta::Location; @@ -155,8 +155,7 @@ impl MutationTransform { handles.push(async move { op.object(&segment.location).write(segment.data).await?; if let Some(segment_cache) = CacheManager::instance().get_table_segment_cache() { - let cache = &mut segment_cache.write(); - cache.put(segment.location.clone(), segment.segment.clone()); + segment_cache.put(segment.location.clone(), segment.segment.clone()); } Ok::<_, ErrorCode>(()) }); diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source.rs index e2ed209d1c6bb..e6b717adcc5ed 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source.rs @@ -23,7 +23,7 @@ use common_expression::BlockMetaInfoPtr; use serde::Deserializer; use serde::Serializer; -use crate::MergeIOReadResult; +use crate::io::MergeIOReadResult; pub struct DataSourceMeta { pub part: Vec, diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index 33ef3a21aff61..b6e0992fedc74 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -30,10 +30,10 @@ use common_pipeline_core::processors::Processor; use crate::fuse_part::FusePartInfo; use crate::io::BlockReader; +use crate::io::MergeIOReadResult; use crate::io::UncompressedBuffer; use crate::metrics::metrics_inc_remote_io_deserialize_milliseconds; use crate::operations::read::parquet_data_source::DataSourceMeta; -use crate::MergeIOReadResult; pub struct DeserializeDataTransform { scan_progress: Arc, @@ -140,6 +140,7 @@ impl Processor for DeserializeDataTransform { let part = FusePartInfo::from_part(&part)?; let data_block = self.block_reader.deserialize_parquet_chunks_with_buffer( + &part.location, part.nums_rows, &part.compression, &part.columns_meta, diff --git a/src/query/storages/hive/hive/src/hive_meta_data_reader.rs b/src/query/storages/hive/hive/src/hive_meta_data_reader.rs index 1d8f71d119677..d3878b16b46a9 100644 --- a/src/query/storages/hive/hive/src/hive_meta_data_reader.rs +++ b/src/query/storages/hive/hive/src/hive_meta_data_reader.rs @@ -30,7 +30,6 @@ impl MetaDataReader { pub fn meta_data_reader(dal: Operator) -> FileMetaDataReader { FileMetaDataReader::new( CacheManager::instance().get_file_meta_data_cache(), - "file_meta_data_cache".to_owned(), LoaderWrapper(dal), ) } diff --git a/src/query/storages/system/src/configs_table.rs b/src/query/storages/system/src/configs_table.rs index d1d06181f839a..9173c255ed56e 100644 --- a/src/query/storages/system/src/configs_table.rs +++ b/src/query/storages/system/src/configs_table.rs @@ -18,6 +18,7 @@ use common_base::base::mask_string; use common_catalog::table::Table; use common_catalog::table_context::TableContext; use common_config::GlobalConfig; +use common_config::QueryConfig; use common_exception::Result; use common_expression::types::StringType; use common_expression::utils::FromData; @@ -30,6 +31,7 @@ use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use itertools::Itertools; use serde_json::Value as JsonValue; +use serde_json::Value; use crate::SyncOneBlockSystemTable; use crate::SyncSystemTable; @@ -46,14 +48,16 @@ impl SyncSystemTable for ConfigsTable { } fn get_full_data(&self, _ctx: Arc) -> Result { - let config = GlobalConfig::instance().as_ref().clone().into_outer(); + let config = GlobalConfig::instance().as_ref().clone().into_config(); let mut names: Vec = vec![]; let mut values: Vec = vec![]; let mut groups: Vec = vec![]; let mut descs: Vec = vec![]; let query_config = config.query; - let query_config_value = serde_json::to_value(query_config)?; + let query_config_value = + Self::remove_obsolete_query_configs(serde_json::to_value(query_config)?); + ConfigsTable::extract_config( &mut names, &mut values, @@ -85,6 +89,17 @@ impl SyncSystemTable for ConfigsTable { meta_config_value, ); + let cache_config = config.cache; + let cache_config_value = serde_json::to_value(cache_config)?; + ConfigsTable::extract_config( + &mut names, + &mut values, + &mut groups, + &mut descs, + "cache".to_string(), + cache_config_value, + ); + // Clone storage config to avoid change it's value. // // TODO(xuanwo): @@ -266,4 +281,16 @@ impl ConfigsTable { groups.push(group); descs.push(desc); } + + fn remove_obsolete_query_configs(config_json: JsonValue) -> JsonValue { + match config_json { + Value::Object(mut config_json_obj) => { + for key in QueryConfig::obsoleted_option_keys().iter() { + config_json_obj.remove(*key); + } + JsonValue::Object(config_json_obj) + } + _ => config_json, + } + } }