From 71d3030f8c7aa6ab8874606511e8e3f0ba3879c9 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 28 Jan 2023 19:31:37 +0800 Subject: [PATCH 01/80] bring back @PsiACE's disk cache --- common/cache/src/disk_cache.rs | 361 ++++++++++++++++++++++++++++ common/cache/src/disk_cache_test.rs | 273 +++++++++++++++++++++ 2 files changed, 634 insertions(+) create mode 100644 common/cache/src/disk_cache.rs create mode 100644 common/cache/src/disk_cache_test.rs diff --git a/common/cache/src/disk_cache.rs b/common/cache/src/disk_cache.rs new file mode 100644 index 0000000000000..d49b05401d861 --- /dev/null +++ b/common/cache/src/disk_cache.rs @@ -0,0 +1,361 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::boxed::Box; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fs::File; +use std::fs::{self}; +use std::hash::BuildHasher; +use std::io; +use std::io::prelude::*; +use std::path::Path; +use std::path::PathBuf; + +use filetime::set_file_times; +use filetime::FileTime; +use ritelinked::DefaultHashBuilder; +use walkdir::WalkDir; + +use crate::Cache; +use crate::FileSize; +use crate::LruCache; + +/// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified +/// time, such that the oldest modified file is returned first. +fn get_all_files>(path: P) -> Box> { + let mut files: Vec<_> = WalkDir::new(path.as_ref()) + .into_iter() + .filter_map(|e| { + e.ok().and_then(|f| { + // Only look at files + if f.file_type().is_file() { + // Get the last-modified time, size, and the full path. + f.metadata().ok().and_then(|m| { + m.modified() + .ok() + .map(|mtime| (mtime, f.path().to_owned(), m.len())) + }) + } else { + None + } + }) + }) + .collect(); + // Sort by last-modified-time, so oldest file first. + files.sort_by_key(|k| k.0); + Box::new(files.into_iter().map(|(_mtime, path, size)| (path, size))) +} + +/// An LRU cache of files on disk. +pub type LruDiskCache = DiskCache>; + +/// An basic disk cache of files on disk. +pub struct DiskCache +where C: Cache +{ + hash_builder: S, + cache: C, + root: PathBuf, +} + +/// Trait objects can't be bounded by more than one non-builtin trait. +pub trait ReadSeek: Read + Seek + Send {} + +impl ReadSeek for T {} + +enum AddFile<'a> { + AbsPath(PathBuf), + RelPath(&'a OsStr), +} + +impl DiskCache +where C: Cache +{ + /// Create an `DiskCache` with `ritelinked::DefaultHashBuilder` that stores files in `path`, + /// limited to `size` bytes. + /// + /// Existing files in `path` will be stored with their last-modified time from the filesystem + /// used as the order for the recency of their use. Any files that are individually larger + /// than `size` bytes will be removed. + /// + /// The cache is not observant of changes to files under `path` from external sources, it + /// expects to have sole maintence of the contents. + pub fn new(path: T, size: u64) -> Result + where PathBuf: From { + let default_hash_builder = DefaultHashBuilder::new(); + DiskCache { + hash_builder: default_hash_builder.clone(), + cache: C::with_meter_and_hasher(size, FileSize, default_hash_builder), + root: PathBuf::from(path), + } + .init() + } +} + +impl DiskCache +where + C: Cache, + S: BuildHasher + Clone, +{ + /// Create an `DiskCache` with hasher that stores files in `path`, limited to `size` bytes. + /// + /// Existing files in `path` will be stored with their last-modified time from the filesystem + /// used as the order for the recency of their use. Any files that are individually larger + /// than `size` bytes will be removed. + /// + /// The cache is not observant of changes to files under `path` from external sources, it + /// expects to have sole maintence of the contents. + pub fn new_with_hasher(path: T, size: u64, hash_builder: S) -> Result + where PathBuf: From { + DiskCache { + hash_builder: hash_builder.clone(), + cache: C::with_meter_and_hasher(size, FileSize, hash_builder), + root: PathBuf::from(path), + } + .init() + } + + /// Return the current size of all the files in the cache. + pub fn size(&self) -> u64 { + self.cache.size() + } + + /// Return the count of entries in the cache. + pub fn len(&self) -> usize { + self.cache.len() + } + + pub fn is_empty(&self) -> bool { + self.cache.len() == 0 + } + + /// Return the maximum size of the cache. + pub fn capacity(&self) -> u64 { + self.cache.capacity() + } + + /// Return the path in which the cache is stored. + pub fn path(&self) -> &Path { + self.root.as_path() + } + + /// Return the path that `key` would be stored at. + fn rel_to_abs_path>(&self, rel_path: K) -> PathBuf { + self.root.join(rel_path) + } + + /// Scan `self.root` for existing files and store them. + fn init(mut self) -> Result { + fs::create_dir_all(&self.root)?; + for (file, size) in get_all_files(&self.root) { + if !self.can_store(size) { + fs::remove_file(file).unwrap_or_else(|e| { + error!( + "Error removing file `{}` which is too large for the cache ({} bytes)", + e, size + ) + }); + } else { + self.add_file(AddFile::AbsPath(file), size) + .unwrap_or_else(|e| error!("Error adding file: {}", e)); + } + } + Ok(self) + } + + /// Returns `true` if the disk cache can store a file of `size` bytes. + pub fn can_store(&self, size: u64) -> bool { + size <= self.cache.capacity() as u64 + } + + /// Add the file at `path` of size `size` to the cache. + fn add_file(&mut self, addfile_path: AddFile<'_>, size: u64) -> Result<()> { + if !self.can_store(size) { + return Err(Error::FileTooLarge); + } + let rel_path = match addfile_path { + AddFile::AbsPath(ref p) => p.strip_prefix(&self.root).expect("Bad path?").as_os_str(), + AddFile::RelPath(p) => p, + }; + //TODO: ideally Cache::put would give us back the entries it had to remove. + while self.cache.size() as u64 + size > self.cache.capacity() as u64 { + let (rel_path, _) = self + .cache + .pop_by_policy() + .expect("Unexpectedly empty cache!"); + let remove_path = self.rel_to_abs_path(rel_path); + //TODO: check that files are removable during `init`, so that this is only + // due to outside interference. + fs::remove_file(&remove_path).unwrap_or_else(|e| { + panic!("Error removing file from cache: `{:?}`: {}", remove_path, e) + }); + } + self.cache.put(rel_path.to_owned(), size); + Ok(()) + } + + fn insert_by, F: FnOnce(&Path) -> io::Result<()>>( + &mut self, + key: K, + size: Option, + by: F, + ) -> Result<()> { + if let Some(size) = size { + if !self.can_store(size) { + return Err(Error::FileTooLarge); + } + } + let rel_path = key.as_ref(); + let path = self.rel_to_abs_path(rel_path); + fs::create_dir_all(path.parent().expect("Bad path?"))?; + by(&path)?; + let size = match size { + Some(size) => size, + None => fs::metadata(path)?.len(), + }; + self.add_file(AddFile::RelPath(rel_path), size) + .map_err(|e| { + error!( + "Failed to insert file `{}`: {}", + rel_path.to_string_lossy(), + e + ); + fs::remove_file(&self.rel_to_abs_path(rel_path)) + .expect("Failed to remove file we just created!"); + e + }) + } + + /// Add a file by calling `with` with the open `File` corresponding to the cache at path `key`. + pub fn insert_with, F: FnOnce(File) -> io::Result<()>>( + &mut self, + key: K, + with: F, + ) -> Result<()> { + self.insert_by(key, None, |path| with(File::create(&path)?)) + } + + /// Add a file with `bytes` as its contents to the cache at path `key`. + pub fn insert_bytes>(&mut self, key: K, bytes: &[u8]) -> Result<()> { + self.insert_by(key, Some(bytes.len() as u64), |path| { + let mut f = File::create(&path)?; + f.write_all(bytes)?; + Ok(()) + }) + } + + /// Add an existing file at `path` to the cache at path `key`. + pub fn insert_file, P: AsRef>(&mut self, key: K, path: P) -> Result<()> { + let size = fs::metadata(path.as_ref())?.len(); + self.insert_by(key, Some(size), |new_path| { + fs::rename(path.as_ref(), new_path).or_else(|_| { + warn!("fs::rename failed, falling back to copy!"); + fs::copy(path.as_ref(), new_path)?; + fs::remove_file(path.as_ref()).unwrap_or_else(|e| { + error!("Failed to remove original file in insert_file: {}", e) + }); + Ok(()) + }) + }) + } + + /// Return `true` if a file with path `key` is in the cache. + pub fn contains_key>(&self, key: K) -> bool { + self.cache.contains(key.as_ref()) + } + + /// Get an opened `File` for `key`, if one exists and can be opened. Updates the Cache state + /// of the file if present. Avoid using this method if at all possible, prefer `.get`. + pub fn get_file>(&mut self, key: K) -> Result { + let rel_path = key.as_ref(); + let path = self.rel_to_abs_path(rel_path); + self.cache + .get(rel_path) + .ok_or(Error::FileNotInCache) + .and_then(|_| { + let t = FileTime::now(); + set_file_times(&path, t, t)?; + File::open(path).map_err(Into::into) + }) + } + + /// Get an opened readable and seekable handle to the file at `key`, if one exists and can + /// be opened. Updates the Cache state of the file if present. + pub fn get>(&mut self, key: K) -> Result> { + self.get_file(key).map(|f| Box::new(f) as Box) + } + + /// Remove the given key from the cache. + pub fn remove>(&mut self, key: K) -> Result<()> { + match self.cache.pop(key.as_ref()) { + Some(_) => { + let path = self.rel_to_abs_path(key.as_ref()); + fs::remove_file(&path).map_err(|e| { + error!("Error removing file from cache: `{:?}`: {}", path, e); + Into::into(e) + }) + } + None => Ok(()), + } + } +} + +pub mod result { + use std::error::Error as StdError; + use std::fmt; + use std::io; + + /// Errors returned by this crate. + #[derive(Debug)] + pub enum Error { + /// The file was too large to fit in the cache. + FileTooLarge, + /// The file was not in the cache. + FileNotInCache, + /// An IO Error occurred. + Io(io::Error), + } + + impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::FileTooLarge => write!(f, "File too large"), + Error::FileNotInCache => write!(f, "File not in cache"), + Error::Io(ref e) => write!(f, "{}", e), + } + } + } + + impl StdError for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::FileTooLarge => None, + Error::FileNotInCache => None, + Error::Io(ref e) => Some(e), + } + } + } + + impl From for Error { + fn from(e: io::Error) -> Error { + Error::Io(e) + } + } + + /// A convenience `Result` type + pub type Result = std::result::Result; +} + +use result::*; diff --git a/common/cache/src/disk_cache_test.rs b/common/cache/src/disk_cache_test.rs new file mode 100644 index 0000000000000..71f1924be49c8 --- /dev/null +++ b/common/cache/src/disk_cache_test.rs @@ -0,0 +1,273 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fs::File; +use std::fs::{self}; +use std::io::Read; +use std::io::Write; +use std::io::{self}; +use std::path::Path; +use std::path::PathBuf; + +use filetime::set_file_times; +use filetime::FileTime; +use tempfile::TempDir; + +use crate::DiskCacheError; +use crate::LruDiskCache; + +struct TestFixture { + /// Temp directory. + pub tempdir: TempDir, +} + +fn create_file, F: FnOnce(File) -> io::Result<()>>( + dir: &Path, + path: T, + fill_contents: F, +) -> io::Result { + let b = dir.join(path); + fs::create_dir_all(b.parent().unwrap())?; + let f = fs::File::create(&b)?; + fill_contents(f)?; + b.canonicalize() +} + +/// Set the last modified time of `path` backwards by `seconds` seconds. +fn set_mtime_back>(path: T, seconds: usize) { + let m = fs::metadata(path.as_ref()).unwrap(); + let t = FileTime::from_last_modification_time(&m); + let t = FileTime::from_unix_time(t.unix_seconds() - seconds as i64, t.nanoseconds()); + set_file_times(path, t, t).unwrap(); +} + +fn read_all(r: &mut R) -> io::Result> { + let mut v = vec![]; + r.read_to_end(&mut v)?; + Ok(v) +} + +impl TestFixture { + pub fn new() -> TestFixture { + TestFixture { + tempdir: tempfile::Builder::new() + .prefix("lru-disk-cache-test") + .tempdir() + .unwrap(), + } + } + + pub fn tmp(&self) -> &Path { + self.tempdir.path() + } + + pub fn create_file>(&self, path: T, size: usize) -> PathBuf { + create_file(self.tempdir.path(), path, |mut f| { + f.write_all(&vec![0; size]) + }) + .unwrap() + } +} + +#[test] +fn test_empty_dir() { + let f = TestFixture::new(); + LruDiskCache::new(f.tmp(), 1024).unwrap(); +} + +#[test] +fn test_missing_root() { + let f = TestFixture::new(); + LruDiskCache::new(f.tmp().join("not-here"), 1024).unwrap(); +} + +#[test] +fn test_some_existing_files() { + let f = TestFixture::new(); + f.create_file("file1", 10); + f.create_file("file2", 10); + let c = LruDiskCache::new(f.tmp(), 20).unwrap(); + assert_eq!(c.size(), 20); + assert_eq!(c.len(), 2); +} + +#[test] +fn test_existing_file_too_large() { + let f = TestFixture::new(); + // Create files explicitly in the past. + set_mtime_back(f.create_file("file1", 10), 10); + set_mtime_back(f.create_file("file2", 10), 5); + let c = LruDiskCache::new(f.tmp(), 15).unwrap(); + assert_eq!(c.size(), 10); + assert_eq!(c.len(), 1); + assert!(!c.contains_key("file1")); + assert!(c.contains_key("file2")); +} + +#[test] +fn test_existing_files_lru_mtime() { + let f = TestFixture::new(); + // Create files explicitly in the past. + set_mtime_back(f.create_file("file1", 10), 5); + set_mtime_back(f.create_file("file2", 10), 10); + let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); + assert_eq!(c.size(), 20); + c.insert_bytes("file3", &[0; 10]).unwrap(); + assert_eq!(c.size(), 20); + // The oldest file on disk should have been removed. + assert!(!c.contains_key("file2")); + assert!(c.contains_key("file1")); +} + +#[test] +fn test_insert_bytes() { + let f = TestFixture::new(); + let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); + c.insert_bytes("a/b/c", &[0; 10]).unwrap(); + assert!(c.contains_key("a/b/c")); + c.insert_bytes("a/b/d", &[0; 10]).unwrap(); + assert_eq!(c.size(), 20); + // Adding this third file should put the cache above the limit. + c.insert_bytes("x/y/z", &[0; 10]).unwrap(); + assert_eq!(c.size(), 20); + // The least-recently-used file should have been removed. + assert!(!c.contains_key("a/b/c")); + assert!(!f.tmp().join("a/b/c").exists()); +} + +#[test] +fn test_insert_bytes_exact() { + // Test that files adding up to exactly the size limit works. + let f = TestFixture::new(); + let mut c = LruDiskCache::new(f.tmp(), 20).unwrap(); + c.insert_bytes("file1", &[1; 10]).unwrap(); + c.insert_bytes("file2", &[2; 10]).unwrap(); + assert_eq!(c.size(), 20); + c.insert_bytes("file3", &[3; 10]).unwrap(); + assert_eq!(c.size(), 20); + assert!(!c.contains_key("file1")); +} + +#[test] +fn test_add_get_lru() { + let f = TestFixture::new(); + { + let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); + c.insert_bytes("file1", &[1; 10]).unwrap(); + c.insert_bytes("file2", &[2; 10]).unwrap(); + // Get the file to bump its LRU status. + assert_eq!(read_all(&mut c.get("file1").unwrap()).unwrap(), vec![ + 1u8; + 10 + ]); + // Adding this third file should put the cache above the limit. + c.insert_bytes("file3", &[3; 10]).unwrap(); + assert_eq!(c.size(), 20); + // The least-recently-used file should have been removed. + assert!(!c.contains_key("file2")); + } + // Get rid of the cache, to test that the LRU persists on-disk as mtimes. + // This is hacky, but mtime resolution on my mac with HFS+ is only 1 second, so we either + // need to have a 1 second sleep in the test (boo) or adjust the mtimes back a bit so + // that updating one file to the current time actually works to make it newer. + set_mtime_back(f.tmp().join("file1"), 5); + set_mtime_back(f.tmp().join("file3"), 5); + { + let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); + // Bump file1 again. + c.get("file1").unwrap(); + } + // Now check that the on-disk mtimes were updated and used. + { + let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); + assert!(c.contains_key("file1")); + assert!(c.contains_key("file3")); + assert_eq!(c.size(), 20); + // Add another file to bump out the least-recently-used. + c.insert_bytes("file4", &[4; 10]).unwrap(); + assert_eq!(c.size(), 20); + assert!(!c.contains_key("file3")); + assert!(c.contains_key("file1")); + } +} + +#[test] +fn test_insert_bytes_too_large() { + let f = TestFixture::new(); + let mut c = LruDiskCache::new(f.tmp(), 1).unwrap(); + match c.insert_bytes("a/b/c", &[0; 2]) { + Err(DiskCacheError::FileTooLarge) => {} + x => panic!("Unexpected result: {:?}", x), + } +} + +#[test] +fn test_insert_file() { + let f = TestFixture::new(); + let p1 = f.create_file("file1", 10); + let p2 = f.create_file("file2", 10); + let p3 = f.create_file("file3", 10); + let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap(); + c.insert_file("file1", &p1).unwrap(); + assert_eq!(c.len(), 1); + c.insert_file("file2", &p2).unwrap(); + assert_eq!(c.len(), 2); + // Get the file to bump its LRU status. + assert_eq!(read_all(&mut c.get("file1").unwrap()).unwrap(), vec![ + 0u8; + 10 + ]); + // Adding this third file should put the cache above the limit. + c.insert_file("file3", &p3).unwrap(); + assert_eq!(c.len(), 2); + assert_eq!(c.size(), 20); + // The least-recently-used file should have been removed. + assert!(!c.contains_key("file2")); + assert!(!p1.exists()); + assert!(!p2.exists()); + assert!(!p3.exists()); +} + +#[test] +fn test_remove() { + let f = TestFixture::new(); + let p1 = f.create_file("file1", 10); + let p2 = f.create_file("file2", 10); + let p3 = f.create_file("file3", 10); + let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap(); + c.insert_file("file1", &p1).unwrap(); + c.insert_file("file2", &p2).unwrap(); + c.remove("file1").unwrap(); + c.insert_file("file3", &p3).unwrap(); + assert_eq!(c.len(), 2); + assert_eq!(c.size(), 20); + + // file1 should have been removed. + assert!(!c.contains_key("file1")); + assert!(!f.tmp().join("cache").join("file1").exists()); + assert!(f.tmp().join("cache").join("file2").exists()); + assert!(f.tmp().join("cache").join("file3").exists()); + assert!(!p1.exists()); + assert!(!p2.exists()); + assert!(!p3.exists()); + + let p4 = f.create_file("file1", 10); + c.insert_file("file1", &p4).unwrap(); + assert_eq!(c.len(), 2); + // file2 should have been removed. + assert!(c.contains_key("file1")); + assert!(!c.contains_key("file2")); + assert!(!f.tmp().join("cache").join("file2").exists()); + assert!(!p4.exists()); +} From a29c2c1f09620962ec34c55863a136a10c87595b Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sun, 29 Jan 2023 17:05:33 +0800 Subject: [PATCH 02/80] tailor disk cache --- Cargo.lock | 17 ++ src/common/cache/Cargo.toml | 7 +- .../common}/cache/src/disk_cache.rs | 204 ++++++++---------- src/common/cache/src/lib.rs | 4 + .../common/cache/tests/it/disk_cache.rs | 104 ++++----- src/common/cache/tests/it/main.rs | 1 + .../common/cache-manager/src/cache_manager.rs | 24 ++- src/query/storages/common/cache/Cargo.toml | 1 + src/query/storages/common/cache/src/cache.rs | 4 +- src/query/storages/common/cache/src/lib.rs | 2 + .../common/cache/src/providers/disk_cache.rs | 57 ++++- .../cache/src/providers/memory_cache.rs | 4 +- .../common/cache/src/providers/mod.rs | 2 + .../common/cache/src/read/cached_reader.rs | 8 +- 14 files changed, 231 insertions(+), 208 deletions(-) rename {common => src/common}/cache/src/disk_cache.rs (60%) rename common/cache/src/disk_cache_test.rs => src/common/cache/tests/it/disk_cache.rs (74%) diff --git a/Cargo.lock b/Cargo.lock index 38dd11e7ab700..08aee448f1855 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1293,9 +1293,13 @@ dependencies = [ name = "common-cache" version = "0.1.0" dependencies = [ + "filetime", "heapsize", + "hex", "ritelinked", "tempfile", + "tracing", + "walkdir", ] [[package]] @@ -3578,6 +3582,18 @@ dependencies = [ "subtle", ] +[[package]] +name = "filetime" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "windows-sys 0.42.0", +] + [[package]] name = "findshlibs" version = "0.10.2" @@ -7783,6 +7799,7 @@ dependencies = [ "parking_lot 0.12.1", "serde", "serde_json", + "tracing", ] [[package]] diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index 20c3305168ad2..ce357a86d8cef 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -14,11 +14,14 @@ test = false heapsize = ["heapsize_"] amortized = ["ritelinked/ahash-amortized", "ritelinked/inline-more-amortized"] -[dependencies] # In alphabetical order -# Github dependencies +[dependencies] # Crates.io dependencies +filetime = "0.2.15" +hex = "0.4.3" ritelinked = { version = "0.3.2", default-features = false, features = ["ahash", "inline-more"] } +tracing = "0.1.36" +walkdir = "2.3.2" [target.'cfg(not(target_os = "macos"))'.dependencies] heapsize_ = { package = "heapsize", version = "0.4.2", optional = true } diff --git a/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs similarity index 60% rename from common/cache/src/disk_cache.rs rename to src/common/cache/src/disk_cache.rs index d49b05401d861..57bb88430bcd4 100644 --- a/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -1,4 +1,4 @@ -// Copyright 2021 Datafuse Labs. +// Copyright 2023 Datafuse Labs. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,22 +13,22 @@ // limitations under the License. use std::boxed::Box; -use std::ffi::OsStr; -use std::ffi::OsString; +use std::fs; use std::fs::File; -use std::fs::{self}; use std::hash::BuildHasher; -use std::io; +use std::hash::Hash; +use std::hash::Hasher; use std::io::prelude::*; use std::path::Path; use std::path::PathBuf; use filetime::set_file_times; use filetime::FileTime; -use ritelinked::DefaultHashBuilder; +use tracing::error; use walkdir::WalkDir; use crate::Cache; +use crate::DefaultHashBuilder; use crate::FileSize; use crate::LruCache; @@ -59,29 +59,19 @@ fn get_all_files>(path: P) -> Box>; +pub type LruDiskCache = DiskCache>; /// An basic disk cache of files on disk. pub struct DiskCache -where C: Cache +where C: Cache { hash_builder: S, cache: C, root: PathBuf, } -/// Trait objects can't be bounded by more than one non-builtin trait. -pub trait ReadSeek: Read + Seek + Send {} - -impl ReadSeek for T {} - -enum AddFile<'a> { - AbsPath(PathBuf), - RelPath(&'a OsStr), -} - -impl DiskCache -where C: Cache +impl DiskCache +where C: Cache { /// Create an `DiskCache` with `ritelinked::DefaultHashBuilder` that stores files in `path`, /// limited to `size` bytes. @@ -91,7 +81,7 @@ where C: Cache /// than `size` bytes will be removed. /// /// The cache is not observant of changes to files under `path` from external sources, it - /// expects to have sole maintence of the contents. + /// expects to have sole maintenance of the contents. pub fn new(path: T, size: u64) -> Result where PathBuf: From { let default_hash_builder = DefaultHashBuilder::new(); @@ -106,7 +96,7 @@ where C: Cache impl DiskCache where - C: Cache, + C: Cache, S: BuildHasher + Clone, { /// Create an `DiskCache` with hasher that stores files in `path`, limited to `size` bytes. @@ -116,7 +106,7 @@ where /// than `size` bytes will be removed. /// /// The cache is not observant of changes to files under `path` from external sources, it - /// expects to have sole maintence of the contents. + /// expects to have sole maintenance of the contents. pub fn new_with_hasher(path: T, size: u64, hash_builder: S) -> Result where PathBuf: From { DiskCache { @@ -168,8 +158,23 @@ where ) }); } else { - self.add_file(AddFile::AbsPath(file), size) - .unwrap_or_else(|e| error!("Error adding file: {}", e)); + while self.cache.size() as u64 + size > self.cache.capacity() as u64 { + let (rel_path, _) = self + .cache + .pop_by_policy() + .expect("Unexpectedly empty cache!"); + let remove_path = self.rel_to_abs_path(&rel_path); + // TODO: check that files are removable during `init`, so that this is only + // due to outside interference. + fs::remove_file(&remove_path).unwrap_or_else(|e| { + panic!("Error removing file from cache: `{:?}`: {}", remove_path, e) + }); + } + let rel_path = file + .strip_prefix(&self.root) + .map_err(|_e| self::Error::MalformedPath)?; + let cache_key = Self::recovery_from(rel_path); + self.cache.put(cache_key, size); } } Ok(self) @@ -180,128 +185,85 @@ where size <= self.cache.capacity() as u64 } - /// Add the file at `path` of size `size` to the cache. - fn add_file(&mut self, addfile_path: AddFile<'_>, size: u64) -> Result<()> { - if !self.can_store(size) { - return Err(Error::FileTooLarge); - } - let rel_path = match addfile_path { - AddFile::AbsPath(ref p) => p.strip_prefix(&self.root).expect("Bad path?").as_os_str(), - AddFile::RelPath(p) => p, + fn recovery_from(str: &Path) -> String { + let key_string = match str.as_os_str().to_str() { + Some(str) => str.to_owned(), + None => { + unreachable!() + } }; - //TODO: ideally Cache::put would give us back the entries it had to remove. - while self.cache.size() as u64 + size > self.cache.capacity() as u64 { - let (rel_path, _) = self - .cache - .pop_by_policy() - .expect("Unexpectedly empty cache!"); - let remove_path = self.rel_to_abs_path(rel_path); - //TODO: check that files are removable during `init`, so that this is only - // due to outside interference. - fs::remove_file(&remove_path).unwrap_or_else(|e| { - panic!("Error removing file from cache: `{:?}`: {}", remove_path, e) - }); - } - self.cache.put(rel_path.to_owned(), size); - Ok(()) + key_string } - fn insert_by, F: FnOnce(&Path) -> io::Result<()>>( - &mut self, - key: K, - size: Option, - by: F, - ) -> Result<()> { - if let Some(size) = size { - if !self.can_store(size) { - return Err(Error::FileTooLarge); - } - } - let rel_path = key.as_ref(); - let path = self.rel_to_abs_path(rel_path); - fs::create_dir_all(path.parent().expect("Bad path?"))?; - by(&path)?; - let size = match size { - Some(size) => size, - None => fs::metadata(path)?.len(), - }; - self.add_file(AddFile::RelPath(rel_path), size) - .map_err(|e| { - error!( - "Failed to insert file `{}`: {}", - rel_path.to_string_lossy(), - e - ); - fs::remove_file(&self.rel_to_abs_path(rel_path)) - .expect("Failed to remove file we just created!"); - e - }) + fn cache_key(&self, str: &K) -> String + where K: Hash + Eq + ?Sized { + // TODO we need a 128 bit digest + let mut hash_state = self.hash_builder.build_hasher(); + str.hash(&mut hash_state); + let digits = hash_state.finish(); + let hex_key = format!("{:x}", digits); + hex_key } - /// Add a file by calling `with` with the open `File` corresponding to the cache at path `key`. - pub fn insert_with, F: FnOnce(File) -> io::Result<()>>( - &mut self, - key: K, - with: F, - ) -> Result<()> { - self.insert_by(key, None, |path| with(File::create(&path)?)) + fn cache_path(&self, str: &K) -> PathBuf + where K: Hash + Eq + ?Sized { + let hex_key = self.cache_key(str); + let prefix = &hex_key[0..3]; + let mut path_buf = PathBuf::from(prefix); + path_buf.push(Path::new(&hex_key)); + path_buf } /// Add a file with `bytes` as its contents to the cache at path `key`. - pub fn insert_bytes>(&mut self, key: K, bytes: &[u8]) -> Result<()> { - self.insert_by(key, Some(bytes.len() as u64), |path| { - let mut f = File::create(&path)?; - f.write_all(bytes)?; - Ok(()) - }) - } + pub fn insert_bytes>(&mut self, key: K, bytes: &[u8]) -> Result<()> { + if !self.can_store(bytes.len() as u64) { + return Err(Error::FileTooLarge); + } - /// Add an existing file at `path` to the cache at path `key`. - pub fn insert_file, P: AsRef>(&mut self, key: K, path: P) -> Result<()> { - let size = fs::metadata(path.as_ref())?.len(); - self.insert_by(key, Some(size), |new_path| { - fs::rename(path.as_ref(), new_path).or_else(|_| { - warn!("fs::rename failed, falling back to copy!"); - fs::copy(path.as_ref(), new_path)?; - fs::remove_file(path.as_ref()).unwrap_or_else(|e| { - error!("Failed to remove original file in insert_file: {}", e) - }); - Ok(()) - }) - }) + // TODO combine these + let cache_key = self.cache_key(key.as_ref()); + let rel_path = self.cache_path(key.as_ref()); + let path = self.rel_to_abs_path(rel_path); + // TODO rm this panic, no nested dirs here + fs::create_dir_all(path.parent().expect("Bad path?"))?; + let mut f = File::create(&path)?; + f.write_all(bytes)?; + let size = bytes.len() as u64; + self.cache.put(cache_key, size); + Ok(()) } /// Return `true` if a file with path `key` is in the cache. - pub fn contains_key>(&self, key: K) -> bool { + pub fn contains_key>(&self, key: K) -> bool { self.cache.contains(key.as_ref()) } /// Get an opened `File` for `key`, if one exists and can be opened. Updates the Cache state /// of the file if present. Avoid using this method if at all possible, prefer `.get`. - pub fn get_file>(&mut self, key: K) -> Result { - let rel_path = key.as_ref(); + pub fn get_file(&mut self, key: &K) -> Result + where K: Hash + Eq + ?Sized { + let cache_key = self.cache_key(key); + let rel_path = self.cache_path(key); let path = self.rel_to_abs_path(rel_path); self.cache - .get(rel_path) + .get(&cache_key) .ok_or(Error::FileNotInCache) .and_then(|_| { + // TODO do we need to adjust the mtime, cross reboot? let t = FileTime::now(); set_file_times(&path, t, t)?; File::open(path).map_err(Into::into) }) } - /// Get an opened readable and seekable handle to the file at `key`, if one exists and can - /// be opened. Updates the Cache state of the file if present. - pub fn get>(&mut self, key: K) -> Result> { - self.get_file(key).map(|f| Box::new(f) as Box) - } - /// Remove the given key from the cache. - pub fn remove>(&mut self, key: K) -> Result<()> { - match self.cache.pop(key.as_ref()) { + pub fn remove(&mut self, key: &K) -> Result<()> + where K: Hash + Eq + ?Sized { + let cache_key = self.cache_key(key); + let rel_path = self.cache_path(key); + match self.cache.pop(&cache_key) { Some(_) => { - let path = self.rel_to_abs_path(key.as_ref()); + let path = self.rel_to_abs_path(rel_path); fs::remove_file(&path).map_err(|e| { error!("Error removing file from cache: `{:?}`: {}", path, e); Into::into(e) @@ -324,6 +286,8 @@ pub mod result { FileTooLarge, /// The file was not in the cache. FileNotInCache, + /// The file was not in the cache. + MalformedPath, /// An IO Error occurred. Io(io::Error), } @@ -333,6 +297,7 @@ pub mod result { match self { Error::FileTooLarge => write!(f, "File too large"), Error::FileNotInCache => write!(f, "File not in cache"), + Error::MalformedPath => write!(f, "Malformed catch file path"), Error::Io(ref e) => write!(f, "{}", e), } } @@ -341,9 +306,8 @@ pub mod result { impl StdError for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { - Error::FileTooLarge => None, - Error::FileNotInCache => None, Error::Io(ref e) => Some(e), + _ => None, } } } diff --git a/src/common/cache/src/lib.rs b/src/common/cache/src/lib.rs index 920d9cbdf6221..65f64b42603d2 100644 --- a/src/common/cache/src/lib.rs +++ b/src/common/cache/src/lib.rs @@ -18,10 +18,14 @@ extern crate heapsize_; mod cache; +mod disk_cache; mod meter; pub use cache::lru::LruCache; pub use cache::Cache; +pub use disk_cache::result::Error as DiskCacheError; +pub use disk_cache::result::Result as DiskCacheResult; +pub use disk_cache::*; pub use meter::bytes_meter::BytesMeter; pub use meter::count_meter::Count; pub use meter::count_meter::CountableMeter; diff --git a/common/cache/src/disk_cache_test.rs b/src/common/cache/tests/it/disk_cache.rs similarity index 74% rename from common/cache/src/disk_cache_test.rs rename to src/common/cache/tests/it/disk_cache.rs index 71f1924be49c8..bf4f331ba2e95 100644 --- a/common/cache/src/disk_cache_test.rs +++ b/src/common/cache/tests/it/disk_cache.rs @@ -1,4 +1,4 @@ -// Copyright 2021 Datafuse Labs. +// Copyright 2023 Datafuse Labs. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +// use std::fs::File; use std::fs::{self}; @@ -20,13 +21,12 @@ use std::io::{self}; use std::path::Path; use std::path::PathBuf; +use common_cache::DiskCacheError; +use common_cache::LruDiskCache; use filetime::set_file_times; use filetime::FileTime; use tempfile::TempDir; -use crate::DiskCacheError; -use crate::LruDiskCache; - struct TestFixture { /// Temp directory. pub tempdir: TempDir, @@ -167,7 +167,7 @@ fn test_add_get_lru() { c.insert_bytes("file1", &[1; 10]).unwrap(); c.insert_bytes("file2", &[2; 10]).unwrap(); // Get the file to bump its LRU status. - assert_eq!(read_all(&mut c.get("file1").unwrap()).unwrap(), vec![ + assert_eq!(read_all(&mut c.get_file("file1").unwrap()).unwrap(), vec![ 1u8; 10 ]); @@ -186,7 +186,7 @@ fn test_add_get_lru() { { let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); // Bump file1 again. - c.get("file1").unwrap(); + c.get_file("file1").unwrap(); } // Now check that the on-disk mtimes were updated and used. { @@ -212,62 +212,36 @@ fn test_insert_bytes_too_large() { } } -#[test] -fn test_insert_file() { - let f = TestFixture::new(); - let p1 = f.create_file("file1", 10); - let p2 = f.create_file("file2", 10); - let p3 = f.create_file("file3", 10); - let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap(); - c.insert_file("file1", &p1).unwrap(); - assert_eq!(c.len(), 1); - c.insert_file("file2", &p2).unwrap(); - assert_eq!(c.len(), 2); - // Get the file to bump its LRU status. - assert_eq!(read_all(&mut c.get("file1").unwrap()).unwrap(), vec![ - 0u8; - 10 - ]); - // Adding this third file should put the cache above the limit. - c.insert_file("file3", &p3).unwrap(); - assert_eq!(c.len(), 2); - assert_eq!(c.size(), 20); - // The least-recently-used file should have been removed. - assert!(!c.contains_key("file2")); - assert!(!p1.exists()); - assert!(!p2.exists()); - assert!(!p3.exists()); -} - -#[test] -fn test_remove() { - let f = TestFixture::new(); - let p1 = f.create_file("file1", 10); - let p2 = f.create_file("file2", 10); - let p3 = f.create_file("file3", 10); - let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap(); - c.insert_file("file1", &p1).unwrap(); - c.insert_file("file2", &p2).unwrap(); - c.remove("file1").unwrap(); - c.insert_file("file3", &p3).unwrap(); - assert_eq!(c.len(), 2); - assert_eq!(c.size(), 20); - - // file1 should have been removed. - assert!(!c.contains_key("file1")); - assert!(!f.tmp().join("cache").join("file1").exists()); - assert!(f.tmp().join("cache").join("file2").exists()); - assert!(f.tmp().join("cache").join("file3").exists()); - assert!(!p1.exists()); - assert!(!p2.exists()); - assert!(!p3.exists()); - - let p4 = f.create_file("file1", 10); - c.insert_file("file1", &p4).unwrap(); - assert_eq!(c.len(), 2); - // file2 should have been removed. - assert!(c.contains_key("file1")); - assert!(!c.contains_key("file2")); - assert!(!f.tmp().join("cache").join("file2").exists()); - assert!(!p4.exists()); -} +// TODO +//#[test] +// fn test_remove() { +// let f = TestFixture::new(); +// let p1 = f.create_file("file1", 10); +// let p2 = f.create_file("file2", 10); +// let p3 = f.create_file("file3", 10); +// let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap(); +// c.insert_file("file1", &p1).unwrap(); +// c.insert_file("file2", &p2).unwrap(); +// c.remove("file1").unwrap(); +// c.insert_file("file3", &p3).unwrap(); +// assert_eq!(c.len(), 2); +// assert_eq!(c.size(), 20); +// +// // file1 should have been removed. +// assert!(!c.contains_key("file1")); +// assert!(!f.tmp().join("cache").join("file1").exists()); +// assert!(f.tmp().join("cache").join("file2").exists()); +// assert!(f.tmp().join("cache").join("file3").exists()); +// assert!(!p1.exists()); +// assert!(!p2.exists()); +// assert!(!p3.exists()); +// +// let p4 = f.create_file("file1", 10); +// c.insert_file("file1", &p4).unwrap(); +// assert_eq!(c.len(), 2); +// // file2 should have been removed. +// assert!(c.contains_key("file1")); +// assert!(!c.contains_key("file2")); +// assert!(!f.tmp().join("cache").join("file2").exists()); +// assert!(!p4.exists()); +//} diff --git a/src/common/cache/tests/it/main.rs b/src/common/cache/tests/it/main.rs index 0f9ba048238f0..c69f732a0428c 100644 --- a/src/common/cache/tests/it/main.rs +++ b/src/common/cache/tests/it/main.rs @@ -15,3 +15,4 @@ #![allow(clippy::uninlined_format_args)] mod cache; +mod disk_cache; diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index c887c90ad6e25..7142bcf9998f8 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -18,6 +18,8 @@ use std::sync::Arc; use common_base::base::GlobalInstance; use common_config::QueryConfig; use common_exception::Result; +use storages_common_cache::DiskBytesCache; +use storages_common_cache::DiskCacheBuilder; use storages_common_cache::InMemoryCacheBuilder; use storages_common_cache::InMemoryItemCacheHolder; @@ -38,12 +40,11 @@ pub struct CacheManager { bloom_index_filter_cache: Option, bloom_index_meta_cache: Option, file_meta_data_cache: Option, + block_data_cache: Option, } impl CacheManager { /// Initialize the caches according to the relevant configurations. - /// - /// For convenience, ids of cluster and tenant are also kept pub fn init(config: &QueryConfig) -> Result<()> { if !config.table_meta_cache_enabled { GlobalInstance::set(Arc::new(Self { @@ -53,6 +54,7 @@ impl CacheManager { bloom_index_meta_cache: None, file_meta_data_cache: None, table_statistic_cache: None, + block_data_cache: None, })); } else { let table_snapshot_cache = Self::new_item_cache(config.table_cache_snapshot_count); @@ -63,6 +65,10 @@ impl CacheManager { let bloom_index_meta_cache = Self::new_item_cache(config.table_cache_bloom_index_meta_count); let file_meta_data_cache = Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS); + let block_data_cache = Self::new_block_data_cache( + &config.table_disk_cache_root, + config.table_disk_cache_mb_size * 1024 * 1024, + )?; GlobalInstance::set(Arc::new(Self { table_snapshot_cache, segment_info_cache, @@ -70,6 +76,7 @@ impl CacheManager { bloom_index_meta_cache, file_meta_data_cache, table_statistic_cache, + block_data_cache, })); } @@ -104,6 +111,10 @@ impl CacheManager { self.file_meta_data_cache.clone() } + pub fn get_block_data_cache(&self) -> Option { + self.block_data_cache.clone() + } + fn new_item_cache(capacity: u64) -> Option> { if capacity > 0 { Some(InMemoryCacheBuilder::new_item_cache(capacity)) @@ -111,4 +122,13 @@ impl CacheManager { None } } + + fn new_block_data_cache(path: &str, capacity: u64) -> Result> { + if capacity > 0 { + let cache_holder = DiskCacheBuilder::new_disk_cache(path, capacity)?; + Ok(Some(cache_holder)) + } else { + Ok(None) + } + } } diff --git a/src/query/storages/common/cache/Cargo.toml b/src/query/storages/common/cache/Cargo.toml index 51b121cb1ffb1..6ea8a12f84de3 100644 --- a/src/query/storages/common/cache/Cargo.toml +++ b/src/query/storages/common/cache/Cargo.toml @@ -22,5 +22,6 @@ opendal = { workspace = true } parking_lot = "0.12.1" serde = { workspace = true } serde_json = { workspace = true } +tracing = "0.1.36" [build-dependencies] diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index c0623be645b80..3a453e2671cc8 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -34,7 +34,7 @@ pub trait StorageCache { type Meter; fn put(&mut self, key: K, value: Arc); - fn get(&mut self, k: &Q) -> Option<&Arc> + fn get(&mut self, k: &Q) -> Option> where K: Borrow, Q: Hash + Eq + ?Sized; @@ -64,7 +64,7 @@ mod impls { Q: Hash + Eq + ?Sized, { let mut guard = self.write(); - guard.get(k).cloned() + guard.get(k).clone() } fn put(&self, k: String, v: Arc) { diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index 55bc641613412..5f20851c1e970 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -18,7 +18,9 @@ mod providers; mod read; pub use cache::CacheAccessor; +pub use providers::DiskBytesCache; pub use providers::DiskCache; +pub use providers::DiskCacheBuilder; pub use providers::InMemoryBytesCacheHolder; pub use providers::InMemoryCacheBuilder; pub use providers::InMemoryItemCacheHolder; diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 5abc3ad386830..bca724b8918d2 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -14,33 +14,68 @@ use std::borrow::Borrow; use std::hash::Hash; +use std::io::Read; use std::sync::Arc; +pub use common_cache::LruDiskCache as DiskCache; +use common_exception::ErrorCode; +use common_exception::Result; +use parking_lot::RwLock; +use tracing::error; + use crate::cache::StorageCache; -// TODO: local disk file based LRU/LFU/xxxx cache -pub struct DiskCache {} +pub type DiskBytesCache = Arc>; -impl StorageCache for DiskCache { +pub struct DiskCacheBuilder; +impl DiskCacheBuilder { + pub fn new_disk_cache(path: &str, capacity: u64) -> Result { + let cache = DiskCache::new(path, capacity) + .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {}", e)))?; + Ok(Arc::new(RwLock::new(cache))) + } +} + +impl StorageCache> for DiskCache { type Meter = (); - fn put(&mut self, _key: K, _value: Arc) { - todo!() + fn put(&mut self, key: String, value: Arc>) { + if let Err(e) = self.insert_bytes(key, &value) { + error!("populate disk cache failed {}", e); + } } - fn get(&mut self, _k: &Q) -> Option<&Arc> + fn get(&mut self, k: &Q) -> Option>> where - K: Borrow, + String: Borrow, Q: Hash + Eq + ?Sized, { - todo!() + let mut read_file = || { + let mut file = self.get_file(k)?; + let mut v = vec![]; + file.read_to_end(&mut v)?; + Ok::<_, Box>(v) + }; + + match read_file() { + Ok(bytes) => Some(Arc::new(bytes)), + Err(e) => { + error!("get disk cache item failed {}", e); + None + } + } } - fn evict(&mut self, _k: &Q) -> bool + fn evict(&mut self, k: &Q) -> bool where - K: Borrow, + String: Borrow, Q: Hash + Eq + ?Sized, { - todo!() + if let Err(e) = self.remove(k) { + error!("evict disk cache item failed {}", e); + false + } else { + true + } } } diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index 952bc553aed53..7aea3bfd4d2dd 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -59,12 +59,12 @@ where Cache::put(self, key, value); } - fn get(&mut self, k: &Q) -> Option<&Arc> + fn get(&mut self, k: &Q) -> Option> where K: Borrow, Q: Hash + Eq + ?Sized, { - Cache::get(self, k) + Cache::get(self, k).cloned() } fn evict(&mut self, k: &Q) -> bool diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index 579bb711d3692..aa55bbdd72935 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -14,7 +14,9 @@ mod disk_cache; mod memory_cache; +pub use disk_cache::DiskBytesCache; pub use disk_cache::DiskCache; +pub use disk_cache::DiskCacheBuilder; pub use memory_cache::BytesCache; pub use memory_cache::InMemoryBytesCacheHolder; pub use memory_cache::InMemoryCacheBuilder; diff --git a/src/query/storages/common/cache/src/read/cached_reader.rs b/src/query/storages/common/cache/src/read/cached_reader.rs index 29923bd509393..5f610bd9f7089 100644 --- a/src/query/storages/common/cache/src/read/cached_reader.rs +++ b/src/query/storages/common/cache/src/read/cached_reader.rs @@ -30,9 +30,9 @@ use crate::metrics::metrics_inc_cache_miss_load_millisecond; /// A generic cache-aware reader /// /// Given an impl of [StorageCache], e.g. `ItemCache` or `DiskCache` and a proper impl -/// [LoaderWithCacheKey], which is able to load `T`, `CachedReader` will load the `T` -/// by using [LoaderWithCacheKey], and populate the cache item into [StorageCache] by using -/// the loaded `T` and the key that [LoaderWithCacheKey] provides. +/// [Loader], which is able to load `T`, `CachedReader` will load the `T` +/// by using [Loader], and populate the cache item into [StorageCache] by using +/// the loaded `T` and the key that [Loader] provides. pub struct CachedReader { cache: Option>>, loader: L, @@ -104,6 +104,6 @@ where } fn get_cached(&self, key: &str, cache: &RwLock) -> Option> { - cache.write().get(key).cloned() + cache.write().get(key) } } From c38810889a9a0b3f8fb45451fc266f14b579ae10 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 30 Jan 2023 09:20:16 +0800 Subject: [PATCH 03/80] assembly data cache to async reader --- Cargo.lock | 2 - src/common/cache/Cargo.toml | 4 +- src/common/cache/src/disk_cache.rs | 24 ++++--- src/common/cache/src/lib.rs | 2 + src/common/cache/src/todo | 12 ++++ src/common/cache/tests/it/disk_cache.rs | 3 +- src/common/cache/tests/it/main.rs | 2 +- src/query/storages/common/cache/src/cache.rs | 2 +- .../common/cache/src/providers/disk_cache.rs | 4 +- .../fuse/src/io/read/block/block_reader.rs | 65 ++++++++++++++----- .../src/io/read/block/block_reader_parquet.rs | 14 ++-- .../operations/mutation/mutation_source.rs | 3 + 12 files changed, 98 insertions(+), 39 deletions(-) create mode 100644 src/common/cache/src/todo diff --git a/Cargo.lock b/Cargo.lock index 08aee448f1855..fce778fad6af6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1295,9 +1295,7 @@ version = "0.1.0" dependencies = [ "filetime", "heapsize", - "hex", "ritelinked", - "tempfile", "tracing", "walkdir", ] diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index ce357a86d8cef..2bc9e338f60e1 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -18,7 +18,7 @@ amortized = ["ritelinked/ahash-amortized", "ritelinked/inline-more-amortized"] # Crates.io dependencies filetime = "0.2.15" -hex = "0.4.3" +#hex = "0.4.3" ritelinked = { version = "0.3.2", default-features = false, features = ["ahash", "inline-more"] } tracing = "0.1.36" walkdir = "2.3.2" @@ -27,4 +27,4 @@ walkdir = "2.3.2" heapsize_ = { package = "heapsize", version = "0.4.2", optional = true } [dev-dependencies] -tempfile = "3.3.0" +#tempfile = "3.3.0" diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs index 57bb88430bcd4..f21a3e9651793 100644 --- a/src/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -32,6 +32,10 @@ use crate::DefaultHashBuilder; use crate::FileSize; use crate::LruCache; +// TODO doc the disk cache path layout +// TODO extract new type CacheKey +// TODO checksum of cached data + /// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified /// time, such that the oldest modified file is returned first. fn get_all_files>(path: P) -> Box> { @@ -158,7 +162,7 @@ where ) }); } else { - while self.cache.size() as u64 + size > self.cache.capacity() as u64 { + while self.cache.size() + size > self.cache.capacity() { let (rel_path, _) = self .cache .pop_by_policy() @@ -182,10 +186,10 @@ where /// Returns `true` if the disk cache can store a file of `size` bytes. pub fn can_store(&self, size: u64) -> bool { - size <= self.cache.capacity() as u64 + size <= self.cache.capacity() } - fn recovery_from(str: &Path) -> String { + pub fn recovery_from(str: &Path) -> String { let key_string = match str.as_os_str().to_str() { Some(str) => str.to_owned(), None => { @@ -195,7 +199,7 @@ where key_string } - fn cache_key(&self, str: &K) -> String + pub fn cache_key(&self, str: &K) -> String where K: Hash + Eq + ?Sized { // TODO we need a 128 bit digest let mut hash_state = self.hash_builder.build_hasher(); @@ -205,7 +209,7 @@ where hex_key } - fn cache_path(&self, str: &K) -> PathBuf + pub fn cache_path(&self, str: &K) -> PathBuf where K: Hash + Eq + ?Sized { let hex_key = self.cache_key(str); let prefix = &hex_key[0..3]; @@ -229,13 +233,17 @@ where let mut f = File::create(&path)?; f.write_all(bytes)?; let size = bytes.len() as u64; - self.cache.put(cache_key, size); + if let Some(_replaced) = self.cache.put(cache_key, size) { + // TODO remove the replaced item from disk + } Ok(()) } /// Return `true` if a file with path `key` is in the cache. - pub fn contains_key>(&self, key: K) -> bool { - self.cache.contains(key.as_ref()) + pub fn contains_key>(&self, key: &K) -> bool + where K: Hash + Eq + ?Sized { + let cache_key = self.cache_key(key); + self.cache.contains(&cache_key) } /// Get an opened `File` for `key`, if one exists and can be opened. Updates the Cache state diff --git a/src/common/cache/src/lib.rs b/src/common/cache/src/lib.rs index 65f64b42603d2..5c92472e95722 100644 --- a/src/common/cache/src/lib.rs +++ b/src/common/cache/src/lib.rs @@ -13,6 +13,8 @@ // limitations under the License. #![allow(clippy::uninlined_format_args)] +#![deny(unused_crate_dependencies)] + #[cfg(feature = "heapsize")] #[cfg(not(target_os = "macos"))] extern crate heapsize_; diff --git a/src/common/cache/src/todo b/src/common/cache/src/todo new file mode 100644 index 0000000000000..b60114d9796c9 --- /dev/null +++ b/src/common/cache/src/todo @@ -0,0 +1,12 @@ +- cache data crc +- metrics +- performance evaluation + - without threshold + - just put bytes to disk (no sync) + - what happens if cache is full? will performance degrade significantly? +- if performance do not meet requirement + - consider make it tiered + - a in-memory moka at front + - disk cache layer listen to the eviction event (serving in dedicate thread pool) +- misc: restart maybe slow if cached a large amount of data, consider make the startup of data cache layer async + silently drop cache admissions during starting up \ No newline at end of file diff --git a/src/common/cache/tests/it/disk_cache.rs b/src/common/cache/tests/it/disk_cache.rs index bf4f331ba2e95..8e31474571135 100644 --- a/src/common/cache/tests/it/disk_cache.rs +++ b/src/common/cache/tests/it/disk_cache.rs @@ -105,10 +105,11 @@ fn test_some_existing_files() { #[test] fn test_existing_file_too_large() { let f = TestFixture::new(); + let c = LruDiskCache::new(f.tmp(), 15).unwrap(); + let cache_key = // Create files explicitly in the past. set_mtime_back(f.create_file("file1", 10), 10); set_mtime_back(f.create_file("file2", 10), 5); - let c = LruDiskCache::new(f.tmp(), 15).unwrap(); assert_eq!(c.size(), 10); assert_eq!(c.len(), 1); assert!(!c.contains_key("file1")); diff --git a/src/common/cache/tests/it/main.rs b/src/common/cache/tests/it/main.rs index c69f732a0428c..27049ede54a7e 100644 --- a/src/common/cache/tests/it/main.rs +++ b/src/common/cache/tests/it/main.rs @@ -15,4 +15,4 @@ #![allow(clippy::uninlined_format_args)] mod cache; -mod disk_cache; +// mod disk_cache; diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 3a453e2671cc8..0e59a924a9623 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -64,7 +64,7 @@ mod impls { Q: Hash + Eq + ?Sized, { let mut guard = self.write(); - guard.get(k).clone() + guard.get(k) } fn put(&self, k: String, v: Arc) { diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index bca724b8918d2..aa8114c2f887d 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -31,7 +31,7 @@ pub struct DiskCacheBuilder; impl DiskCacheBuilder { pub fn new_disk_cache(path: &str, capacity: u64) -> Result { let cache = DiskCache::new(path, capacity) - .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {}", e)))?; + .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; Ok(Arc::new(RwLock::new(cache))) } } @@ -39,12 +39,14 @@ impl DiskCacheBuilder { impl StorageCache> for DiskCache { type Meter = (); + // TODO change this signature, takes &[u8] fn put(&mut self, key: String, value: Arc>) { if let Err(e) = self.insert_bytes(key, &value) { error!("populate disk cache failed {}", e); } } + // TODO change this signature, remove tha Arc fn get(&mut self, k: &Q) -> Option>> where String: Borrow, diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 99e285cfe9654..a36537f0ff1b0 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -37,6 +37,9 @@ use common_storage::ColumnNodes; use futures::future::try_join_all; use opendal::Object; use opendal::Operator; +use storages_common_cache::CacheAccessor; +use storages_common_cache_manager::CacheManager; +use storages_common_table_meta::meta::ColumnId; use storages_common_table_meta::meta::ColumnMeta; use crate::fuse_part::FusePartInfo; @@ -80,9 +83,12 @@ where Self: 'static { path: String, owner_memory: OwnerMemory, - columns_chunks: HashMap)>, + columns_chunks: HashMap)>, + cached_columns: CachedColumnData, } +pub type CachedColumnData = Vec<(ColumnId, Arc>)>; + impl MergeIOReadResult where Self: 'static { @@ -91,10 +97,11 @@ where Self: 'static path, owner_memory, columns_chunks: HashMap::with_capacity(capacity), + cached_columns: vec![], } } - pub fn columns_chunks(&self) -> Result> { + pub fn columns_chunks(&self) -> Result> { let mut res = Vec::with_capacity(self.columns_chunks.len()); for (column_idx, (chunk_idx, range)) in &self.columns_chunks { @@ -102,6 +109,10 @@ where Self: 'static res.push((*column_idx, &chunk[range.clone()])); } + for (column_id, data) in &self.cached_columns { + res.push((*column_id, data.as_slice())) + } + Ok(res) } @@ -109,8 +120,15 @@ where Self: 'static self.owner_memory.get_chunk(index, path) } - pub fn add_column_chunk(&mut self, chunk: usize, column: usize, range: Range) { - self.columns_chunks.insert(column, (chunk, range)); + pub fn add_column_chunk(&mut self, chunk: usize, column_id: ColumnId, range: Range) { + if let Ok(chunk_data) = self.get_chunk(chunk, &self.path) { + let cache = CacheManager::instance().get_block_data_cache(); + let cache_key = format!("{}-{}", self.path, column_id); + let data = &chunk_data[range.clone()]; + // TODO NO, use a &[u8] to pass data to cache + cache.put(cache_key, Arc::new(data.to_vec())); + } + self.columns_chunks.insert(column_id, (chunk, range)); } } @@ -148,7 +166,9 @@ impl BlockReader { } pub fn support_blocking_api(&self) -> bool { - self.operator.metadata().can_blocking() + // TODO for testing purpose only, remove this in the final PR + let force_async = std::env::var("DATABEND_DEBUG_FORCE_ASYNC_READ").is_ok(); + force_async && self.operator.metadata().can_blocking() } /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. @@ -217,7 +237,8 @@ impl BlockReader { // Fetch the raw data for the raw range. let start = (column_range.start - merged_range.start) as usize; let end = (column_range.end - merged_range.start) as usize; - read_res.add_column_chunk(merged_range_idx, *raw_idx, start..end); + let column_id = *raw_idx as ColumnId; + read_res.add_column_chunk(merged_range_idx, column_id, start..end); } Ok(read_res) @@ -257,6 +278,7 @@ impl BlockReader { let mut read_res = MergeIOReadResult::create(owner_memory, raw_ranges.len(), path.clone()); for (raw_idx, raw_range) in &raw_ranges { + let column_id = *raw_idx as ColumnId; let column_range = raw_range.start..raw_range.end; // Find the range index and Range from merged ranges. @@ -271,7 +293,7 @@ impl BlockReader { // Fetch the raw data for the raw range. let start = (column_range.start - merged_range.start) as usize; let end = (column_range.end - merged_range.start) as usize; - read_res.add_column_chunk(merged_range_idx, *raw_idx, start..end); + read_res.add_column_chunk(merged_range_idx, column_id, start..end); } Ok(read_res) @@ -289,21 +311,30 @@ impl BlockReader { } let mut ranges = vec![]; - for index in self.project_indices.keys() { - let column_meta = &columns_meta[index]; - let (offset, len) = column_meta.offset_length(); - ranges.push((*index, offset..(offset + len))); - - // Perf - { - metrics_inc_remote_io_seeks(1); - metrics_inc_remote_io_read_bytes(len); + let cache_manager = CacheManager::instance().get_block_data_cache(); + let mut data_from_cache = vec![]; + for column_id in self.project_indices.keys() { + let column_cache_key = format!("{location}-{column_id}"); + if let Some(cached_column_raw_data) = cache_manager.get(&column_cache_key) { + data_from_cache.push((*column_id as ColumnId, cached_column_raw_data)); + } else { + let column_meta = &columns_meta[column_id]; + let (offset, len) = column_meta.offset_length(); + ranges.push((*column_id, offset..(offset + len))); + + // Perf + { + metrics_inc_remote_io_seeks(1); + metrics_inc_remote_io_read_bytes(len); + } } } let object = self.operator.object(location); - Self::merge_io_read(settings, object, ranges).await + let mut merge_io_read_res = Self::merge_io_read(settings, object, ranges).await?; + merge_io_read_res.cached_columns = data_from_cache; + Ok(merge_io_read_res) } pub fn sync_read_columns_data_by_merge_io( diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 719596118907a..0979586d182b1 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -29,6 +29,7 @@ use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; use storages_common_table_meta::meta::BlockMeta; +use storages_common_table_meta::meta::ColumnId; use storages_common_table_meta::meta::ColumnMeta; use storages_common_table_meta::meta::Compression; @@ -55,12 +56,12 @@ impl BlockReader { .collect::>(); // Get the merged IO read result. - let read_res = self + let fetched = self .read_columns_data_by_merge_io(settings, &meta.location.0, &columns_meta) .await?; // Get the columns chunk. - let chunks = read_res + let chunks = fetched .columns_chunks()? .into_iter() .map(|(column_idx, column_chunk)| (column_idx, column_chunk)) @@ -85,7 +86,7 @@ impl BlockReader { pub fn deserialize_parquet_chunks( &self, part: PartInfoPtr, - chunks: Vec<(usize, &[u8])>, + chunks: Vec<(ColumnId, &[u8])>, ) -> Result { let part = FusePartInfo::from_part(&part)?; let start = Instant::now(); @@ -121,14 +122,14 @@ impl BlockReader { num_rows: usize, compression: &Compression, columns_meta: &HashMap, - columns_chunks: Vec<(usize, &[u8])>, + columns_chunks: Vec<(ColumnId, &[u8])>, uncompressed_buffer: Option>, ) -> Result { if columns_chunks.is_empty() { return Ok(DataBlock::new(vec![], num_rows)); } - let chunk_map: HashMap = columns_chunks.into_iter().collect(); + let chunk_map: HashMap = columns_chunks.into_iter().collect(); let mut columns_array_iter = Vec::with_capacity(self.projection.len()); let columns = self.projection.project_column_nodes(&self.column_nodes)?; @@ -140,7 +141,8 @@ impl BlockReader { let mut column_chunks = Vec::with_capacity(indices.len()); let mut column_descriptors = Vec::with_capacity(indices.len()); for index in indices { - let column_read = <&[u8]>::clone(&chunk_map[index]); + let column_id = *index as ColumnId; + let column_read = <&[u8]>::clone(&chunk_map[&column_id]); let column_meta = &columns_meta[index]; let column_descriptor = &self.parquet_schema_descriptor.columns()[*index]; column_metas.push(column_meta); diff --git a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs index 2b7d2e5830269..bc2ec4ce988b2 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs @@ -35,6 +35,7 @@ use common_functions::scalars::BUILTIN_FUNCTIONS; use common_sql::evaluator::BlockOperator; use storages_common_pruner::BlockMetaIndex; use storages_common_table_meta::meta::ClusterStatistics; +use storages_common_table_meta::meta::ColumnId; use crate::fuse_part::FusePartInfo; use crate::io::BlockReader; @@ -47,6 +48,8 @@ use crate::pipelines::processors::processor::ProcessorPtr; use crate::pipelines::processors::Processor; use crate::MergeIOReadResult; +type DataChunks = Vec<(ColumnId, Vec)>; + pub enum MutationAction { Deletion, Update, From f09faa0345ffca9ee2ddb1726fa265737b4699ac Mon Sep 17 00:00:00 2001 From: dantengsky Date: Tue, 31 Jan 2023 17:21:09 +0800 Subject: [PATCH 04/80] add CacheItem type parameter --- src/query/storages/common/cache/src/cache.rs | 14 +++++++++----- .../common/cache/src/providers/disk_cache.rs | 1 + .../common/cache/src/providers/memory_cache.rs | 3 ++- .../common/cache/src/read/cached_reader.rs | 4 ++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 0e59a924a9623..7f9c0bd060966 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -32,9 +32,11 @@ pub trait CacheAccessor { /// The minimum interface that cache providers should implement pub trait StorageCache { type Meter; + type CachedItem; + fn put(&mut self, key: K, value: Arc); - fn get(&mut self, k: &Q) -> Option> + fn get(&mut self, k: &Q) -> Option where K: Borrow, Q: Hash + Eq + ?Sized; @@ -55,10 +57,12 @@ mod impls { use crate::cache::CacheAccessor; use crate::cache::StorageCache; - impl CacheAccessor for Arc> - where C: StorageCache + impl<'a, V, C> CacheAccessor for Arc> + where + C: StorageCache>, + Self: 'a, { - fn get(&self, k: &Q) -> Option> + fn get(&self, k: &Q) -> Option where String: Borrow, Q: Hash + Eq + ?Sized, @@ -83,7 +87,7 @@ mod impls { } impl CacheAccessor for Option>> - where C: StorageCache + where C: StorageCache> { fn get(&self, k: &Q) -> Option> where diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index aa8114c2f887d..02ebae6e8e6c0 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -38,6 +38,7 @@ impl DiskCacheBuilder { impl StorageCache> for DiskCache { type Meter = (); + type CachedItem = Arc>; // TODO change this signature, takes &[u8] fn put(&mut self, key: String, value: Arc>) { diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index 7aea3bfd4d2dd..9657123790440 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -54,12 +54,13 @@ where K: Eq + Hash, { type Meter = M; + type CachedItem = Arc; fn put(&mut self, key: K, value: Arc) { Cache::put(self, key, value); } - fn get(&mut self, k: &Q) -> Option> + fn get(&mut self, k: &Q) -> Option where K: Borrow, Q: Hash + Eq + ?Sized, diff --git a/src/query/storages/common/cache/src/read/cached_reader.rs b/src/query/storages/common/cache/src/read/cached_reader.rs index 5f610bd9f7089..50f0e51465079 100644 --- a/src/query/storages/common/cache/src/read/cached_reader.rs +++ b/src/query/storages/common/cache/src/read/cached_reader.rs @@ -41,10 +41,10 @@ pub struct CachedReader { _p: PhantomData, } -impl CachedReader +impl<'a, T, L, C, M> CachedReader where L: Loader + Sync, - C: StorageCache, + C: 'a + StorageCache>, { pub fn new(cache: Option>>, name: impl Into, loader: L) -> Self { Self { From 32e4251859e06bbc309f9377ede5799d7ca700d7 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 1 Feb 2023 13:30:22 +0800 Subject: [PATCH 05/80] fix cache key to string, use 128 bit hash --- Cargo.lock | 2 + src/common/cache/Cargo.toml | 4 +- src/common/cache/src/disk_cache.rs | 103 ++++++------------ src/query/storages/common/cache/src/cache.rs | 49 ++------- src/query/storages/common/cache/src/lib.rs | 2 + .../common/cache/src/providers/disk_cache.rs | 16 +-- .../cache/src/providers/memory_cache.rs | 21 +--- .../fuse/src/io/read/block/block_reader.rs | 8 ++ 8 files changed, 66 insertions(+), 139 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fce778fad6af6..e7b0bb67e73a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1295,7 +1295,9 @@ version = "0.1.0" dependencies = [ "filetime", "heapsize", + "hex", "ritelinked", + "siphasher", "tracing", "walkdir", ] diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index 2bc9e338f60e1..4f8381c85cf50 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -17,9 +17,11 @@ amortized = ["ritelinked/ahash-amortized", "ritelinked/inline-more-amortized"] [dependencies] # Crates.io dependencies +#crc32fast = "1.3.2" filetime = "0.2.15" -#hex = "0.4.3" +hex = "0.4.3" ritelinked = { version = "0.3.2", default-features = false, features = ["ahash", "inline-more"] } +siphasher = "0.3.10" tracing = "0.1.36" walkdir = "2.3.2" diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs index f21a3e9651793..5ab327ec8e164 100644 --- a/src/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -15,7 +15,6 @@ use std::boxed::Box; use std::fs; use std::fs::File; -use std::hash::BuildHasher; use std::hash::Hash; use std::hash::Hasher; use std::io::prelude::*; @@ -24,6 +23,8 @@ use std::path::PathBuf; use filetime::set_file_times; use filetime::FileTime; +use siphasher::sip128; +use siphasher::sip128::Hasher128; use tracing::error; use walkdir::WalkDir; @@ -66,14 +67,15 @@ fn get_all_files>(path: P) -> Box>; /// An basic disk cache of files on disk. -pub struct DiskCache -where C: Cache -{ - hash_builder: S, +// pub struct DiskCache +pub struct DiskCache { cache: C, root: PathBuf, } +#[derive(Hash)] +struct CacheKey(String); + impl DiskCache where C: Cache { @@ -88,39 +90,17 @@ where C: Cache /// expects to have sole maintenance of the contents. pub fn new(path: T, size: u64) -> Result where PathBuf: From { - let default_hash_builder = DefaultHashBuilder::new(); DiskCache { - hash_builder: default_hash_builder.clone(), - cache: C::with_meter_and_hasher(size, FileSize, default_hash_builder), + cache: C::with_meter_and_hasher(size, FileSize, DefaultHashBuilder::default()), root: PathBuf::from(path), } .init() } } -impl DiskCache -where - C: Cache, - S: BuildHasher + Clone, +impl DiskCache +where C: Cache { - /// Create an `DiskCache` with hasher that stores files in `path`, limited to `size` bytes. - /// - /// Existing files in `path` will be stored with their last-modified time from the filesystem - /// used as the order for the recency of their use. Any files that are individually larger - /// than `size` bytes will be removed. - /// - /// The cache is not observant of changes to files under `path` from external sources, it - /// expects to have sole maintenance of the contents. - pub fn new_with_hasher(path: T, size: u64, hash_builder: S) -> Result - where PathBuf: From { - DiskCache { - hash_builder: hash_builder.clone(), - cache: C::with_meter_and_hasher(size, FileSize, hash_builder), - root: PathBuf::from(path), - } - .init() - } - /// Return the current size of all the files in the cache. pub fn size(&self) -> u64 { self.cache.size() @@ -174,10 +154,10 @@ where panic!("Error removing file from cache: `{:?}`: {}", remove_path, e) }); } - let rel_path = file + let relative_path = file .strip_prefix(&self.root) .map_err(|_e| self::Error::MalformedPath)?; - let cache_key = Self::recovery_from(rel_path); + let cache_key = Self::recovery_from(relative_path); self.cache.put(cache_key, size); } } @@ -189,8 +169,8 @@ where size <= self.cache.capacity() } - pub fn recovery_from(str: &Path) -> String { - let key_string = match str.as_os_str().to_str() { + pub fn recovery_from(relative_path: &Path) -> String { + let key_string = match relative_path.as_os_str().to_str() { Some(str) => str.to_owned(), None => { unreachable!() @@ -199,23 +179,20 @@ where key_string } - pub fn cache_key(&self, str: &K) -> String - where K: Hash + Eq + ?Sized { - // TODO we need a 128 bit digest - let mut hash_state = self.hash_builder.build_hasher(); - str.hash(&mut hash_state); - let digits = hash_state.finish(); - let hex_key = format!("{:x}", digits); - hex_key + // convert key string into hex string of SipHash 2-4 128 + fn cache_key(&self, str: &str) -> CacheKey { + let mut sip = sip128::SipHasher24::new(); + sip.write(str.as_bytes()); + let hash = sip.finish128(); + let hex_hash = hex::encode(hash.as_bytes()); + CacheKey(hex_hash) } - pub fn cache_path(&self, str: &K) -> PathBuf - where K: Hash + Eq + ?Sized { - let hex_key = self.cache_key(str); - let prefix = &hex_key[0..3]; + fn cache_path(&self, cache_key: &CacheKey) -> PathBuf { + let prefix = &cache_key.0[0..3]; let mut path_buf = PathBuf::from(prefix); - path_buf.push(Path::new(&hex_key)); - path_buf + path_buf.push(Path::new(&cache_key.0)); + self.rel_to_abs_path(path_buf) } /// Add a file with `bytes` as its contents to the cache at path `key`. @@ -224,37 +201,29 @@ where return Err(Error::FileTooLarge); } - // TODO combine these let cache_key = self.cache_key(key.as_ref()); - let rel_path = self.cache_path(key.as_ref()); - let path = self.rel_to_abs_path(rel_path); + let path = self.cache_path(&cache_key); // TODO rm this panic, no nested dirs here fs::create_dir_all(path.parent().expect("Bad path?"))?; let mut f = File::create(&path)?; f.write_all(bytes)?; - let size = bytes.len() as u64; - if let Some(_replaced) = self.cache.put(cache_key, size) { - // TODO remove the replaced item from disk - } + self.cache.put(cache_key.0, bytes.len() as u64); Ok(()) } /// Return `true` if a file with path `key` is in the cache. - pub fn contains_key>(&self, key: &K) -> bool - where K: Hash + Eq + ?Sized { + pub fn contains_key(&self, key: &str) -> bool { let cache_key = self.cache_key(key); - self.cache.contains(&cache_key) + self.cache.contains(&cache_key.0) } /// Get an opened `File` for `key`, if one exists and can be opened. Updates the Cache state /// of the file if present. Avoid using this method if at all possible, prefer `.get`. - pub fn get_file(&mut self, key: &K) -> Result - where K: Hash + Eq + ?Sized { + pub fn get_file(&mut self, key: &str) -> Result { let cache_key = self.cache_key(key); - let rel_path = self.cache_path(key); - let path = self.rel_to_abs_path(rel_path); + let path = self.cache_path(&cache_key); self.cache - .get(&cache_key) + .get(&cache_key.0) .ok_or(Error::FileNotInCache) .and_then(|_| { // TODO do we need to adjust the mtime, cross reboot? @@ -265,13 +234,11 @@ where } /// Remove the given key from the cache. - pub fn remove(&mut self, key: &K) -> Result<()> - where K: Hash + Eq + ?Sized { + pub fn remove(&mut self, key: &str) -> Result<()> { let cache_key = self.cache_key(key); - let rel_path = self.cache_path(key); - match self.cache.pop(&cache_key) { + match self.cache.pop(&cache_key.0) { Some(_) => { - let path = self.rel_to_abs_path(rel_path); + let path = self.cache_path(&cache_key); fs::remove_file(&path).map_err(|e| { error!("Error removing file from cache: `{:?}`: {}", path, e); Into::into(e) diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 7f9c0bd060966..4d68d429706cc 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -12,21 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Borrow; -use std::hash::Hash; use std::sync::Arc; pub trait CacheAccessor { - fn get(&self, k: &Q) -> Option> - where - K: Borrow, - Q: Hash + Eq + ?Sized; - + fn get(&self, k: &str) -> Option>; fn put(&self, key: K, value: Arc); - fn evict(&self, k: &Q) -> bool - where - K: Borrow, - Q: Hash + Eq + ?Sized; + fn evict(&self, k: &str) -> bool; } /// The minimum interface that cache providers should implement @@ -36,20 +27,12 @@ pub trait StorageCache { fn put(&mut self, key: K, value: Arc); - fn get(&mut self, k: &Q) -> Option - where - K: Borrow, - Q: Hash + Eq + ?Sized; + fn get(&mut self, k: &str) -> Option; - fn evict(&mut self, k: &Q) -> bool - where - K: Borrow, - Q: Hash + Eq + ?Sized; + fn evict(&mut self, k: &str) -> bool; } mod impls { - use std::borrow::Borrow; - use std::hash::Hash; use std::sync::Arc; use parking_lot::RwLock; @@ -62,11 +45,7 @@ mod impls { C: StorageCache>, Self: 'a, { - fn get(&self, k: &Q) -> Option - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { + fn get(&self, k: &str) -> Option { let mut guard = self.write(); guard.get(k) } @@ -76,11 +55,7 @@ mod impls { guard.put(k, v); } - fn evict(&self, k: &Q) -> bool - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { + fn evict(&self, k: &str) -> bool { let mut guard = self.write(); guard.evict(k) } @@ -89,11 +64,7 @@ mod impls { impl CacheAccessor for Option>> where C: StorageCache> { - fn get(&self, k: &Q) -> Option> - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { + fn get(&self, k: &str) -> Option> { self.as_ref().and_then(|cache| cache.get(k)) } @@ -103,11 +74,7 @@ mod impls { } } - fn evict(&self, k: &Q) -> bool - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { + fn evict(&self, k: &str) -> bool { if let Some(cache) = self { cache.evict(k) } else { diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index 5f20851c1e970..da3b362d0c9c4 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -30,3 +30,5 @@ pub use read::InMemoryBytesCacheReader; pub use read::InMemoryItemCacheReader; pub use read::LoadParams; pub use read::Loader; + +pub use self::metrics::*; diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 02ebae6e8e6c0..cd3131943c1b4 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Borrow; -use std::hash::Hash; use std::io::Read; use std::sync::Arc; @@ -47,12 +45,8 @@ impl StorageCache> for DiskCache { } } - // TODO change this signature, remove tha Arc - fn get(&mut self, k: &Q) -> Option>> - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { + // TODO change this signature, remove that Arc + fn get(&mut self, k: &str) -> Option>> { let mut read_file = || { let mut file = self.get_file(k)?; let mut v = vec![]; @@ -69,11 +63,7 @@ impl StorageCache> for DiskCache { } } - fn evict(&mut self, k: &Q) -> bool - where - String: Borrow, - Q: Hash + Eq + ?Sized, - { + fn evict(&mut self, k: &str) -> bool { if let Err(e) = self.remove(k) { error!("evict disk cache item failed {}", e); false diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index 9657123790440..bfb556cf75ab3 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -12,9 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Borrow; use std::hash::BuildHasher; -use std::hash::Hash; use std::sync::Arc; use common_cache::BytesMeter; @@ -47,32 +45,23 @@ impl InMemoryCacheBuilder { } } -impl StorageCache for LruCache, S, M> +impl StorageCache for LruCache, S, M> where - M: CountableMeter>, + M: CountableMeter>, S: BuildHasher, - K: Eq + Hash, { type Meter = M; type CachedItem = Arc; - fn put(&mut self, key: K, value: Arc) { + fn put(&mut self, key: String, value: Arc) { Cache::put(self, key, value); } - fn get(&mut self, k: &Q) -> Option - where - K: Borrow, - Q: Hash + Eq + ?Sized, - { + fn get(&mut self, k: &str) -> Option { Cache::get(self, k).cloned() } - fn evict(&mut self, k: &Q) -> bool - where - K: Borrow, - Q: Hash + Eq + ?Sized, - { + fn evict(&mut self, k: &str) -> bool { self.pop(k).is_some() } } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index a36537f0ff1b0..0e79deccf2b1a 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -37,6 +37,9 @@ use common_storage::ColumnNodes; use futures::future::try_join_all; use opendal::Object; use opendal::Operator; +use storages_common_cache::metrics_inc_cache_access_count; +use storages_common_cache::metrics_inc_cache_hit_count; +use storages_common_cache::metrics_inc_cache_miss_count; use storages_common_cache::CacheAccessor; use storages_common_cache_manager::CacheManager; use storages_common_table_meta::meta::ColumnId; @@ -314,10 +317,15 @@ impl BlockReader { let cache_manager = CacheManager::instance().get_block_data_cache(); let mut data_from_cache = vec![]; for column_id in self.project_indices.keys() { + // TODO encapsulate this in another component let column_cache_key = format!("{location}-{column_id}"); + let cache_name = "data_block_cache"; + metrics_inc_cache_access_count(1, cache_name); if let Some(cached_column_raw_data) = cache_manager.get(&column_cache_key) { + metrics_inc_cache_hit_count(1, cache_name); data_from_cache.push((*column_id as ColumnId, cached_column_raw_data)); } else { + metrics_inc_cache_miss_count(1, cache_name); let column_meta = &columns_meta[column_id]; let (offset, len) = column_meta.offset_length(); ranges.push((*column_id, offset..(offset + len))); From 291fe74541c6f3ab0c5cdff12c9c02a9fa9d9c22 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 1 Feb 2023 15:42:46 +0800 Subject: [PATCH 06/80] shrink lock scope of reading cached data --- src/query/storages/common/cache/src/cache.rs | 10 ++-- .../common/cache/src/providers/disk_cache.rs | 50 +++++++++++-------- .../fuse/src/io/read/block/block_reader.rs | 8 +-- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 4d68d429706cc..5059e75a25747 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -40,10 +40,8 @@ mod impls { use crate::cache::CacheAccessor; use crate::cache::StorageCache; - impl<'a, V, C> CacheAccessor for Arc> - where - C: StorageCache>, - Self: 'a, + impl CacheAccessor for Arc> + where C: StorageCache> { fn get(&self, k: &str) -> Option { let mut guard = self.write(); @@ -61,8 +59,8 @@ mod impls { } } - impl CacheAccessor for Option>> - where C: StorageCache> + impl CacheAccessor for Option + where C: CacheAccessor { fn get(&self, k: &str) -> Option> { self.as_ref().and_then(|cache| cache.get(k)) diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index cd3131943c1b4..1f24afe6b9807 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -21,34 +21,30 @@ use common_exception::Result; use parking_lot::RwLock; use tracing::error; -use crate::cache::StorageCache; +use crate::CacheAccessor; -pub type DiskBytesCache = Arc>; +#[derive(Clone)] +pub struct DiskBytesCache { + inner: Arc>, +} pub struct DiskCacheBuilder; impl DiskCacheBuilder { pub fn new_disk_cache(path: &str, capacity: u64) -> Result { let cache = DiskCache::new(path, capacity) .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; - Ok(Arc::new(RwLock::new(cache))) + let inner = Arc::new(RwLock::new(cache)); + Ok(DiskBytesCache { inner }) } } -impl StorageCache> for DiskCache { - type Meter = (); - type CachedItem = Arc>; - - // TODO change this signature, takes &[u8] - fn put(&mut self, key: String, value: Arc>) { - if let Err(e) = self.insert_bytes(key, &value) { - error!("populate disk cache failed {}", e); - } - } - - // TODO change this signature, remove that Arc - fn get(&mut self, k: &str) -> Option>> { - let mut read_file = || { - let mut file = self.get_file(k)?; +impl CacheAccessor> for DiskBytesCache { + fn get(&self, k: &str) -> Option>> { + let read_file = || { + let mut file = { + let mut inner = self.inner.write(); + inner.get_file(k)? + }; let mut v = vec![]; file.read_to_end(&mut v)?; Ok::<_, Box>(v) @@ -57,14 +53,26 @@ impl StorageCache> for DiskCache { match read_file() { Ok(bytes) => Some(Arc::new(bytes)), Err(e) => { - error!("get disk cache item failed {}", e); + error!("get disk cache item failed, {}", e); None } } } - fn evict(&mut self, k: &str) -> bool { - if let Err(e) = self.remove(k) { + fn put(&self, k: String, v: Arc>) { + if let Err(e) = { + let mut inner = self.inner.write(); + inner.insert_bytes(k, &v) + } { + error!("populate disk cache failed {}", e); + } + } + + fn evict(&self, k: &str) -> bool { + if let Err(e) = { + let mut inner = self.inner.write(); + inner.remove(k) + } { error!("evict disk cache item failed {}", e); false } else { diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 0e79deccf2b1a..a068e1ddb14a7 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -170,8 +170,8 @@ impl BlockReader { pub fn support_blocking_api(&self) -> bool { // TODO for testing purpose only, remove this in the final PR - let force_async = std::env::var("DATABEND_DEBUG_FORCE_ASYNC_READ").is_ok(); - force_async && self.operator.metadata().can_blocking() + let revert_sync_read = std::env::var("DATABEND_DEBUG_REVERT_SYNC_READ").is_ok(); + revert_sync_read && self.operator.metadata().can_blocking() } /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. @@ -314,14 +314,14 @@ impl BlockReader { } let mut ranges = vec![]; - let cache_manager = CacheManager::instance().get_block_data_cache(); + let block_data_cache = CacheManager::instance().get_block_data_cache(); let mut data_from_cache = vec![]; for column_id in self.project_indices.keys() { // TODO encapsulate this in another component let column_cache_key = format!("{location}-{column_id}"); let cache_name = "data_block_cache"; metrics_inc_cache_access_count(1, cache_name); - if let Some(cached_column_raw_data) = cache_manager.get(&column_cache_key) { + if let Some(cached_column_raw_data) = block_data_cache.get(&column_cache_key) { metrics_inc_cache_hit_count(1, cache_name); data_from_cache.push((*column_id as ColumnId, cached_column_raw_data)); } else { From 9ac4c3d3fc1ad963e3049e8c371f9bd0d57c45a9 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 1 Feb 2023 17:32:52 +0800 Subject: [PATCH 07/80] fix: cache init --- Cargo.lock | 13 ------- src/common/cache/Cargo.toml | 1 - src/common/cache/src/disk_cache.rs | 60 ++++++++++++------------------ 3 files changed, 23 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e7b0bb67e73a7..dc8befb942b97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1293,7 +1293,6 @@ dependencies = [ name = "common-cache" version = "0.1.0" dependencies = [ - "filetime", "heapsize", "hex", "ritelinked", @@ -3582,18 +3581,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "filetime" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "windows-sys 0.42.0", -] - [[package]] name = "findshlibs" version = "0.10.2" diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index 4f8381c85cf50..4543d87f6b90c 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -18,7 +18,6 @@ amortized = ["ritelinked/ahash-amortized", "ritelinked/inline-more-amortized"] # Crates.io dependencies #crc32fast = "1.3.2" -filetime = "0.2.15" hex = "0.4.3" ritelinked = { version = "0.3.2", default-features = false, features = ["ahash", "inline-more"] } siphasher = "0.3.10" diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs index 5ab327ec8e164..e8b25f490910c 100644 --- a/src/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::boxed::Box; use std::fs; use std::fs::File; use std::hash::Hash; @@ -21,8 +20,6 @@ use std::io::prelude::*; use std::path::Path; use std::path::PathBuf; -use filetime::set_file_times; -use filetime::FileTime; use siphasher::sip128; use siphasher::sip128::Hasher128; use tracing::error; @@ -34,40 +31,27 @@ use crate::FileSize; use crate::LruCache; // TODO doc the disk cache path layout -// TODO extract new type CacheKey // TODO checksum of cached data /// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified /// time, such that the oldest modified file is returned first. -fn get_all_files>(path: P) -> Box> { - let mut files: Vec<_> = WalkDir::new(path.as_ref()) - .into_iter() - .filter_map(|e| { - e.ok().and_then(|f| { - // Only look at files - if f.file_type().is_file() { - // Get the last-modified time, size, and the full path. - f.metadata().ok().and_then(|m| { - m.modified() - .ok() - .map(|mtime| (mtime, f.path().to_owned(), m.len())) - }) - } else { - None - } - }) +fn get_all_files>(path: P) -> impl Iterator { + WalkDir::new(path.as_ref()).into_iter().filter_map(|e| { + e.ok().and_then(|f| { + // Only look at files + if f.file_type().is_file() { + f.metadata().ok().map(|m| (f.path().to_owned(), m.len())) + } else { + None + } }) - .collect(); - // Sort by last-modified-time, so oldest file first. - files.sort_by_key(|k| k.0); - Box::new(files.into_iter().map(|(_mtime, path, size)| (path, size))) + }) } /// An LRU cache of files on disk. pub type LruDiskCache = DiskCache>; /// An basic disk cache of files on disk. -// pub struct DiskCache pub struct DiskCache { cache: C, root: PathBuf, @@ -132,6 +116,7 @@ where C: Cache /// Scan `self.root` for existing files and store them. fn init(mut self) -> Result { + eprintln!("INIT>>>>"); fs::create_dir_all(&self.root)?; for (file, size) in get_all_files(&self.root) { if !self.can_store(size) { @@ -147,9 +132,8 @@ where C: Cache .cache .pop_by_policy() .expect("Unexpectedly empty cache!"); + // FIX ME, this path is not right :) let remove_path = self.rel_to_abs_path(&rel_path); - // TODO: check that files are removable during `init`, so that this is only - // due to outside interference. fs::remove_file(&remove_path).unwrap_or_else(|e| { panic!("Error removing file from cache: `{:?}`: {}", remove_path, e) }); @@ -169,10 +153,17 @@ where C: Cache size <= self.cache.capacity() } - pub fn recovery_from(relative_path: &Path) -> String { - let key_string = match relative_path.as_os_str().to_str() { - Some(str) => str.to_owned(), + fn recovery_from(relative_path: &Path) -> String { + let key_string = match relative_path.file_name() { + Some(file_name) => match file_name.to_str() { + Some(str) => str.to_owned(), + None => { + // relative_path is constructed by ourself, and shall be valid utf8 string + unreachable!() + } + }, None => { + // only called during init, and only path of files are passed in unreachable!() } }; @@ -225,12 +216,7 @@ where C: Cache self.cache .get(&cache_key.0) .ok_or(Error::FileNotInCache) - .and_then(|_| { - // TODO do we need to adjust the mtime, cross reboot? - let t = FileTime::now(); - set_file_times(&path, t, t)?; - File::open(path).map_err(Into::into) - }) + .and_then(|_len| File::open(path).map_err(Into::into)) } /// Remove the given key from the cache. From 118c8fa2ca649859e45c8c50ddf56e3b6c099574 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 1 Feb 2023 19:24:59 +0800 Subject: [PATCH 08/80] wip --- src/common/cache/src/disk_cache.rs | 1 - src/common/cache/src/todo | 3 +-- .../storages/fuse/src/operations/mutation/mutation_source.rs | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs index e8b25f490910c..23b638c0b7d18 100644 --- a/src/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -116,7 +116,6 @@ where C: Cache /// Scan `self.root` for existing files and store them. fn init(mut self) -> Result { - eprintln!("INIT>>>>"); fs::create_dir_all(&self.root)?; for (file, size) in get_all_files(&self.root) { if !self.can_store(size) { diff --git a/src/common/cache/src/todo b/src/common/cache/src/todo index b60114d9796c9..a90c14843c2d1 100644 --- a/src/common/cache/src/todo +++ b/src/common/cache/src/todo @@ -6,7 +6,6 @@ - what happens if cache is full? will performance degrade significantly? - if performance do not meet requirement - consider make it tiered - - a in-memory moka at front - - disk cache layer listen to the eviction event (serving in dedicate thread pool) + - a in-memory moka at front, LFU to keep the candidates - misc: restart maybe slow if cached a large amount of data, consider make the startup of data cache layer async silently drop cache admissions during starting up \ No newline at end of file diff --git a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs index bc2ec4ce988b2..2b7d2e5830269 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs @@ -35,7 +35,6 @@ use common_functions::scalars::BUILTIN_FUNCTIONS; use common_sql::evaluator::BlockOperator; use storages_common_pruner::BlockMetaIndex; use storages_common_table_meta::meta::ClusterStatistics; -use storages_common_table_meta::meta::ColumnId; use crate::fuse_part::FusePartInfo; use crate::io::BlockReader; @@ -48,8 +47,6 @@ use crate::pipelines::processors::processor::ProcessorPtr; use crate::pipelines::processors::Processor; use crate::MergeIOReadResult; -type DataChunks = Vec<(ColumnId, Vec)>; - pub enum MutationAction { Deletion, Update, From b10afa77fd7c37ddb52ca159b2c814b5c02e0f86 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 3 Feb 2023 11:34:35 +0800 Subject: [PATCH 09/80] fix: evcition --- src/common/cache/src/disk_cache.rs | 15 ++++++++++++++- src/query/storages/common/cache/src/cache.rs | 8 ++++---- .../common/cache/src/providers/memory_cache.rs | 4 ++-- .../common/cache/src/read/cached_reader.rs | 2 +- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs index 23b638c0b7d18..aaf5712a22bc7 100644 --- a/src/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -23,6 +23,7 @@ use std::path::PathBuf; use siphasher::sip128; use siphasher::sip128::Hasher128; use tracing::error; +use tracing::warn; use walkdir::WalkDir; use crate::Cache; @@ -187,10 +188,22 @@ where C: Cache /// Add a file with `bytes` as its contents to the cache at path `key`. pub fn insert_bytes>(&mut self, key: K, bytes: &[u8]) -> Result<()> { - if !self.can_store(bytes.len() as u64) { + let item_len = bytes.len() as u64; + // check if this chunk of bytes itself is too large + if !self.can_store(item_len) { return Err(Error::FileTooLarge); } + // check eviction + if self.cache.size() + item_len > self.cache.capacity() { + if let Some((rel_path, _)) = self.cache.pop_by_policy() { + let remove_path = self.rel_to_abs_path(rel_path); + fs::remove_file(&remove_path).unwrap_or_else(|e| { + warn!("Error removing file from cache: `{:?}`: {}", remove_path, e) + }); + } + } + let cache_key = self.cache_key(key.as_ref()); let path = self.cache_path(&cache_key); // TODO rm this panic, no nested dirs here diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 5059e75a25747..cc0b77e770eb4 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -23,11 +23,11 @@ pub trait CacheAccessor { /// The minimum interface that cache providers should implement pub trait StorageCache { type Meter; - type CachedItem; + type CacheEntry; fn put(&mut self, key: K, value: Arc); - fn get(&mut self, k: &str) -> Option; + fn get(&mut self, k: &str) -> Option; fn evict(&mut self, k: &str) -> bool; } @@ -41,9 +41,9 @@ mod impls { use crate::cache::StorageCache; impl CacheAccessor for Arc> - where C: StorageCache> + where C: StorageCache> { - fn get(&self, k: &str) -> Option { + fn get(&self, k: &str) -> Option { let mut guard = self.write(); guard.get(k) } diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index bfb556cf75ab3..146f532ba53f4 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -51,13 +51,13 @@ where S: BuildHasher, { type Meter = M; - type CachedItem = Arc; + type CacheEntry = Arc; fn put(&mut self, key: String, value: Arc) { Cache::put(self, key, value); } - fn get(&mut self, k: &str) -> Option { + fn get(&mut self, k: &str) -> Option { Cache::get(self, k).cloned() } diff --git a/src/query/storages/common/cache/src/read/cached_reader.rs b/src/query/storages/common/cache/src/read/cached_reader.rs index 50f0e51465079..4cebc6e99b360 100644 --- a/src/query/storages/common/cache/src/read/cached_reader.rs +++ b/src/query/storages/common/cache/src/read/cached_reader.rs @@ -44,7 +44,7 @@ pub struct CachedReader { impl<'a, T, L, C, M> CachedReader where L: Loader + Sync, - C: 'a + StorageCache>, + C: 'a + StorageCache>, { pub fn new(cache: Option>>, name: impl Into, loader: L) -> Self { Self { From c265c4df454e11925a319eeb7017cc0e82bb857e Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 4 Feb 2023 11:51:27 +0800 Subject: [PATCH 10/80] add unit tests --- Cargo.lock | 1 + src/common/cache/Cargo.toml | 2 +- src/common/cache/src/disk_cache.rs | 78 ++++++---- src/common/cache/src/lib.rs | 1 - src/common/cache/tests/it/disk_cache.rs | 135 +++++------------- src/common/cache/tests/it/main.rs | 2 +- .../common/cache/src/providers/disk_cache.rs | 2 +- 7 files changed, 90 insertions(+), 131 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3bce605903454..7d53b80fe28eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1297,6 +1297,7 @@ dependencies = [ "hex", "ritelinked", "siphasher", + "tempfile", "tracing", "walkdir", ] diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index 4543d87f6b90c..91cf1f3fee195 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -28,4 +28,4 @@ walkdir = "2.3.2" heapsize_ = { package = "heapsize", version = "0.4.2", optional = true } [dev-dependencies] -#tempfile = "3.3.0" +tempfile = "3.3.0" diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs index aaf5712a22bc7..433f017ef1b9f 100644 --- a/src/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -14,7 +14,6 @@ use std::fs; use std::fs::File; -use std::hash::Hash; use std::hash::Hasher; use std::io::prelude::*; use std::path::Path; @@ -23,7 +22,6 @@ use std::path::PathBuf; use siphasher::sip128; use siphasher::sip128::Hasher128; use tracing::error; -use tracing::warn; use walkdir::WalkDir; use crate::Cache; @@ -58,8 +56,31 @@ pub struct DiskCache { root: PathBuf, } -#[derive(Hash)] -struct CacheKey(String); +// make it public for IT +pub struct CacheKey(String); + +impl From for CacheKey +where S: AsRef +{ + // convert key string into hex string of SipHash 2-4 128 + fn from(key: S) -> Self { + let mut sip = sip128::SipHasher24::new(); + let key = key.as_ref(); + sip.write(key.as_bytes()); + let hash = sip.finish128(); + let hex_hash = hex::encode(hash.as_bytes()); + CacheKey(hex_hash) + } +} + +impl From<&CacheKey> for PathBuf { + fn from(cache_key: &CacheKey) -> Self { + let prefix = &cache_key.0[0..3]; + let mut path_buf = PathBuf::from(prefix); + path_buf.push(Path::new(&cache_key.0)); + path_buf + } +} impl DiskCache where C: Cache @@ -132,10 +153,12 @@ where C: Cache .cache .pop_by_policy() .expect("Unexpectedly empty cache!"); - // FIX ME, this path is not right :) - let remove_path = self.rel_to_abs_path(&rel_path); - fs::remove_file(&remove_path).unwrap_or_else(|e| { - panic!("Error removing file from cache: `{:?}`: {}", remove_path, e) + let cache_item_path = self.cache_absolute_path(&CacheKey(rel_path)); + fs::remove_file(&cache_item_path).unwrap_or_else(|e| { + error!( + "Error removing file from cache: `{:?}`: {}", + cache_item_path, e + ) }); } let relative_path = file @@ -170,24 +193,17 @@ where C: Cache key_string } - // convert key string into hex string of SipHash 2-4 128 - fn cache_key(&self, str: &str) -> CacheKey { - let mut sip = sip128::SipHasher24::new(); - sip.write(str.as_bytes()); - let hash = sip.finish128(); - let hex_hash = hex::encode(hash.as_bytes()); - CacheKey(hex_hash) + fn cache_key(&self, key: &str) -> CacheKey { + CacheKey::from(key) } - fn cache_path(&self, cache_key: &CacheKey) -> PathBuf { - let prefix = &cache_key.0[0..3]; - let mut path_buf = PathBuf::from(prefix); - path_buf.push(Path::new(&cache_key.0)); - self.rel_to_abs_path(path_buf) + fn cache_absolute_path(&self, cache_key: &CacheKey) -> PathBuf { + let path = PathBuf::from(cache_key); + self.rel_to_abs_path(path) } /// Add a file with `bytes` as its contents to the cache at path `key`. - pub fn insert_bytes>(&mut self, key: K, bytes: &[u8]) -> Result<()> { + pub fn insert_bytes(&mut self, key: &str, bytes: &[u8]) -> Result<()> { let item_len = bytes.len() as u64; // check if this chunk of bytes itself is too large if !self.can_store(item_len) { @@ -197,17 +213,21 @@ where C: Cache // check eviction if self.cache.size() + item_len > self.cache.capacity() { if let Some((rel_path, _)) = self.cache.pop_by_policy() { - let remove_path = self.rel_to_abs_path(rel_path); - fs::remove_file(&remove_path).unwrap_or_else(|e| { - warn!("Error removing file from cache: `{:?}`: {}", remove_path, e) + let cached_item_path = self.cache_absolute_path(&CacheKey(rel_path)); + fs::remove_file(&cached_item_path).unwrap_or_else(|e| { + error!( + "Error removing file from cache: `{:?}`: {}", + cached_item_path, e + ) }); } } let cache_key = self.cache_key(key.as_ref()); - let path = self.cache_path(&cache_key); - // TODO rm this panic, no nested dirs here - fs::create_dir_all(path.parent().expect("Bad path?"))?; + let path = self.cache_absolute_path(&cache_key); + if let Some(parent_path) = path.parent() { + fs::create_dir_all(parent_path)?; + } let mut f = File::create(&path)?; f.write_all(bytes)?; self.cache.put(cache_key.0, bytes.len() as u64); @@ -224,7 +244,7 @@ where C: Cache /// of the file if present. Avoid using this method if at all possible, prefer `.get`. pub fn get_file(&mut self, key: &str) -> Result { let cache_key = self.cache_key(key); - let path = self.cache_path(&cache_key); + let path = self.cache_absolute_path(&cache_key); self.cache .get(&cache_key.0) .ok_or(Error::FileNotInCache) @@ -236,7 +256,7 @@ where C: Cache let cache_key = self.cache_key(key); match self.cache.pop(&cache_key.0) { Some(_) => { - let path = self.cache_path(&cache_key); + let path = self.cache_absolute_path(&cache_key); fs::remove_file(&path).map_err(|e| { error!("Error removing file from cache: `{:?}`: {}", path, e); Into::into(e) diff --git a/src/common/cache/src/lib.rs b/src/common/cache/src/lib.rs index 5c92472e95722..f53e1e7b93079 100644 --- a/src/common/cache/src/lib.rs +++ b/src/common/cache/src/lib.rs @@ -13,7 +13,6 @@ // limitations under the License. #![allow(clippy::uninlined_format_args)] -#![deny(unused_crate_dependencies)] #[cfg(feature = "heapsize")] #[cfg(not(target_os = "macos"))] diff --git a/src/common/cache/tests/it/disk_cache.rs b/src/common/cache/tests/it/disk_cache.rs index 8e31474571135..82239768c368d 100644 --- a/src/common/cache/tests/it/disk_cache.rs +++ b/src/common/cache/tests/it/disk_cache.rs @@ -13,18 +13,17 @@ // limitations under the License. // +use std::fs; use std::fs::File; -use std::fs::{self}; +use std::io; use std::io::Read; use std::io::Write; -use std::io::{self}; use std::path::Path; use std::path::PathBuf; +use common_cache::CacheKey; use common_cache::DiskCacheError; use common_cache::LruDiskCache; -use filetime::set_file_times; -use filetime::FileTime; use tempfile::TempDir; struct TestFixture { @@ -44,14 +43,6 @@ fn create_file, F: FnOnce(File) -> io::Result<()>>( b.canonicalize() } -/// Set the last modified time of `path` backwards by `seconds` seconds. -fn set_mtime_back>(path: T, seconds: usize) { - let m = fs::metadata(path.as_ref()).unwrap(); - let t = FileTime::from_last_modification_time(&m); - let t = FileTime::from_unix_time(t.unix_seconds() - seconds as i64, t.nanoseconds()); - set_file_times(path, t, t).unwrap(); -} - fn read_all(r: &mut R) -> io::Result> { let mut v = vec![]; r.read_to_end(&mut v)?; @@ -95,40 +86,43 @@ fn test_missing_root() { #[test] fn test_some_existing_files() { let f = TestFixture::new(); - f.create_file("file1", 10); - f.create_file("file2", 10); - let c = LruDiskCache::new(f.tmp(), 20).unwrap(); - assert_eq!(c.size(), 20); - assert_eq!(c.len(), 2); + let items = 10; + let sizes = (0..).take(items); + let total_bytes: u64 = sizes.clone().sum(); + for i in sizes { + let file_name = format!("file-{i}"); + let test_key = CacheKey::from(file_name.as_str()); + let test_path = PathBuf::from(&test_key); + f.create_file(test_path, i as usize); + } + + let c = LruDiskCache::new(f.tmp(), total_bytes).unwrap(); + assert_eq!(c.size(), total_bytes); + assert_eq!(c.len(), items); } #[test] fn test_existing_file_too_large() { let f = TestFixture::new(); - let c = LruDiskCache::new(f.tmp(), 15).unwrap(); - let cache_key = - // Create files explicitly in the past. - set_mtime_back(f.create_file("file1", 10), 10); - set_mtime_back(f.create_file("file2", 10), 5); - assert_eq!(c.size(), 10); - assert_eq!(c.len(), 1); - assert!(!c.contains_key("file1")); - assert!(c.contains_key("file2")); -} + let items_count = 10; + let items_count_shall_be_kept = 10 - 2; + let item_size = 10; + let capacity = items_count_shall_be_kept * item_size; + let sizes = (0..).take(items_count); + for i in sizes { + let file_name = format!("file-{i}"); + let test_key = CacheKey::from(file_name.as_str()); + let test_path = PathBuf::from(&test_key); + f.create_file(test_path, item_size); + } + let c = LruDiskCache::new(f.tmp(), capacity as u64).unwrap(); -#[test] -fn test_existing_files_lru_mtime() { - let f = TestFixture::new(); - // Create files explicitly in the past. - set_mtime_back(f.create_file("file1", 10), 5); - set_mtime_back(f.create_file("file2", 10), 10); - let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); - assert_eq!(c.size(), 20); - c.insert_bytes("file3", &[0; 10]).unwrap(); - assert_eq!(c.size(), 20); - // The oldest file on disk should have been removed. - assert!(!c.contains_key("file2")); - assert!(c.contains_key("file1")); + assert_eq!(c.size(), capacity as u64); + assert_eq!(c.len(), items_count_shall_be_kept); + for i in (0..).take(items_count_shall_be_kept) { + let file_name = format!("file-{i}"); + c.contains_key(file_name.as_str()); + } } #[test] @@ -144,7 +138,9 @@ fn test_insert_bytes() { assert_eq!(c.size(), 20); // The least-recently-used file should have been removed. assert!(!c.contains_key("a/b/c")); - assert!(!f.tmp().join("a/b/c").exists()); + + let evicted_file_path = PathBuf::from(&CacheKey::from("a/b/c")); + assert!(!f.tmp().join(evicted_file_path).exists()); } #[test] @@ -178,29 +174,6 @@ fn test_add_get_lru() { // The least-recently-used file should have been removed. assert!(!c.contains_key("file2")); } - // Get rid of the cache, to test that the LRU persists on-disk as mtimes. - // This is hacky, but mtime resolution on my mac with HFS+ is only 1 second, so we either - // need to have a 1 second sleep in the test (boo) or adjust the mtimes back a bit so - // that updating one file to the current time actually works to make it newer. - set_mtime_back(f.tmp().join("file1"), 5); - set_mtime_back(f.tmp().join("file3"), 5); - { - let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); - // Bump file1 again. - c.get_file("file1").unwrap(); - } - // Now check that the on-disk mtimes were updated and used. - { - let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); - assert!(c.contains_key("file1")); - assert!(c.contains_key("file3")); - assert_eq!(c.size(), 20); - // Add another file to bump out the least-recently-used. - c.insert_bytes("file4", &[4; 10]).unwrap(); - assert_eq!(c.size(), 20); - assert!(!c.contains_key("file3")); - assert!(c.contains_key("file1")); - } } #[test] @@ -212,37 +185,3 @@ fn test_insert_bytes_too_large() { x => panic!("Unexpected result: {:?}", x), } } - -// TODO -//#[test] -// fn test_remove() { -// let f = TestFixture::new(); -// let p1 = f.create_file("file1", 10); -// let p2 = f.create_file("file2", 10); -// let p3 = f.create_file("file3", 10); -// let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap(); -// c.insert_file("file1", &p1).unwrap(); -// c.insert_file("file2", &p2).unwrap(); -// c.remove("file1").unwrap(); -// c.insert_file("file3", &p3).unwrap(); -// assert_eq!(c.len(), 2); -// assert_eq!(c.size(), 20); -// -// // file1 should have been removed. -// assert!(!c.contains_key("file1")); -// assert!(!f.tmp().join("cache").join("file1").exists()); -// assert!(f.tmp().join("cache").join("file2").exists()); -// assert!(f.tmp().join("cache").join("file3").exists()); -// assert!(!p1.exists()); -// assert!(!p2.exists()); -// assert!(!p3.exists()); -// -// let p4 = f.create_file("file1", 10); -// c.insert_file("file1", &p4).unwrap(); -// assert_eq!(c.len(), 2); -// // file2 should have been removed. -// assert!(c.contains_key("file1")); -// assert!(!c.contains_key("file2")); -// assert!(!f.tmp().join("cache").join("file2").exists()); -// assert!(!p4.exists()); -//} diff --git a/src/common/cache/tests/it/main.rs b/src/common/cache/tests/it/main.rs index 27049ede54a7e..c69f732a0428c 100644 --- a/src/common/cache/tests/it/main.rs +++ b/src/common/cache/tests/it/main.rs @@ -15,4 +15,4 @@ #![allow(clippy::uninlined_format_args)] mod cache; -// mod disk_cache; +mod disk_cache; diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 1f24afe6b9807..d8c899dfc18db 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -62,7 +62,7 @@ impl CacheAccessor> for DiskBytesCache { fn put(&self, k: String, v: Arc>) { if let Err(e) = { let mut inner = self.inner.write(); - inner.insert_bytes(k, &v) + inner.insert_bytes(&k, &v) } { error!("populate disk cache failed {}", e); } From 254f5a739e2746b1d7543a41c4e01097b22176e7 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 6 Feb 2023 13:22:16 +0800 Subject: [PATCH 11/80] add crc checksum --- Cargo.lock | 2 + src/common/cache/Cargo.toml | 2 +- src/common/cache/src/disk_cache.rs | 31 +++++++------ src/common/cache/src/lib.rs | 2 +- src/common/cache/tests/it/disk_cache.rs | 32 ++++++++----- src/query/storages/common/cache/Cargo.toml | 1 + .../common/cache/src/providers/disk_cache.rs | 45 ++++++++++++++++++- 7 files changed, 87 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7d53b80fe28eb..25a22ba93d80e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1293,6 +1293,7 @@ dependencies = [ name = "common-cache" version = "0.1.0" dependencies = [ + "crc32fast", "heapsize", "hex", "ritelinked", @@ -7799,6 +7800,7 @@ dependencies = [ "common-base", "common-cache", "common-exception", + "crc32fast", "metrics", "opendal", "parking_lot 0.12.1", diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index 91cf1f3fee195..3b8fd06a46143 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -17,7 +17,7 @@ amortized = ["ritelinked/ahash-amortized", "ritelinked/inline-more-amortized"] [dependencies] # Crates.io dependencies -#crc32fast = "1.3.2" +crc32fast = "1.3.2" hex = "0.4.3" ritelinked = { version = "0.3.2", default-features = false, features = ["ahash", "inline-more"] } siphasher = "0.3.10" diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/disk_cache.rs index 433f017ef1b9f..2acf3388aa882 100644 --- a/src/common/cache/src/disk_cache.rs +++ b/src/common/cache/src/disk_cache.rs @@ -16,6 +16,7 @@ use std::fs; use std::fs::File; use std::hash::Hasher; use std::io::prelude::*; +use std::io::IoSlice; use std::path::Path; use std::path::PathBuf; @@ -30,7 +31,6 @@ use crate::FileSize; use crate::LruCache; // TODO doc the disk cache path layout -// TODO checksum of cached data /// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified /// time, such that the oldest modified file is returned first. @@ -153,7 +153,7 @@ where C: Cache .cache .pop_by_policy() .expect("Unexpectedly empty cache!"); - let cache_item_path = self.cache_absolute_path(&CacheKey(rel_path)); + let cache_item_path = self.abs_path_of_cache_key(&CacheKey(rel_path)); fs::remove_file(&cache_item_path).unwrap_or_else(|e| { error!( "Error removing file from cache: `{:?}`: {}", @@ -197,23 +197,22 @@ where C: Cache CacheKey::from(key) } - fn cache_absolute_path(&self, cache_key: &CacheKey) -> PathBuf { + fn abs_path_of_cache_key(&self, cache_key: &CacheKey) -> PathBuf { let path = PathBuf::from(cache_key); self.rel_to_abs_path(path) } - /// Add a file with `bytes` as its contents to the cache at path `key`. - pub fn insert_bytes(&mut self, key: &str, bytes: &[u8]) -> Result<()> { - let item_len = bytes.len() as u64; + pub fn insert_bytes(&mut self, key: &str, bytes: &[&[u8]]) -> Result<()> { + let bytes_len = bytes.iter().map(|x| x.len() as u64).sum::(); // check if this chunk of bytes itself is too large - if !self.can_store(item_len) { + if !self.can_store(bytes_len) { return Err(Error::FileTooLarge); } // check eviction - if self.cache.size() + item_len > self.cache.capacity() { + if self.cache.size() + bytes_len > self.cache.capacity() { if let Some((rel_path, _)) = self.cache.pop_by_policy() { - let cached_item_path = self.cache_absolute_path(&CacheKey(rel_path)); + let cached_item_path = self.abs_path_of_cache_key(&CacheKey(rel_path)); fs::remove_file(&cached_item_path).unwrap_or_else(|e| { error!( "Error removing file from cache: `{:?}`: {}", @@ -224,13 +223,17 @@ where C: Cache } let cache_key = self.cache_key(key.as_ref()); - let path = self.cache_absolute_path(&cache_key); + let path = self.abs_path_of_cache_key(&cache_key); if let Some(parent_path) = path.parent() { fs::create_dir_all(parent_path)?; } let mut f = File::create(&path)?; - f.write_all(bytes)?; - self.cache.put(cache_key.0, bytes.len() as u64); + let mut bufs = Vec::with_capacity(bytes.len()); + for x in bytes { + bufs.push(IoSlice::new(x)); + } + f.write_all_vectored(&mut bufs)?; + self.cache.put(cache_key.0, bytes_len); Ok(()) } @@ -244,7 +247,7 @@ where C: Cache /// of the file if present. Avoid using this method if at all possible, prefer `.get`. pub fn get_file(&mut self, key: &str) -> Result { let cache_key = self.cache_key(key); - let path = self.cache_absolute_path(&cache_key); + let path = self.abs_path_of_cache_key(&cache_key); self.cache .get(&cache_key.0) .ok_or(Error::FileNotInCache) @@ -256,7 +259,7 @@ where C: Cache let cache_key = self.cache_key(key); match self.cache.pop(&cache_key.0) { Some(_) => { - let path = self.cache_absolute_path(&cache_key); + let path = self.abs_path_of_cache_key(&cache_key); fs::remove_file(&path).map_err(|e| { error!("Error removing file from cache: `{:?}`: {}", path, e); Into::into(e) diff --git a/src/common/cache/src/lib.rs b/src/common/cache/src/lib.rs index f53e1e7b93079..72d75e9234bf0 100644 --- a/src/common/cache/src/lib.rs +++ b/src/common/cache/src/lib.rs @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![feature(write_all_vectored)] #![allow(clippy::uninlined_format_args)] - #[cfg(feature = "heapsize")] #[cfg(not(target_os = "macos"))] extern crate heapsize_; diff --git a/src/common/cache/tests/it/disk_cache.rs b/src/common/cache/tests/it/disk_cache.rs index 82239768c368d..ddf0da2bca583 100644 --- a/src/common/cache/tests/it/disk_cache.rs +++ b/src/common/cache/tests/it/disk_cache.rs @@ -23,6 +23,7 @@ use std::path::PathBuf; use common_cache::CacheKey; use common_cache::DiskCacheError; +use common_cache::DiskCacheResult; use common_cache::LruDiskCache; use tempfile::TempDir; @@ -31,6 +32,17 @@ struct TestFixture { pub tempdir: TempDir, } +// helper trait to simplify the test case +trait InsertSingleSlice { + fn insert_single_slice(&mut self, key: &str, bytes: &[u8]) -> DiskCacheResult<()>; +} + +impl InsertSingleSlice for LruDiskCache { + fn insert_single_slice(&mut self, key: &str, bytes: &[u8]) -> DiskCacheResult<()> { + self.insert_bytes(key, &[bytes]) + } +} + fn create_file, F: FnOnce(File) -> io::Result<()>>( dir: &Path, path: T, @@ -129,12 +141,12 @@ fn test_existing_file_too_large() { fn test_insert_bytes() { let f = TestFixture::new(); let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); - c.insert_bytes("a/b/c", &[0; 10]).unwrap(); + c.insert_single_slice("a/b/c", &[0; 10]).unwrap(); assert!(c.contains_key("a/b/c")); - c.insert_bytes("a/b/d", &[0; 10]).unwrap(); + c.insert_single_slice("a/b/d", &[0; 10]).unwrap(); assert_eq!(c.size(), 20); // Adding this third file should put the cache above the limit. - c.insert_bytes("x/y/z", &[0; 10]).unwrap(); + c.insert_single_slice("x/y/z", &[0; 10]).unwrap(); assert_eq!(c.size(), 20); // The least-recently-used file should have been removed. assert!(!c.contains_key("a/b/c")); @@ -148,10 +160,10 @@ fn test_insert_bytes_exact() { // Test that files adding up to exactly the size limit works. let f = TestFixture::new(); let mut c = LruDiskCache::new(f.tmp(), 20).unwrap(); - c.insert_bytes("file1", &[1; 10]).unwrap(); - c.insert_bytes("file2", &[2; 10]).unwrap(); + c.insert_single_slice("file1", &[1; 10]).unwrap(); + c.insert_single_slice("file2", &[2; 10]).unwrap(); assert_eq!(c.size(), 20); - c.insert_bytes("file3", &[3; 10]).unwrap(); + c.insert_single_slice("file3", &[3; 10]).unwrap(); assert_eq!(c.size(), 20); assert!(!c.contains_key("file1")); } @@ -161,15 +173,15 @@ fn test_add_get_lru() { let f = TestFixture::new(); { let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); - c.insert_bytes("file1", &[1; 10]).unwrap(); - c.insert_bytes("file2", &[2; 10]).unwrap(); + c.insert_single_slice("file1", &[1; 10]).unwrap(); + c.insert_single_slice("file2", &[2; 10]).unwrap(); // Get the file to bump its LRU status. assert_eq!(read_all(&mut c.get_file("file1").unwrap()).unwrap(), vec![ 1u8; 10 ]); // Adding this third file should put the cache above the limit. - c.insert_bytes("file3", &[3; 10]).unwrap(); + c.insert_single_slice("file3", &[3; 10]).unwrap(); assert_eq!(c.size(), 20); // The least-recently-used file should have been removed. assert!(!c.contains_key("file2")); @@ -180,7 +192,7 @@ fn test_add_get_lru() { fn test_insert_bytes_too_large() { let f = TestFixture::new(); let mut c = LruDiskCache::new(f.tmp(), 1).unwrap(); - match c.insert_bytes("a/b/c", &[0; 2]) { + match c.insert_single_slice("a/b/c", &[0; 2]) { Err(DiskCacheError::FileTooLarge) => {} x => panic!("Unexpected result: {:?}", x), } diff --git a/src/query/storages/common/cache/Cargo.toml b/src/query/storages/common/cache/Cargo.toml index 6ea8a12f84de3..04cb58b643ae7 100644 --- a/src/query/storages/common/cache/Cargo.toml +++ b/src/query/storages/common/cache/Cargo.toml @@ -17,6 +17,7 @@ common-cache = { path = "../../../../common/cache" } common-exception = { path = "../../../../common/exception" } async-trait = { version = "0.1.57", package = "async-trait-fn" } +crc32fast = "1.3.2" metrics = "0.20.1" opendal = { workspace = true } parking_lot = "0.12.1" diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index d8c899dfc18db..5fe140197e961 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -51,7 +51,22 @@ impl CacheAccessor> for DiskBytesCache { }; match read_file() { - Ok(bytes) => Some(Arc::new(bytes)), + Ok(mut bytes) => { + if let Err(e) = validate_checksum(bytes.as_slice()) { + error!("data cache, of key {k}, crc validation failure: {e}"); + { + // remove the invalid cache, error of removal ignored + let mut inner = self.inner.write(); + let _ = inner.remove(k); + } + return None; + } + // return the bytes without the checksum bytes + let total_len = bytes.len(); + let body_len = total_len - 4; + bytes.truncate(body_len); + Some(Arc::new(bytes)) + } Err(e) => { error!("get disk cache item failed, {}", e); None @@ -61,8 +76,10 @@ impl CacheAccessor> for DiskBytesCache { fn put(&self, k: String, v: Arc>) { if let Err(e) = { + let crc = crc32fast::hash(v.as_slice()); + let crc_bytes = crc.to_le_bytes(); let mut inner = self.inner.write(); - inner.insert_bytes(&k, &v) + inner.insert_bytes(&k, &[v.as_slice(), &crc_bytes]) } { error!("populate disk cache failed {}", e); } @@ -80,3 +97,27 @@ impl CacheAccessor> for DiskBytesCache { } } } + +/// Assuming that the crc32 is at the end of `bytes` and encoded as le u32. +// Although parquet page has built-in crc, but it is optional (and not generated in parquet2) +// Later, if cache data is put into redis, we can reuse the checksum logic +fn validate_checksum(bytes: &[u8]) -> Result<()> { + let total_len = bytes.len(); + if total_len <= 4 { + Err(ErrorCode::StorageOther(format!( + "crc checksum validation failure: invalid file length {total_len}" + ))) + } else { + // checksum validation + let crc_bytes: [u8; 4] = bytes[total_len - 4..].try_into().unwrap(); + let crc = u32::from_le_bytes(crc_bytes); + let crc_calculated = crc32fast::hash(&bytes[4..]); + if crc == crc_calculated { + Ok(()) + } else { + Err(ErrorCode::StorageOther(format!( + "crc checksum validation failure, key : crc checksum not match, crc kept in file {crc}, crc calculated {crc_calculated}" + ))) + } + } +} From 4b1ca8bfbbcc2011e3719eafbac2b434f4165ce0 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 6 Feb 2023 18:36:48 +0800 Subject: [PATCH 12/80] tiered disk cache --- Cargo.lock | 8 + src/common/cache/src/lib.rs | 8 +- .../src/{disk_cache.rs => lru_disk_cache.rs} | 0 src/common/cache/src/todo | 2 +- src/query/storages/common/cache/Cargo.toml | 2 + .../common/cache/src/providers/disk_cache.rs | 163 ++++++++++++++++-- 6 files changed, 165 insertions(+), 18 deletions(-) rename src/common/cache/src/{disk_cache.rs => lru_disk_cache.rs} (100%) diff --git a/Cargo.lock b/Cargo.lock index 25a22ba93d80e..5910eb3ea7229 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6968,6 +6968,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "ringbuffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1973f95452ec56a4b2c683f5eabc15617e38f4faf2776ed4a0011d5070ecb37e" + [[package]] name = "rio" version = "0.9.4" @@ -7801,9 +7807,11 @@ dependencies = [ "common-cache", "common-exception", "crc32fast", + "crossbeam-channel", "metrics", "opendal", "parking_lot 0.12.1", + "ringbuffer", "serde", "serde_json", "tracing", diff --git a/src/common/cache/src/lib.rs b/src/common/cache/src/lib.rs index 72d75e9234bf0..17bcdaea889de 100644 --- a/src/common/cache/src/lib.rs +++ b/src/common/cache/src/lib.rs @@ -19,14 +19,14 @@ extern crate heapsize_; mod cache; -mod disk_cache; +mod lru_disk_cache; mod meter; pub use cache::lru::LruCache; pub use cache::Cache; -pub use disk_cache::result::Error as DiskCacheError; -pub use disk_cache::result::Result as DiskCacheResult; -pub use disk_cache::*; +pub use lru_disk_cache::result::Error as DiskCacheError; +pub use lru_disk_cache::result::Result as DiskCacheResult; +pub use lru_disk_cache::*; pub use meter::bytes_meter::BytesMeter; pub use meter::count_meter::Count; pub use meter::count_meter::CountableMeter; diff --git a/src/common/cache/src/disk_cache.rs b/src/common/cache/src/lru_disk_cache.rs similarity index 100% rename from src/common/cache/src/disk_cache.rs rename to src/common/cache/src/lru_disk_cache.rs diff --git a/src/common/cache/src/todo b/src/common/cache/src/todo index a90c14843c2d1..507b353d011b5 100644 --- a/src/common/cache/src/todo +++ b/src/common/cache/src/todo @@ -1,6 +1,6 @@ - cache data crc - metrics -- performance evaluation +- performance evaluation (done) - without threshold - just put bytes to disk (no sync) - what happens if cache is full? will performance degrade significantly? diff --git a/src/query/storages/common/cache/Cargo.toml b/src/query/storages/common/cache/Cargo.toml index 04cb58b643ae7..35dca504b1c5a 100644 --- a/src/query/storages/common/cache/Cargo.toml +++ b/src/query/storages/common/cache/Cargo.toml @@ -18,9 +18,11 @@ common-exception = { path = "../../../../common/exception" } async-trait = { version = "0.1.57", package = "async-trait-fn" } crc32fast = "1.3.2" +crossbeam-channel = "0.5.6" metrics = "0.20.1" opendal = { workspace = true } parking_lot = "0.12.1" +ringbuffer = "0.12.0" serde = { workspace = true } serde_json = { workspace = true } tracing = "0.1.36" diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 5fe140197e961..4c43dd3422e12 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -14,35 +14,73 @@ use std::io::Read; use std::sync::Arc; +use std::thread::JoinHandle; +use common_cache::Cache; pub use common_cache::LruDiskCache as DiskCache; use common_exception::ErrorCode; use common_exception::Result; +use crossbeam_channel::TrySendError; use parking_lot::RwLock; use tracing::error; +use tracing::info; +use tracing::warn; use crate::CacheAccessor; +use crate::InMemoryBytesCacheHolder; +use crate::InMemoryCacheBuilder; +struct CacheItem { + key: String, + value: Arc>, +} + +/// Tiered cache which consist of +/// A in-memory cache +/// A ring that keep the reference of bytes +/// A slow disk or redis based persistent cache #[derive(Clone)] pub struct DiskBytesCache { - inner: Arc>, + inner_memory_cache: InMemoryBytesCacheHolder, + inner_external_cache: Arc>, + population_queue: crossbeam_channel::Sender, + _cache_pupulator: DiskCachePopulator, } +const DEFAULT_POPULATION_CHAN_CAP: usize = 100_000; +const DEFAULT_IN_MEMORY_BLOCK_DATA_CACHE_CAP: u64 = 1024 * 1024 * 1024 * 10; + pub struct DiskCacheBuilder; impl DiskCacheBuilder { pub fn new_disk_cache(path: &str, capacity: u64) -> Result { let cache = DiskCache::new(path, capacity) .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; let inner = Arc::new(RwLock::new(cache)); - Ok(DiskBytesCache { inner }) + let (rx, tx) = crossbeam_channel::bounded(DEFAULT_POPULATION_CHAN_CAP); + Ok(DiskBytesCache { + inner_memory_cache: InMemoryCacheBuilder::new_bytes_cache( + DEFAULT_IN_MEMORY_BLOCK_DATA_CACHE_CAP, + ), + inner_external_cache: inner.clone(), + population_queue: rx, + _cache_pupulator: DiskCachePopulator::new(tx, inner, 1)?, + }) } } impl CacheAccessor> for DiskBytesCache { fn get(&self, k: &str) -> Option>> { + // check in memory cache first + { + if let Some(item) = self.inner_memory_cache.get(k) { + return Some(item); + } + } + + // check disk cache let read_file = || { let mut file = { - let mut inner = self.inner.write(); + let mut inner = self.inner_external_cache.write(); inner.get_file(k)? }; let mut v = vec![]; @@ -56,7 +94,7 @@ impl CacheAccessor> for DiskBytesCache { error!("data cache, of key {k}, crc validation failure: {e}"); { // remove the invalid cache, error of removal ignored - let mut inner = self.inner.write(); + let mut inner = self.inner_external_cache.write(); let _ = inner.remove(k); } return None; @@ -75,19 +113,48 @@ impl CacheAccessor> for DiskBytesCache { } fn put(&self, k: String, v: Arc>) { - if let Err(e) = { - let crc = crc32fast::hash(v.as_slice()); - let crc_bytes = crc.to_le_bytes(); - let mut inner = self.inner.write(); - inner.insert_bytes(&k, &[v.as_slice(), &crc_bytes]) - } { - error!("populate disk cache failed {}", e); + // try put the cached item into in-memory cache first + + // check in memory cache first + // note: upgradable guard is not used here , since probability of concurrent + // modification os some key is rather low, and will not affect the integrity of cache. + { + let in_memory_cache = self.inner_memory_cache.read(); + if in_memory_cache.contains(&k) { + // if already cached in memory, it is already attempted to be written to disk cache + return; + } + } + + { + let mut cache = self.inner_memory_cache.write(); + cache.put(k.clone(), v.clone()); + } + + let inner = self.inner_external_cache.read(); + if inner.contains_key(&k) { + // check if k in disk cache already. + // note that cache is being accessed concurrently, the cached item that associated + // whit `k` might be evicted at this time, but we ignore this situation for performance + // concerns. + } else { + let msg = CacheItem { key: k, value: v }; + match self.population_queue.try_send(msg) { + Ok(_) => {} + Err(TrySendError::Full(_)) => { + // TODO metric, record missed cache writing + warn!("disk cache population queue is full"); + } + Err(TrySendError::Disconnected(_)) => { + error!("disk cache population thread is down"); + } + } } } fn evict(&self, k: &str) -> bool { if let Err(e) = { - let mut inner = self.inner.write(); + let mut inner = self.inner_external_cache.write(); inner.remove(k) } { error!("evict disk cache item failed {}", e); @@ -98,6 +165,76 @@ impl CacheAccessor> for DiskBytesCache { } } +#[derive(Clone)] +struct CachePopulationWorker { + cache: Arc>, + population_queue: crossbeam_channel::Receiver, +} + +impl CachePopulationWorker { + fn populate(&self) { + loop { + match self.population_queue.recv() { + Ok(CacheItem { key, value }) => { + { + let inner = self.cache.read(); + if inner.contains_key(&key) { + continue; + } + } + if let Err(e) = { + let crc = crc32fast::hash(value.as_slice()); + let crc_bytes = crc.to_le_bytes(); + let mut inner = self.cache.write(); + inner.insert_bytes(&key, &[value.as_slice(), &crc_bytes]) + } { + error!("populate disk cache failed {}", e); + } + } + Err(_) => { + info!("cache work shutdown"); + break; + } + } + } + } + + fn start(self: Arc) -> Result> { + let thread_builder = std::thread::Builder::new().name("cache-population".to_owned()); + thread_builder.spawn(move || self.populate()).map_err(|e| { + ErrorCode::StorageOther(format!("spawn cache population worker thread failed, {e}")) + }) + } +} + +#[derive(Clone)] +struct DiskCachePopulator { + _workers: Vec>, +} + +impl DiskCachePopulator { + fn new( + incoming: crossbeam_channel::Receiver, + cache: Arc>, + _num_worker_thread: usize, + ) -> Result { + let worker = Arc::new(CachePopulationWorker { + cache, + population_queue: incoming, + }); + let _join_handler = worker.clone().start()?; + Ok(Self { + _workers: vec![worker], + }) + } + + #[allow(dead_code)] + pub fn shutdown(&self) { + // by drop the sender + // and timed join the join_handlers + } +} + /// Assuming that the crc32 is at the end of `bytes` and encoded as le u32. // Although parquet page has built-in crc, but it is optional (and not generated in parquet2) // Later, if cache data is put into redis, we can reuse the checksum logic @@ -111,7 +248,7 @@ fn validate_checksum(bytes: &[u8]) -> Result<()> { // checksum validation let crc_bytes: [u8; 4] = bytes[total_len - 4..].try_into().unwrap(); let crc = u32::from_le_bytes(crc_bytes); - let crc_calculated = crc32fast::hash(&bytes[4..]); + let crc_calculated = crc32fast::hash(&bytes[0..total_len - 4]); if crc == crc_calculated { Ok(()) } else { From c05d3c7356b68b0a8b1282f370051a9c6214ece6 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 6 Feb 2023 22:23:51 +0800 Subject: [PATCH 13/80] tuning and metrics --- .../common/cache/src/providers/disk_cache.rs | 49 +++++++++++++------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 4c43dd3422e12..14e4a21c902d5 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -44,9 +44,10 @@ pub struct DiskBytesCache { inner_memory_cache: InMemoryBytesCacheHolder, inner_external_cache: Arc>, population_queue: crossbeam_channel::Sender, - _cache_pupulator: DiskCachePopulator, + _cache_populator: DiskCachePopulator, } +// TODO new settings please const DEFAULT_POPULATION_CHAN_CAP: usize = 100_000; const DEFAULT_IN_MEMORY_BLOCK_DATA_CACHE_CAP: u64 = 1024 * 1024 * 1024 * 10; @@ -63,7 +64,7 @@ impl DiskCacheBuilder { ), inner_external_cache: inner.clone(), population_queue: rx, - _cache_pupulator: DiskCachePopulator::new(tx, inner, 1)?, + _cache_populator: DiskCachePopulator::new(tx, inner, 1)?, }) } } @@ -97,13 +98,17 @@ impl CacheAccessor> for DiskBytesCache { let mut inner = self.inner_external_cache.write(); let _ = inner.remove(k); } - return None; + None + } else { + // trim the checksum bytes and return + let total_len = bytes.len(); + let body_len = total_len - 4; + bytes.truncate(body_len); + let item = Arc::new(bytes); + // also put the cached item into in-memory cache + self.inner_memory_cache.put(k.to_owned(), item.clone()); + Some(item) } - // return the bytes without the checksum bytes - let total_len = bytes.len(); - let body_len = total_len - 4; - bytes.truncate(body_len); - Some(Arc::new(bytes)) } Err(e) => { error!("get disk cache item failed, {}", e); @@ -122,6 +127,7 @@ impl CacheAccessor> for DiskBytesCache { let in_memory_cache = self.inner_memory_cache.read(); if in_memory_cache.contains(&k) { // if already cached in memory, it is already attempted to be written to disk cache + // TODO check this, shall we return here? return; } } @@ -132,17 +138,14 @@ impl CacheAccessor> for DiskBytesCache { } let inner = self.inner_external_cache.read(); - if inner.contains_key(&k) { - // check if k in disk cache already. - // note that cache is being accessed concurrently, the cached item that associated - // whit `k` might be evicted at this time, but we ignore this situation for performance - // concerns. - } else { + // in our case, the value associated with a specific key never change, thus + // only when cache is missing, we try to populate it. + if !inner.contains_key(&k) { let msg = CacheItem { key: k, value: v }; match self.population_queue.try_send(msg) { Ok(_) => {} Err(TrySendError::Full(_)) => { - // TODO metric, record missed cache writing + self::metrics::metrics_inc_population_overflow_count(1); warn!("disk cache population queue is full"); } Err(TrySendError::Disconnected(_)) => { @@ -189,6 +192,8 @@ impl CachePopulationWorker { inner.insert_bytes(&key, &[value.as_slice(), &crc_bytes]) } { error!("populate disk cache failed {}", e); + } else { + self::metrics::metrics_inc_disk_cache_population_count(1); } } Err(_) => { @@ -258,3 +263,17 @@ fn validate_checksum(bytes: &[u8]) -> Result<()> { } } } + +mod metrics { + use metrics::increment_gauge; + + #[inline] + pub fn metrics_inc_population_overflow_count(c: u64) { + increment_gauge!("data_block_cache_population_overflow", c as f64); + } + + #[inline] + pub fn metrics_inc_disk_cache_population_count(c: u64) { + increment_gauge!("data_block_cache_population_overflow", c as f64); + } +} From 47cfc1d311d6ee87586925ec0edc84193406edb6 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 6 Feb 2023 23:09:26 +0800 Subject: [PATCH 14/80] add data cache related config --- src/query/config/src/inner.rs | 27 +++++++--- src/query/config/src/outer_v0.rs | 23 ++++++++- .../common/cache-manager/src/cache_manager.rs | 30 +++++++++--- .../common/cache/src/providers/disk_cache.rs | 49 ++++++++----------- 4 files changed, 85 insertions(+), 44 deletions(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 668cf14c847ba..d23f1eead9121 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -147,16 +147,12 @@ pub struct QueryConfig { pub table_engine_memory_enabled: bool, pub wait_timeout_mills: u64, pub max_query_log_size: usize, - /// Table Cached enabled + /// Table Meta Cached enabled pub table_meta_cache_enabled: bool, /// Max number of cached table block meta pub table_cache_block_meta_count: u64, - /// Table memory cache size (mb) + /// Table memory cache size (MB) pub table_memory_cache_mb_size: u64, - /// Table disk cache folder root - pub table_disk_cache_root: String, - /// Table disk cache size (mb) - pub table_disk_cache_mb_size: u64, /// Max number of cached table snapshot pub table_cache_snapshot_count: u64, /// Max number of cached table statistic @@ -167,6 +163,18 @@ pub struct QueryConfig { pub table_cache_bloom_index_meta_count: u64, /// Max number of cached bloom index filters pub table_cache_bloom_index_filter_count: u64, + /// Table data cache enabled + pub table_data_cache_enabled: bool, + /// Max bytes of table data cached in memory (MB) + pub table_data_cache_in_memory_mb_size: u64, + /// Table disk cache folder root + pub table_disk_cache_root: String, + /// Max size of external cache population queue length + /// TODO explain this, how it effect the memory usage + /// the item being queued are typically referencing items that inside in-memory cached data cache + pub table_data_cache_population_queue_size: u32, + /// Table disk cache size (MB) + pub table_disk_cache_mb_size: u64, /// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv. pub management_mode: bool, pub jwt_key_file: String, @@ -211,17 +219,19 @@ impl Default for QueryConfig { rpc_tls_query_service_domain_name: "localhost".to_string(), table_engine_memory_enabled: true, wait_timeout_mills: 5000, - max_query_log_size: 10000, + max_query_log_size: 10_000, table_meta_cache_enabled: true, table_cache_block_meta_count: 102400, table_memory_cache_mb_size: 256, table_disk_cache_root: "_cache".to_string(), - table_disk_cache_mb_size: 1024, + table_data_cache_population_queue_size: 65536, + table_disk_cache_mb_size: 10240, table_cache_snapshot_count: 256, table_cache_statistic_count: 256, table_cache_segment_count: 10240, table_cache_bloom_index_meta_count: 3000, table_cache_bloom_index_filter_count: 1024 * 1024, + table_data_cache_enabled: false, management_mode: false, jwt_key_file: "".to_string(), async_insert_max_data_size: 10000, @@ -232,6 +242,7 @@ impl Default for QueryConfig { share_endpoint_auth_token_file: "".to_string(), tenant_quota: None, internal_enable_sandbox_tenant: false, + table_data_cache_in_memory_mb_size: 1024 * 10, } } } diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index bb92775cb56b8..bb2f9ed4afd62 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -1274,7 +1274,7 @@ pub struct QueryConfig { pub table_disk_cache_root: String, /// Table disk cache size (mb) - #[clap(long, default_value = "1024")] + #[clap(long, default_value = "10240")] pub table_disk_cache_mb_size: u64, /// Max number of cached table snapshot @@ -1301,6 +1301,21 @@ pub struct QueryConfig { #[clap(long, default_value = "1048576")] pub table_cache_bloom_index_filter_count: u64, + /// Table data cached enabled, default false + #[clap(long, default_value = "false")] + pub table_data_cache_enabled: bool, + + /// Max bytes of table data cached in memory (MB) + /// default value 10240 MB, or 10G + #[clap(long, default_value = "10240")] + pub table_data_cache_in_memory_mb_size: u64, + + /// Max item that could be pending in the external cache population queue + /// default value 65536 items. Increase this value if it takes too much times + /// to fully populate the disk cache. + #[clap(long, default_value = "65536")] + pub table_data_cache_population_queue_size: u32, + /// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv. #[clap(long)] pub management_mode: bool, @@ -1380,12 +1395,14 @@ impl TryInto for QueryConfig { table_cache_block_meta_count: self.table_cache_block_meta_count, table_memory_cache_mb_size: self.table_memory_cache_mb_size, table_disk_cache_root: self.table_disk_cache_root, + table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, table_disk_cache_mb_size: self.table_disk_cache_mb_size, table_cache_snapshot_count: self.table_cache_snapshot_count, table_cache_statistic_count: self.table_cache_statistic_count, table_cache_segment_count: self.table_cache_segment_count, table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, + table_data_cache_enabled: self.table_data_cache_enabled, management_mode: self.management_mode, jwt_key_file: self.jwt_key_file, async_insert_max_data_size: self.async_insert_max_data_size, @@ -1398,6 +1415,7 @@ impl TryInto for QueryConfig { share_endpoint_auth_token_file: self.share_endpoint_auth_token_file, tenant_quota: self.quota, internal_enable_sandbox_tenant: self.internal_enable_sandbox_tenant, + table_data_cache_in_memory_mb_size: self.table_data_cache_in_memory_mb_size, }) } } @@ -1451,6 +1469,9 @@ impl From for QueryConfig { table_cache_segment_count: inner.table_cache_segment_count, table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, + table_data_cache_enabled: inner.table_meta_cache_enabled, + table_data_cache_in_memory_mb_size: inner.table_data_cache_in_memory_mb_size, + table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, management_mode: inner.management_mode, jwt_key_file: inner.jwt_key_file, async_insert_max_data_size: inner.async_insert_max_data_size, diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 7142bcf9998f8..66bec8455db70 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -46,6 +46,16 @@ pub struct CacheManager { impl CacheManager { /// Initialize the caches according to the relevant configurations. pub fn init(config: &QueryConfig) -> Result<()> { + let block_data_cache = if config.table_data_cache_enabled { + Self::new_block_data_cache( + &config.table_disk_cache_root, + config.table_data_cache_in_memory_mb_size, + config.table_data_cache_population_queue_size, + config.table_disk_cache_mb_size, + )? + } else { + None + }; if !config.table_meta_cache_enabled { GlobalInstance::set(Arc::new(Self { table_snapshot_cache: None, @@ -65,10 +75,6 @@ impl CacheManager { let bloom_index_meta_cache = Self::new_item_cache(config.table_cache_bloom_index_meta_count); let file_meta_data_cache = Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS); - let block_data_cache = Self::new_block_data_cache( - &config.table_disk_cache_root, - config.table_disk_cache_mb_size * 1024 * 1024, - )?; GlobalInstance::set(Arc::new(Self { table_snapshot_cache, segment_info_cache, @@ -123,9 +129,19 @@ impl CacheManager { } } - fn new_block_data_cache(path: &str, capacity: u64) -> Result> { - if capacity > 0 { - let cache_holder = DiskCacheBuilder::new_disk_cache(path, capacity)?; + fn new_block_data_cache( + path: &str, + in_memory_cache_mb_size: u64, + population_queue_size: u32, + disk_cache_mb_size: u64, + ) -> Result> { + if in_memory_cache_mb_size > 0 { + let cache_holder = DiskCacheBuilder::new_disk_cache( + path, + in_memory_cache_mb_size, + population_queue_size, + disk_cache_mb_size, + )?; Ok(Some(cache_holder)) } else { Ok(None) diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 14e4a21c902d5..5e407b58676a0 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -47,20 +47,21 @@ pub struct DiskBytesCache { _cache_populator: DiskCachePopulator, } -// TODO new settings please -const DEFAULT_POPULATION_CHAN_CAP: usize = 100_000; -const DEFAULT_IN_MEMORY_BLOCK_DATA_CACHE_CAP: u64 = 1024 * 1024 * 1024 * 10; - pub struct DiskCacheBuilder; impl DiskCacheBuilder { - pub fn new_disk_cache(path: &str, capacity: u64) -> Result { - let cache = DiskCache::new(path, capacity) + pub fn new_disk_cache( + path: &str, + in_memory_cache_mb_size: u64, + population_queue_size: u32, + disk_cache_size: u64, + ) -> Result { + let external_cache = DiskCache::new(path, disk_cache_size * 1024 * 1024) .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; - let inner = Arc::new(RwLock::new(cache)); - let (rx, tx) = crossbeam_channel::bounded(DEFAULT_POPULATION_CHAN_CAP); + let inner = Arc::new(RwLock::new(external_cache)); + let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); Ok(DiskBytesCache { inner_memory_cache: InMemoryCacheBuilder::new_bytes_cache( - DEFAULT_IN_MEMORY_BLOCK_DATA_CACHE_CAP, + in_memory_cache_mb_size * 1024 * 1024, ), inner_external_cache: inner.clone(), population_queue: rx, @@ -118,29 +119,20 @@ impl CacheAccessor> for DiskBytesCache { } fn put(&self, k: String, v: Arc>) { - // try put the cached item into in-memory cache first - - // check in memory cache first - // note: upgradable guard is not used here , since probability of concurrent - // modification os some key is rather low, and will not affect the integrity of cache. + // put it into the in-memory cache first { - let in_memory_cache = self.inner_memory_cache.read(); - if in_memory_cache.contains(&k) { - // if already cached in memory, it is already attempted to be written to disk cache - // TODO check this, shall we return here? - return; - } + let mut in_memory_cache = self.inner_memory_cache.write(); + in_memory_cache.put(k.clone(), v.clone()); } - { - let mut cache = self.inner_memory_cache.write(); - cache.put(k.clone(), v.clone()); - } + // check if external(disk/redis) already have it. + let contains = { + let external_cache = self.inner_external_cache.read(); + external_cache.contains_key(&k) + }; - let inner = self.inner_external_cache.read(); - // in our case, the value associated with a specific key never change, thus - // only when cache is missing, we try to populate it. - if !inner.contains_key(&k) { + if !contains { + // populate the cache to external cache(disk/redis) asyncly let msg = CacheItem { key: k, value: v }; match self.population_queue.try_send(msg) { Ok(_) => {} @@ -157,6 +149,7 @@ impl CacheAccessor> for DiskBytesCache { fn evict(&self, k: &str) -> bool { if let Err(e) = { + self.inner_memory_cache.evict(k); let mut inner = self.inner_external_cache.write(); inner.remove(k) } { From 9f9bce840a0fec57dc8706356b9bd45cfafdfac1 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 6 Feb 2023 23:41:05 +0800 Subject: [PATCH 15/80] fix ut --- src/query/config/src/inner.rs | 6 +++--- src/query/config/src/outer_v0.rs | 10 +++++----- .../storages/common/cache-manager/src/cache_manager.rs | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index d23f1eead9121..4642c22eec6a0 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -163,8 +163,8 @@ pub struct QueryConfig { pub table_cache_bloom_index_meta_count: u64, /// Max number of cached bloom index filters pub table_cache_bloom_index_filter_count: u64, - /// Table data cache enabled - pub table_data_cache_enabled: bool, + /// Table data cache disabled + pub table_data_cache_disabled: bool, /// Max bytes of table data cached in memory (MB) pub table_data_cache_in_memory_mb_size: u64, /// Table disk cache folder root @@ -231,7 +231,7 @@ impl Default for QueryConfig { table_cache_segment_count: 10240, table_cache_bloom_index_meta_count: 3000, table_cache_bloom_index_filter_count: 1024 * 1024, - table_data_cache_enabled: false, + table_data_cache_disabled: true, management_mode: false, jwt_key_file: "".to_string(), async_insert_max_data_size: 10000, diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index bb2f9ed4afd62..eea3624e27cbf 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -1301,9 +1301,9 @@ pub struct QueryConfig { #[clap(long, default_value = "1048576")] pub table_cache_bloom_index_filter_count: u64, - /// Table data cached enabled, default false - #[clap(long, default_value = "false")] - pub table_data_cache_enabled: bool, + /// Table data cached disabled, default true + #[clap(long, default_value = "true")] + pub table_data_cache_disabled: bool, /// Max bytes of table data cached in memory (MB) /// default value 10240 MB, or 10G @@ -1402,7 +1402,7 @@ impl TryInto for QueryConfig { table_cache_segment_count: self.table_cache_segment_count, table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, - table_data_cache_enabled: self.table_data_cache_enabled, + table_data_cache_disabled: self.table_data_cache_disabled, management_mode: self.management_mode, jwt_key_file: self.jwt_key_file, async_insert_max_data_size: self.async_insert_max_data_size, @@ -1469,7 +1469,7 @@ impl From for QueryConfig { table_cache_segment_count: inner.table_cache_segment_count, table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, - table_data_cache_enabled: inner.table_meta_cache_enabled, + table_data_cache_disabled: inner.table_meta_cache_enabled, table_data_cache_in_memory_mb_size: inner.table_data_cache_in_memory_mb_size, table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, management_mode: inner.management_mode, diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 66bec8455db70..b9b336e6470b3 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -46,15 +46,15 @@ pub struct CacheManager { impl CacheManager { /// Initialize the caches according to the relevant configurations. pub fn init(config: &QueryConfig) -> Result<()> { - let block_data_cache = if config.table_data_cache_enabled { + let block_data_cache = if config.table_data_cache_disabled { + None + } else { Self::new_block_data_cache( &config.table_disk_cache_root, config.table_data_cache_in_memory_mb_size, config.table_data_cache_population_queue_size, config.table_disk_cache_mb_size, )? - } else { - None }; if !config.table_meta_cache_enabled { GlobalInstance::set(Arc::new(Self { From e28564cda78e7145351b1a5fc5f39d71c46187b5 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Tue, 7 Feb 2023 09:39:14 +0800 Subject: [PATCH 16/80] fix ut it --- .../storages/testdata/configs_table_basic.txt | 249 +++++++++--------- .../fuse/src/io/read/block/block_reader.rs | 4 +- 2 files changed, 127 insertions(+), 126 deletions(-) diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 5ca275fdc7da5..9a3bf22b4f14f 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -1,128 +1,131 @@ ---------- TABLE INFO ------------ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemConfigs -------- TABLE CONTENTS ---------- -+-----------+----------------------------------------+----------------------------------+----------+ -| Column 0 | Column 1 | Column 2 | Column 3 | -+-----------+----------------------------------------+----------------------------------+----------+ -| "log" | "dir" | "./.databend/logs" | "" | -| "log" | "file.dir" | "./.databend/logs" | "" | -| "log" | "file.format" | "text" | "" | -| "log" | "file.level" | "DEBUG" | "" | -| "log" | "file.on" | "true" | "" | -| "log" | "level" | "DEBUG" | "" | -| "log" | "query_enabled" | "false" | "" | -| "log" | "stderr.format" | "text" | "" | -| "log" | "stderr.level" | "INFO" | "" | -| "log" | "stderr.on" | "true" | "" | -| "meta" | "auto_sync_interval" | "0" | "" | -| "meta" | "client_timeout_in_second" | "10" | "" | -| "meta" | "embedded_dir" | "" | "" | -| "meta" | "endpoints" | "" | "" | -| "meta" | "password" | "" | "" | -| "meta" | "rpc_tls_meta_server_root_ca_cert" | "" | "" | -| "meta" | "rpc_tls_meta_service_domain_name" | "localhost" | "" | -| "meta" | "username" | "root" | "" | -| "query" | "admin_api_address" | "127.0.0.1:8080" | "" | -| "query" | "api_tls_server_cert" | "" | "" | -| "query" | "api_tls_server_key" | "" | "" | -| "query" | "api_tls_server_root_ca_cert" | "" | "" | -| "query" | "async_insert_busy_timeout" | "200" | "" | -| "query" | "async_insert_max_data_size" | "10000" | "" | -| "query" | "async_insert_stale_timeout" | "0" | "" | -| "query" | "clickhouse_handler_host" | "127.0.0.1" | "" | -| "query" | "clickhouse_handler_port" | "9000" | "" | -| "query" | "clickhouse_http_handler_host" | "127.0.0.1" | "" | -| "query" | "clickhouse_http_handler_port" | "8124" | "" | -| "query" | "cluster_id" | "" | "" | -| "query" | "database_engine_github_enabled" | "true" | "" | -| "query" | "flight_api_address" | "127.0.0.1:9090" | "" | -| "query" | "http_handler_host" | "127.0.0.1" | "" | -| "query" | "http_handler_port" | "8000" | "" | -| "query" | "http_handler_result_timeout_secs" | "60" | "" | -| "query" | "http_handler_tls_server_cert" | "" | "" | -| "query" | "http_handler_tls_server_key" | "" | "" | -| "query" | "http_handler_tls_server_root_ca_cert" | "" | "" | -| "query" | "internal_enable_sandbox_tenant" | "false" | "" | -| "query" | "jwt_key_file" | "" | "" | -| "query" | "management_mode" | "false" | "" | -| "query" | "max_active_sessions" | "256" | "" | -| "query" | "max_memory_limit_enabled" | "false" | "" | -| "query" | "max_query_log_size" | "10000" | "" | -| "query" | "max_server_memory_usage" | "0" | "" | -| "query" | "metric_api_address" | "127.0.0.1:7070" | "" | -| "query" | "mysql_handler_host" | "127.0.0.1" | "" | -| "query" | "mysql_handler_port" | "3307" | "" | -| "query" | "num_cpus" | "0" | "" | -| "query" | "quota" | "null" | "" | -| "query" | "rpc_tls_query_server_root_ca_cert" | "" | "" | -| "query" | "rpc_tls_query_service_domain_name" | "localhost" | "" | -| "query" | "rpc_tls_server_cert" | "" | "" | -| "query" | "rpc_tls_server_key" | "" | "" | -| "query" | "share_endpoint_address" | "" | "" | -| "query" | "share_endpoint_auth_token_file" | "" | "" | -| "query" | "table_cache_block_meta_count" | "102400" | "" | -| "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | -| "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | -| "query" | "table_cache_segment_count" | "10240" | "" | -| "query" | "table_cache_snapshot_count" | "256" | "" | -| "query" | "table_cache_statistic_count" | "256" | "" | -| "query" | "table_disk_cache_mb_size" | "1024" | "" | -| "query" | "table_disk_cache_root" | "_cache" | "" | -| "query" | "table_engine_memory_enabled" | "true" | "" | -| "query" | "table_memory_cache_mb_size" | "256" | "" | -| "query" | "table_meta_cache_enabled" | "true" | "" | -| "query" | "tenant_id" | "test" | "" | -| "query" | "users" | "" | "" | -| "query" | "wait_timeout_mills" | "5000" | "" | -| "storage" | "allow_insecure" | "false" | "" | -| "storage" | "azblob.account_key" | "" | "" | -| "storage" | "azblob.account_name" | "" | "" | -| "storage" | "azblob.container" | "" | "" | -| "storage" | "azblob.endpoint_url" | "" | "" | -| "storage" | "azblob.root" | "" | "" | -| "storage" | "cache.fs.data_path" | "_data" | "" | -| "storage" | "cache.moka.max_capacity" | "1073741824" | "" | -| "storage" | "cache.moka.time_to_idle" | "600" | "" | -| "storage" | "cache.moka.time_to_live" | "3600" | "" | -| "storage" | "cache.num_cpus" | "0" | "" | -| "storage" | "cache.redis.db" | "0" | "" | -| "storage" | "cache.redis.default_ttl" | "0" | "" | -| "storage" | "cache.redis.endpoint_url" | "" | "" | -| "storage" | "cache.redis.password" | "" | "" | -| "storage" | "cache.redis.root" | "" | "" | -| "storage" | "cache.redis.username" | "" | "" | -| "storage" | "cache.type" | "none" | "" | -| "storage" | "fs.data_path" | "_data" | "" | -| "storage" | "gcs.bucket" | "" | "" | -| "storage" | "gcs.credential" | "" | "" | -| "storage" | "gcs.endpoint_url" | "https://storage.googleapis.com" | "" | -| "storage" | "gcs.root" | "" | "" | -| "storage" | "hdfs.name_node" | "" | "" | -| "storage" | "hdfs.root" | "" | "" | -| "storage" | "num_cpus" | "0" | "" | -| "storage" | "obs.access_key_id" | "" | "" | -| "storage" | "obs.bucket" | "" | "" | -| "storage" | "obs.endpoint_url" | "" | "" | -| "storage" | "obs.root" | "" | "" | -| "storage" | "obs.secret_access_key" | "" | "" | -| "storage" | "oss.access_key_id" | "" | "" | -| "storage" | "oss.access_key_secret" | "" | "" | -| "storage" | "oss.bucket" | "" | "" | -| "storage" | "oss.endpoint_url" | "" | "" | -| "storage" | "oss.presign_endpoint_url" | "" | "" | -| "storage" | "oss.root" | "" | "" | -| "storage" | "s3.access_key_id" | "" | "" | -| "storage" | "s3.bucket" | "" | "" | -| "storage" | "s3.enable_virtual_host_style" | "false" | "" | -| "storage" | "s3.endpoint_url" | "https://s3.amazonaws.com" | "" | -| "storage" | "s3.external_id" | "" | "" | -| "storage" | "s3.master_key" | "" | "" | -| "storage" | "s3.region" | "" | "" | -| "storage" | "s3.role_arn" | "" | "" | -| "storage" | "s3.root" | "" | "" | -| "storage" | "s3.secret_access_key" | "" | "" | -| "storage" | "s3.security_token" | "" | "" | -| "storage" | "type" | "fs" | "" | -+-----------+----------------------------------------+----------------------------------+----------+ ++-----------+------------------------------------------+----------------------------------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++-----------+------------------------------------------+----------------------------------+----------+ +| "log" | "dir" | "./.databend/logs" | "" | +| "log" | "file.dir" | "./.databend/logs" | "" | +| "log" | "file.format" | "text" | "" | +| "log" | "file.level" | "DEBUG" | "" | +| "log" | "file.on" | "true" | "" | +| "log" | "level" | "DEBUG" | "" | +| "log" | "query_enabled" | "false" | "" | +| "log" | "stderr.format" | "text" | "" | +| "log" | "stderr.level" | "INFO" | "" | +| "log" | "stderr.on" | "true" | "" | +| "meta" | "auto_sync_interval" | "0" | "" | +| "meta" | "client_timeout_in_second" | "10" | "" | +| "meta" | "embedded_dir" | "" | "" | +| "meta" | "endpoints" | "" | "" | +| "meta" | "password" | "" | "" | +| "meta" | "rpc_tls_meta_server_root_ca_cert" | "" | "" | +| "meta" | "rpc_tls_meta_service_domain_name" | "localhost" | "" | +| "meta" | "username" | "root" | "" | +| "query" | "admin_api_address" | "127.0.0.1:8080" | "" | +| "query" | "api_tls_server_cert" | "" | "" | +| "query" | "api_tls_server_key" | "" | "" | +| "query" | "api_tls_server_root_ca_cert" | "" | "" | +| "query" | "async_insert_busy_timeout" | "200" | "" | +| "query" | "async_insert_max_data_size" | "10000" | "" | +| "query" | "async_insert_stale_timeout" | "0" | "" | +| "query" | "clickhouse_handler_host" | "127.0.0.1" | "" | +| "query" | "clickhouse_handler_port" | "9000" | "" | +| "query" | "clickhouse_http_handler_host" | "127.0.0.1" | "" | +| "query" | "clickhouse_http_handler_port" | "8124" | "" | +| "query" | "cluster_id" | "" | "" | +| "query" | "database_engine_github_enabled" | "true" | "" | +| "query" | "flight_api_address" | "127.0.0.1:9090" | "" | +| "query" | "http_handler_host" | "127.0.0.1" | "" | +| "query" | "http_handler_port" | "8000" | "" | +| "query" | "http_handler_result_timeout_secs" | "60" | "" | +| "query" | "http_handler_tls_server_cert" | "" | "" | +| "query" | "http_handler_tls_server_key" | "" | "" | +| "query" | "http_handler_tls_server_root_ca_cert" | "" | "" | +| "query" | "internal_enable_sandbox_tenant" | "false" | "" | +| "query" | "jwt_key_file" | "" | "" | +| "query" | "management_mode" | "false" | "" | +| "query" | "max_active_sessions" | "256" | "" | +| "query" | "max_memory_limit_enabled" | "false" | "" | +| "query" | "max_query_log_size" | "10000" | "" | +| "query" | "max_server_memory_usage" | "0" | "" | +| "query" | "metric_api_address" | "127.0.0.1:7070" | "" | +| "query" | "mysql_handler_host" | "127.0.0.1" | "" | +| "query" | "mysql_handler_port" | "3307" | "" | +| "query" | "num_cpus" | "0" | "" | +| "query" | "quota" | "null" | "" | +| "query" | "rpc_tls_query_server_root_ca_cert" | "" | "" | +| "query" | "rpc_tls_query_service_domain_name" | "localhost" | "" | +| "query" | "rpc_tls_server_cert" | "" | "" | +| "query" | "rpc_tls_server_key" | "" | "" | +| "query" | "share_endpoint_address" | "" | "" | +| "query" | "share_endpoint_auth_token_file" | "" | "" | +| "query" | "table_cache_block_meta_count" | "102400" | "" | +| "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | +| "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | +| "query" | "table_cache_segment_count" | "10240" | "" | +| "query" | "table_cache_snapshot_count" | "256" | "" | +| "query" | "table_cache_statistic_count" | "256" | "" | +| "query" | "table_data_cache_disabled" | "true" | "" | +| "query" | "table_data_cache_in_memory_mb_size" | "10240" | "" | +| "query" | "table_data_cache_population_queue_size" | "65536" | "" | +| "query" | "table_disk_cache_mb_size" | "10240" | "" | +| "query" | "table_disk_cache_root" | "_cache" | "" | +| "query" | "table_engine_memory_enabled" | "true" | "" | +| "query" | "table_memory_cache_mb_size" | "256" | "" | +| "query" | "table_meta_cache_enabled" | "true" | "" | +| "query" | "tenant_id" | "test" | "" | +| "query" | "users" | "" | "" | +| "query" | "wait_timeout_mills" | "5000" | "" | +| "storage" | "allow_insecure" | "false" | "" | +| "storage" | "azblob.account_key" | "" | "" | +| "storage" | "azblob.account_name" | "" | "" | +| "storage" | "azblob.container" | "" | "" | +| "storage" | "azblob.endpoint_url" | "" | "" | +| "storage" | "azblob.root" | "" | "" | +| "storage" | "cache.fs.data_path" | "_data" | "" | +| "storage" | "cache.moka.max_capacity" | "1073741824" | "" | +| "storage" | "cache.moka.time_to_idle" | "600" | "" | +| "storage" | "cache.moka.time_to_live" | "3600" | "" | +| "storage" | "cache.num_cpus" | "0" | "" | +| "storage" | "cache.redis.db" | "0" | "" | +| "storage" | "cache.redis.default_ttl" | "0" | "" | +| "storage" | "cache.redis.endpoint_url" | "" | "" | +| "storage" | "cache.redis.password" | "" | "" | +| "storage" | "cache.redis.root" | "" | "" | +| "storage" | "cache.redis.username" | "" | "" | +| "storage" | "cache.type" | "none" | "" | +| "storage" | "fs.data_path" | "_data" | "" | +| "storage" | "gcs.bucket" | "" | "" | +| "storage" | "gcs.credential" | "" | "" | +| "storage" | "gcs.endpoint_url" | "https://storage.googleapis.com" | "" | +| "storage" | "gcs.root" | "" | "" | +| "storage" | "hdfs.name_node" | "" | "" | +| "storage" | "hdfs.root" | "" | "" | +| "storage" | "num_cpus" | "0" | "" | +| "storage" | "obs.access_key_id" | "" | "" | +| "storage" | "obs.bucket" | "" | "" | +| "storage" | "obs.endpoint_url" | "" | "" | +| "storage" | "obs.root" | "" | "" | +| "storage" | "obs.secret_access_key" | "" | "" | +| "storage" | "oss.access_key_id" | "" | "" | +| "storage" | "oss.access_key_secret" | "" | "" | +| "storage" | "oss.bucket" | "" | "" | +| "storage" | "oss.endpoint_url" | "" | "" | +| "storage" | "oss.presign_endpoint_url" | "" | "" | +| "storage" | "oss.root" | "" | "" | +| "storage" | "s3.access_key_id" | "" | "" | +| "storage" | "s3.bucket" | "" | "" | +| "storage" | "s3.enable_virtual_host_style" | "false" | "" | +| "storage" | "s3.endpoint_url" | "https://s3.amazonaws.com" | "" | +| "storage" | "s3.external_id" | "" | "" | +| "storage" | "s3.master_key" | "" | "" | +| "storage" | "s3.region" | "" | "" | +| "storage" | "s3.role_arn" | "" | "" | +| "storage" | "s3.root" | "" | "" | +| "storage" | "s3.secret_access_key" | "" | "" | +| "storage" | "s3.security_token" | "" | "" | +| "storage" | "type" | "fs" | "" | ++-----------+------------------------------------------+----------------------------------+----------+ diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index b2f8866dcb789..a646da1ec4204 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -170,9 +170,7 @@ impl BlockReader { } pub fn support_blocking_api(&self) -> bool { - // TODO for testing purpose only, remove this in the final PR - let revert_sync_read = std::env::var("DATABEND_DEBUG_REVERT_SYNC_READ").is_ok(); - revert_sync_read && self.operator.metadata().can_blocking() + self.operator.metadata().can_blocking() } /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. From bdca8a488d5068e989d4720defe8dbb6583728e1 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Tue, 7 Feb 2023 10:58:05 +0800 Subject: [PATCH 17/80] refactor: generic external cache type --- .../it/{disk_cache.rs => lru_disk_cache.rs} | 0 src/common/cache/tests/it/main.rs | 2 +- .../common/cache-manager/src/cache_manager.rs | 18 +- src/query/storages/common/cache/src/cache.rs | 24 ++- src/query/storages/common/cache/src/lib.rs | 2 + .../common/cache/src/providers/disk_cache.rs | 170 ++------------- .../cache/src/providers/memory_cache.rs | 4 + .../common/cache/src/providers/mod.rs | 3 + .../cache/src/providers/table_data_cache.rs | 194 ++++++++++++++++++ .../fuse/src/io/read/block/block_reader.rs | 4 +- 10 files changed, 254 insertions(+), 167 deletions(-) rename src/common/cache/tests/it/{disk_cache.rs => lru_disk_cache.rs} (100%) create mode 100644 src/query/storages/common/cache/src/providers/table_data_cache.rs diff --git a/src/common/cache/tests/it/disk_cache.rs b/src/common/cache/tests/it/lru_disk_cache.rs similarity index 100% rename from src/common/cache/tests/it/disk_cache.rs rename to src/common/cache/tests/it/lru_disk_cache.rs diff --git a/src/common/cache/tests/it/main.rs b/src/common/cache/tests/it/main.rs index c69f732a0428c..4bfc6f691478f 100644 --- a/src/common/cache/tests/it/main.rs +++ b/src/common/cache/tests/it/main.rs @@ -15,4 +15,4 @@ #![allow(clippy::uninlined_format_args)] mod cache; -mod disk_cache; +mod lru_disk_cache; diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index b9b336e6470b3..b452e1046b291 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -18,10 +18,10 @@ use std::sync::Arc; use common_base::base::GlobalInstance; use common_config::QueryConfig; use common_exception::Result; -use storages_common_cache::DiskBytesCache; -use storages_common_cache::DiskCacheBuilder; use storages_common_cache::InMemoryCacheBuilder; use storages_common_cache::InMemoryItemCacheHolder; +use storages_common_cache::TableDataCache; +use storages_common_cache::TableDataCacheBuilder; use crate::caches::BloomIndexFilterCache; use crate::caches::BloomIndexMetaCache; @@ -40,7 +40,7 @@ pub struct CacheManager { bloom_index_filter_cache: Option, bloom_index_meta_cache: Option, file_meta_data_cache: Option, - block_data_cache: Option, + table_data_cache: Option, } impl CacheManager { @@ -64,7 +64,7 @@ impl CacheManager { bloom_index_meta_cache: None, file_meta_data_cache: None, table_statistic_cache: None, - block_data_cache: None, + table_data_cache: None, })); } else { let table_snapshot_cache = Self::new_item_cache(config.table_cache_snapshot_count); @@ -82,7 +82,7 @@ impl CacheManager { bloom_index_meta_cache, file_meta_data_cache, table_statistic_cache, - block_data_cache, + table_data_cache: block_data_cache, })); } @@ -117,8 +117,8 @@ impl CacheManager { self.file_meta_data_cache.clone() } - pub fn get_block_data_cache(&self) -> Option { - self.block_data_cache.clone() + pub fn get_block_data_cache(&self) -> Option { + self.table_data_cache.clone() } fn new_item_cache(capacity: u64) -> Option> { @@ -134,9 +134,9 @@ impl CacheManager { in_memory_cache_mb_size: u64, population_queue_size: u32, disk_cache_mb_size: u64, - ) -> Result> { + ) -> Result> { if in_memory_cache_mb_size > 0 { - let cache_holder = DiskCacheBuilder::new_disk_cache( + let cache_holder = TableDataCacheBuilder::new_table_data_disk_cache( path, in_memory_cache_mb_size, population_queue_size, diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index cc0b77e770eb4..d4a4030f03152 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -14,15 +14,19 @@ use std::sync::Arc; +// The cache accessor, crate users usually working on this interface while manipulating caches pub trait CacheAccessor { fn get(&self, k: &str) -> Option>; fn put(&self, key: K, value: Arc); fn evict(&self, k: &str) -> bool; + fn contains_key(&self, _k: &str) -> bool; } -/// The minimum interface that cache providers should implement +// The minimum interface that cache providers should implement +// note this interface working on mutable self reference pub trait StorageCache { type Meter; + // TODO: remove this assoc type type CacheEntry; fn put(&mut self, key: K, value: Arc); @@ -30,8 +34,11 @@ pub trait StorageCache { fn get(&mut self, k: &str) -> Option; fn evict(&mut self, k: &str) -> bool; + + fn contains_key(&self, k: &str) -> bool; } +// default impls mod impls { use std::sync::Arc; @@ -40,6 +47,7 @@ mod impls { use crate::cache::CacheAccessor; use crate::cache::StorageCache; + // Wrap a StorageCache with RwLock, and impl CacheAccessor for it impl CacheAccessor for Arc> where C: StorageCache> { @@ -57,8 +65,14 @@ mod impls { let mut guard = self.write(); guard.evict(k) } + + fn contains_key(&self, k: &str) -> bool { + let guard = self.read(); + guard.contains_key(k) + } } + // Wrap an Option, and impl CacheAccessor for it impl CacheAccessor for Option where C: CacheAccessor { @@ -79,5 +93,13 @@ mod impls { false } } + + fn contains_key(&self, k: &str) -> bool { + if let Some(cache) = self { + cache.contains_key(k) + } else { + false + } + } } } diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index da3b362d0c9c4..db00ddab4be6b 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -24,6 +24,8 @@ pub use providers::DiskCacheBuilder; pub use providers::InMemoryBytesCacheHolder; pub use providers::InMemoryCacheBuilder; pub use providers::InMemoryItemCacheHolder; +pub use providers::TableDataCache; +pub use providers::TableDataCacheBuilder; pub use read::CacheKey; pub use read::DiskCacheReader; pub use read::InMemoryBytesCacheReader; diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 5e407b58676a0..5e0f52462a921 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -14,26 +14,14 @@ use std::io::Read; use std::sync::Arc; -use std::thread::JoinHandle; -use common_cache::Cache; pub use common_cache::LruDiskCache as DiskCache; use common_exception::ErrorCode; use common_exception::Result; -use crossbeam_channel::TrySendError; use parking_lot::RwLock; use tracing::error; -use tracing::info; -use tracing::warn; use crate::CacheAccessor; -use crate::InMemoryBytesCacheHolder; -use crate::InMemoryCacheBuilder; - -struct CacheItem { - key: String, - value: Arc>, -} /// Tiered cache which consist of /// A in-memory cache @@ -41,62 +29,38 @@ struct CacheItem { /// A slow disk or redis based persistent cache #[derive(Clone)] pub struct DiskBytesCache { - inner_memory_cache: InMemoryBytesCacheHolder, - inner_external_cache: Arc>, - population_queue: crossbeam_channel::Sender, - _cache_populator: DiskCachePopulator, + inner: Arc>, } pub struct DiskCacheBuilder; impl DiskCacheBuilder { - pub fn new_disk_cache( - path: &str, - in_memory_cache_mb_size: u64, - population_queue_size: u32, - disk_cache_size: u64, - ) -> Result { + pub fn new_disk_cache(path: &str, disk_cache_size: u64) -> Result { let external_cache = DiskCache::new(path, disk_cache_size * 1024 * 1024) .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; let inner = Arc::new(RwLock::new(external_cache)); - let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); - Ok(DiskBytesCache { - inner_memory_cache: InMemoryCacheBuilder::new_bytes_cache( - in_memory_cache_mb_size * 1024 * 1024, - ), - inner_external_cache: inner.clone(), - population_queue: rx, - _cache_populator: DiskCachePopulator::new(tx, inner, 1)?, - }) + Ok(DiskBytesCache { inner }) } } impl CacheAccessor> for DiskBytesCache { fn get(&self, k: &str) -> Option>> { - // check in memory cache first - { - if let Some(item) = self.inner_memory_cache.get(k) { - return Some(item); - } - } - // check disk cache let read_file = || { let mut file = { - let mut inner = self.inner_external_cache.write(); + let mut inner = self.inner.write(); inner.get_file(k)? }; let mut v = vec![]; file.read_to_end(&mut v)?; Ok::<_, Box>(v) }; - match read_file() { Ok(mut bytes) => { if let Err(e) = validate_checksum(bytes.as_slice()) { error!("data cache, of key {k}, crc validation failure: {e}"); { // remove the invalid cache, error of removal ignored - let mut inner = self.inner_external_cache.write(); + let mut inner = self.inner.write(); let _ = inner.remove(k); } None @@ -106,8 +70,6 @@ impl CacheAccessor> for DiskBytesCache { let body_len = total_len - 4; bytes.truncate(body_len); let item = Arc::new(bytes); - // also put the cached item into in-memory cache - self.inner_memory_cache.put(k.to_owned(), item.clone()); Some(item) } } @@ -118,39 +80,18 @@ impl CacheAccessor> for DiskBytesCache { } } - fn put(&self, k: String, v: Arc>) { - // put it into the in-memory cache first - { - let mut in_memory_cache = self.inner_memory_cache.write(); - in_memory_cache.put(k.clone(), v.clone()); - } - - // check if external(disk/redis) already have it. - let contains = { - let external_cache = self.inner_external_cache.read(); - external_cache.contains_key(&k) - }; - - if !contains { - // populate the cache to external cache(disk/redis) asyncly - let msg = CacheItem { key: k, value: v }; - match self.population_queue.try_send(msg) { - Ok(_) => {} - Err(TrySendError::Full(_)) => { - self::metrics::metrics_inc_population_overflow_count(1); - warn!("disk cache population queue is full"); - } - Err(TrySendError::Disconnected(_)) => { - error!("disk cache population thread is down"); - } - } + fn put(&self, key: String, value: Arc>) { + let crc = crc32fast::hash(value.as_slice()); + let crc_bytes = crc.to_le_bytes(); + let mut inner = self.inner.write(); + if let Err(e) = inner.insert_bytes(&key, &[value.as_slice(), &crc_bytes]) { + error!("put disk cache item failed {}", e); } } fn evict(&self, k: &str) -> bool { if let Err(e) = { - self.inner_memory_cache.evict(k); - let mut inner = self.inner_external_cache.write(); + let mut inner = self.inner.write(); inner.remove(k) } { error!("evict disk cache item failed {}", e); @@ -159,77 +100,10 @@ impl CacheAccessor> for DiskBytesCache { true } } -} - -#[derive(Clone)] -struct CachePopulationWorker { - cache: Arc>, - population_queue: crossbeam_channel::Receiver, -} - -impl CachePopulationWorker { - fn populate(&self) { - loop { - match self.population_queue.recv() { - Ok(CacheItem { key, value }) => { - { - let inner = self.cache.read(); - if inner.contains_key(&key) { - continue; - } - } - if let Err(e) = { - let crc = crc32fast::hash(value.as_slice()); - let crc_bytes = crc.to_le_bytes(); - let mut inner = self.cache.write(); - inner.insert_bytes(&key, &[value.as_slice(), &crc_bytes]) - } { - error!("populate disk cache failed {}", e); - } else { - self::metrics::metrics_inc_disk_cache_population_count(1); - } - } - Err(_) => { - info!("cache work shutdown"); - break; - } - } - } - } - - fn start(self: Arc) -> Result> { - let thread_builder = std::thread::Builder::new().name("cache-population".to_owned()); - thread_builder.spawn(move || self.populate()).map_err(|e| { - ErrorCode::StorageOther(format!("spawn cache population worker thread failed, {e}")) - }) - } -} - -#[derive(Clone)] -struct DiskCachePopulator { - _workers: Vec>, -} - -impl DiskCachePopulator { - fn new( - incoming: crossbeam_channel::Receiver, - cache: Arc>, - _num_worker_thread: usize, - ) -> Result { - let worker = Arc::new(CachePopulationWorker { - cache, - population_queue: incoming, - }); - let _join_handler = worker.clone().start()?; - Ok(Self { - _workers: vec![worker], - }) - } - #[allow(dead_code)] - pub fn shutdown(&self) { - // by drop the sender - // and timed join the join_handlers + fn contains_key(&self, k: &str) -> bool { + let inner = self.inner.read(); + inner.contains_key(k) } } @@ -256,17 +130,3 @@ fn validate_checksum(bytes: &[u8]) -> Result<()> { } } } - -mod metrics { - use metrics::increment_gauge; - - #[inline] - pub fn metrics_inc_population_overflow_count(c: u64) { - increment_gauge!("data_block_cache_population_overflow", c as f64); - } - - #[inline] - pub fn metrics_inc_disk_cache_population_count(c: u64) { - increment_gauge!("data_block_cache_population_overflow", c as f64); - } -} diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index 146f532ba53f4..7b14fa36cd22f 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -64,4 +64,8 @@ where fn evict(&mut self, k: &str) -> bool { self.pop(k).is_some() } + + fn contains_key(&self, k: &str) -> bool { + Cache::contains(self, k) + } } diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index aa55bbdd72935..ad9cd2f7ee0a8 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -14,6 +14,7 @@ mod disk_cache; mod memory_cache; +mod table_data_cache; pub use disk_cache::DiskBytesCache; pub use disk_cache::DiskCache; pub use disk_cache::DiskCacheBuilder; @@ -22,3 +23,5 @@ pub use memory_cache::InMemoryBytesCacheHolder; pub use memory_cache::InMemoryCacheBuilder; pub use memory_cache::InMemoryItemCacheHolder; pub use memory_cache::ItemCache; +pub use table_data_cache::TableDataCache; +pub use table_data_cache::TableDataCacheBuilder; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs new file mode 100644 index 0000000000000..263ca0cd9a4a1 --- /dev/null +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -0,0 +1,194 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::thread::JoinHandle; + +pub use common_cache::LruDiskCache as DiskCache; +use common_exception::ErrorCode; +use common_exception::Result; +use crossbeam_channel::TrySendError; +use tracing::error; +use tracing::info; +use tracing::warn; + +use crate::CacheAccessor; +use crate::DiskBytesCache; +use crate::DiskCacheBuilder; +use crate::InMemoryBytesCacheHolder; +use crate::InMemoryCacheBuilder; + +struct CacheItem { + key: String, + value: Arc>, +} + +/// Tiered cache which consist of +/// A in-memory cache +/// A bounded channel that keep the references of items being cached +/// A disk or redis based external cache +#[derive(Clone)] +pub struct TableDataCache { + in_memory_cache: InMemoryBytesCacheHolder, + external_cache: T, + population_queue: crossbeam_channel::Sender, + _cache_populator: DiskCachePopulator, +} + +pub struct TableDataCacheBuilder; +impl TableDataCacheBuilder { + pub fn new_table_data_disk_cache( + path: &str, + in_memory_cache_mb_size: u64, + population_queue_size: u32, + disk_cache_mb_size: u64, + ) -> Result> { + let disk_cache = DiskCacheBuilder::new_disk_cache(path, disk_cache_mb_size)?; + let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); + let num_population_thread = 1; + Ok(TableDataCache { + in_memory_cache: InMemoryCacheBuilder::new_bytes_cache( + in_memory_cache_mb_size * 1024 * 1024, + ), + external_cache: disk_cache.clone(), + population_queue: rx, + _cache_populator: DiskCachePopulator::new(tx, disk_cache, num_population_thread)?, + }) + } +} + +impl CacheAccessor> for TableDataCache { + fn get(&self, k: &str) -> Option>> { + // check in memory cache first + { + if let Some(item) = self.in_memory_cache.get(k) { + return Some(item); + } + } + + if let Some(item) = self.external_cache.get(k) { + // put item into in-memory cache + self.in_memory_cache.put(k.to_owned(), item.clone()); + Some(item) + } else { + None + } + } + + fn put(&self, k: String, v: Arc>) { + // put it into the in-memory cache first + self.in_memory_cache.put(k.clone(), v.clone()); + + // check if external(disk/redis) already have it. + if !self.external_cache.contains_key(&k) { + // populate the cache to external cache(disk/redis) asyncly + let msg = CacheItem { key: k, value: v }; + match self.population_queue.try_send(msg) { + Ok(_) => {} + Err(TrySendError::Full(_)) => { + self::metrics::metrics_inc_population_overflow_count(1); + warn!("external cache population queue is full"); + } + Err(TrySendError::Disconnected(_)) => { + error!("external cache population thread is down"); + } + } + } + } + + fn evict(&self, k: &str) -> bool { + let r = self.in_memory_cache.evict(k); + let l = self.external_cache.evict(k); + r || l + } + + fn contains_key(&self, k: &str) -> bool { + self.in_memory_cache.contains_key(k) || self.external_cache.contains_key(k) + } +} + +#[derive(Clone)] +struct CachePopulationWorker { + cache: T, + population_queue: crossbeam_channel::Receiver, +} + +impl CachePopulationWorker +where T: CacheAccessor> + Send + Sync + 'static +{ + fn populate(&self) { + loop { + match self.population_queue.recv() { + Ok(CacheItem { key, value }) => { + { + if self.cache.contains_key(&key) { + continue; + } + } + self.cache.put(key, value); + self::metrics::metrics_inc_disk_cache_population_count(1); + } + Err(_) => { + info!("cache work shutdown"); + break; + } + } + } + } + + fn start(self: Arc) -> Result> { + let thread_builder = std::thread::Builder::new().name("cache-population".to_owned()); + thread_builder.spawn(move || self.populate()).map_err(|e| { + ErrorCode::StorageOther(format!("spawn cache population worker thread failed, {e}")) + }) + } +} + +#[derive(Clone)] +struct DiskCachePopulator { + _workers: Vec>>, +} + +impl DiskCachePopulator +where T: CacheAccessor> + Send + Sync + 'static +{ + fn new( + incoming: crossbeam_channel::Receiver, + cache: T, + _num_worker_thread: usize, + ) -> Result { + let worker = Arc::new(CachePopulationWorker { + cache, + population_queue: incoming, + }); + let _join_handler = worker.clone().start()?; + Ok(Self { + _workers: vec![worker], + }) + } +} + +mod metrics { + use metrics::increment_gauge; + + #[inline] + pub fn metrics_inc_population_overflow_count(c: u64) { + increment_gauge!("data_block_cache_population_overflow", c as f64); + } + + #[inline] + pub fn metrics_inc_disk_cache_population_count(c: u64) { + increment_gauge!("data_block_cache_population_overflow", c as f64); + } +} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index a646da1ec4204..b2f8866dcb789 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -170,7 +170,9 @@ impl BlockReader { } pub fn support_blocking_api(&self) -> bool { - self.operator.metadata().can_blocking() + // TODO for testing purpose only, remove this in the final PR + let revert_sync_read = std::env::var("DATABEND_DEBUG_REVERT_SYNC_READ").is_ok(); + revert_sync_read && self.operator.metadata().can_blocking() } /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. From 248b6639a48ee806c46967294f8eac0685fe4b0e Mon Sep 17 00:00:00 2001 From: dantengsky Date: Tue, 7 Feb 2023 13:17:19 +0800 Subject: [PATCH 18/80] remove debug env var --- src/common/cache/src/todo | 11 ----------- .../storages/fuse/src/io/read/block/block_reader.rs | 4 +--- 2 files changed, 1 insertion(+), 14 deletions(-) delete mode 100644 src/common/cache/src/todo diff --git a/src/common/cache/src/todo b/src/common/cache/src/todo deleted file mode 100644 index 507b353d011b5..0000000000000 --- a/src/common/cache/src/todo +++ /dev/null @@ -1,11 +0,0 @@ -- cache data crc -- metrics -- performance evaluation (done) - - without threshold - - just put bytes to disk (no sync) - - what happens if cache is full? will performance degrade significantly? -- if performance do not meet requirement - - consider make it tiered - - a in-memory moka at front, LFU to keep the candidates -- misc: restart maybe slow if cached a large amount of data, consider make the startup of data cache layer async - silently drop cache admissions during starting up \ No newline at end of file diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index b2f8866dcb789..a646da1ec4204 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -170,9 +170,7 @@ impl BlockReader { } pub fn support_blocking_api(&self) -> bool { - // TODO for testing purpose only, remove this in the final PR - let revert_sync_read = std::env::var("DATABEND_DEBUG_REVERT_SYNC_READ").is_ok(); - revert_sync_read && self.operator.metadata().can_blocking() + self.operator.metadata().can_blocking() } /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. From 5eadd481dcd6954472f5f390108d843f67a3461a Mon Sep 17 00:00:00 2001 From: dantengsky Date: Tue, 7 Feb 2023 20:55:07 +0800 Subject: [PATCH 19/80] rename table_data_cache setting name & fix ut --- src/query/config/src/inner.rs | 14 +++++------ src/query/config/src/outer_v0.rs | 24 +++++++++---------- .../storages/testdata/configs_table_basic.txt | 6 ++--- .../common/cache-manager/src/cache_manager.rs | 2 +- .../fuse/src/io/read/block/block_reader.rs | 1 - 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 4642c22eec6a0..1d6331c7c0f69 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -163,8 +163,8 @@ pub struct QueryConfig { pub table_cache_bloom_index_meta_count: u64, /// Max number of cached bloom index filters pub table_cache_bloom_index_filter_count: u64, - /// Table data cache disabled - pub table_data_cache_disabled: bool, + /// Indicates if table data cache is enabled + pub table_data_cache_enabled: bool, /// Max bytes of table data cached in memory (MB) pub table_data_cache_in_memory_mb_size: u64, /// Table disk cache folder root @@ -223,15 +223,16 @@ impl Default for QueryConfig { table_meta_cache_enabled: true, table_cache_block_meta_count: 102400, table_memory_cache_mb_size: 256, - table_disk_cache_root: "_cache".to_string(), - table_data_cache_population_queue_size: 65536, - table_disk_cache_mb_size: 10240, table_cache_snapshot_count: 256, table_cache_statistic_count: 256, table_cache_segment_count: 10240, table_cache_bloom_index_meta_count: 3000, table_cache_bloom_index_filter_count: 1024 * 1024, - table_data_cache_disabled: true, + table_data_cache_enabled: false, + table_data_cache_population_queue_size: 65536, + table_disk_cache_root: "_cache".to_string(), + table_disk_cache_mb_size: 20 * 1024, + table_data_cache_in_memory_mb_size: 2 * 1024, management_mode: false, jwt_key_file: "".to_string(), async_insert_max_data_size: 10000, @@ -242,7 +243,6 @@ impl Default for QueryConfig { share_endpoint_auth_token_file: "".to_string(), tenant_quota: None, internal_enable_sandbox_tenant: false, - table_data_cache_in_memory_mb_size: 1024 * 10, } } } diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index eea3624e27cbf..fc00684954bf4 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -1274,7 +1274,7 @@ pub struct QueryConfig { pub table_disk_cache_root: String, /// Table disk cache size (mb) - #[clap(long, default_value = "10240")] + #[clap(long, default_value = "20480")] pub table_disk_cache_mb_size: u64, /// Max number of cached table snapshot @@ -1301,13 +1301,13 @@ pub struct QueryConfig { #[clap(long, default_value = "1048576")] pub table_cache_bloom_index_filter_count: u64, - /// Table data cached disabled, default true - #[clap(long, default_value = "true")] - pub table_data_cache_disabled: bool, + /// Indicates if table data cached is enabled, default false + #[clap(long)] + pub table_data_cache_enabled: bool, /// Max bytes of table data cached in memory (MB) - /// default value 10240 MB, or 10G - #[clap(long, default_value = "10240")] + /// default value 2048 MB, or 2G + #[clap(long, default_value = "2048")] pub table_data_cache_in_memory_mb_size: u64, /// Max item that could be pending in the external cache population queue @@ -1394,15 +1394,16 @@ impl TryInto for QueryConfig { table_meta_cache_enabled: self.table_meta_cache_enabled, table_cache_block_meta_count: self.table_cache_block_meta_count, table_memory_cache_mb_size: self.table_memory_cache_mb_size, - table_disk_cache_root: self.table_disk_cache_root, - table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, - table_disk_cache_mb_size: self.table_disk_cache_mb_size, table_cache_snapshot_count: self.table_cache_snapshot_count, table_cache_statistic_count: self.table_cache_statistic_count, table_cache_segment_count: self.table_cache_segment_count, table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, - table_data_cache_disabled: self.table_data_cache_disabled, + table_data_cache_enabled: self.table_data_cache_enabled, + table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, + table_data_cache_in_memory_mb_size: self.table_data_cache_in_memory_mb_size, + table_disk_cache_root: self.table_disk_cache_root, + table_disk_cache_mb_size: self.table_disk_cache_mb_size, management_mode: self.management_mode, jwt_key_file: self.jwt_key_file, async_insert_max_data_size: self.async_insert_max_data_size, @@ -1415,7 +1416,6 @@ impl TryInto for QueryConfig { share_endpoint_auth_token_file: self.share_endpoint_auth_token_file, tenant_quota: self.quota, internal_enable_sandbox_tenant: self.internal_enable_sandbox_tenant, - table_data_cache_in_memory_mb_size: self.table_data_cache_in_memory_mb_size, }) } } @@ -1469,7 +1469,7 @@ impl From for QueryConfig { table_cache_segment_count: inner.table_cache_segment_count, table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, - table_data_cache_disabled: inner.table_meta_cache_enabled, + table_data_cache_enabled: inner.table_data_cache_enabled, table_data_cache_in_memory_mb_size: inner.table_data_cache_in_memory_mb_size, table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, management_mode: inner.management_mode, diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 9a3bf22b4f14f..8e3283205aa09 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -66,10 +66,10 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | "query" | "table_cache_segment_count" | "10240" | "" | | "query" | "table_cache_snapshot_count" | "256" | "" | | "query" | "table_cache_statistic_count" | "256" | "" | -| "query" | "table_data_cache_disabled" | "true" | "" | -| "query" | "table_data_cache_in_memory_mb_size" | "10240" | "" | +| "query" | "table_data_cache_enabled" | "false" | "" | +| "query" | "table_data_cache_in_memory_mb_size" | "2048" | "" | | "query" | "table_data_cache_population_queue_size" | "65536" | "" | -| "query" | "table_disk_cache_mb_size" | "10240" | "" | +| "query" | "table_disk_cache_mb_size" | "20480" | "" | | "query" | "table_disk_cache_root" | "_cache" | "" | | "query" | "table_engine_memory_enabled" | "true" | "" | | "query" | "table_memory_cache_mb_size" | "256" | "" | diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index b452e1046b291..45b213ca036f0 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -46,7 +46,7 @@ pub struct CacheManager { impl CacheManager { /// Initialize the caches according to the relevant configurations. pub fn init(config: &QueryConfig) -> Result<()> { - let block_data_cache = if config.table_data_cache_disabled { + let block_data_cache = if config.table_data_cache_enabled { None } else { Self::new_block_data_cache( diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index a646da1ec4204..f3af225d9ebfc 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -325,7 +325,6 @@ impl BlockReader { data_from_cache.push((*column_id as ColumnId, cached_column_raw_data)); } else { metrics_inc_cache_miss_count(1, cache_name); - // TODO , where is the None branch? if let Some(column_meta) = columns_meta.get(column_id) { let (offset, len) = column_meta.offset_length(); ranges.push((*column_id, offset..(offset + len))); From 776446d2e90b833184149b3c4908957c364e71b1 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Tue, 7 Feb 2023 23:23:06 +0800 Subject: [PATCH 20/80] fix cache population thread number to 1 --- .../cache/src/providers/table_data_cache.rs | 32 ++++++++----------- .../src/io/read/block/block_reader_parquet.rs | 3 +- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 263ca0cd9a4a1..8275892824d59 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -35,15 +35,15 @@ struct CacheItem { } /// Tiered cache which consist of -/// A in-memory cache -/// A bounded channel that keep the references of items being cached -/// A disk or redis based external cache +/// - a in-memory cache +/// - a disk or redis based external cache +/// - a bounded channel that keep the references of items being cached #[derive(Clone)] pub struct TableDataCache { in_memory_cache: InMemoryBytesCacheHolder, external_cache: T, population_queue: crossbeam_channel::Sender, - _cache_populator: DiskCachePopulator, + _cache_populator: DiskCachePopulator, } pub struct TableDataCacheBuilder; @@ -56,11 +56,10 @@ impl TableDataCacheBuilder { ) -> Result> { let disk_cache = DiskCacheBuilder::new_disk_cache(path, disk_cache_mb_size)?; let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); + let in_memory_cache_bytes_size = in_memory_cache_mb_size * 1024 * 1024; let num_population_thread = 1; Ok(TableDataCache { - in_memory_cache: InMemoryCacheBuilder::new_bytes_cache( - in_memory_cache_mb_size * 1024 * 1024, - ), + in_memory_cache: InMemoryCacheBuilder::new_bytes_cache(in_memory_cache_bytes_size), external_cache: disk_cache.clone(), population_queue: rx, _cache_populator: DiskCachePopulator::new(tx, disk_cache, num_population_thread)?, @@ -156,26 +155,23 @@ where T: CacheAccessor> + Send + Sync + 'static } #[derive(Clone)] -struct DiskCachePopulator { - _workers: Vec>>, -} +struct DiskCachePopulator; -impl DiskCachePopulator -where T: CacheAccessor> + Send + Sync + 'static -{ - fn new( +impl DiskCachePopulator { + fn new( incoming: crossbeam_channel::Receiver, cache: T, _num_worker_thread: usize, - ) -> Result { + ) -> Result + where + T: CacheAccessor> + Send + Sync + 'static, + { let worker = Arc::new(CachePopulationWorker { cache, population_queue: incoming, }); let _join_handler = worker.clone().start()?; - Ok(Self { - _workers: vec![worker], - }) + Ok(Self) } } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 3851bce8e1cb2..af1bf1b8d8e7f 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -142,9 +142,8 @@ impl BlockReader { let mut column_descriptors = Vec::with_capacity(indices.len()); for (i, index) in indices.iter().enumerate() { let column_id = column.leaf_column_id(i); - // TODO where is the None branch? if let Some(column_meta) = columns_meta.get(&column_id) { - // TODO why index is used here? + // TODO why index is used here? need @LiChuang review let column_id_in_question = *index as ColumnId; let column_read = <&[u8]>::clone(&chunk_map[&column_id_in_question]); let column_descriptor = &self.parquet_schema_descriptor.columns()[*index]; From ac83c28bff855c782de508b003341d5999799e9f Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 8 Feb 2023 00:22:14 +0800 Subject: [PATCH 21/80] refactor: move metrics into TableDataCache --- src/query/storages/common/cache/src/cache.rs | 13 +++-- src/query/storages/common/cache/src/lib.rs | 1 + .../storages/common/cache/src/metrics.rs | 4 ++ .../common/cache/src/providers/disk_cache.rs | 3 +- .../common/cache/src/providers/mod.rs | 1 + .../cache/src/providers/table_data_cache.rs | 51 ++++++++++++------- .../fuse/src/io/read/block/block_reader.rs | 4 +- 7 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index d4a4030f03152..71e4be827bcfa 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -16,7 +16,7 @@ use std::sync::Arc; // The cache accessor, crate users usually working on this interface while manipulating caches pub trait CacheAccessor { - fn get(&self, k: &str) -> Option>; + fn get>(&self, k: Q) -> Option>; fn put(&self, key: K, value: Arc); fn evict(&self, k: &str) -> bool; fn contains_key(&self, _k: &str) -> bool; @@ -26,13 +26,12 @@ pub trait CacheAccessor { // note this interface working on mutable self reference pub trait StorageCache { type Meter; - // TODO: remove this assoc type type CacheEntry; - fn put(&mut self, key: K, value: Arc); - fn get(&mut self, k: &str) -> Option; + fn put(&mut self, key: K, value: Arc); + fn evict(&mut self, k: &str) -> bool; fn contains_key(&self, k: &str) -> bool; @@ -51,9 +50,9 @@ mod impls { impl CacheAccessor for Arc> where C: StorageCache> { - fn get(&self, k: &str) -> Option { + fn get>(&self, k: Q) -> Option> { let mut guard = self.write(); - guard.get(k) + guard.get(k.as_ref()) } fn put(&self, k: String, v: Arc) { @@ -76,7 +75,7 @@ mod impls { impl CacheAccessor for Option where C: CacheAccessor { - fn get(&self, k: &str) -> Option> { + fn get>(&self, k: Q) -> Option> { self.as_ref().and_then(|cache| cache.get(k)) } diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index db00ddab4be6b..f4afc6a5ec4a4 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -26,6 +26,7 @@ pub use providers::InMemoryCacheBuilder; pub use providers::InMemoryItemCacheHolder; pub use providers::TableDataCache; pub use providers::TableDataCacheBuilder; +pub use providers::TableDataColumnCacheKey; pub use read::CacheKey; pub use read::DiskCacheReader; pub use read::InMemoryBytesCacheReader; diff --git a/src/query/storages/common/cache/src/metrics.rs b/src/query/storages/common/cache/src/metrics.rs index 92386fc7771df..5dd7fedc36a40 100644 --- a/src/query/storages/common/cache/src/metrics.rs +++ b/src/query/storages/common/cache/src/metrics.rs @@ -36,3 +36,7 @@ pub fn metrics_inc_cache_miss_load_millisecond(c: u64, cache_name: &str) { pub fn metrics_inc_cache_hit_count(c: u64, cache_name: &str) { increment_gauge!(key_str(cache_name, "memory_hit_count"), c as f64); } + +pub fn metrics_inc_cache_population_pending_count(c: i64, cache_name: &str) { + increment_gauge!(key_str(cache_name, "memory_hit_count"), c as f64); +} diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 5e0f52462a921..1e6390d2a5bd7 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -43,7 +43,8 @@ impl DiskCacheBuilder { } impl CacheAccessor> for DiskBytesCache { - fn get(&self, k: &str) -> Option>> { + fn get>(&self, k: Q) -> Option>> { + let k = k.as_ref(); // check disk cache let read_file = || { let mut file = { diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index ad9cd2f7ee0a8..a2125a688424f 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -25,3 +25,4 @@ pub use memory_cache::InMemoryItemCacheHolder; pub use memory_cache::ItemCache; pub use table_data_cache::TableDataCache; pub use table_data_cache::TableDataCacheBuilder; +pub use table_data_cache::TableDataColumnCacheKey; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 8275892824d59..3c0fad3e22642 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -23,6 +23,10 @@ use tracing::error; use tracing::info; use tracing::warn; +use crate::metrics_inc_cache_access_count; +use crate::metrics_inc_cache_hit_count; +use crate::metrics_inc_cache_miss_count; +use crate::metrics_inc_cache_population_pending_count; use crate::CacheAccessor; use crate::DiskBytesCache; use crate::DiskCacheBuilder; @@ -34,6 +38,24 @@ struct CacheItem { value: Arc>, } +pub struct TableDataColumnCacheKey { + cache_key: String, +} + +impl TableDataColumnCacheKey { + pub fn new(block_path: &str, column_id: u32) -> Self { + Self { + cache_key: format!("{block_path}-{column_id}"), + } + } +} + +impl AsRef for TableDataColumnCacheKey { + fn as_ref(&self) -> &str { + &self.cache_key + } +} + /// Tiered cache which consist of /// - a in-memory cache /// - a disk or redis based external cache @@ -46,6 +68,8 @@ pub struct TableDataCache { _cache_populator: DiskCachePopulator, } +const TABLE_DATA_CACHE_NAME: &str = "table_data_cache"; + pub struct TableDataCacheBuilder; impl TableDataCacheBuilder { pub fn new_table_data_disk_cache( @@ -68,10 +92,13 @@ impl TableDataCacheBuilder { } impl CacheAccessor> for TableDataCache { - fn get(&self, k: &str) -> Option>> { + fn get>(&self, k: Q) -> Option>> { + metrics_inc_cache_access_count(1, TABLE_DATA_CACHE_NAME); + let k = k.as_ref(); // check in memory cache first { if let Some(item) = self.in_memory_cache.get(k) { + metrics_inc_cache_hit_count(1, TABLE_DATA_CACHE_NAME); return Some(item); } } @@ -79,8 +106,10 @@ impl CacheAccessor> for TableDataCache { if let Some(item) = self.external_cache.get(k) { // put item into in-memory cache self.in_memory_cache.put(k.to_owned(), item.clone()); + metrics_inc_cache_hit_count(1, TABLE_DATA_CACHE_NAME); Some(item) } else { + metrics_inc_cache_miss_count(1, TABLE_DATA_CACHE_NAME); None } } @@ -96,7 +125,7 @@ impl CacheAccessor> for TableDataCache { match self.population_queue.try_send(msg) { Ok(_) => {} Err(TrySendError::Full(_)) => { - self::metrics::metrics_inc_population_overflow_count(1); + metrics_inc_cache_population_pending_count(1, TABLE_DATA_CACHE_NAME); warn!("external cache population queue is full"); } Err(TrySendError::Disconnected(_)) => { @@ -136,7 +165,7 @@ where T: CacheAccessor> + Send + Sync + 'static } } self.cache.put(key, value); - self::metrics::metrics_inc_disk_cache_population_count(1); + metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); } Err(_) => { info!("cache work shutdown"); @@ -170,21 +199,7 @@ impl DiskCachePopulator { cache, population_queue: incoming, }); - let _join_handler = worker.clone().start()?; + let _join_handler = worker.start()?; Ok(Self) } } - -mod metrics { - use metrics::increment_gauge; - - #[inline] - pub fn metrics_inc_population_overflow_count(c: u64) { - increment_gauge!("data_block_cache_population_overflow", c as f64); - } - - #[inline] - pub fn metrics_inc_disk_cache_population_count(c: u64) { - increment_gauge!("data_block_cache_population_overflow", c as f64); - } -} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index f3af225d9ebfc..de9b9e769be6e 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -42,6 +42,7 @@ use storages_common_cache::metrics_inc_cache_access_count; use storages_common_cache::metrics_inc_cache_hit_count; use storages_common_cache::metrics_inc_cache_miss_count; use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataColumnCacheKey; use storages_common_cache_manager::CacheManager; use storages_common_table_meta::meta::ColumnMeta; @@ -316,8 +317,7 @@ impl BlockReader { let block_data_cache = CacheManager::instance().get_block_data_cache(); let mut data_from_cache = vec![]; for (_index, (column_id, ..)) in self.project_indices.iter() { - // TODO encapsulate this in another component - let column_cache_key = format!("{location}-{column_id}"); + let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); let cache_name = "data_block_cache"; metrics_inc_cache_access_count(1, cache_name); if let Some(cached_column_raw_data) = block_data_cache.get(&column_cache_key) { From bfb1eff56d35f80c29ce09ff3c133d7ffd2f521c Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 8 Feb 2023 10:53:27 +0800 Subject: [PATCH 22/80] cleanup unused metrics --- .../common/cache-manager/src/caches.rs | 2 +- src/query/storages/common/cache/src/cache.rs | 4 +- .../fuse/src/io/read/block/block_reader.rs | 42 +++++++++---------- .../src/io/read/bloom/column_filter_reader.rs | 2 - 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index 0a9db2c8f1eb9..6f221f342f8d9 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -28,7 +28,7 @@ pub type SegmentInfoCache = InMemoryItemCacheHolder; pub type TableSnapshotCache = InMemoryItemCacheHolder; /// In memory object cache of TableSnapshotStatistics pub type TableSnapshotStatisticCache = InMemoryItemCacheHolder; -/// In memory data cache of bloom index data. +/// In memory object cache of bloom filter. /// For each indexed data block, the bloom xor8 filter of column is cached individually pub type BloomIndexFilterCache = InMemoryItemCacheHolder; pub struct BloomIndexMeta(pub FileMetaData); diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 71e4be827bcfa..02db11e49c496 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -22,8 +22,8 @@ pub trait CacheAccessor { fn contains_key(&self, _k: &str) -> bool; } -// The minimum interface that cache providers should implement -// note this interface working on mutable self reference +// TODO rename this, and move it into another mod +// or consider remove this trait pub trait StorageCache { type Meter; type CacheEntry; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index de9b9e769be6e..f4ac5de6413f3 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -38,9 +38,6 @@ use common_storage::ColumnNodes; use futures::future::try_join_all; use opendal::Object; use opendal::Operator; -use storages_common_cache::metrics_inc_cache_access_count; -use storages_common_cache::metrics_inc_cache_hit_count; -use storages_common_cache::metrics_inc_cache_miss_count; use storages_common_cache::CacheAccessor; use storages_common_cache::TableDataColumnCacheKey; use storages_common_cache_manager::CacheManager; @@ -85,10 +82,10 @@ impl OwnerMemory { pub struct MergeIOReadResult where Self: 'static { - path: String, + block_path: String, + columns_chunk_positions: HashMap)>, owner_memory: OwnerMemory, - columns_chunks: HashMap)>, - cached_columns: CachedColumnData, + cached_column_data: CachedColumnData, } pub type CachedColumnData = Vec<(ColumnId, Arc>)>; @@ -98,22 +95,24 @@ where Self: 'static { pub fn create(owner_memory: OwnerMemory, capacity: usize, path: String) -> MergeIOReadResult { MergeIOReadResult { - path, + block_path: path, + columns_chunk_positions: HashMap::with_capacity(capacity), owner_memory, - columns_chunks: HashMap::with_capacity(capacity), - cached_columns: vec![], + cached_column_data: vec![], } } pub fn columns_chunks(&self) -> Result> { - let mut res = Vec::with_capacity(self.columns_chunks.len()); + let mut res = Vec::with_capacity(self.columns_chunk_positions.len()); - for (column_idx, (chunk_idx, range)) in &self.columns_chunks { - let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.path)?; + // merge column data fetched from object storage + for (column_idx, (chunk_idx, range)) in &self.columns_chunk_positions { + let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.block_path)?; res.push((*column_idx, &chunk[range.clone()])); } - for (column_id, data) in &self.cached_columns { + // merge column data from cache + for (column_id, data) in &self.cached_column_data { res.push((*column_id, data.as_slice())) } @@ -125,14 +124,15 @@ where Self: 'static } pub fn add_column_chunk(&mut self, chunk: usize, column_id: ColumnId, range: Range) { - if let Ok(chunk_data) = self.get_chunk(chunk, &self.path) { + if let Ok(chunk_data) = self.get_chunk(chunk, &self.block_path) { let cache = CacheManager::instance().get_block_data_cache(); - let cache_key = format!("{}-{}", self.path, column_id); + let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); let data = &chunk_data[range.clone()]; - // TODO NO, use a &[u8] to pass data to cache - cache.put(cache_key, Arc::new(data.to_vec())); + // TODO api is NOT type safe + cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); } - self.columns_chunks.insert(column_id, (chunk, range)); + self.columns_chunk_positions + .insert(column_id, (chunk, range)); } } @@ -318,13 +318,9 @@ impl BlockReader { let mut data_from_cache = vec![]; for (_index, (column_id, ..)) in self.project_indices.iter() { let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); - let cache_name = "data_block_cache"; - metrics_inc_cache_access_count(1, cache_name); if let Some(cached_column_raw_data) = block_data_cache.get(&column_cache_key) { - metrics_inc_cache_hit_count(1, cache_name); data_from_cache.push((*column_id as ColumnId, cached_column_raw_data)); } else { - metrics_inc_cache_miss_count(1, cache_name); if let Some(column_meta) = columns_meta.get(column_id) { let (offset, len) = column_meta.offset_length(); ranges.push((*column_id, offset..(offset + len))); @@ -341,7 +337,7 @@ impl BlockReader { let object = self.operator.object(location); let mut merge_io_read_res = Self::merge_io_read(settings, object, ranges).await?; - merge_io_read_res.cached_columns = data_from_cache; + merge_io_read_res.cached_column_data = data_from_cache; Ok(merge_io_read_res) } diff --git a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs index 62d1be1f29d0d..704faea2a1476 100644 --- a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs +++ b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs @@ -44,8 +44,6 @@ type CachedReader = InMemoryItemCacheReader; /// Load the filter of a given bloom index column. Also /// - generates the proper cache key /// - takes cares of getting the correct cache instance from [CacheManager] -/// -/// this could be generified to be the template of cached data block column reader pub struct BloomColumnFilterReader { cached_reader: CachedReader, param: LoadParams, From 00318340f24f2edc0b65e68bb985d3d199937e51 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 8 Feb 2023 17:19:31 +0800 Subject: [PATCH 23/80] cleaup traits --- .../common/cache-manager/src/caches.rs | 3 + src/query/storages/common/cache/src/cache.rs | 94 +++---------------- src/query/storages/common/cache/src/lib.rs | 2 +- .../common/cache/src/providers/disk_cache.rs | 3 +- .../cache/src/providers/memory_cache.rs | 83 ++++++++++++---- .../cache/src/providers/table_data_cache.rs | 9 +- .../common/cache/src/read/cached_reader.rs | 60 +++++------- .../storages/common/cache/src/read/mod.rs | 2 +- .../storages/common/cache/src/read/readers.rs | 11 +-- .../fuse/src/io/read/block/block_reader.rs | 33 +++---- 10 files changed, 133 insertions(+), 167 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index 6f221f342f8d9..041733bf63b1a 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -37,6 +37,9 @@ pub type BloomIndexMetaCache = InMemoryItemCacheHolder; /// In memory object cache of parquet FileMetaData of external parquet files pub type FileMetaDataCache = InMemoryItemCacheHolder; +/// In memory object cache of parquet FileMetaData of external parquet files +pub type ColumnArrayCache = InMemoryItemCacheHolder; + // Bind Type of cached objects to Caches // // The `Cache` returned should diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 02db11e49c496..3903491b34edb 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -12,93 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::hash::BuildHasher; +use std::hash::Hash; use std::sync::Arc; +use common_cache::Count; +use common_cache::CountableMeter; +use common_cache::DefaultHashBuilder; + // The cache accessor, crate users usually working on this interface while manipulating caches -pub trait CacheAccessor { +pub trait CacheAccessor +where + K: Eq + Hash, + S: BuildHasher, + M: CountableMeter>, +{ fn get>(&self, k: Q) -> Option>; fn put(&self, key: K, value: Arc); fn evict(&self, k: &str) -> bool; fn contains_key(&self, _k: &str) -> bool; } - -// TODO rename this, and move it into another mod -// or consider remove this trait -pub trait StorageCache { - type Meter; - type CacheEntry; - - fn get(&mut self, k: &str) -> Option; - - fn put(&mut self, key: K, value: Arc); - - fn evict(&mut self, k: &str) -> bool; - - fn contains_key(&self, k: &str) -> bool; -} - -// default impls -mod impls { - use std::sync::Arc; - - use parking_lot::RwLock; - - use crate::cache::CacheAccessor; - use crate::cache::StorageCache; - - // Wrap a StorageCache with RwLock, and impl CacheAccessor for it - impl CacheAccessor for Arc> - where C: StorageCache> - { - fn get>(&self, k: Q) -> Option> { - let mut guard = self.write(); - guard.get(k.as_ref()) - } - - fn put(&self, k: String, v: Arc) { - let mut guard = self.write(); - guard.put(k, v); - } - - fn evict(&self, k: &str) -> bool { - let mut guard = self.write(); - guard.evict(k) - } - - fn contains_key(&self, k: &str) -> bool { - let guard = self.read(); - guard.contains_key(k) - } - } - - // Wrap an Option, and impl CacheAccessor for it - impl CacheAccessor for Option - where C: CacheAccessor - { - fn get>(&self, k: Q) -> Option> { - self.as_ref().and_then(|cache| cache.get(k)) - } - - fn put(&self, k: String, v: Arc) { - if let Some(cache) = self { - cache.put(k, v); - } - } - - fn evict(&self, k: &str) -> bool { - if let Some(cache) = self { - cache.evict(k) - } else { - false - } - } - - fn contains_key(&self, k: &str) -> bool { - if let Some(cache) = self { - cache.contains_key(k) - } else { - false - } - } - } -} diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index f4afc6a5ec4a4..532a111c23ffb 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -28,7 +28,7 @@ pub use providers::TableDataCache; pub use providers::TableDataCacheBuilder; pub use providers::TableDataColumnCacheKey; pub use read::CacheKey; -pub use read::DiskCacheReader; +pub use read::CachedReader; pub use read::InMemoryBytesCacheReader; pub use read::InMemoryItemCacheReader; pub use read::LoadParams; diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 1e6390d2a5bd7..257eb51008746 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -15,6 +15,7 @@ use std::io::Read; use std::sync::Arc; +use common_cache::Count; pub use common_cache::LruDiskCache as DiskCache; use common_exception::ErrorCode; use common_exception::Result; @@ -42,7 +43,7 @@ impl DiskCacheBuilder { } } -impl CacheAccessor> for DiskBytesCache { +impl CacheAccessor, common_cache::DefaultHashBuilder, Count> for DiskBytesCache { fn get>(&self, k: Q) -> Option>> { let k = k.as_ref(); // check disk cache diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index 7b14fa36cd22f..fbabe06055dd6 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -23,8 +23,6 @@ use common_cache::DefaultHashBuilder; use common_cache::LruCache; use parking_lot::RwLock; -use crate::cache::StorageCache; - pub type ItemCache = LruCache, DefaultHashBuilder, Count>; pub type BytesCache = LruCache>, DefaultHashBuilder, BytesMeter>; @@ -45,27 +43,74 @@ impl InMemoryCacheBuilder { } } -impl StorageCache for LruCache, S, M> -where - M: CountableMeter>, - S: BuildHasher, -{ - type Meter = M; - type CacheEntry = Arc; +// default impls +mod impls { + use std::sync::Arc; - fn put(&mut self, key: String, value: Arc) { - Cache::put(self, key, value); - } + use parking_lot::RwLock; - fn get(&mut self, k: &str) -> Option { - Cache::get(self, k).cloned() - } + use super::*; + use crate::cache::CacheAccessor; + + // Wrap a Cache with RwLock, and impl CacheAccessor for it + impl CacheAccessor for Arc> + where + C: Cache, S, M>, + M: CountableMeter>, + S: BuildHasher, + { + fn get>(&self, k: Q) -> Option> { + let mut guard = self.write(); + guard.get(k.as_ref()).cloned() + } + + fn put(&self, k: String, v: Arc) { + let mut guard = self.write(); + guard.put(k, v); + } - fn evict(&mut self, k: &str) -> bool { - self.pop(k).is_some() + fn evict(&self, k: &str) -> bool { + let mut guard = self.write(); + guard.pop(k).is_some() + } + + fn contains_key(&self, k: &str) -> bool { + let guard = self.read(); + guard.contains(k) + } } - fn contains_key(&self, k: &str) -> bool { - Cache::contains(self, k) + // Wrap an Option, and impl CacheAccessor for it + impl CacheAccessor for Option + where + C: CacheAccessor, + M: CountableMeter>, + S: BuildHasher, + { + fn get>(&self, k: Q) -> Option> { + self.as_ref().and_then(|cache| cache.get(k)) + } + + fn put(&self, k: String, v: Arc) { + if let Some(cache) = self { + cache.put(k, v); + } + } + + fn evict(&self, k: &str) -> bool { + if let Some(cache) = self { + cache.evict(k) + } else { + false + } + } + + fn contains_key(&self, k: &str) -> bool { + if let Some(cache) = self { + cache.contains_key(k) + } else { + false + } + } } } diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 3c0fad3e22642..88f4580858ec4 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -15,6 +15,8 @@ use std::sync::Arc; use std::thread::JoinHandle; +use common_cache::Count; +use common_cache::DefaultHashBuilder; pub use common_cache::LruDiskCache as DiskCache; use common_exception::ErrorCode; use common_exception::Result; @@ -91,7 +93,7 @@ impl TableDataCacheBuilder { } } -impl CacheAccessor> for TableDataCache { +impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCache { fn get>(&self, k: Q) -> Option>> { metrics_inc_cache_access_count(1, TABLE_DATA_CACHE_NAME); let k = k.as_ref(); @@ -146,14 +148,13 @@ impl CacheAccessor> for TableDataCache { } } -#[derive(Clone)] struct CachePopulationWorker { cache: T, population_queue: crossbeam_channel::Receiver, } impl CachePopulationWorker -where T: CacheAccessor> + Send + Sync + 'static +where T: CacheAccessor, DefaultHashBuilder, Count> + Send + Sync + 'static { fn populate(&self) { loop { @@ -193,7 +194,7 @@ impl DiskCachePopulator { _num_worker_thread: usize, ) -> Result where - T: CacheAccessor> + Send + Sync + 'static, + T: CacheAccessor, DefaultHashBuilder, Count> + Send + Sync + 'static, { let worker = Arc::new(CachePopulationWorker { cache, diff --git a/src/query/storages/common/cache/src/read/cached_reader.rs b/src/query/storages/common/cache/src/read/cached_reader.rs index 4cebc6e99b360..93386de9439f2 100644 --- a/src/query/storages/common/cache/src/read/cached_reader.rs +++ b/src/query/storages/common/cache/src/read/cached_reader.rs @@ -12,65 +12,62 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::marker::PhantomData; +use std::hash::BuildHasher; use std::sync::Arc; use std::time::Instant; +use common_cache::CountableMeter; +use common_cache::LruCache; use common_exception::Result; use parking_lot::RwLock; use super::loader::LoadParams; -use super::loader::Loader; -use crate::cache::StorageCache; use crate::metrics::metrics_inc_cache_access_count; use crate::metrics::metrics_inc_cache_hit_count; use crate::metrics::metrics_inc_cache_miss_count; use crate::metrics::metrics_inc_cache_miss_load_millisecond; +use crate::CacheAccessor; +use crate::Loader; -/// A generic cache-aware reader -/// -/// Given an impl of [StorageCache], e.g. `ItemCache` or `DiskCache` and a proper impl -/// [Loader], which is able to load `T`, `CachedReader` will load the `T` -/// by using [Loader], and populate the cache item into [StorageCache] by using -/// the loaded `T` and the key that [Loader] provides. -pub struct CachedReader { - cache: Option>>, +/// A cache-aware reader +pub struct CachedReader { + cache: Option, loader: L, - /// name of this cache instance - name: String, - _p: PhantomData, + cache_name: String, } -impl<'a, T, L, C, M> CachedReader +pub type CacheHolder = Arc, S, M>>>; + +impl CachedReader> where - L: Loader + Sync, - C: 'a + StorageCache>, + L: Loader + Sync, + S: BuildHasher, + M: CountableMeter>, { - pub fn new(cache: Option>>, name: impl Into, loader: L) -> Self { + pub fn new(cache: Option>, name: impl Into, loader: L) -> Self { Self { cache, - name: name.into(), + cache_name: name.into(), loader, - _p: PhantomData, } } /// Load the object at `location`, uses/populates the cache if possible/necessary. - pub async fn read(&self, params: &LoadParams) -> Result> { + pub async fn read(&self, params: &LoadParams) -> Result> { match &self.cache { None => Ok(Arc::new(self.loader.load(params).await?)), - Some(labeled_cache) => { + Some(cache) => { // Perf. { - metrics_inc_cache_access_count(1, &self.name); + metrics_inc_cache_access_count(1, &self.cache_name); } let cache_key = self.loader.cache_key(params); - match self.get_cached(cache_key.as_ref(), labeled_cache) { + match cache.get(cache_key.as_str()) { Some(item) => { // Perf. { - metrics_inc_cache_hit_count(1, &self.name); + metrics_inc_cache_hit_count(1, &self.cache_name); } Ok(item) @@ -83,15 +80,14 @@ where // Perf. { - metrics_inc_cache_miss_count(1, &self.name); + metrics_inc_cache_miss_count(1, &self.cache_name); metrics_inc_cache_miss_load_millisecond( start.elapsed().as_millis() as u64, - &self.name, + &self.cache_name, ); } - let mut cache_guard = labeled_cache.write(); - cache_guard.put(cache_key, item.clone()); + cache.put(cache_key, item.clone()); Ok(item) } } @@ -100,10 +96,6 @@ where } pub fn name(&self) -> &str { - self.name.as_str() - } - - fn get_cached(&self, key: &str, cache: &RwLock) -> Option> { - cache.write().get(key) + self.cache_name.as_str() } } diff --git a/src/query/storages/common/cache/src/read/mod.rs b/src/query/storages/common/cache/src/read/mod.rs index 8e2000cb01903..7d7611460a92b 100644 --- a/src/query/storages/common/cache/src/read/mod.rs +++ b/src/query/storages/common/cache/src/read/mod.rs @@ -17,9 +17,9 @@ mod cached_reader; mod loader; mod readers; +pub use cached_reader::CachedReader; pub use loader::CacheKey; pub use loader::LoadParams; pub use loader::Loader; -pub use readers::DiskCacheReader; pub use readers::InMemoryBytesCacheReader; pub use readers::InMemoryItemCacheReader; diff --git a/src/query/storages/common/cache/src/read/readers.rs b/src/query/storages/common/cache/src/read/readers.rs index 44ba165b8a8c1..7429544a71e3d 100644 --- a/src/query/storages/common/cache/src/read/readers.rs +++ b/src/query/storages/common/cache/src/read/readers.rs @@ -12,12 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::providers::BytesCache; -use crate::providers::DiskCache; -use crate::providers::ItemCache; use crate::read::cached_reader::CachedReader; +use crate::InMemoryBytesCacheHolder; +use crate::InMemoryItemCacheHolder; -pub type InMemoryItemCacheReader = CachedReader>; -pub type InMemoryBytesCacheReader = CachedReader; -// NOTE: dummy impl, just for api testing -pub type DiskCacheReader = CachedReader; +pub type InMemoryItemCacheReader = CachedReader>; +pub type InMemoryBytesCacheReader = CachedReader; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index f4ac5de6413f3..c92cb1f15ffaa 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -79,9 +79,7 @@ impl OwnerMemory { } } -pub struct MergeIOReadResult -where Self: 'static -{ +pub struct MergeIOReadResult { block_path: String, columns_chunk_positions: HashMap)>, owner_memory: OwnerMemory, @@ -90,9 +88,7 @@ where Self: 'static pub type CachedColumnData = Vec<(ColumnId, Arc>)>; -impl MergeIOReadResult -where Self: 'static -{ +impl MergeIOReadResult { pub fn create(owner_memory: OwnerMemory, capacity: usize, path: String) -> MergeIOReadResult { MergeIOReadResult { block_path: path, @@ -119,11 +115,14 @@ where Self: 'static Ok(res) } - pub fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { + fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { self.owner_memory.get_chunk(index, path) } - pub fn add_column_chunk(&mut self, chunk: usize, column_id: ColumnId, range: Range) { + // sync read path also hit this method! + // TODO 1. pass the cache in 2) let the block reader hold a instance of cache + fn add_column_chunk(&mut self, chunk: usize, column_id: ColumnId, range: Range) { + // TODO doc why put cache operation could be placed here if let Ok(chunk_data) = self.get_chunk(chunk, &self.block_path) { let cache = CacheManager::instance().get_block_data_cache(); let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); @@ -320,16 +319,14 @@ impl BlockReader { let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); if let Some(cached_column_raw_data) = block_data_cache.get(&column_cache_key) { data_from_cache.push((*column_id as ColumnId, cached_column_raw_data)); - } else { - if let Some(column_meta) = columns_meta.get(column_id) { - let (offset, len) = column_meta.offset_length(); - ranges.push((*column_id, offset..(offset + len))); - - // Perf - { - metrics_inc_remote_io_seeks(1); - metrics_inc_remote_io_read_bytes(len); - } + } else if let Some(column_meta) = columns_meta.get(column_id) { + let (offset, len) = column_meta.offset_length(); + ranges.push((*column_id, offset..(offset + len))); + + // Perf + { + metrics_inc_remote_io_seeks(1); + metrics_inc_remote_io_read_bytes(len); } } } From 1f06dd90a42ba6a352c74951eac1c0d869f15189 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 8 Feb 2023 17:28:18 +0800 Subject: [PATCH 24/80] tweak doc --- src/query/storages/common/cache/src/providers/disk_cache.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 257eb51008746..55a5cc14172a3 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -109,9 +109,8 @@ impl CacheAccessor, common_cache::DefaultHashBuilder, Count> for } } -/// Assuming that the crc32 is at the end of `bytes` and encoded as le u32. +/// The crc32 checksum is stored at the end of `bytes` and encoded as le u32. // Although parquet page has built-in crc, but it is optional (and not generated in parquet2) -// Later, if cache data is put into redis, we can reuse the checksum logic fn validate_checksum(bytes: &[u8]) -> Result<()> { let total_len = bytes.len(); if total_len <= 4 { From 7344fa1d0e179caba7b6501c500cd808eed1a82d Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 8 Feb 2023 19:00:23 +0800 Subject: [PATCH 25/80] minor refactor --- .../common/cache-manager/src/cache_manager.rs | 2 +- .../fuse/src/io/read/block/block_reader.rs | 51 ++++++++++++++----- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 45b213ca036f0..1a384e48436a2 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -117,7 +117,7 @@ impl CacheManager { self.file_meta_data_cache.clone() } - pub fn get_block_data_cache(&self) -> Option { + pub fn get_table_data_cache(&self) -> Option { self.table_data_cache.clone() } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index c92cb1f15ffaa..47d166a500022 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -39,6 +39,7 @@ use futures::future::try_join_all; use opendal::Object; use opendal::Operator; use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataCache; use storages_common_cache::TableDataColumnCacheKey; use storages_common_cache_manager::CacheManager; use storages_common_table_meta::meta::ColumnMeta; @@ -84,17 +85,24 @@ pub struct MergeIOReadResult { columns_chunk_positions: HashMap)>, owner_memory: OwnerMemory, cached_column_data: CachedColumnData, + table_data_cache: Option, } pub type CachedColumnData = Vec<(ColumnId, Arc>)>; impl MergeIOReadResult { - pub fn create(owner_memory: OwnerMemory, capacity: usize, path: String) -> MergeIOReadResult { + pub fn create( + owner_memory: OwnerMemory, + capacity: usize, + path: String, + table_data_cache: Option, + ) -> MergeIOReadResult { MergeIOReadResult { block_path: path, columns_chunk_positions: HashMap::with_capacity(capacity), owner_memory, cached_column_data: vec![], + table_data_cache, } } @@ -123,12 +131,13 @@ impl MergeIOReadResult { // TODO 1. pass the cache in 2) let the block reader hold a instance of cache fn add_column_chunk(&mut self, chunk: usize, column_id: ColumnId, range: Range) { // TODO doc why put cache operation could be placed here - if let Ok(chunk_data) = self.get_chunk(chunk, &self.block_path) { - let cache = CacheManager::instance().get_block_data_cache(); - let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); - let data = &chunk_data[range.clone()]; - // TODO api is NOT type safe - cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); + if let Some(cache) = &self.table_data_cache { + if let Ok(chunk_data) = self.get_chunk(chunk, &self.block_path) { + let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); + let data = &chunk_data[range.clone()]; + // TODO api is NOT type safe + cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); + } } self.columns_chunk_positions .insert(column_id, (chunk, range)); @@ -179,7 +188,7 @@ impl BlockReader { /// /// It will *NOT* merge two requests: /// if the last io request size is larger than storage_io_page_bytes_for_read(Default is 512KB). - pub async fn merge_io_read( + async fn merge_io_read( read_settings: &ReadSettings, object: Object, raw_ranges: Vec<(ColumnId, Range)>, @@ -217,7 +226,13 @@ impl BlockReader { let start = Instant::now(); let owner_memory = OwnerMemory::create(try_join_all(read_handlers).await?); - let mut read_res = MergeIOReadResult::create(owner_memory, raw_ranges.len(), path.clone()); + let table_data_cache = CacheManager::instance().get_table_data_cache(); + let mut read_res = MergeIOReadResult::create( + owner_memory, + raw_ranges.len(), + path.clone(), + table_data_cache, + ); // Perf. { @@ -277,7 +292,15 @@ impl BlockReader { } let owner_memory = OwnerMemory::create(io_res); - let mut read_res = MergeIOReadResult::create(owner_memory, raw_ranges.len(), path.clone()); + + // for sync read, we disable table data cache + let table_data_cache = None; + let mut read_res = MergeIOReadResult::create( + owner_memory, + raw_ranges.len(), + path.clone(), + table_data_cache, + ); for (raw_idx, raw_range) in &raw_ranges { let column_id = *raw_idx as ColumnId; @@ -313,12 +336,12 @@ impl BlockReader { } let mut ranges = vec![]; - let block_data_cache = CacheManager::instance().get_block_data_cache(); - let mut data_from_cache = vec![]; + let block_data_cache = CacheManager::instance().get_table_data_cache(); + let mut cached_column_data = vec![]; for (_index, (column_id, ..)) in self.project_indices.iter() { let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); if let Some(cached_column_raw_data) = block_data_cache.get(&column_cache_key) { - data_from_cache.push((*column_id as ColumnId, cached_column_raw_data)); + cached_column_data.push((*column_id as ColumnId, cached_column_raw_data)); } else if let Some(column_meta) = columns_meta.get(column_id) { let (offset, len) = column_meta.offset_length(); ranges.push((*column_id, offset..(offset + len))); @@ -334,7 +357,7 @@ impl BlockReader { let object = self.operator.object(location); let mut merge_io_read_res = Self::merge_io_read(settings, object, ranges).await?; - merge_io_read_res.cached_column_data = data_from_cache; + merge_io_read_res.cached_column_data = cached_column_data; Ok(merge_io_read_res) } From 915f66388bb2b8621a496f68fd8a6690f527b2a8 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 9 Feb 2023 10:31:29 +0800 Subject: [PATCH 26/80] wip: integrate ColumnArrayCache - works but messy --- .../common/cache-manager/src/cache_manager.rs | 19 +- .../common/cache-manager/src/caches.rs | 7 +- .../cache/src/providers/table_data_cache.rs | 7 + .../fuse/src/io/read/block/block_reader.rs | 61 ++++-- .../src/io/read/block/block_reader_parquet.rs | 206 +++++++++++++++--- .../read/parquet_data_source_deserializer.rs | 1 + 6 files changed, 249 insertions(+), 52 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 1a384e48436a2..60d18f7ffa5cb 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -29,8 +29,10 @@ use crate::caches::FileMetaDataCache; use crate::caches::SegmentInfoCache; use crate::caches::TableSnapshotCache; use crate::caches::TableSnapshotStatisticCache; +use crate::ColumnArrayCache; static DEFAULT_FILE_META_DATA_CACHE_ITEMS: u64 = 3000; +static DEFAULT_COLUMN_ARRAY_CACHE_ITEMS: u64 = 100_000; /// Where all the caches reside pub struct CacheManager { @@ -41,12 +43,13 @@ pub struct CacheManager { bloom_index_meta_cache: Option, file_meta_data_cache: Option, table_data_cache: Option, + table_column_array_cache: Option, } impl CacheManager { /// Initialize the caches according to the relevant configurations. pub fn init(config: &QueryConfig) -> Result<()> { - let block_data_cache = if config.table_data_cache_enabled { + let table_data_cache = if config.table_data_cache_enabled { None } else { Self::new_block_data_cache( @@ -56,6 +59,10 @@ impl CacheManager { config.table_disk_cache_mb_size, )? }; + + // TODO settings + let table_column_array_cache = Self::new_item_cache(DEFAULT_COLUMN_ARRAY_CACHE_ITEMS); + if !config.table_meta_cache_enabled { GlobalInstance::set(Arc::new(Self { table_snapshot_cache: None, @@ -64,7 +71,8 @@ impl CacheManager { bloom_index_meta_cache: None, file_meta_data_cache: None, table_statistic_cache: None, - table_data_cache: None, + table_data_cache, + table_column_array_cache, })); } else { let table_snapshot_cache = Self::new_item_cache(config.table_cache_snapshot_count); @@ -82,7 +90,8 @@ impl CacheManager { bloom_index_meta_cache, file_meta_data_cache, table_statistic_cache, - table_data_cache: block_data_cache, + table_data_cache, + table_column_array_cache, })); } @@ -121,6 +130,10 @@ impl CacheManager { self.table_data_cache.clone() } + pub fn get_table_data_array_cache(&self) -> Option { + self.table_column_array_cache.clone() + } + fn new_item_cache(capacity: u64) -> Option> { if capacity > 0 { Some(InMemoryCacheBuilder::new_item_cache(capacity)) diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index 041733bf63b1a..92773dbc75d5d 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -38,12 +38,13 @@ pub type BloomIndexMetaCache = InMemoryItemCacheHolder; pub type FileMetaDataCache = InMemoryItemCacheHolder; /// In memory object cache of parquet FileMetaData of external parquet files -pub type ColumnArrayCache = InMemoryItemCacheHolder; +/// TODO provides a proper meter +pub type ColumnArrayCache = InMemoryItemCacheHolder>; // Bind Type of cached objects to Caches // -// The `Cache` returned should -// - cache item s of Type `T` +// The `Cache` should return +// - cache item of Type `T` // - and implement `CacheAccessor` properly pub trait CachedObject { type Cache: CacheAccessor; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 88f4580858ec4..fb0019f7894b6 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -40,6 +40,7 @@ struct CacheItem { value: Arc>, } +#[derive(Clone)] pub struct TableDataColumnCacheKey { cache_key: String, } @@ -52,6 +53,12 @@ impl TableDataColumnCacheKey { } } +impl From for String { + fn from(value: TableDataColumnCacheKey) -> Self { + value.cache_key + } +} + impl AsRef for TableDataColumnCacheKey { fn as_ref(&self) -> &str { &self.cache_key diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 47d166a500022..70781a69679ba 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -18,6 +18,7 @@ use std::ops::Range; use std::sync::Arc; use std::time::Instant; +use common_arrow::arrow::array::Array; use common_arrow::arrow::datatypes::Field; use common_arrow::arrow::io::parquet::write::to_parquet_schema; use common_arrow::parquet::metadata::SchemaDescriptor; @@ -82,13 +83,20 @@ impl OwnerMemory { pub struct MergeIOReadResult { block_path: String, - columns_chunk_positions: HashMap)>, + columns_chunk_offsets: HashMap)>, owner_memory: OwnerMemory, cached_column_data: CachedColumnData, + cached_column_array: CachedColumnArray, table_data_cache: Option, } -pub type CachedColumnData = Vec<(ColumnId, Arc>)>; +type CachedColumnData = Vec<(ColumnId, Arc>)>; +type CachedColumnArray = Vec<(ColumnId, Arc>)>; + +pub enum DataItem<'a> { + RawData(&'a [u8]), + ColumnArray(&'a Arc>), +} impl MergeIOReadResult { pub fn create( @@ -99,25 +107,31 @@ impl MergeIOReadResult { ) -> MergeIOReadResult { MergeIOReadResult { block_path: path, - columns_chunk_positions: HashMap::with_capacity(capacity), + columns_chunk_offsets: HashMap::with_capacity(capacity), owner_memory, cached_column_data: vec![], + cached_column_array: vec![], table_data_cache, } } - pub fn columns_chunks(&self) -> Result> { - let mut res = Vec::with_capacity(self.columns_chunk_positions.len()); + pub fn columns_chunks(&self) -> Result> { + let mut res = Vec::with_capacity(self.columns_chunk_offsets.len()); // merge column data fetched from object storage - for (column_idx, (chunk_idx, range)) in &self.columns_chunk_positions { + for (column_idx, (chunk_idx, range)) in &self.columns_chunk_offsets { let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.block_path)?; - res.push((*column_idx, &chunk[range.clone()])); + res.push((*column_idx, DataItem::RawData(&chunk[range.clone()]))); } // merge column data from cache for (column_id, data) in &self.cached_column_data { - res.push((*column_id, data.as_slice())) + res.push((*column_id, DataItem::RawData(data.as_slice()))) + } + + // merge column array from cache + for (column_id, data) in &self.cached_column_array { + res.push((*column_id, DataItem::ColumnArray(data))) } Ok(res) @@ -139,8 +153,7 @@ impl MergeIOReadResult { cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); } } - self.columns_chunk_positions - .insert(column_id, (chunk, range)); + self.columns_chunk_offsets.insert(column_id, (chunk, range)); } } @@ -179,7 +192,8 @@ impl BlockReader { } pub fn support_blocking_api(&self) -> bool { - self.operator.metadata().can_blocking() + // self.operator.metadata().can_blocking() + false } /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. @@ -336,13 +350,29 @@ impl BlockReader { } let mut ranges = vec![]; - let block_data_cache = CacheManager::instance().get_table_data_cache(); + // for async read, always try using table data cache (if enabled in settings) + let column_data_cache = CacheManager::instance().get_table_data_cache(); + let column_array_cache = CacheManager::instance().get_table_data_array_cache(); let mut cached_column_data = vec![]; + let mut cached_column_array = vec![]; for (_index, (column_id, ..)) in self.project_indices.iter() { let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); - if let Some(cached_column_raw_data) = block_data_cache.get(&column_cache_key) { - cached_column_data.push((*column_id as ColumnId, cached_column_raw_data)); - } else if let Some(column_meta) = columns_meta.get(column_id) { + + // check column array object cache + eprintln!("geting col array cache {}", column_cache_key.as_ref()); + if let Some(cache_array) = column_array_cache.get(&column_cache_key) { + eprintln!("got from object cache"); + cached_column_array.push((*column_id, cache_array)); + continue; + } + + // check column data cache + if let Some(cached_column_raw_data) = column_data_cache.get(&column_cache_key) { + cached_column_data.push((*column_id, cached_column_raw_data)); + continue; + } + + if let Some(column_meta) = columns_meta.get(column_id) { let (offset, len) = column_meta.offset_length(); ranges.push((*column_id, offset..(offset + len))); @@ -358,6 +388,7 @@ impl BlockReader { let mut merge_io_read_res = Self::merge_io_read(settings, object, ranges).await?; merge_io_read_res.cached_column_data = cached_column_data; + merge_io_read_res.cached_column_array = cached_column_array; Ok(merge_io_read_res) } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index af1bf1b8d8e7f..8df9db7c9db22 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -29,11 +29,17 @@ use common_catalog::table::ColumnId; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataCache; +use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache_manager::CacheManager; +use storages_common_cache_manager::ColumnArrayCache; use storages_common_table_meta::meta::BlockMeta; use storages_common_table_meta::meta::ColumnMeta; use storages_common_table_meta::meta::Compression; use crate::fuse_part::FusePartInfo; +use crate::io::read::block::block_reader::DataItem; use crate::io::read::block::decompressor::BuffedBasicDecompressor; use crate::io::read::ReadSettings; use crate::io::BlockReader; @@ -69,11 +75,12 @@ impl BlockReader { let num_rows = meta.row_count as usize; let columns_chunk = chunks - .iter() - .map(|(index, chunk)| (*index, *chunk)) + .into_iter() + .map(|(index, chunk)| (index, chunk)) .collect::>(); self.deserialize_parquet_chunks_with_buffer( + &meta.location.0, num_rows, &meta.compression, &columns_meta, @@ -86,7 +93,7 @@ impl BlockReader { pub fn deserialize_parquet_chunks( &self, part: PartInfoPtr, - chunks: Vec<(ColumnId, &[u8])>, + chunks: Vec<(ColumnId, DataItem)>, ) -> Result { let part = FusePartInfo::from_part(&part)?; let start = Instant::now(); @@ -96,11 +103,12 @@ impl BlockReader { } let reads = chunks - .iter() - .map(|(index, chunk)| (*index, *chunk)) + .into_iter() + .map(|(index, chunk)| (index, chunk)) .collect::>(); let deserialized_res = self.deserialize_parquet_chunks_with_buffer( + &part.location, part.nums_rows, &part.compression, &part.columns_meta, @@ -119,62 +127,143 @@ impl BlockReader { /// Deserialize column chunks data from parquet format to DataBlock with a uncompressed buffer. pub fn deserialize_parquet_chunks_with_buffer( &self, + block_path: &str, num_rows: usize, compression: &Compression, columns_meta: &HashMap, - columns_chunks: Vec<(ColumnId, &[u8])>, + columns_chunks: Vec<(ColumnId, DataItem)>, uncompressed_buffer: Option>, ) -> Result { + eprintln!(">>> I AM H"); + if columns_chunks.is_empty() { + eprintln!(">>> I AM H, EMPTY"); return Ok(DataBlock::new(vec![], num_rows)); } - let chunk_map: HashMap = columns_chunks.into_iter().collect(); + let chunk_map: HashMap = columns_chunks.into_iter().collect(); let mut columns_array_iter = Vec::with_capacity(self.projection.len()); let columns = self.projection.project_column_nodes(&self.column_nodes)?; + // TODO need refactor + + type ItemIndex = usize; + enum Holder { + Cached(ItemIndex), + Deserialized(ItemIndex), + } + + let mut column_idx_cached_array = vec![]; + let mut holders = vec![]; + let mut deserialized_item_index: usize = 0; + for column in &columns { + eprintln!("processing col"); let field = column.field.clone(); let indices = &column.leaf_ids; + eprintln!(">>> column leaf node len {}", indices.len()); let mut column_metas = Vec::with_capacity(indices.len()); let mut column_chunks = Vec::with_capacity(indices.len()); let mut column_descriptors = Vec::with_capacity(indices.len()); for (i, index) in indices.iter().enumerate() { + eprintln!("lead idx {}-{}", i, index); let column_id = column.leaf_column_id(i); if let Some(column_meta) = columns_meta.get(&column_id) { - // TODO why index is used here? need @LiChuang review - let column_id_in_question = *index as ColumnId; - let column_read = <&[u8]>::clone(&chunk_map[&column_id_in_question]); - let column_descriptor = &self.parquet_schema_descriptor.columns()[*index]; - column_metas.push(column_meta); - column_chunks.push(column_read); - column_descriptors.push(column_descriptor); + // TODO use get and handle exception + match chunk_map[&column_id] { + DataItem::RawData(data) => { + let column_read = data; + // TODO why index is used here? need @LiChuang review + let column_descriptor = + &self.parquet_schema_descriptor.columns()[*index]; + column_metas.push(column_meta); + column_chunks.push(column_read); + column_descriptors.push(column_descriptor); + eprintln!("pushing deser {}", deserialized_item_index); + holders.push(Holder::Deserialized(deserialized_item_index)); + deserialized_item_index += 1; + } + DataItem::ColumnArray(column_array) => { + eprintln!("***>>>using cached column"); + let idx = column_idx_cached_array.len(); + eprintln!("pushing cached {}", idx); + column_idx_cached_array.push(column_array); + holders.push(Holder::Cached(idx)); + } + } } } + eprintln!("col meta len {}", column_metas.len()); - columns_array_iter.push(Self::chunks_to_parquet_array_iter( - column_metas, - column_chunks, - num_rows, - column_descriptors, - field, - compression, - uncompressed_buffer - .clone() - .unwrap_or_else(|| UncompressedBuffer::new(0)), - )?); + // let test_chunks = column_chunks + // .into_iter() + // .map(|bytes| DataItem::RawData(bytes)) + // .collect(); + if column_metas.len() > 0 { + columns_array_iter.push(( + indices[0], + Self::chunks_to_parquet_array_iter( + column_metas, + column_chunks, + // test_chunks, + num_rows, + column_descriptors, + field, + compression, + uncompressed_buffer + .clone() + .unwrap_or_else(|| UncompressedBuffer::new(0)), + )?, + )); + } + eprintln!("col arr iter len {}", columns_array_iter.len()); } let mut arrays = Vec::with_capacity(columns_array_iter.len()); - for mut column_array_iter in columns_array_iter.into_iter() { - let array = column_array_iter.next().unwrap()?; - arrays.push(array); - drop(column_array_iter); - } + let deserialized_column_arrays = columns_array_iter + .into_iter() + .map(|(col_idx, mut v)| + // TODO error handling + (col_idx, v.next().unwrap().unwrap())) + .collect::>(); + + eprintln!( + "deserialized_column_arrays len {}", + deserialized_column_arrays.len() + ); + + for holder in holders { + match holder { + Holder::Cached(idx) => { + arrays.push(column_idx_cached_array[idx].as_ref()); + } + Holder::Deserialized(idx) => { + let item = &deserialized_column_arrays[idx].1; + arrays.push(item); + } + } + } let chunk = Chunk::try_new(arrays)?; - DataBlock::from_arrow_chunk(&chunk, &self.data_schema()) + // let chunk = Chunk::try_new(deserialized_column_arrays)?; + let block = DataBlock::from_arrow_chunk(&chunk, &self.data_schema()); + + if block.is_ok() { + let cache = CacheManager::instance().get_table_data_array_cache(); + match cache { + Some(cache) => { + for (idx, array) in deserialized_column_arrays.into_iter() { + let key = TableDataColumnCacheKey::new(block_path, idx as ColumnId); + eprintln!("putting cache {}", key.as_ref()); + cache.put(key.into(), array.into()); + } + } + None => {} + } + } + + block } fn chunks_to_parquet_array_iter<'a>( @@ -255,4 +344,59 @@ impl BlockReader { Compression::None => Ok(ParquetCompression::Uncompressed), } } + + fn chunks_to_parquet_array_iter_new<'a>( + metas: Vec<&ColumnMeta>, + chunks: Vec>, + rows: usize, + column_descriptors: Vec<&ColumnDescriptor>, + field: Field, + compression: &Compression, + uncompressed_buffer: Arc, + ) -> Result> { + let columns = metas + .iter() + .zip(chunks.into_iter().zip(column_descriptors.iter())) + .map(|(meta, (chunk, column_descriptor))| match chunk { + DataItem::RawData(data) => { + let meta = meta.as_parquet().unwrap(); + + let page_meta_data = PageMetaData { + column_start: meta.offset, + num_values: meta.num_values as i64, + compression: Self::to_parquet_compression(compression)?, + descriptor: column_descriptor.descriptor.clone(), + }; + let pages = PageReader::new_with_page_meta( + data, + page_meta_data, + Arc::new(|_, _| true), + vec![], + usize::MAX, + ); + + Ok(BuffedBasicDecompressor::new( + pages, + uncompressed_buffer.clone(), + )) + } + DataItem::ColumnArray(_) => { + todo!() + } + }) + .collect::>>()?; + + let types = column_descriptors + .iter() + .map(|column_descriptor| &column_descriptor.descriptor.primitive_type) + .collect::>(); + + Ok(column_iter_to_arrays( + columns, + types, + field, + Some(rows), + rows, + )?) + } } diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index 33ef3a21aff61..819d40d5a0d2c 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -140,6 +140,7 @@ impl Processor for DeserializeDataTransform { let part = FusePartInfo::from_part(&part)?; let data_block = self.block_reader.deserialize_parquet_chunks_with_buffer( + &part.location, part.nums_rows, &part.compression, &part.columns_meta, From 2b72e40a64f0ba28a04146c44ee7167d42c27285 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 9 Feb 2023 15:07:22 +0800 Subject: [PATCH 27/80] wip --- .../src/io/read/block/block_reader_parquet.rs | 206 +++++++----------- 1 file changed, 78 insertions(+), 128 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 8df9db7c9db22..a2e189512c1fb 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -30,10 +30,8 @@ use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; use storages_common_cache::CacheAccessor; -use storages_common_cache::TableDataCache; use storages_common_cache::TableDataColumnCacheKey; use storages_common_cache_manager::CacheManager; -use storages_common_cache_manager::ColumnArrayCache; use storages_common_table_meta::meta::BlockMeta; use storages_common_table_meta::meta::ColumnMeta; use storages_common_table_meta::meta::Compression; @@ -134,10 +132,7 @@ impl BlockReader { columns_chunks: Vec<(ColumnId, DataItem)>, uncompressed_buffer: Option>, ) -> Result { - eprintln!(">>> I AM H"); - if columns_chunks.is_empty() { - eprintln!(">>> I AM H, EMPTY"); return Ok(DataBlock::new(vec![], num_rows)); } @@ -147,11 +142,11 @@ impl BlockReader { let columns = self.projection.project_column_nodes(&self.column_nodes)?; // TODO need refactor - type ItemIndex = usize; enum Holder { Cached(ItemIndex), Deserialized(ItemIndex), + DeserializedNoCache(ItemIndex), } let mut column_idx_cached_array = vec![]; @@ -159,109 +154,119 @@ impl BlockReader { let mut deserialized_item_index: usize = 0; for column in &columns { - eprintln!("processing col"); let field = column.field.clone(); let indices = &column.leaf_ids; - eprintln!(">>> column leaf node len {}", indices.len()); let mut column_metas = Vec::with_capacity(indices.len()); let mut column_chunks = Vec::with_capacity(indices.len()); let mut column_descriptors = Vec::with_capacity(indices.len()); - for (i, index) in indices.iter().enumerate() { - eprintln!("lead idx {}-{}", i, index); - let column_id = column.leaf_column_id(i); + if indices.len() == 1 { + eprintln!("non nestted filed"); + // we only cache non-nested column for the time being + let column_id = column.leaf_column_id(0); + let index = indices[0]; if let Some(column_meta) = columns_meta.get(&column_id) { - // TODO use get and handle exception - match chunk_map[&column_id] { - DataItem::RawData(data) => { - let column_read = data; - // TODO why index is used here? need @LiChuang review - let column_descriptor = - &self.parquet_schema_descriptor.columns()[*index]; - column_metas.push(column_meta); - column_chunks.push(column_read); - column_descriptors.push(column_descriptor); - eprintln!("pushing deser {}", deserialized_item_index); - holders.push(Holder::Deserialized(deserialized_item_index)); - deserialized_item_index += 1; + if let Some(chunk) = chunk_map.get(&column_id) { + match chunk { + DataItem::RawData(data) => { + // TODO why index of type usize is used here? need @LiChuang review + let column_descriptor = + &self.parquet_schema_descriptor.columns()[index]; + column_metas.push(column_meta); + column_chunks.push(*data); + column_descriptors.push(column_descriptor); + holders.push(Holder::Deserialized(deserialized_item_index)); + deserialized_item_index += 1; + } + DataItem::ColumnArray(column_array) => { + let idx = column_idx_cached_array.len(); + column_idx_cached_array.push(*column_array); + holders.push(Holder::Cached(idx)); + } } - DataItem::ColumnArray(column_array) => { - eprintln!("***>>>using cached column"); - let idx = column_idx_cached_array.len(); - eprintln!("pushing cached {}", idx); - column_idx_cached_array.push(column_array); - holders.push(Holder::Cached(idx)); + } + } + } else { + eprintln!("nestted filed"); + for (i, index) in indices.iter().enumerate() { + let column_id = column.leaf_column_id(i); + if let Some(column_meta) = columns_meta.get(&column_id) { + if let Some(chunk) = chunk_map.get(&column_id) { + match chunk { + DataItem::RawData(data) => { + // TODO why index is used here? need @LiChuang review + let column_descriptor = + &self.parquet_schema_descriptor.columns()[*index]; + column_metas.push(column_meta); + column_chunks.push(*data); + column_descriptors.push(column_descriptor); + } + DataItem::ColumnArray(column_array) => { + eprintln!("using cache in nested"); + let idx = column_idx_cached_array.len(); + column_idx_cached_array.push(*column_array); + holders.push(Holder::Cached(idx)); + } + } } } } + // a field nested structure is expected + holders.push(Holder::DeserializedNoCache(deserialized_item_index)); + deserialized_item_index += 1; } - eprintln!("col meta len {}", column_metas.len()); - // let test_chunks = column_chunks - // .into_iter() - // .map(|bytes| DataItem::RawData(bytes)) - // .collect(); if column_metas.len() > 0 { - columns_array_iter.push(( - indices[0], - Self::chunks_to_parquet_array_iter( - column_metas, - column_chunks, - // test_chunks, - num_rows, - column_descriptors, - field, - compression, - uncompressed_buffer - .clone() - .unwrap_or_else(|| UncompressedBuffer::new(0)), - )?, - )); + columns_array_iter.push(Self::chunks_to_parquet_array_iter( + column_metas, + column_chunks, + // test_chunks, + num_rows, + column_descriptors, + field, + compression, + uncompressed_buffer + .clone() + .unwrap_or_else(|| UncompressedBuffer::new(0)), + )?); } - eprintln!("col arr iter len {}", columns_array_iter.len()); } let mut arrays = Vec::with_capacity(columns_array_iter.len()); let deserialized_column_arrays = columns_array_iter .into_iter() - .map(|(col_idx, mut v)| + .map(|mut v| // TODO error handling - (col_idx, v.next().unwrap().unwrap())) + v.next().unwrap().unwrap()) .collect::>(); - eprintln!( - "deserialized_column_arrays len {}", - deserialized_column_arrays.len() - ); - for holder in holders { match holder { Holder::Cached(idx) => { arrays.push(column_idx_cached_array[idx].as_ref()); } Holder::Deserialized(idx) => { - let item = &deserialized_column_arrays[idx].1; + let item = &deserialized_column_arrays[idx]; + arrays.push(item); + } + Holder::DeserializedNoCache(idx) => { + let item = &deserialized_column_arrays[idx]; arrays.push(item); } } } let chunk = Chunk::try_new(arrays)?; - // let chunk = Chunk::try_new(deserialized_column_arrays)?; let block = DataBlock::from_arrow_chunk(&chunk, &self.data_schema()); - if block.is_ok() { - let cache = CacheManager::instance().get_table_data_array_cache(); - match cache { - Some(cache) => { - for (idx, array) in deserialized_column_arrays.into_iter() { - let key = TableDataColumnCacheKey::new(block_path, idx as ColumnId); - eprintln!("putting cache {}", key.as_ref()); - cache.put(key.into(), array.into()); - } - } - None => {} - } - } + // if block.is_ok() { + // let maybe_column_array_cache = CacheManager::instance().get_table_data_array_cache(); + // if let Some(cache) = maybe_column_array_cache { + // for (idx, array) in deserialized_column_arrays.into_iter() { + // let key = TableDataColumnCacheKey::new(block_path, idx as ColumnId); + // cache.put(key.into(), array.into()); + // } + // } + //} block } @@ -344,59 +349,4 @@ impl BlockReader { Compression::None => Ok(ParquetCompression::Uncompressed), } } - - fn chunks_to_parquet_array_iter_new<'a>( - metas: Vec<&ColumnMeta>, - chunks: Vec>, - rows: usize, - column_descriptors: Vec<&ColumnDescriptor>, - field: Field, - compression: &Compression, - uncompressed_buffer: Arc, - ) -> Result> { - let columns = metas - .iter() - .zip(chunks.into_iter().zip(column_descriptors.iter())) - .map(|(meta, (chunk, column_descriptor))| match chunk { - DataItem::RawData(data) => { - let meta = meta.as_parquet().unwrap(); - - let page_meta_data = PageMetaData { - column_start: meta.offset, - num_values: meta.num_values as i64, - compression: Self::to_parquet_compression(compression)?, - descriptor: column_descriptor.descriptor.clone(), - }; - let pages = PageReader::new_with_page_meta( - data, - page_meta_data, - Arc::new(|_, _| true), - vec![], - usize::MAX, - ); - - Ok(BuffedBasicDecompressor::new( - pages, - uncompressed_buffer.clone(), - )) - } - DataItem::ColumnArray(_) => { - todo!() - } - }) - .collect::>>()?; - - let types = column_descriptors - .iter() - .map(|column_descriptor| &column_descriptor.descriptor.primitive_type) - .collect::>(); - - Ok(column_iter_to_arrays( - columns, - types, - field, - Some(rows), - rows, - )?) - } } From e7f5258d5519cc25a56bb4280970178251cd0328 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 9 Feb 2023 19:55:58 +0800 Subject: [PATCH 28/80] refactor: avoid clone Box --- .../src/io/read/block/block_reader_parquet.rs | 89 +++++++------------ 1 file changed, 34 insertions(+), 55 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index a2e189512c1fb..33233255497d3 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -146,12 +146,12 @@ impl BlockReader { enum Holder { Cached(ItemIndex), Deserialized(ItemIndex), - DeserializedNoCache(ItemIndex), } let mut column_idx_cached_array = vec![]; let mut holders = vec![]; let mut deserialized_item_index: usize = 0; + let mut cache_flags = vec![]; for column in &columns { let field = column.field.clone(); @@ -159,23 +159,17 @@ impl BlockReader { let mut column_metas = Vec::with_capacity(indices.len()); let mut column_chunks = Vec::with_capacity(indices.len()); let mut column_descriptors = Vec::with_capacity(indices.len()); - if indices.len() == 1 { - eprintln!("non nestted filed"); - // we only cache non-nested column for the time being - let column_id = column.leaf_column_id(0); - let index = indices[0]; + for (i, index) in indices.iter().enumerate() { + let column_id = column.leaf_column_id(i); if let Some(column_meta) = columns_meta.get(&column_id) { if let Some(chunk) = chunk_map.get(&column_id) { match chunk { DataItem::RawData(data) => { - // TODO why index of type usize is used here? need @LiChuang review let column_descriptor = - &self.parquet_schema_descriptor.columns()[index]; + &self.parquet_schema_descriptor.columns()[*index]; column_metas.push(column_meta); column_chunks.push(*data); column_descriptors.push(column_descriptor); - holders.push(Holder::Deserialized(deserialized_item_index)); - deserialized_item_index += 1; } DataItem::ColumnArray(column_array) => { let idx = column_idx_cached_array.len(); @@ -185,37 +179,21 @@ impl BlockReader { } } } - } else { - eprintln!("nestted filed"); - for (i, index) in indices.iter().enumerate() { - let column_id = column.leaf_column_id(i); - if let Some(column_meta) = columns_meta.get(&column_id) { - if let Some(chunk) = chunk_map.get(&column_id) { - match chunk { - DataItem::RawData(data) => { - // TODO why index is used here? need @LiChuang review - let column_descriptor = - &self.parquet_schema_descriptor.columns()[*index]; - column_metas.push(column_meta); - column_chunks.push(*data); - column_descriptors.push(column_descriptor); - } - DataItem::ColumnArray(column_array) => { - eprintln!("using cache in nested"); - let idx = column_idx_cached_array.len(); - column_idx_cached_array.push(*column_array); - holders.push(Holder::Cached(idx)); - } - } - } - } - } - // a field nested structure is expected - holders.push(Holder::DeserializedNoCache(deserialized_item_index)); - deserialized_item_index += 1; } if column_metas.len() > 0 { + if column_metas.len() > 1 { + // working on nested field + holders.push(Holder::Deserialized(deserialized_item_index)); + cache_flags.push(None); + } else { + // working on simple field + let column_idx = indices[0] as ColumnId; + holders.push(Holder::Deserialized(deserialized_item_index)); + cache_flags.push(Some(column_idx)); + } + deserialized_item_index += 1; + columns_array_iter.push(Self::chunks_to_parquet_array_iter( column_metas, column_chunks, @@ -233,6 +211,7 @@ impl BlockReader { let mut arrays = Vec::with_capacity(columns_array_iter.len()); + // deserialized fields that are not cached let deserialized_column_arrays = columns_array_iter .into_iter() .map(|mut v| @@ -240,6 +219,7 @@ impl BlockReader { v.next().unwrap().unwrap()) .collect::>(); + // assembly the arrays for holder in holders { match holder { Holder::Cached(idx) => { @@ -249,26 +229,25 @@ impl BlockReader { let item = &deserialized_column_arrays[idx]; arrays.push(item); } - Holder::DeserializedNoCache(idx) => { - let item = &deserialized_column_arrays[idx]; - arrays.push(item); - } } } let chunk = Chunk::try_new(arrays)?; - let block = DataBlock::from_arrow_chunk(&chunk, &self.data_schema()); - - // if block.is_ok() { - // let maybe_column_array_cache = CacheManager::instance().get_table_data_array_cache(); - // if let Some(cache) = maybe_column_array_cache { - // for (idx, array) in deserialized_column_arrays.into_iter() { - // let key = TableDataColumnCacheKey::new(block_path, idx as ColumnId); - // cache.put(key.into(), array.into()); - // } - // } - //} - - block + let data_block = DataBlock::from_arrow_chunk(&chunk, &self.data_schema()); + + if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { + // populate array cache items + for (item, need_tobe_cached) in deserialized_column_arrays + .into_iter() + .zip(cache_flags) + .into_iter() + { + if let Some(column_idx) = need_tobe_cached { + let key = TableDataColumnCacheKey::new(block_path, column_idx); + cache.put(key.into(), item.into()) + } + } + } + data_block } fn chunks_to_parquet_array_iter<'a>( From e091e0107404b304f5a47b89d125c27c15a091fd Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 9 Feb 2023 21:50:07 +0800 Subject: [PATCH 29/80] array cache for sync parquet read --- .../fuse/src/io/read/block/block_reader.rs | 51 +++++++++++++------ .../src/io/read/block/block_reader_parquet.rs | 7 +-- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 70781a69679ba..91ed6ba680b50 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -81,6 +81,8 @@ impl OwnerMemory { } } +type CachedColumnData = Vec<(ColumnId, Arc>)>; +type CachedColumnArray = Vec<(ColumnId, Arc>)>; pub struct MergeIOReadResult { block_path: String, columns_chunk_offsets: HashMap)>, @@ -90,9 +92,6 @@ pub struct MergeIOReadResult { table_data_cache: Option, } -type CachedColumnData = Vec<(ColumnId, Arc>)>; -type CachedColumnArray = Vec<(ColumnId, Arc>)>; - pub enum DataItem<'a> { RawData(&'a [u8]), ColumnArray(&'a Arc>), @@ -141,19 +140,18 @@ impl MergeIOReadResult { self.owner_memory.get_chunk(index, path) } - // sync read path also hit this method! - // TODO 1. pass the cache in 2) let the block reader hold a instance of cache - fn add_column_chunk(&mut self, chunk: usize, column_id: ColumnId, range: Range) { + fn add_column_chunk(&mut self, chunk_index: usize, column_id: ColumnId, range: Range) { // TODO doc why put cache operation could be placed here if let Some(cache) = &self.table_data_cache { - if let Ok(chunk_data) = self.get_chunk(chunk, &self.block_path) { + if let Ok(chunk_data) = self.get_chunk(chunk_index, &self.block_path) { let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); let data = &chunk_data[range.clone()]; // TODO api is NOT type safe cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); } } - self.columns_chunk_offsets.insert(column_id, (chunk, range)); + self.columns_chunk_offsets + .insert(column_id, (chunk_index, range)); } } @@ -207,7 +205,16 @@ impl BlockReader { object: Object, raw_ranges: Vec<(ColumnId, Range)>, ) -> Result { - let path = object.path().to_string(); + if raw_ranges.is_empty() { + // shortcut + let read_res = MergeIOReadResult::create( + OwnerMemory::create(vec![]), + raw_ranges.len(), + object.path().to_string(), + CacheManager::instance().get_table_data_cache(), + ); + return Ok(read_res); + } // Build merged read ranges. let ranges = raw_ranges @@ -244,7 +251,7 @@ impl BlockReader { let mut read_res = MergeIOReadResult::create( owner_memory, raw_ranges.len(), - path.clone(), + object.path().to_string(), table_data_cache, ); @@ -260,7 +267,9 @@ impl BlockReader { let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { None => Err(ErrorCode::Internal(format!( "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", - column_range, path, merged_ranges + column_range, + object.path(), + merged_ranges ))), Some((index, range)) => Ok((index, range)), }?; @@ -358,20 +367,19 @@ impl BlockReader { for (_index, (column_id, ..)) in self.project_indices.iter() { let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); - // check column array object cache - eprintln!("geting col array cache {}", column_cache_key.as_ref()); + // first, check column array object cache if let Some(cache_array) = column_array_cache.get(&column_cache_key) { - eprintln!("got from object cache"); cached_column_array.push((*column_id, cache_array)); continue; } - // check column data cache + // and then, check column data cache if let Some(cached_column_raw_data) = column_data_cache.get(&column_cache_key) { cached_column_data.push((*column_id, cached_column_raw_data)); continue; } + // if all cache missed, prepare the ranges to be read if let Some(column_meta) = columns_meta.get(column_id) { let (offset, len) = column_meta.offset_length(); ranges.push((*column_id, offset..(offset + len))); @@ -398,9 +406,18 @@ impl BlockReader { part: PartInfoPtr, ) -> Result { let part = FusePartInfo::from_part(&part)?; + let column_array_cache = CacheManager::instance().get_table_data_array_cache(); let mut ranges = vec![]; + let mut cached_column_array = vec![]; for (index, (column_id, ..)) in self.project_indices.iter() { + // first, check column array object cache + let block_path = &part.location; + let column_cache_key = TableDataColumnCacheKey::new(block_path, *column_id); + if let Some(cache_array) = column_array_cache.get(&column_cache_key) { + cached_column_array.push((*column_id, cache_array)); + continue; + } if let Some(column_meta) = part.columns_meta.get(column_id) { let (offset, len) = column_meta.offset_length(); ranges.push((*index, offset..(offset + len))); @@ -408,7 +425,9 @@ impl BlockReader { } let object = self.operator.object(&part.location); - Self::sync_merge_io_read(settings, object, ranges) + let mut merge_io_result = Self::sync_merge_io_read(settings, object, ranges)?; + merge_io_result.cached_column_array = cached_column_array; + Ok(merge_io_result) } // Build non duplicate leaf_ids to avoid repeated read column from parquet diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 33233255497d3..4ba2b75ae77f7 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -181,7 +181,7 @@ impl BlockReader { } } - if column_metas.len() > 0 { + if !column_metas.is_empty() { if column_metas.len() > 1 { // working on nested field holders.push(Holder::Deserialized(deserialized_item_index)); @@ -236,10 +236,7 @@ impl BlockReader { if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { // populate array cache items - for (item, need_tobe_cached) in deserialized_column_arrays - .into_iter() - .zip(cache_flags) - .into_iter() + for (item, need_tobe_cached) in deserialized_column_arrays.into_iter().zip(cache_flags) { if let Some(column_idx) = need_tobe_cached { let key = TableDataColumnCacheKey::new(block_path, column_idx); From 89d393bb1c004c86ac6e8d9d6677e1a7b2bb193c Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 9 Feb 2023 22:59:16 +0800 Subject: [PATCH 30/80] add setting for in memnory table colum object cache --- src/common/cache/src/lru_disk_cache.rs | 4 ++-- src/query/config/src/inner.rs | 3 +++ src/query/config/src/outer_v0.rs | 7 +++++++ src/query/pipeline/sources/src/sync_source.rs | 1 - .../tests/it/storages/testdata/configs_table_basic.txt | 1 + .../storages/common/cache-manager/src/cache_manager.rs | 7 ++++--- .../storages/common/cache/src/providers/memory_cache.rs | 5 +++-- 7 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/common/cache/src/lru_disk_cache.rs b/src/common/cache/src/lru_disk_cache.rs index 2acf3388aa882..ddc8bb9e210c1 100644 --- a/src/common/cache/src/lru_disk_cache.rs +++ b/src/common/cache/src/lru_disk_cache.rs @@ -229,8 +229,8 @@ where C: Cache } let mut f = File::create(&path)?; let mut bufs = Vec::with_capacity(bytes.len()); - for x in bytes { - bufs.push(IoSlice::new(x)); + for slick in bytes { + bufs.push(IoSlice::new(slick)); } f.write_all_vectored(&mut bufs)?; self.cache.put(cache_key.0, bytes_len); diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 1d6331c7c0f69..9e2c16c8d842c 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -163,6 +163,8 @@ pub struct QueryConfig { pub table_cache_bloom_index_meta_count: u64, /// Max number of cached bloom index filters pub table_cache_bloom_index_filter_count: u64, + /// Max size of in memory table column object cache + pub table_cache_column_mb_size: u64, /// Indicates if table data cache is enabled pub table_data_cache_enabled: bool, /// Max bytes of table data cached in memory (MB) @@ -228,6 +230,7 @@ impl Default for QueryConfig { table_cache_segment_count: 10240, table_cache_bloom_index_meta_count: 3000, table_cache_bloom_index_filter_count: 1024 * 1024, + table_cache_column_mb_size: 10 * 1024, table_data_cache_enabled: false, table_data_cache_population_queue_size: 65536, table_disk_cache_root: "_cache".to_string(), diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index fc00684954bf4..e5ed7aa2bb719 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -1301,6 +1301,11 @@ pub struct QueryConfig { #[clap(long, default_value = "1048576")] pub table_cache_bloom_index_filter_count: u64, + /// Max size of in memory table column object cache, default value is 10 GiB + /// To disable this cache , jus set it to 0 + #[clap(long, default_value = "10240")] + pub table_cache_column_mb_size: u64, + /// Indicates if table data cached is enabled, default false #[clap(long)] pub table_data_cache_enabled: bool, @@ -1399,6 +1404,7 @@ impl TryInto for QueryConfig { table_cache_segment_count: self.table_cache_segment_count, table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, + table_cache_column_mb_size: self.table_cache_column_mb_size, table_data_cache_enabled: self.table_data_cache_enabled, table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, table_data_cache_in_memory_mb_size: self.table_data_cache_in_memory_mb_size, @@ -1469,6 +1475,7 @@ impl From for QueryConfig { table_cache_segment_count: inner.table_cache_segment_count, table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, + table_cache_column_mb_size: inner.table_cache_column_mb_size, table_data_cache_enabled: inner.table_data_cache_enabled, table_data_cache_in_memory_mb_size: inner.table_data_cache_in_memory_mb_size, table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, diff --git a/src/query/pipeline/sources/src/sync_source.rs b/src/query/pipeline/sources/src/sync_source.rs index b39a7ab671a19..a362dc050657d 100644 --- a/src/query/pipeline/sources/src/sync_source.rs +++ b/src/query/pipeline/sources/src/sync_source.rs @@ -37,7 +37,6 @@ pub trait SyncSource: Send { // TODO: This can be refactored using proc macros pub struct SyncSourcer { is_finish: bool, - inner: T, output: Arc, generated_data: Option, diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 8e3283205aa09..b5af6ae881ea8 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -63,6 +63,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | "query" | "table_cache_block_meta_count" | "102400" | "" | | "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | | "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | +| "query" | "table_cache_column_mb_size" | "10240" | "" | | "query" | "table_cache_segment_count" | "10240" | "" | | "query" | "table_cache_snapshot_count" | "256" | "" | | "query" | "table_cache_statistic_count" | "256" | "" | diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 60d18f7ffa5cb..cd8aab87a898d 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -32,7 +32,6 @@ use crate::caches::TableSnapshotStatisticCache; use crate::ColumnArrayCache; static DEFAULT_FILE_META_DATA_CACHE_ITEMS: u64 = 3000; -static DEFAULT_COLUMN_ARRAY_CACHE_ITEMS: u64 = 100_000; /// Where all the caches reside pub struct CacheManager { @@ -49,6 +48,7 @@ pub struct CacheManager { impl CacheManager { /// Initialize the caches according to the relevant configurations. pub fn init(config: &QueryConfig) -> Result<()> { + // setup table data cache let table_data_cache = if config.table_data_cache_enabled { None } else { @@ -60,9 +60,10 @@ impl CacheManager { )? }; - // TODO settings - let table_column_array_cache = Self::new_item_cache(DEFAULT_COLUMN_ARRAY_CACHE_ITEMS); + // setup in-memory table column cache + let table_column_array_cache = Self::new_item_cache(config.table_cache_column_mb_size); + // setup in-memory table meta cache if !config.table_meta_cache_enabled { GlobalInstance::set(Arc::new(Self { table_snapshot_cache: None, diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index fbabe06055dd6..b376ceac37883 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -23,10 +23,11 @@ use common_cache::DefaultHashBuilder; use common_cache::LruCache; use parking_lot::RwLock; -pub type ItemCache = LruCache, DefaultHashBuilder, Count>; +pub type ItemCache = LruCache, S, M>; pub type BytesCache = LruCache>, DefaultHashBuilder, BytesMeter>; -pub type InMemoryItemCacheHolder = Arc>>; +pub type InMemoryItemCacheHolder = + Arc>>; pub type InMemoryBytesCacheHolder = Arc>; pub struct InMemoryCacheBuilder; From c7af42bb00c96ab2edb80ab895e815441362d5ea Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 00:12:54 +0800 Subject: [PATCH 31/80] meter in memory column array cache by uncompressed bytes size --- Cargo.lock | 1 + .../storages/common/cache-manager/Cargo.toml | 1 + .../common/cache-manager/src/cache_manager.rs | 26 +++++++++-- .../common/cache-manager/src/caches.rs | 20 ++++++++- .../cache/src/providers/memory_cache.rs | 18 +++++++- .../common/cache/src/providers/mod.rs | 2 +- .../fuse/src/io/read/block/block_reader.rs | 6 +-- .../src/io/read/block/block_reader_parquet.rs | 44 +++++++++++-------- 8 files changed, 88 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2439edd409c9..b5b2908d4d79b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7832,6 +7832,7 @@ version = "0.1.0" dependencies = [ "common-arrow", "common-base", + "common-cache", "common-config", "common-exception", "storages-common-cache", diff --git a/src/query/storages/common/cache-manager/Cargo.toml b/src/query/storages/common/cache-manager/Cargo.toml index bcd467f6a3d70..2e8f98cd5162c 100644 --- a/src/query/storages/common/cache-manager/Cargo.toml +++ b/src/query/storages/common/cache-manager/Cargo.toml @@ -11,6 +11,7 @@ edition = { workspace = true } [dependencies] common-arrow = { path = "../../../../common/arrow" } common-base = { path = "../../../../common/base" } +common-cache = { path = "../../../../common/cache" } common-config = { path = "../../../config" } common-exception = { path = "../../../../common/exception" } storages-common-cache = { path = "../../common/cache" } diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index cd8aab87a898d..06b7a443ad097 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -16,6 +16,8 @@ use std::sync::Arc; use common_base::base::GlobalInstance; +use common_cache::CountableMeter; +use common_cache::DefaultHashBuilder; use common_config::QueryConfig; use common_exception::Result; use storages_common_cache::InMemoryCacheBuilder; @@ -25,11 +27,12 @@ use storages_common_cache::TableDataCacheBuilder; use crate::caches::BloomIndexFilterCache; use crate::caches::BloomIndexMetaCache; +use crate::caches::ColumnArrayCache; use crate::caches::FileMetaDataCache; use crate::caches::SegmentInfoCache; use crate::caches::TableSnapshotCache; use crate::caches::TableSnapshotStatisticCache; -use crate::ColumnArrayCache; +use crate::ColumnArrayMeter; static DEFAULT_FILE_META_DATA_CACHE_ITEMS: u64 = 3000; @@ -61,7 +64,8 @@ impl CacheManager { }; // setup in-memory table column cache - let table_column_array_cache = Self::new_item_cache(config.table_cache_column_mb_size); + let table_column_array_cache = + Self::new_in_memory_cache(config.table_cache_column_mb_size, ColumnArrayMeter); // setup in-memory table meta cache if !config.table_meta_cache_enabled { @@ -135,7 +139,8 @@ impl CacheManager { self.table_column_array_cache.clone() } - fn new_item_cache(capacity: u64) -> Option> { + // create cache that meters size by `Count` + fn new_item_cache(capacity: u64) -> Option> { if capacity > 0 { Some(InMemoryCacheBuilder::new_item_cache(capacity)) } else { @@ -143,6 +148,21 @@ impl CacheManager { } } + // create cache that meters size by `meter` + fn new_in_memory_cache( + capacity: u64, + meter: M, + ) -> Option> + where + M: CountableMeter>, + { + if capacity > 0 { + Some(InMemoryCacheBuilder::new_in_memory_cache(capacity, meter)) + } else { + None + } + } + fn new_block_data_cache( path: &str, in_memory_cache_mb_size: u64, diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index 92773dbc75d5d..db84af853794b 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -12,7 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Borrow; +use std::sync::Arc; + use common_arrow::parquet::metadata::FileMetaData; +use common_cache::DefaultHashBuilder; +use common_cache::Meter; use storages_common_cache::CacheAccessor; use storages_common_cache::InMemoryItemCacheHolder; use storages_common_index::filters::Xor8Filter; @@ -38,8 +43,9 @@ pub type BloomIndexMetaCache = InMemoryItemCacheHolder; pub type FileMetaDataCache = InMemoryItemCacheHolder; /// In memory object cache of parquet FileMetaData of external parquet files -/// TODO provides a proper meter -pub type ColumnArrayCache = InMemoryItemCacheHolder>; +pub type ColumnArrayCache = + InMemoryItemCacheHolder; +pub type SizedColumnArray = (Box, usize); // Bind Type of cached objects to Caches // @@ -92,3 +98,13 @@ impl CachedObject for FileMetaData { CacheManager::instance().get_file_meta_data_cache() } } + +pub struct ColumnArrayMeter; + +impl Meter> for ColumnArrayMeter { + type Measure = usize; + fn measure(&self, _: &Q, v: &Arc<(V, usize)>) -> usize + where K: Borrow { + v.1 + } +} diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index b376ceac37883..ab5709fc1a5cd 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -23,20 +23,34 @@ use common_cache::DefaultHashBuilder; use common_cache::LruCache; use parking_lot::RwLock; -pub type ItemCache = LruCache, S, M>; +pub type ImMemoryCache = LruCache, S, M>; pub type BytesCache = LruCache>, DefaultHashBuilder, BytesMeter>; pub type InMemoryItemCacheHolder = - Arc>>; + Arc>>; pub type InMemoryBytesCacheHolder = Arc>; pub struct InMemoryCacheBuilder; impl InMemoryCacheBuilder { + // new cache that cache `V`, and metered by the given `meter` + pub fn new_in_memory_cache( + capacity: u64, + meter: M, + ) -> InMemoryItemCacheHolder + where + M: CountableMeter>, + { + let cache = LruCache::with_meter_and_hasher(capacity, meter, DefaultHashBuilder::new()); + Arc::new(RwLock::new(cache)) + } + + // new cache that caches `V` and meter by counting pub fn new_item_cache(capacity: u64) -> InMemoryItemCacheHolder { let cache = LruCache::new(capacity); Arc::new(RwLock::new(cache)) } + // new cache that cache `Vec`, and metered by byte size pub fn new_bytes_cache(capacity: u64) -> InMemoryBytesCacheHolder { let cache = LruCache::with_meter_and_hasher(capacity, BytesMeter, DefaultHashBuilder::new()); diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index a2125a688424f..1f2c828a65da7 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -19,10 +19,10 @@ pub use disk_cache::DiskBytesCache; pub use disk_cache::DiskCache; pub use disk_cache::DiskCacheBuilder; pub use memory_cache::BytesCache; +pub use memory_cache::ImMemoryCache; pub use memory_cache::InMemoryBytesCacheHolder; pub use memory_cache::InMemoryCacheBuilder; pub use memory_cache::InMemoryItemCacheHolder; -pub use memory_cache::ItemCache; pub use table_data_cache::TableDataCache; pub use table_data_cache::TableDataCacheBuilder; pub use table_data_cache::TableDataColumnCacheKey; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 91ed6ba680b50..e88f09eba4f27 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -18,7 +18,6 @@ use std::ops::Range; use std::sync::Arc; use std::time::Instant; -use common_arrow::arrow::array::Array; use common_arrow::arrow::datatypes::Field; use common_arrow::arrow::io::parquet::write::to_parquet_schema; use common_arrow::parquet::metadata::SchemaDescriptor; @@ -43,6 +42,7 @@ use storages_common_cache::CacheAccessor; use storages_common_cache::TableDataCache; use storages_common_cache::TableDataColumnCacheKey; use storages_common_cache_manager::CacheManager; +use storages_common_cache_manager::SizedColumnArray; use storages_common_table_meta::meta::ColumnMeta; use crate::fuse_part::FusePartInfo; @@ -82,7 +82,7 @@ impl OwnerMemory { } type CachedColumnData = Vec<(ColumnId, Arc>)>; -type CachedColumnArray = Vec<(ColumnId, Arc>)>; +type CachedColumnArray = Vec<(ColumnId, Arc)>; pub struct MergeIOReadResult { block_path: String, columns_chunk_offsets: HashMap)>, @@ -94,7 +94,7 @@ pub struct MergeIOReadResult { pub enum DataItem<'a> { RawData(&'a [u8]), - ColumnArray(&'a Arc>), + ColumnArray(&'a Arc), } impl MergeIOReadResult { diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 4ba2b75ae77f7..7d6ac442db9de 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -159,6 +159,7 @@ impl BlockReader { let mut column_metas = Vec::with_capacity(indices.len()); let mut column_chunks = Vec::with_capacity(indices.len()); let mut column_descriptors = Vec::with_capacity(indices.len()); + let mut field_uncompressed_size = 0; for (i, index) in indices.iter().enumerate() { let column_id = column.leaf_column_id(i); if let Some(column_meta) = columns_meta.get(&column_id) { @@ -170,10 +171,12 @@ impl BlockReader { column_metas.push(column_meta); column_chunks.push(*data); column_descriptors.push(column_descriptor); + field_uncompressed_size += data.len(); } DataItem::ColumnArray(column_array) => { let idx = column_idx_cached_array.len(); column_idx_cached_array.push(*column_array); + field_uncompressed_size += column_array.1; holders.push(Holder::Cached(idx)); } } @@ -194,18 +197,21 @@ impl BlockReader { } deserialized_item_index += 1; - columns_array_iter.push(Self::chunks_to_parquet_array_iter( - column_metas, - column_chunks, - // test_chunks, - num_rows, - column_descriptors, - field, - compression, - uncompressed_buffer - .clone() - .unwrap_or_else(|| UncompressedBuffer::new(0)), - )?); + // TODO why not just iterator it here? + columns_array_iter.push(( + field_uncompressed_size, + Self::chunks_to_parquet_array_iter( + column_metas, + column_chunks, + num_rows, + column_descriptors, + field, + compression, + uncompressed_buffer + .clone() + .unwrap_or_else(|| UncompressedBuffer::new(0)), + )?, + )); } } @@ -214,20 +220,19 @@ impl BlockReader { // deserialized fields that are not cached let deserialized_column_arrays = columns_array_iter .into_iter() - .map(|mut v| + .map(|(size, mut array_iter)| // TODO error handling - v.next().unwrap().unwrap()) + (size, array_iter.next().unwrap().unwrap())) .collect::>(); // assembly the arrays for holder in holders { match holder { Holder::Cached(idx) => { - arrays.push(column_idx_cached_array[idx].as_ref()); + arrays.push(&column_idx_cached_array[idx].as_ref().0); } Holder::Deserialized(idx) => { - let item = &deserialized_column_arrays[idx]; - arrays.push(item); + arrays.push(&deserialized_column_arrays[idx].1); } } } @@ -236,11 +241,12 @@ impl BlockReader { if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { // populate array cache items - for (item, need_tobe_cached) in deserialized_column_arrays.into_iter().zip(cache_flags) + for ((size, item), need_tobe_cached) in + deserialized_column_arrays.into_iter().zip(cache_flags) { if let Some(column_idx) = need_tobe_cached { let key = TableDataColumnCacheKey::new(block_path, column_idx); - cache.put(key.into(), item.into()) + cache.put(key.into(), Arc::new((item, size))) } } } From b7fdfcdc01e5e75f47c5563ac3dc7a5f7b2b06b4 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 09:52:45 +0800 Subject: [PATCH 32/80] remove in-memory raw column data cache --- src/query/config/src/inner.rs | 3 -- src/query/config/src/outer_v0.rs | 7 ----- .../common/cache-manager/src/cache_manager.rs | 5 +--- .../cache/src/providers/table_data_cache.rs | 28 ++----------------- 4 files changed, 4 insertions(+), 39 deletions(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 9e2c16c8d842c..84cafe43e3746 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -167,8 +167,6 @@ pub struct QueryConfig { pub table_cache_column_mb_size: u64, /// Indicates if table data cache is enabled pub table_data_cache_enabled: bool, - /// Max bytes of table data cached in memory (MB) - pub table_data_cache_in_memory_mb_size: u64, /// Table disk cache folder root pub table_disk_cache_root: String, /// Max size of external cache population queue length @@ -235,7 +233,6 @@ impl Default for QueryConfig { table_data_cache_population_queue_size: 65536, table_disk_cache_root: "_cache".to_string(), table_disk_cache_mb_size: 20 * 1024, - table_data_cache_in_memory_mb_size: 2 * 1024, management_mode: false, jwt_key_file: "".to_string(), async_insert_max_data_size: 10000, diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index e5ed7aa2bb719..7de7309f49ce7 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -1310,11 +1310,6 @@ pub struct QueryConfig { #[clap(long)] pub table_data_cache_enabled: bool, - /// Max bytes of table data cached in memory (MB) - /// default value 2048 MB, or 2G - #[clap(long, default_value = "2048")] - pub table_data_cache_in_memory_mb_size: u64, - /// Max item that could be pending in the external cache population queue /// default value 65536 items. Increase this value if it takes too much times /// to fully populate the disk cache. @@ -1407,7 +1402,6 @@ impl TryInto for QueryConfig { table_cache_column_mb_size: self.table_cache_column_mb_size, table_data_cache_enabled: self.table_data_cache_enabled, table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, - table_data_cache_in_memory_mb_size: self.table_data_cache_in_memory_mb_size, table_disk_cache_root: self.table_disk_cache_root, table_disk_cache_mb_size: self.table_disk_cache_mb_size, management_mode: self.management_mode, @@ -1477,7 +1471,6 @@ impl From for QueryConfig { table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, table_cache_column_mb_size: inner.table_cache_column_mb_size, table_data_cache_enabled: inner.table_data_cache_enabled, - table_data_cache_in_memory_mb_size: inner.table_data_cache_in_memory_mb_size, table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, management_mode: inner.management_mode, jwt_key_file: inner.jwt_key_file, diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 06b7a443ad097..a99066ae9fc56 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -57,7 +57,6 @@ impl CacheManager { } else { Self::new_block_data_cache( &config.table_disk_cache_root, - config.table_data_cache_in_memory_mb_size, config.table_data_cache_population_queue_size, config.table_disk_cache_mb_size, )? @@ -165,14 +164,12 @@ impl CacheManager { fn new_block_data_cache( path: &str, - in_memory_cache_mb_size: u64, population_queue_size: u32, disk_cache_mb_size: u64, ) -> Result> { - if in_memory_cache_mb_size > 0 { + if disk_cache_mb_size > 0 { let cache_holder = TableDataCacheBuilder::new_table_data_disk_cache( path, - in_memory_cache_mb_size, population_queue_size, disk_cache_mb_size, )?; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index fb0019f7894b6..78b635135081c 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -32,8 +32,6 @@ use crate::metrics_inc_cache_population_pending_count; use crate::CacheAccessor; use crate::DiskBytesCache; use crate::DiskCacheBuilder; -use crate::InMemoryBytesCacheHolder; -use crate::InMemoryCacheBuilder; struct CacheItem { key: String, @@ -66,12 +64,10 @@ impl AsRef for TableDataColumnCacheKey { } /// Tiered cache which consist of -/// - a in-memory cache -/// - a disk or redis based external cache /// - a bounded channel that keep the references of items being cached +/// - a disk or redis based external cache #[derive(Clone)] pub struct TableDataCache { - in_memory_cache: InMemoryBytesCacheHolder, external_cache: T, population_queue: crossbeam_channel::Sender, _cache_populator: DiskCachePopulator, @@ -83,16 +79,13 @@ pub struct TableDataCacheBuilder; impl TableDataCacheBuilder { pub fn new_table_data_disk_cache( path: &str, - in_memory_cache_mb_size: u64, population_queue_size: u32, disk_cache_mb_size: u64, ) -> Result> { let disk_cache = DiskCacheBuilder::new_disk_cache(path, disk_cache_mb_size)?; let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); - let in_memory_cache_bytes_size = in_memory_cache_mb_size * 1024 * 1024; let num_population_thread = 1; Ok(TableDataCache { - in_memory_cache: InMemoryCacheBuilder::new_bytes_cache(in_memory_cache_bytes_size), external_cache: disk_cache.clone(), population_queue: rx, _cache_populator: DiskCachePopulator::new(tx, disk_cache, num_population_thread)?, @@ -104,17 +97,7 @@ impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCach fn get>(&self, k: Q) -> Option>> { metrics_inc_cache_access_count(1, TABLE_DATA_CACHE_NAME); let k = k.as_ref(); - // check in memory cache first - { - if let Some(item) = self.in_memory_cache.get(k) { - metrics_inc_cache_hit_count(1, TABLE_DATA_CACHE_NAME); - return Some(item); - } - } - if let Some(item) = self.external_cache.get(k) { - // put item into in-memory cache - self.in_memory_cache.put(k.to_owned(), item.clone()); metrics_inc_cache_hit_count(1, TABLE_DATA_CACHE_NAME); Some(item) } else { @@ -124,9 +107,6 @@ impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCach } fn put(&self, k: String, v: Arc>) { - // put it into the in-memory cache first - self.in_memory_cache.put(k.clone(), v.clone()); - // check if external(disk/redis) already have it. if !self.external_cache.contains_key(&k) { // populate the cache to external cache(disk/redis) asyncly @@ -145,13 +125,11 @@ impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCach } fn evict(&self, k: &str) -> bool { - let r = self.in_memory_cache.evict(k); - let l = self.external_cache.evict(k); - r || l + self.external_cache.evict(k) } fn contains_key(&self, k: &str) -> bool { - self.in_memory_cache.contains_key(k) || self.external_cache.contains_key(k) + self.external_cache.contains_key(k) } } From a2592a7452a6aa4d410d8c9a3221a011c2d5dee9 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 09:57:38 +0800 Subject: [PATCH 33/80] fix ut --- .../service/tests/it/storages/testdata/configs_table_basic.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index b5af6ae881ea8..c7eaaa2bc7255 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -68,7 +68,6 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | "query" | "table_cache_snapshot_count" | "256" | "" | | "query" | "table_cache_statistic_count" | "256" | "" | | "query" | "table_data_cache_enabled" | "false" | "" | -| "query" | "table_data_cache_in_memory_mb_size" | "2048" | "" | | "query" | "table_data_cache_population_queue_size" | "65536" | "" | | "query" | "table_disk_cache_mb_size" | "20480" | "" | | "query" | "table_disk_cache_root" | "_cache" | "" | From 936ed75e0ed57b542e91c27940a5955ebcbb8970 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 12:18:05 +0800 Subject: [PATCH 34/80] iterate ArrayIter in-place --- .../fuse/src/io/read/block/block_reader.rs | 10 +- .../src/io/read/block/block_reader_parquet.rs | 93 ++++++++----------- .../operations/mutation/mutation_source.rs | 19 +--- 3 files changed, 48 insertions(+), 74 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 4ec192bdc98d6..b5d5431686d42 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -118,23 +118,23 @@ impl MergeIOReadResult { } } - pub fn columns_chunks(&self) -> Result> { - let mut res = Vec::with_capacity(self.columns_chunk_offsets.len()); + pub fn columns_chunks(&self) -> Result> { + let mut res = HashMap::with_capacity(self.columns_chunk_offsets.len()); // merge column data fetched from object storage for (column_idx, (chunk_idx, range)) in &self.columns_chunk_offsets { let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.block_path)?; - res.push((*column_idx, DataItem::RawData(&chunk[range.clone()]))); + res.insert(*column_idx, DataItem::RawData(&chunk[range.clone()])); } // merge column data from cache for (column_id, data) in &self.cached_column_data { - res.push((*column_id, DataItem::RawData(data.as_slice()))) + res.insert(*column_id, DataItem::RawData(data.as_slice())); } // merge column array from cache for (column_id, data) in &self.cached_column_array { - res.push((*column_id, DataItem::ColumnArray(data))) + res.insert(*column_id, DataItem::ColumnArray(data)); } Ok(res) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index ab40bd46a1a73..681c5f631dae3 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -65,24 +65,16 @@ impl BlockReader { .await?; // Get the columns chunk. - let chunks = fetched - .columns_chunks()? - .into_iter() - .map(|(column_idx, column_chunk)| (column_idx, column_chunk)) - .collect::>(); + let column_chunks = fetched.columns_chunks()?; let num_rows = meta.row_count as usize; - let columns_chunk = chunks - .into_iter() - .map(|(index, chunk)| (index, chunk)) - .collect::>(); self.deserialize_parquet_chunks_with_buffer( &meta.location.0, num_rows, &meta.compression, &columns_meta, - columns_chunk, + column_chunks, None, ) } @@ -91,7 +83,7 @@ impl BlockReader { pub fn deserialize_parquet_chunks( &self, part: PartInfoPtr, - chunks: Vec<(ColumnId, DataItem)>, + chunks: HashMap, ) -> Result { let part = FusePartInfo::from_part(&part)?; let start = Instant::now(); @@ -100,17 +92,12 @@ impl BlockReader { return Ok(DataBlock::new(vec![], part.nums_rows)); } - let reads = chunks - .into_iter() - .map(|(index, chunk)| (index, chunk)) - .collect::>(); - let deserialized_res = self.deserialize_parquet_chunks_with_buffer( &part.location, part.nums_rows, &part.compression, &part.columns_meta, - reads, + chunks, None, ); @@ -134,16 +121,15 @@ impl BlockReader { block_path: &str, num_rows: usize, compression: &Compression, - columns_meta: &HashMap, - columns_chunks: Vec<(ColumnId, DataItem)>, + column_metas: &HashMap, + column_chunks: HashMap, uncompressed_buffer: Option>, ) -> Result { - if columns_chunks.is_empty() { + if column_chunks.is_empty() { return self.build_default_values_block(num_rows); } - let chunk_map: HashMap = columns_chunks.into_iter().collect(); - let mut columns_array_iter = Vec::with_capacity(self.projection.len()); + let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); let columns = self.projection.project_column_nodes(&self.column_nodes)?; let mut need_default_vals = Vec::with_capacity(columns.len()); @@ -164,23 +150,24 @@ impl BlockReader { for column in &columns { let field = column.field.clone(); let indices = &column.leaf_ids; - let mut column_metas = Vec::with_capacity(indices.len()); - let mut column_chunks = Vec::with_capacity(indices.len()); - let mut column_descriptors = Vec::with_capacity(indices.len()); + let mut field_column_metas = Vec::with_capacity(indices.len()); + let mut field_column_data = Vec::with_capacity(indices.len()); + let mut field_column_descriptors = Vec::with_capacity(indices.len()); let mut column_in_block = false; let mut field_uncompressed_size = 0; for (i, index) in indices.iter().enumerate() { - let column_id = column.leaf_column_id(i); - if let Some(column_meta) = columns_meta.get(&column_id) { - if let Some(chunk) = chunk_map.get(index) { + let column_id = column.leaf_column_ids[i]; + if let Some(column_meta) = column_metas.get(&column_id) { + // TODO need @LiChuang review + if let Some(chunk) = column_chunks.get(&(*index as ColumnId)) { column_in_block = true; match chunk { DataItem::RawData(data) => { let column_descriptor = &self.parquet_schema_descriptor.columns()[*index]; - column_metas.push(column_meta); - column_chunks.push(*data); - column_descriptors.push(column_descriptor); + field_column_metas.push(column_meta); + field_column_data.push(*data); + field_column_descriptors.push(column_descriptor); field_uncompressed_size += data.len(); } DataItem::ColumnArray(column_array) => { @@ -212,21 +199,25 @@ impl BlockReader { } deserialized_item_index += 1; - // TODO why not just iterator it here? - columns_array_iter.push(( - field_uncompressed_size, - Self::chunks_to_parquet_array_iter( - column_metas, - column_chunks, - num_rows, - column_descriptors, - field, - compression, - uncompressed_buffer - .clone() - .unwrap_or_else(|| UncompressedBuffer::new(0)), - )?, - )); + let field_name = field.name.clone(); + let mut array_iter = Self::chunks_to_parquet_array_iter( + field_column_metas, + field_column_data, + num_rows, + field_column_descriptors, + field, + compression, + uncompressed_buffer + .clone() + .unwrap_or_else(|| UncompressedBuffer::new(0)), + )?; + let array = array_iter.next().transpose()?.ok_or_else(|| { + ErrorCode::StorageOther(format!( + "unexpected deserialization error, no array found for field {field_name} " + )) + })?; + deserialized_column_arrays.push((field_uncompressed_size, array)); + need_default_vals.push(false); } else { need_default_vals.push(true); @@ -234,15 +225,7 @@ impl BlockReader { } } - let mut arrays = Vec::with_capacity(columns_array_iter.len()); - - // deserialized fields that are not cached - let deserialized_column_arrays = columns_array_iter - .into_iter() - .map(|(size, mut array_iter)| - // TODO error handling - (size, array_iter.next().unwrap().unwrap())) - .collect::>(); + let mut arrays = Vec::with_capacity(self.projection.len()); // assembly the arrays for holder in holders { diff --git a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs index 2b7d2e5830269..c607dc2ee68eb 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutation_source.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutation_source.rs @@ -62,7 +62,7 @@ enum State { }, MergeRemain { part: PartInfoPtr, - chunks: MergeIOReadResult, + merged_io_read_result: MergeIOReadResult, data_block: DataBlock, filter: Value, }, @@ -170,11 +170,7 @@ impl Processor for MutationSource { fn process(&mut self) -> Result<()> { match std::mem::replace(&mut self.state, State::Finish) { State::FilterData(part, read_res) => { - let chunks = read_res - .columns_chunks()? - .into_iter() - .map(|(column_idx, column_chunk)| (column_idx, column_chunk)) - .collect::>(); + let chunks = read_res.columns_chunks()?; let mut data_block = self .block_reader .deserialize_parquet_chunks(part.clone(), chunks)?; @@ -272,17 +268,12 @@ impl Processor for MutationSource { } State::MergeRemain { part, - chunks, + merged_io_read_result, mut data_block, filter, } => { if let Some(remain_reader) = self.remain_reader.as_ref() { - let chunks = chunks - .columns_chunks()? - .into_iter() - .map(|(column_idx, column_chunk)| (column_idx, column_chunk)) - .collect::>(); - + let chunks = merged_io_read_result.columns_chunks()?; let remain_block = remain_reader.deserialize_parquet_chunks(part, chunks)?; match self.action { @@ -360,7 +351,7 @@ impl Processor for MutationSource { .await?; self.state = State::MergeRemain { part, - chunks: read_res, + merged_io_read_result: read_res, data_block, filter, }; From 1c2978c7a6818c2946285a798ea9ff595e5f4915 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 15:18:40 +0800 Subject: [PATCH 35/80] fix ut --- .../common/cache-manager/src/cache_manager.rs | 17 ++++-- .../common/cache-manager/src/caches.rs | 9 ++- src/query/storages/common/cache/src/cache.rs | 58 ++++++++++++++++++- src/query/storages/common/cache/src/lib.rs | 2 + .../common/cache/src/providers/disk_cache.rs | 8 +-- .../cache/src/providers/memory_cache.rs | 1 + .../fuse/src/io/read/block/block_reader.rs | 4 +- .../src/io/read/block/block_reader_parquet.rs | 17 ++++-- 8 files changed, 99 insertions(+), 17 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index a99066ae9fc56..60b85307b0897 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -22,6 +22,8 @@ use common_config::QueryConfig; use common_exception::Result; use storages_common_cache::InMemoryCacheBuilder; use storages_common_cache::InMemoryItemCacheHolder; +use storages_common_cache::Named; +use storages_common_cache::NamedCache; use storages_common_cache::TableDataCache; use storages_common_cache::TableDataCacheBuilder; @@ -63,8 +65,11 @@ impl CacheManager { }; // setup in-memory table column cache - let table_column_array_cache = - Self::new_in_memory_cache(config.table_cache_column_mb_size, ColumnArrayMeter); + let table_column_array_cache = Self::new_in_memory_cache( + config.table_cache_column_mb_size, + ColumnArrayMeter, + "table_data_cache_column_array", + ); // setup in-memory table meta cache if !config.table_meta_cache_enabled { @@ -151,12 +156,16 @@ impl CacheManager { fn new_in_memory_cache( capacity: u64, meter: M, - ) -> Option> + name: &str, + ) -> Option>> where M: CountableMeter>, { if capacity > 0 { - Some(InMemoryCacheBuilder::new_in_memory_cache(capacity, meter)) + Some( + InMemoryCacheBuilder::new_in_memory_cache(capacity, meter) + .name_with(name.to_owned()), + ) } else { None } diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index db84af853794b..0d64f6f262625 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -20,6 +20,7 @@ use common_cache::DefaultHashBuilder; use common_cache::Meter; use storages_common_cache::CacheAccessor; use storages_common_cache::InMemoryItemCacheHolder; +use storages_common_cache::NamedCache; use storages_common_index::filters::Xor8Filter; use storages_common_table_meta::meta::SegmentInfo; use storages_common_table_meta::meta::TableSnapshot; @@ -44,8 +45,12 @@ pub type FileMetaDataCache = InMemoryItemCacheHolder; /// In memory object cache of parquet FileMetaData of external parquet files pub type ColumnArrayCache = - InMemoryItemCacheHolder; -pub type SizedColumnArray = (Box, usize); + NamedCache>; +pub type ArrayRawDataUncompressedSize = usize; +pub type SizedColumnArray = ( + Box, + ArrayRawDataUncompressedSize, +); // Bind Type of cached objects to Caches // diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 3903491b34edb..548a6522d80c3 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -20,6 +20,10 @@ use common_cache::Count; use common_cache::CountableMeter; use common_cache::DefaultHashBuilder; +use crate::metrics_inc_cache_access_count; +use crate::metrics_inc_cache_hit_count; +use crate::metrics_inc_cache_miss_count; + // The cache accessor, crate users usually working on this interface while manipulating caches pub trait CacheAccessor where @@ -30,5 +34,57 @@ where fn get>(&self, k: Q) -> Option>; fn put(&self, key: K, value: Arc); fn evict(&self, k: &str) -> bool; - fn contains_key(&self, _k: &str) -> bool; + fn contains_key(&self, k: &str) -> bool; +} + +/// Helper trait to convert a Cache into NamedCache +pub trait Named +where Self: Sized +{ + fn name_with(self: Self, name: String) -> NamedCache { + NamedCache { name, cache: self } + } +} + +impl Named for T where T: Sized + Clone {} + +/// A named cache that with embedded metrics logging +#[derive(Clone)] +pub struct NamedCache { + name: String, + cache: C, +} + +impl CacheAccessor for NamedCache +where + C: CacheAccessor, + K: Eq + Hash, + S: BuildHasher, + M: CountableMeter>, +{ + fn get>(&self, k: Q) -> Option> { + metrics_inc_cache_access_count(1, &self.name); + match self.cache.get(k) { + None => { + metrics_inc_cache_miss_count(1, &self.name); + None + } + v @ Some(_) => { + metrics_inc_cache_hit_count(1, &self.name); + v + } + } + } + + fn put(&self, key: K, value: Arc) { + self.cache.put(key, value) + } + + fn evict(&self, k: &str) -> bool { + self.cache.evict(k) + } + + fn contains_key(&self, k: &str) -> bool { + self.cache.contains_key(k) + } } diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index 532a111c23ffb..3e58bb7247ce1 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -18,6 +18,8 @@ mod providers; mod read; pub use cache::CacheAccessor; +pub use cache::Named; +pub use cache::NamedCache; pub use providers::DiskBytesCache; pub use providers::DiskCache; pub use providers::DiskCacheBuilder; diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 55a5cc14172a3..a663dd2af1464 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -118,15 +118,15 @@ fn validate_checksum(bytes: &[u8]) -> Result<()> { "crc checksum validation failure: invalid file length {total_len}" ))) } else { - // checksum validation + // total_len > 4 is ensured let crc_bytes: [u8; 4] = bytes[total_len - 4..].try_into().unwrap(); - let crc = u32::from_le_bytes(crc_bytes); + let crc_provided = u32::from_le_bytes(crc_bytes); let crc_calculated = crc32fast::hash(&bytes[0..total_len - 4]); - if crc == crc_calculated { + if crc_provided == crc_calculated { Ok(()) } else { Err(ErrorCode::StorageOther(format!( - "crc checksum validation failure, key : crc checksum not match, crc kept in file {crc}, crc calculated {crc_calculated}" + "crc checksum validation failure, key : crc checksum not match, crc provided {crc_provided}, crc calculated {crc_calculated}" ))) } } diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index ab5709fc1a5cd..d2e04a07c1986 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -58,6 +58,7 @@ impl InMemoryCacheBuilder { } } +// TODO move this to super // default impls mod impls { use std::sync::Arc; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index b5d5431686d42..e8c9d24d96ff2 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -146,12 +146,12 @@ impl MergeIOReadResult { fn add_column_chunk(&mut self, chunk_index: usize, column_id: ColumnId, range: Range) { // TODO doc why put cache operation could be placed here - if let Some(cache) = &self.table_data_cache { + if let Some(table_data_cache) = &self.table_data_cache { if let Ok(chunk_data) = self.get_chunk(chunk_index, &self.block_path) { let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); let data = &chunk_data[range.clone()]; // TODO api is NOT type safe - cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); + table_data_cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); } } self.columns_chunk_offsets diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 681c5f631dae3..c5585e14a7531 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -131,8 +131,8 @@ impl BlockReader { let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); - let columns = self.projection.project_column_nodes(&self.column_nodes)?; - let mut need_default_vals = Vec::with_capacity(columns.len()); + let fields = self.projection.project_column_nodes(&self.column_nodes)?; + let mut need_default_vals = Vec::with_capacity(fields.len()); let mut need_to_fill_default_val = false; // TODO need refactor @@ -147,7 +147,7 @@ impl BlockReader { let mut deserialized_item_index: usize = 0; let mut cache_flags = vec![]; - for column in &columns { + for column in &fields { let field = column.field.clone(); let indices = &column.leaf_ids; let mut field_column_metas = Vec::with_capacity(indices.len()); @@ -186,17 +186,24 @@ impl BlockReader { } } + if field_column_metas.len() == 0 { + continue; + } + if column_in_block { - if column_metas.len() > 1 { + if field_column_metas.len() > 1 { + eprintln!("nested"); // working on nested field holders.push(Holder::Deserialized(deserialized_item_index)); cache_flags.push(None); } else { + eprintln!("simple"); // working on simple field let column_idx = indices[0] as ColumnId; holders.push(Holder::Deserialized(deserialized_item_index)); cache_flags.push(Some(column_idx)); } + deserialized_item_index += 1; let field_name = field.name.clone(); @@ -262,12 +269,14 @@ impl BlockReader { }; if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { + eprintln!("array cache enabeld"); // populate array cache items for ((size, item), need_tobe_cached) in deserialized_column_arrays.into_iter().zip(cache_flags) { if let Some(column_idx) = need_tobe_cached { let key = TableDataColumnCacheKey::new(block_path, column_idx); + eprintln!("array cache put {}", key.as_ref()); cache.put(key.into(), Arc::new((item, size))) } } From 4bb8311acc9466797a92410f2b70d0da4f099696 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 17:02:01 +0800 Subject: [PATCH 36/80] extract deserialize_field method --- src/query/storages/common/cache/src/cache.rs | 2 +- .../fuse/src/io/read/block/block_reader.rs | 3 +- .../src/io/read/block/block_reader_parquet.rs | 221 +++++++++--------- 3 files changed, 113 insertions(+), 113 deletions(-) diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 548a6522d80c3..99a3ba30d16c6 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -41,7 +41,7 @@ where pub trait Named where Self: Sized { - fn name_with(self: Self, name: String) -> NamedCache { + fn name_with(self, name: String) -> NamedCache { NamedCache { name, cache: self } } } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index e8c9d24d96ff2..91e2f46ccfb41 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -202,8 +202,7 @@ impl BlockReader { } pub fn support_blocking_api(&self) -> bool { - // self.operator.metadata().can_blocking() - false + self.operator.metadata().can_blocking() } /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index c5585e14a7531..f3ca03fbbeadb 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Instant; +use common_arrow::arrow::array::Array; use common_arrow::arrow::chunk::Chunk; use common_arrow::arrow::datatypes::Field; use common_arrow::arrow::io::parquet::read::column_iter_to_arrays; @@ -29,9 +30,11 @@ use common_catalog::table::ColumnId; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; +use common_storage::ColumnNode; use storages_common_cache::CacheAccessor; use storages_common_cache::TableDataColumnCacheKey; use storages_common_cache_manager::CacheManager; +use storages_common_cache_manager::SizedColumnArray; use storages_common_table_meta::meta::BlockMeta; use storages_common_table_meta::meta::ColumnMeta; use storages_common_table_meta::meta::Compression; @@ -44,6 +47,11 @@ use crate::io::BlockReader; use crate::io::UncompressedBuffer; use crate::metrics::*; +enum DeserializedArray<'a> { + Cached(&'a Arc), + Deserialized((ColumnId, Box, usize)), +} + impl BlockReader { /// Read a parquet file and convert to DataBlock. #[tracing::instrument(level = "debug", skip_all)] @@ -129,124 +137,45 @@ impl BlockReader { return self.build_default_values_block(num_rows); } - let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); - let fields = self.projection.project_column_nodes(&self.column_nodes)?; let mut need_default_vals = Vec::with_capacity(fields.len()); let mut need_to_fill_default_val = false; - - // TODO need refactor - type ItemIndex = usize; - enum Holder { - Cached(ItemIndex), - Deserialized(ItemIndex), - } - - let mut column_idx_cached_array = vec![]; - let mut holders = vec![]; - let mut deserialized_item_index: usize = 0; - let mut cache_flags = vec![]; - + let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); for column in &fields { - let field = column.field.clone(); - let indices = &column.leaf_ids; - let mut field_column_metas = Vec::with_capacity(indices.len()); - let mut field_column_data = Vec::with_capacity(indices.len()); - let mut field_column_descriptors = Vec::with_capacity(indices.len()); - let mut column_in_block = false; - let mut field_uncompressed_size = 0; - for (i, index) in indices.iter().enumerate() { - let column_id = column.leaf_column_ids[i]; - if let Some(column_meta) = column_metas.get(&column_id) { - // TODO need @LiChuang review - if let Some(chunk) = column_chunks.get(&(*index as ColumnId)) { - column_in_block = true; - match chunk { - DataItem::RawData(data) => { - let column_descriptor = - &self.parquet_schema_descriptor.columns()[*index]; - field_column_metas.push(column_meta); - field_column_data.push(*data); - field_column_descriptors.push(column_descriptor); - field_uncompressed_size += data.len(); - } - DataItem::ColumnArray(column_array) => { - let idx = column_idx_cached_array.len(); - column_idx_cached_array.push(*column_array); - field_uncompressed_size += column_array.1; - holders.push(Holder::Cached(idx)); - } - } - } else { - break; - } - } else { - column_in_block = false; - break; + match self.deserialize_field( + column, + column_metas, + &column_chunks, + num_rows, + compression, + &uncompressed_buffer, + )? { + None => { + need_to_fill_default_val = true; + need_default_vals.push(true); } - } - - if field_column_metas.len() == 0 { - continue; - } - - if column_in_block { - if field_column_metas.len() > 1 { - eprintln!("nested"); - // working on nested field - holders.push(Holder::Deserialized(deserialized_item_index)); - cache_flags.push(None); - } else { - eprintln!("simple"); - // working on simple field - let column_idx = indices[0] as ColumnId; - holders.push(Holder::Deserialized(deserialized_item_index)); - cache_flags.push(Some(column_idx)); + Some(v) => { + deserialized_column_arrays.push(v); + need_default_vals.push(false); } - - deserialized_item_index += 1; - - let field_name = field.name.clone(); - let mut array_iter = Self::chunks_to_parquet_array_iter( - field_column_metas, - field_column_data, - num_rows, - field_column_descriptors, - field, - compression, - uncompressed_buffer - .clone() - .unwrap_or_else(|| UncompressedBuffer::new(0)), - )?; - let array = array_iter.next().transpose()?.ok_or_else(|| { - ErrorCode::StorageOther(format!( - "unexpected deserialization error, no array found for field {field_name} " - )) - })?; - deserialized_column_arrays.push((field_uncompressed_size, array)); - - need_default_vals.push(false); - } else { - need_default_vals.push(true); - need_to_fill_default_val = true; } } - let mut arrays = Vec::with_capacity(self.projection.len()); - // assembly the arrays - for holder in holders { - match holder { - Holder::Cached(idx) => { - arrays.push(&column_idx_cached_array[idx].as_ref().0); + let mut chunk_arrays = vec![]; + for array in &deserialized_column_arrays { + match array { + DeserializedArray::Deserialized((_, array, ..)) => { + chunk_arrays.push(array); } - Holder::Deserialized(idx) => { - arrays.push(&deserialized_column_arrays[idx].1); + DeserializedArray::Cached(sized_column) => { + chunk_arrays.push(&sized_column.0); } } } - let chunk = Chunk::try_new(arrays)?; + // build data block + let chunk = Chunk::try_new(chunk_arrays)?; let data_block = if !need_to_fill_default_val { DataBlock::from_arrow_chunk(&chunk, &self.data_schema()) } else { @@ -268,16 +197,14 @@ impl BlockReader { ) }; + // populate cache is necessary if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { - eprintln!("array cache enabeld"); // populate array cache items - for ((size, item), need_tobe_cached) in - deserialized_column_arrays.into_iter().zip(cache_flags) - { - if let Some(column_idx) = need_tobe_cached { - let key = TableDataColumnCacheKey::new(block_path, column_idx); + for item in deserialized_column_arrays.into_iter() { + if let DeserializedArray::Deserialized((column_id, array, size)) = item { + let key = TableDataColumnCacheKey::new(block_path, column_id); eprintln!("array cache put {}", key.as_ref()); - cache.put(key.into(), Arc::new((item, size))) + cache.put(key.into(), Arc::new((array, size))) } } } @@ -334,6 +261,80 @@ impl BlockReader { )?) } + fn deserialize_field<'a>( + &self, + column: &ColumnNode, + column_metas: &HashMap, + column_chunks: &'a HashMap>, + num_rows: usize, + compression: &Compression, + uncompressed_buffer: &'a Option>, + ) -> Result>> { + let indices = &column.leaf_ids; + let estimated_cap = indices.len(); + let mut field_column_metas = Vec::with_capacity(estimated_cap); + let mut field_column_data = Vec::with_capacity(estimated_cap); + let mut field_column_descriptors = Vec::with_capacity(estimated_cap); + let mut field_uncompressed_size = 0; + let is_nested_field = indices.len() > 1; + for (i, leaf_column_id) in indices.iter().enumerate() { + let column_id = column.leaf_column_ids[i]; + if let Some(column_meta) = column_metas.get(&column_id) { + if let Some(chunk) = column_chunks.get(&(*leaf_column_id as ColumnId)) { + match chunk { + DataItem::RawData(data) => { + let column_descriptor = + &self.parquet_schema_descriptor.columns()[*leaf_column_id]; + field_column_metas.push(column_meta); + field_column_data.push(*data); + field_column_descriptors.push(column_descriptor); + field_uncompressed_size += data.len(); + } + DataItem::ColumnArray(column_array) => { + if is_nested_field { + return Err(ErrorCode::StorageOther( + "unexpected nested field.. blah blah", + )); + } + return Ok(Some(DeserializedArray::Cached(column_array))); + } + } + } else { + break; + } + } else { + break; + } + } + + if !field_column_metas.is_empty() { + let field_name = column.field.name.to_owned(); + let mut array_iter = Self::chunks_to_parquet_array_iter( + field_column_metas, + field_column_data, + num_rows, + field_column_descriptors, + column.field.clone(), + compression, + uncompressed_buffer + .clone() + .unwrap_or_else(|| UncompressedBuffer::new(0)), + )?; + let array = array_iter.next().transpose()?.ok_or_else(|| { + ErrorCode::StorageOther(format!( + "unexpected deserialization error, no array found for field {field_name} " + )) + })?; + Ok(Some(DeserializedArray::Deserialized(( + indices[0] as ColumnId, + array, + field_uncompressed_size, + )))) + } else { + Ok(None) + } + } + fn to_parquet_compression(meta_compression: &Compression) -> Result { match meta_compression { Compression::Lz4 => { From 7277243a8d3954e760aa8224d3e08920830e364f Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 17:35:14 +0800 Subject: [PATCH 37/80] tweak doc --- .../storages/fuse/src/io/read/block/block_reader_parquet.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index f3ca03fbbeadb..0957b846d62db 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -277,6 +277,7 @@ impl BlockReader { let mut field_column_descriptors = Vec::with_capacity(estimated_cap); let mut field_uncompressed_size = 0; let is_nested_field = indices.len() > 1; + for (i, leaf_column_id) in indices.iter().enumerate() { let column_id = column.leaf_column_ids[i]; if let Some(column_meta) = column_metas.get(&column_id) { @@ -293,16 +294,18 @@ impl BlockReader { DataItem::ColumnArray(column_array) => { if is_nested_field { return Err(ErrorCode::StorageOther( - "unexpected nested field.. blah blah", + "unexpected nested field: nested leaf field hits cached", )); } return Ok(Some(DeserializedArray::Cached(column_array))); } } } else { + // no raw data or cache item of given column id break; } } else { + // no column meta of given colmun id break; } } From 5f3cbae783af472a87ff5f2ffccf2e76adeb2e8b Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 10 Feb 2023 21:57:25 +0800 Subject: [PATCH 38/80] fix sql logic test --- src/query/catalog/src/plan/projection.rs | 29 +++++++++++++++ .../src/io/read/block/block_reader_parquet.rs | 37 ++++++++++++++----- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/src/query/catalog/src/plan/projection.rs b/src/query/catalog/src/plan/projection.rs index f2ad806f35261..867072a59cb06 100644 --- a/src/query/catalog/src/plan/projection.rs +++ b/src/query/catalog/src/plan/projection.rs @@ -75,6 +75,35 @@ impl Projection { Ok(column_nodes) } + /// ColumnNode projection. + /// + /// `ColumnNode`s returned are paired with a boolean which indicates if it + /// is part of a nested field + pub fn project_column_nodes_nested_aware<'a>( + &'a self, + column_nodes: &'a ColumnNodes, + ) -> Result> { + let column_nodes = match self { + Projection::Columns(indices) => indices + .iter() + .map(|idx| (&column_nodes.column_nodes[*idx], false)) + .collect(), + Projection::InnerColumns(path_indices) => { + let paths: Vec<&Vec> = path_indices.values().collect(); + paths + .iter() + .map(|path| { + ( + ColumnNodes::traverse_path(&column_nodes.column_nodes, path).unwrap(), + true, + ) + }) + .collect() + } + }; + Ok(column_nodes) + } + pub fn add_col(&mut self, col: usize) { match self { Projection::Columns(indices) => { diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 0957b846d62db..8fcfd33db2fd7 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -50,6 +50,7 @@ use crate::metrics::*; enum DeserializedArray<'a> { Cached(&'a Arc), Deserialized((ColumnId, Box, usize)), + NoNeedToCache(Box), } impl BlockReader { @@ -137,11 +138,13 @@ impl BlockReader { return self.build_default_values_block(num_rows); } - let fields = self.projection.project_column_nodes(&self.column_nodes)?; + let fields = self + .projection + .project_column_nodes_nested_aware(&self.column_nodes)?; let mut need_default_vals = Vec::with_capacity(fields.len()); let mut need_to_fill_default_val = false; let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); - for column in &fields { + for (column, is_nested_field) in &fields { match self.deserialize_field( column, column_metas, @@ -149,6 +152,7 @@ impl BlockReader { num_rows, compression, &uncompressed_buffer, + *is_nested_field, )? { None => { need_to_fill_default_val = true; @@ -168,6 +172,9 @@ impl BlockReader { DeserializedArray::Deserialized((_, array, ..)) => { chunk_arrays.push(array); } + DeserializedArray::NoNeedToCache(array) => { + chunk_arrays.push(array); + } DeserializedArray::Cached(sized_column) => { chunk_arrays.push(&sized_column.0); } @@ -203,7 +210,6 @@ impl BlockReader { for item in deserialized_column_arrays.into_iter() { if let DeserializedArray::Deserialized((column_id, array, size)) = item { let key = TableDataColumnCacheKey::new(block_path, column_id); - eprintln!("array cache put {}", key.as_ref()); cache.put(key.into(), Arc::new((array, size))) } } @@ -261,6 +267,8 @@ impl BlockReader { )?) } + // TODO: refactor this method + #[allow(clippy::too_many_arguments)] fn deserialize_field<'a>( &self, column: &ColumnNode, @@ -269,14 +277,15 @@ impl BlockReader { num_rows: usize, compression: &Compression, uncompressed_buffer: &'a Option>, + is_nested: bool, ) -> Result>> { let indices = &column.leaf_ids; + let is_nested = is_nested || indices.len() > 1; let estimated_cap = indices.len(); let mut field_column_metas = Vec::with_capacity(estimated_cap); let mut field_column_data = Vec::with_capacity(estimated_cap); let mut field_column_descriptors = Vec::with_capacity(estimated_cap); let mut field_uncompressed_size = 0; - let is_nested_field = indices.len() > 1; for (i, leaf_column_id) in indices.iter().enumerate() { let column_id = column.leaf_column_ids[i]; @@ -292,7 +301,7 @@ impl BlockReader { field_uncompressed_size += data.len(); } DataItem::ColumnArray(column_array) => { - if is_nested_field { + if is_nested { return Err(ErrorCode::StorageOther( "unexpected nested field: nested leaf field hits cached", )); @@ -328,11 +337,19 @@ impl BlockReader { "unexpected deserialization error, no array found for field {field_name} " )) })?; - Ok(Some(DeserializedArray::Deserialized(( - indices[0] as ColumnId, - array, - field_uncompressed_size, - )))) + + // mark the array + if is_nested { + // the array is not intended to be cached + Ok(Some(DeserializedArray::NoNeedToCache(array))) + } else { + // the array is deserialized from raw bytes, should be cached + Ok(Some(DeserializedArray::Deserialized(( + indices[0] as ColumnId, + array, + field_uncompressed_size, + )))) + } } else { Ok(None) } From 213017fe06772241747cf6d92ac6fb02d126f936 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 01:02:05 +0800 Subject: [PATCH 39/80] split mod block_reader & fix logic test --- src/query/expression/src/block.rs | 12 +- .../fuse/src/io/read/block/block_reader.rs | 372 ------------------ .../io/read/block/block_reader_merge_io.rs | 123 ++++++ .../read/block/block_reader_merge_io_async.rs | 192 +++++++++ .../read/block/block_reader_merge_io_sync.rs | 137 +++++++ .../src/io/read/block/block_reader_parquet.rs | 328 --------------- .../block/block_reader_parquet_deserialize.rs | 354 +++++++++++++++++ .../storages/fuse/src/io/read/block/mod.rs | 6 +- .../operations/read/parquet_data_source.rs | 2 +- .../read/parquet_data_source_deserializer.rs | 2 +- 10 files changed, 822 insertions(+), 706 deletions(-) create mode 100644 src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs create mode 100644 src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs create mode 100644 src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs create mode 100644 src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs diff --git a/src/query/expression/src/block.rs b/src/query/expression/src/block.rs index 5a48a31a2549a..cdedb8db464e0 100644 --- a/src/query/expression/src/block.rs +++ b/src/query/expression/src/block.rs @@ -359,11 +359,13 @@ impl DataBlock { let mut chunk_idx: usize = 0; let schema_fields = schema.fields(); let chunk_columns = chuck.arrays(); + eprintln!("chunk_column len {}", chunk_columns.len()); let mut columns = Vec::with_capacity(default_vals.len()); for (i, default_val) in default_vals.iter().enumerate() { let field = &schema_fields[i]; let data_type = field.data_type(); + eprintln!("data type {}", data_type); let column = match default_val { Some(default_val) => BlockEntry { @@ -371,12 +373,16 @@ impl DataBlock { value: Value::Scalar(default_val.to_owned()), }, None => { - let chunk_column = &chunk_columns[chunk_idx]; + eprintln!("chunk idx {}", chunk_idx); + let chunk_column = chunk_columns.get(chunk_idx).unwrap(); + eprintln!("get column"); chunk_idx += 1; - BlockEntry { + let entry = BlockEntry { data_type: data_type.clone(), value: Value::Column(Column::from_arrow(chunk_column.as_ref(), data_type)), - } + }; + eprintln!("build entry"); + entry } }; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 91e2f46ccfb41..b429c93cbf048 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -13,21 +13,14 @@ // limitations under the License. use std::collections::BTreeMap; -use std::collections::HashMap; -use std::ops::Range; use std::sync::Arc; -use std::time::Instant; use common_arrow::arrow::datatypes::Field; use common_arrow::arrow::io::parquet::write::to_parquet_schema; use common_arrow::parquet::metadata::SchemaDescriptor; -use common_base::rangemap::RangeMerger; -use common_base::runtime::UnlimitedFuture; -use common_catalog::plan::PartInfoPtr; use common_catalog::plan::Projection; use common_catalog::table::ColumnId; use common_catalog::table_context::TableContext; -use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; use common_expression::DataField; @@ -38,19 +31,7 @@ use common_expression::TableSchemaRef; use common_sql::field_default_value; use common_storage::ColumnNode; use common_storage::ColumnNodes; -use futures::future::try_join_all; -use opendal::Object; use opendal::Operator; -use storages_common_cache::CacheAccessor; -use storages_common_cache::TableDataCache; -use storages_common_cache::TableDataColumnCacheKey; -use storages_common_cache_manager::CacheManager; -use storages_common_cache_manager::SizedColumnArray; -use storages_common_table_meta::meta::ColumnMeta; - -use crate::fuse_part::FusePartInfo; -use crate::io::read::ReadSettings; -use crate::metrics::*; // TODO: make BlockReader as a trait. #[derive(Clone)] @@ -64,101 +45,6 @@ pub struct BlockReader { pub(crate) default_vals: Vec, } -pub struct OwnerMemory { - chunks: HashMap>, -} - -impl OwnerMemory { - pub fn create(chunks: Vec<(usize, Vec)>) -> OwnerMemory { - let chunks = chunks.into_iter().collect::>(); - OwnerMemory { chunks } - } - - pub fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { - match self.chunks.get(&index) { - Some(chunk) => Ok(chunk.as_slice()), - None => Err(ErrorCode::Internal(format!( - "It's a terrible bug, not found range data, merged_range_idx:{}, path:{}", - index, path - ))), - } - } -} - -type CachedColumnData = Vec<(ColumnId, Arc>)>; -type CachedColumnArray = Vec<(ColumnId, Arc)>; -pub struct MergeIOReadResult { - block_path: String, - columns_chunk_offsets: HashMap)>, - owner_memory: OwnerMemory, - cached_column_data: CachedColumnData, - cached_column_array: CachedColumnArray, - table_data_cache: Option, -} - -pub enum DataItem<'a> { - RawData(&'a [u8]), - ColumnArray(&'a Arc), -} - -impl MergeIOReadResult { - pub fn create( - owner_memory: OwnerMemory, - capacity: usize, - path: String, - table_data_cache: Option, - ) -> MergeIOReadResult { - MergeIOReadResult { - block_path: path, - columns_chunk_offsets: HashMap::with_capacity(capacity), - owner_memory, - cached_column_data: vec![], - cached_column_array: vec![], - table_data_cache, - } - } - - pub fn columns_chunks(&self) -> Result> { - let mut res = HashMap::with_capacity(self.columns_chunk_offsets.len()); - - // merge column data fetched from object storage - for (column_idx, (chunk_idx, range)) in &self.columns_chunk_offsets { - let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.block_path)?; - res.insert(*column_idx, DataItem::RawData(&chunk[range.clone()])); - } - - // merge column data from cache - for (column_id, data) in &self.cached_column_data { - res.insert(*column_id, DataItem::RawData(data.as_slice())); - } - - // merge column array from cache - for (column_id, data) in &self.cached_column_array { - res.insert(*column_id, DataItem::ColumnArray(data)); - } - - Ok(res) - } - - fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { - self.owner_memory.get_chunk(index, path) - } - - fn add_column_chunk(&mut self, chunk_index: usize, column_id: ColumnId, range: Range) { - // TODO doc why put cache operation could be placed here - if let Some(table_data_cache) = &self.table_data_cache { - if let Ok(chunk_data) = self.get_chunk(chunk_index, &self.block_path) { - let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); - let data = &chunk_data[range.clone()]; - // TODO api is NOT type safe - table_data_cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); - } - } - self.columns_chunk_offsets - .insert(column_id, (chunk_index, range)); - } -} - impl BlockReader { pub fn create( operator: Operator, @@ -205,242 +91,6 @@ impl BlockReader { self.operator.metadata().can_blocking() } - /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. - /// If the distance between two IO request ranges to be read is less than storage_io_min_bytes_for_seek(Default is 48Bytes), - /// will read the range that contains both ranges, thus avoiding extra seek. - /// - /// It will *NOT* merge two requests: - /// if the last io request size is larger than storage_io_page_bytes_for_read(Default is 512KB). - async fn merge_io_read( - read_settings: &ReadSettings, - object: Object, - raw_ranges: Vec<(ColumnId, Range)>, - ) -> Result { - if raw_ranges.is_empty() { - // shortcut - let read_res = MergeIOReadResult::create( - OwnerMemory::create(vec![]), - raw_ranges.len(), - object.path().to_string(), - CacheManager::instance().get_table_data_cache(), - ); - return Ok(read_res); - } - - // Build merged read ranges. - let ranges = raw_ranges - .iter() - .map(|(_, r)| r.clone()) - .collect::>(); - let range_merger = RangeMerger::from_iter( - ranges, - read_settings.storage_io_min_bytes_for_seek, - read_settings.storage_io_max_page_bytes_for_read, - ); - let merged_ranges = range_merger.ranges(); - - // Read merged range data. - let mut read_handlers = Vec::with_capacity(merged_ranges.len()); - for (idx, range) in merged_ranges.iter().enumerate() { - // Perf. - { - metrics_inc_remote_io_seeks_after_merged(1); - metrics_inc_remote_io_read_bytes_after_merged(range.end - range.start); - } - - read_handlers.push(UnlimitedFuture::create(Self::read_range( - object.clone(), - idx, - range.start, - range.end, - ))); - } - - let start = Instant::now(); - let owner_memory = OwnerMemory::create(try_join_all(read_handlers).await?); - let table_data_cache = CacheManager::instance().get_table_data_cache(); - let mut read_res = MergeIOReadResult::create( - owner_memory, - raw_ranges.len(), - object.path().to_string(), - table_data_cache, - ); - - // Perf. - { - metrics_inc_remote_io_read_milliseconds(start.elapsed().as_millis() as u64); - } - - for (raw_idx, raw_range) in &raw_ranges { - let column_range = raw_range.start..raw_range.end; - - // Find the range index and Range from merged ranges. - let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { - None => Err(ErrorCode::Internal(format!( - "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", - column_range, - object.path(), - merged_ranges - ))), - Some((index, range)) => Ok((index, range)), - }?; - - // Fetch the raw data for the raw range. - let start = (column_range.start - merged_range.start) as usize; - let end = (column_range.end - merged_range.start) as usize; - let column_id = *raw_idx as ColumnId; - read_res.add_column_chunk(merged_range_idx, column_id, start..end); - } - - Ok(read_res) - } - - pub fn sync_merge_io_read( - read_settings: &ReadSettings, - object: Object, - raw_ranges: Vec<(usize, Range)>, - ) -> Result { - let path = object.path().to_string(); - - // Build merged read ranges. - let ranges = raw_ranges - .iter() - .map(|(_, r)| r.clone()) - .collect::>(); - let range_merger = RangeMerger::from_iter( - ranges, - read_settings.storage_io_min_bytes_for_seek, - read_settings.storage_io_max_page_bytes_for_read, - ); - let merged_ranges = range_merger.ranges(); - - // Read merged range data. - let mut io_res = Vec::with_capacity(merged_ranges.len()); - for (idx, range) in merged_ranges.iter().enumerate() { - io_res.push(Self::sync_read_range( - object.clone(), - idx, - range.start, - range.end, - )?); - } - - let owner_memory = OwnerMemory::create(io_res); - - // for sync read, we disable table data cache - let table_data_cache = None; - let mut read_res = MergeIOReadResult::create( - owner_memory, - raw_ranges.len(), - path.clone(), - table_data_cache, - ); - - for (raw_idx, raw_range) in &raw_ranges { - let column_id = *raw_idx as ColumnId; - let column_range = raw_range.start..raw_range.end; - - // Find the range index and Range from merged ranges. - let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { - None => Err(ErrorCode::Internal(format!( - "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", - column_range, path, merged_ranges - ))), - Some((index, range)) => Ok((index, range)), - }?; - - // Fetch the raw data for the raw range. - let start = (column_range.start - merged_range.start) as usize; - let end = (column_range.end - merged_range.start) as usize; - read_res.add_column_chunk(merged_range_idx, column_id, start..end); - } - - Ok(read_res) - } - - pub async fn read_columns_data_by_merge_io( - &self, - settings: &ReadSettings, - location: &str, - columns_meta: &HashMap, - ) -> Result { - // Perf - { - metrics_inc_remote_io_read_parts(1); - } - - let mut ranges = vec![]; - // for async read, always try using table data cache (if enabled in settings) - let column_data_cache = CacheManager::instance().get_table_data_cache(); - let column_array_cache = CacheManager::instance().get_table_data_array_cache(); - let mut cached_column_data = vec![]; - let mut cached_column_array = vec![]; - for (_index, (column_id, ..)) in self.project_indices.iter() { - let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); - - // first, check column array object cache - if let Some(cache_array) = column_array_cache.get(&column_cache_key) { - cached_column_array.push((*column_id, cache_array)); - continue; - } - - // and then, check column data cache - if let Some(cached_column_raw_data) = column_data_cache.get(&column_cache_key) { - cached_column_data.push((*column_id, cached_column_raw_data)); - continue; - } - - // if all cache missed, prepare the ranges to be read - if let Some(column_meta) = columns_meta.get(column_id) { - let (offset, len) = column_meta.offset_length(); - ranges.push((*column_id, offset..(offset + len))); - - // Perf - { - metrics_inc_remote_io_seeks(1); - metrics_inc_remote_io_read_bytes(len); - } - } - } - - let object = self.operator.object(location); - - let mut merge_io_read_res = Self::merge_io_read(settings, object, ranges).await?; - merge_io_read_res.cached_column_data = cached_column_data; - merge_io_read_res.cached_column_array = cached_column_array; - Ok(merge_io_read_res) - } - - pub fn sync_read_columns_data_by_merge_io( - &self, - settings: &ReadSettings, - part: PartInfoPtr, - ) -> Result { - let part = FusePartInfo::from_part(&part)?; - let column_array_cache = CacheManager::instance().get_table_data_array_cache(); - - let mut ranges = vec![]; - let mut cached_column_array = vec![]; - for (index, (column_id, ..)) in self.project_indices.iter() { - // first, check column array object cache - let block_path = &part.location; - let column_cache_key = TableDataColumnCacheKey::new(block_path, *column_id); - if let Some(cache_array) = column_array_cache.get(&column_cache_key) { - cached_column_array.push((*column_id, cache_array)); - continue; - } - if let Some(column_meta) = part.columns_meta.get(column_id) { - let (offset, len) = column_meta.offset_length(); - ranges.push((*index, offset..(offset + len))); - } - } - - let object = self.operator.object(&part.location); - let mut merge_io_result = Self::sync_merge_io_read(settings, object, ranges)?; - merge_io_result.cached_column_array = cached_column_array; - Ok(merge_io_result) - } - // Build non duplicate leaf_ids to avoid repeated read column from parquet pub(crate) fn build_projection_indices( columns: &[ColumnNode], @@ -459,28 +109,6 @@ impl BlockReader { indices } - #[inline] - pub async fn read_range( - o: Object, - index: usize, - start: u64, - end: u64, - ) -> Result<(usize, Vec)> { - let chunk = o.range_read(start..end).await?; - Ok((index, chunk)) - } - - #[inline] - pub fn sync_read_range( - o: Object, - index: usize, - start: u64, - end: u64, - ) -> Result<(usize, Vec)> { - let chunk = o.blocking_range_read(start..end)?; - Ok((index, chunk)) - } - pub fn schema(&self) -> TableSchemaRef { self.projected_schema.clone() } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs new file mode 100644 index 0000000000000..3e8d50e9a0965 --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs @@ -0,0 +1,123 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::ops::Range; +use std::sync::Arc; + +use common_catalog::table::ColumnId; +use common_exception::ErrorCode; +use common_exception::Result; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataCache; +use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache_manager::SizedColumnArray; + +pub struct OwnerMemory { + chunks: HashMap>, +} + +impl OwnerMemory { + pub fn create(chunks: Vec<(usize, Vec)>) -> OwnerMemory { + let chunks = chunks.into_iter().collect::>(); + OwnerMemory { chunks } + } + + pub fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { + match self.chunks.get(&index) { + Some(chunk) => Ok(chunk.as_slice()), + None => Err(ErrorCode::Internal(format!( + "It's a terrible bug, not found range data, merged_range_idx:{}, path:{}", + index, path + ))), + } + } +} + +type CachedColumnData = Vec<(ColumnId, Arc>)>; +type CachedColumnArray = Vec<(ColumnId, Arc)>; +pub struct MergeIOReadResult { + block_path: String, + columns_chunk_offsets: HashMap)>, + owner_memory: OwnerMemory, + pub cached_column_data: CachedColumnData, + pub cached_column_array: CachedColumnArray, + table_data_cache: Option, +} + +pub enum DataItem<'a> { + RawData(&'a [u8]), + ColumnArray(&'a Arc), +} + +impl MergeIOReadResult { + pub fn create( + owner_memory: OwnerMemory, + capacity: usize, + path: String, + table_data_cache: Option, + ) -> MergeIOReadResult { + MergeIOReadResult { + block_path: path, + columns_chunk_offsets: HashMap::with_capacity(capacity), + owner_memory, + cached_column_data: vec![], + cached_column_array: vec![], + table_data_cache, + } + } + + pub fn columns_chunks(&self) -> Result> { + let mut res = HashMap::with_capacity(self.columns_chunk_offsets.len()); + + // merge column data fetched from object storage + for (column_id, (chunk_idx, range)) in &self.columns_chunk_offsets { + let chunk = self.owner_memory.get_chunk(*chunk_idx, &self.block_path)?; + res.insert(*column_id, DataItem::RawData(&chunk[range.clone()])); + } + + // merge column data from cache + for (column_id, data) in &self.cached_column_data { + res.insert(*column_id, DataItem::RawData(data.as_slice())); + } + + // merge column array from cache + for (column_id, data) in &self.cached_column_array { + res.insert(*column_id, DataItem::ColumnArray(data)); + } + + Ok(res) + } + + fn get_chunk(&self, index: usize, path: &str) -> Result<&[u8]> { + self.owner_memory.get_chunk(index, path) + } + + pub fn add_column_chunk( + &mut self, + chunk_index: usize, + column_id: ColumnId, + range: Range, + ) { + if let Some(table_data_cache) = &self.table_data_cache { + if let Ok(chunk_data) = self.get_chunk(chunk_index, &self.block_path) { + let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); + let data = &chunk_data[range.clone()]; + table_data_cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); + } + } + self.columns_chunk_offsets + .insert(column_id, (chunk_index, range)); + } +} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs new file mode 100644 index 0000000000000..e21949ed4dfa7 --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs @@ -0,0 +1,192 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::ops::Range; +use std::time::Instant; + +use common_base::rangemap::RangeMerger; +use common_base::runtime::UnlimitedFuture; +use common_catalog::table::ColumnId; +use common_exception::ErrorCode; +use common_exception::Result; +use futures::future::try_join_all; +use opendal::Object; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache_manager::CacheManager; +use storages_common_table_meta::meta::ColumnMeta; + +use crate::io::read::block::block_reader_merge_io::OwnerMemory; +use crate::io::read::ReadSettings; +use crate::io::BlockReader; +use crate::metrics::*; +use crate::MergeIOReadResult; + +impl BlockReader { + /// This is an optimized for data read, works like the Linux kernel io-scheduler IO merging. + /// If the distance between two IO request ranges to be read is less than storage_io_min_bytes_for_seek(Default is 48Bytes), + /// will read the range that contains both ranges, thus avoiding extra seek. + /// + /// It will *NOT* merge two requests: + /// if the last io request size is larger than storage_io_page_bytes_for_read(Default is 512KB). + async fn merge_io_read( + read_settings: &ReadSettings, + object: Object, + raw_ranges: Vec<(ColumnId, Range)>, + ) -> Result { + if raw_ranges.is_empty() { + // shortcut + let read_res = MergeIOReadResult::create( + OwnerMemory::create(vec![]), + raw_ranges.len(), + object.path().to_string(), + CacheManager::instance().get_table_data_cache(), + ); + return Ok(read_res); + } + + // Build merged read ranges. + let ranges = raw_ranges + .iter() + .map(|(_, r)| r.clone()) + .collect::>(); + let range_merger = RangeMerger::from_iter( + ranges, + read_settings.storage_io_min_bytes_for_seek, + read_settings.storage_io_max_page_bytes_for_read, + ); + let merged_ranges = range_merger.ranges(); + + // Read merged range data. + let mut read_handlers = Vec::with_capacity(merged_ranges.len()); + for (idx, range) in merged_ranges.iter().enumerate() { + // Perf. + { + metrics_inc_remote_io_seeks_after_merged(1); + metrics_inc_remote_io_read_bytes_after_merged(range.end - range.start); + } + + read_handlers.push(UnlimitedFuture::create(Self::read_range( + object.clone(), + idx, + range.start, + range.end, + ))); + } + + let start = Instant::now(); + let owner_memory = OwnerMemory::create(try_join_all(read_handlers).await?); + let table_data_cache = CacheManager::instance().get_table_data_cache(); + let mut read_res = MergeIOReadResult::create( + owner_memory, + raw_ranges.len(), + object.path().to_string(), + table_data_cache, + ); + + // Perf. + { + metrics_inc_remote_io_read_milliseconds(start.elapsed().as_millis() as u64); + } + + for (raw_idx, raw_range) in &raw_ranges { + let column_range = raw_range.start..raw_range.end; + + // Find the range index and Range from merged ranges. + let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { + None => Err(ErrorCode::Internal(format!( + "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", + column_range, + object.path(), + merged_ranges + ))), + Some((index, range)) => Ok((index, range)), + }?; + + // Fetch the raw data for the raw range. + let start = (column_range.start - merged_range.start) as usize; + let end = (column_range.end - merged_range.start) as usize; + let column_id = *raw_idx as ColumnId; + read_res.add_column_chunk(merged_range_idx, column_id, start..end); + } + + Ok(read_res) + } + + pub async fn read_columns_data_by_merge_io( + &self, + settings: &ReadSettings, + location: &str, + columns_meta: &HashMap, + ) -> Result { + // Perf + { + metrics_inc_remote_io_read_parts(1); + } + + let mut ranges = vec![]; + // for async read, always try using table data cache (if enabled in settings) + let column_data_cache = CacheManager::instance().get_table_data_cache(); + let column_array_cache = CacheManager::instance().get_table_data_array_cache(); + let mut cached_column_data = vec![]; + let mut cached_column_array = vec![]; + for (_index, (column_id, ..)) in self.project_indices.iter() { + let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); + + // first, check column array object cache + if let Some(cache_array) = column_array_cache.get(&column_cache_key) { + cached_column_array.push((*column_id, cache_array)); + continue; + } + + // and then, check column data cache + if let Some(cached_column_raw_data) = column_data_cache.get(&column_cache_key) { + cached_column_data.push((*column_id, cached_column_raw_data)); + continue; + } + + // if all cache missed, prepare the ranges to be read + if let Some(column_meta) = columns_meta.get(column_id) { + let (offset, len) = column_meta.offset_length(); + ranges.push((*column_id, offset..(offset + len))); + + // Perf + { + metrics_inc_remote_io_seeks(1); + metrics_inc_remote_io_read_bytes(len); + } + } + } + + let object = self.operator.object(location); + + let mut merge_io_read_res = Self::merge_io_read(settings, object, ranges).await?; + // TODO set + merge_io_read_res.cached_column_data = cached_column_data; + merge_io_read_res.cached_column_array = cached_column_array; + Ok(merge_io_read_res) + } + + #[inline] + pub async fn read_range( + o: Object, + index: usize, + start: u64, + end: u64, + ) -> Result<(usize, Vec)> { + let chunk = o.range_read(start..end).await?; + Ok((index, chunk)) + } +} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs new file mode 100644 index 0000000000000..29820b42f99ef --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs @@ -0,0 +1,137 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ops::Range; + +use common_base::rangemap::RangeMerger; +use common_catalog::plan::PartInfoPtr; +use common_catalog::table::ColumnId; +use common_exception::ErrorCode; +use common_exception::Result; +use opendal::Object; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache_manager::CacheManager; + +use crate::fuse_part::FusePartInfo; +use crate::io::read::block::block_reader_merge_io::OwnerMemory; +use crate::io::read::ReadSettings; +use crate::io::BlockReader; +use crate::MergeIOReadResult; + +impl BlockReader { + pub fn sync_merge_io_read( + read_settings: &ReadSettings, + object: Object, + raw_ranges: Vec<(ColumnId, Range)>, + ) -> Result { + let path = object.path().to_string(); + + // Build merged read ranges. + let ranges = raw_ranges + .iter() + .map(|(_, r)| r.clone()) + .collect::>(); + let range_merger = RangeMerger::from_iter( + ranges, + read_settings.storage_io_min_bytes_for_seek, + read_settings.storage_io_max_page_bytes_for_read, + ); + let merged_ranges = range_merger.ranges(); + + // Read merged range data. + let mut io_res = Vec::with_capacity(merged_ranges.len()); + for (idx, range) in merged_ranges.iter().enumerate() { + io_res.push(Self::sync_read_range( + object.clone(), + idx, + range.start, + range.end, + )?); + } + + let owner_memory = OwnerMemory::create(io_res); + + // for sync read, we disable table *data* cache + let table_data_cache = None; + let mut read_res = MergeIOReadResult::create( + owner_memory, + raw_ranges.len(), + path.clone(), + table_data_cache, + ); + + for (raw_idx, raw_range) in &raw_ranges { + let column_id = *raw_idx as ColumnId; + let column_range = raw_range.start..raw_range.end; + + // Find the range index and Range from merged ranges. + let (merged_range_idx, merged_range) = match range_merger.get(column_range.clone()) { + None => Err(ErrorCode::Internal(format!( + "It's a terrible bug, not found raw range:[{:?}], path:{} from merged ranges\n: {:?}", + column_range, path, merged_ranges + ))), + Some((index, range)) => Ok((index, range)), + }?; + + // Fetch the raw data for the raw range. + let start = (column_range.start - merged_range.start) as usize; + let end = (column_range.end - merged_range.start) as usize; + read_res.add_column_chunk(merged_range_idx, column_id, start..end); + } + + Ok(read_res) + } + + pub fn sync_read_columns_data_by_merge_io( + &self, + settings: &ReadSettings, + part: PartInfoPtr, + ) -> Result { + let part = FusePartInfo::from_part(&part)?; + let column_array_cache = CacheManager::instance().get_table_data_array_cache(); + + let mut ranges = vec![]; + let mut cached_column_array = vec![]; + for (_index, (column_id, ..)) in self.project_indices.iter() { + // first, check column array object cache + let block_path = &part.location; + let column_cache_key = TableDataColumnCacheKey::new(block_path, *column_id); + if let Some(cache_array) = column_array_cache.get(&column_cache_key) { + cached_column_array.push((*column_id, cache_array)); + continue; + } + if let Some(column_meta) = part.columns_meta.get(column_id) { + let (offset, len) = column_meta.offset_length(); + ranges.push((*column_id, offset..(offset + len))); + } + } + + let object = self.operator.object(&part.location); + let mut merge_io_result = Self::sync_merge_io_read(settings, object, ranges)?; + merge_io_result.cached_column_array = cached_column_array; + Ok(merge_io_result) + } + + #[inline] + pub fn sync_read_range( + o: Object, + index: usize, + start: u64, + end: u64, + ) -> Result<(usize, Vec)> { + let chunk = o.blocking_range_read(start..end)?; + Ok((index, chunk)) + } +} diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 8fcfd33db2fd7..5ecf06dea5e03 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -13,45 +13,13 @@ // limitations under the License. use std::collections::HashMap; -use std::sync::Arc; -use std::time::Instant; -use common_arrow::arrow::array::Array; -use common_arrow::arrow::chunk::Chunk; -use common_arrow::arrow::datatypes::Field; -use common_arrow::arrow::io::parquet::read::column_iter_to_arrays; -use common_arrow::arrow::io::parquet::read::ArrayIter; -use common_arrow::parquet::compression::Compression as ParquetCompression; -use common_arrow::parquet::metadata::ColumnDescriptor; -use common_arrow::parquet::read::PageMetaData; -use common_arrow::parquet::read::PageReader; -use common_catalog::plan::PartInfoPtr; -use common_catalog::table::ColumnId; -use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; -use common_storage::ColumnNode; -use storages_common_cache::CacheAccessor; -use storages_common_cache::TableDataColumnCacheKey; -use storages_common_cache_manager::CacheManager; -use storages_common_cache_manager::SizedColumnArray; use storages_common_table_meta::meta::BlockMeta; -use storages_common_table_meta::meta::ColumnMeta; -use storages_common_table_meta::meta::Compression; -use crate::fuse_part::FusePartInfo; -use crate::io::read::block::block_reader::DataItem; -use crate::io::read::block::decompressor::BuffedBasicDecompressor; use crate::io::read::ReadSettings; use crate::io::BlockReader; -use crate::io::UncompressedBuffer; -use crate::metrics::*; - -enum DeserializedArray<'a> { - Cached(&'a Arc), - Deserialized((ColumnId, Box, usize)), - NoNeedToCache(Box), -} impl BlockReader { /// Read a parquet file and convert to DataBlock. @@ -87,300 +55,4 @@ impl BlockReader { None, ) } - - /// Deserialize column chunks data from parquet format to DataBlock. - pub fn deserialize_parquet_chunks( - &self, - part: PartInfoPtr, - chunks: HashMap, - ) -> Result { - let part = FusePartInfo::from_part(&part)?; - let start = Instant::now(); - - if chunks.is_empty() { - return Ok(DataBlock::new(vec![], part.nums_rows)); - } - - let deserialized_res = self.deserialize_parquet_chunks_with_buffer( - &part.location, - part.nums_rows, - &part.compression, - &part.columns_meta, - chunks, - None, - ); - - // Perf. - { - metrics_inc_remote_io_deserialize_milliseconds(start.elapsed().as_millis() as u64); - } - - deserialized_res - } - - pub fn build_default_values_block(&self, num_rows: usize) -> Result { - let data_schema = self.data_schema(); - let default_vals = self.default_vals.clone(); - DataBlock::create_with_default_value(&data_schema, &default_vals, num_rows) - } - - /// Deserialize column chunks data from parquet format to DataBlock with a uncompressed buffer. - pub fn deserialize_parquet_chunks_with_buffer( - &self, - block_path: &str, - num_rows: usize, - compression: &Compression, - column_metas: &HashMap, - column_chunks: HashMap, - uncompressed_buffer: Option>, - ) -> Result { - if column_chunks.is_empty() { - return self.build_default_values_block(num_rows); - } - - let fields = self - .projection - .project_column_nodes_nested_aware(&self.column_nodes)?; - let mut need_default_vals = Vec::with_capacity(fields.len()); - let mut need_to_fill_default_val = false; - let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); - for (column, is_nested_field) in &fields { - match self.deserialize_field( - column, - column_metas, - &column_chunks, - num_rows, - compression, - &uncompressed_buffer, - *is_nested_field, - )? { - None => { - need_to_fill_default_val = true; - need_default_vals.push(true); - } - Some(v) => { - deserialized_column_arrays.push(v); - need_default_vals.push(false); - } - } - } - - // assembly the arrays - let mut chunk_arrays = vec![]; - for array in &deserialized_column_arrays { - match array { - DeserializedArray::Deserialized((_, array, ..)) => { - chunk_arrays.push(array); - } - DeserializedArray::NoNeedToCache(array) => { - chunk_arrays.push(array); - } - DeserializedArray::Cached(sized_column) => { - chunk_arrays.push(&sized_column.0); - } - } - } - - // build data block - let chunk = Chunk::try_new(chunk_arrays)?; - let data_block = if !need_to_fill_default_val { - DataBlock::from_arrow_chunk(&chunk, &self.data_schema()) - } else { - let data_schema = self.data_schema(); - let schema_default_vals = self.default_vals.clone(); - let mut default_vals = Vec::with_capacity(need_default_vals.len()); - for (i, need_default_val) in need_default_vals.iter().enumerate() { - if !need_default_val { - default_vals.push(None); - } else { - default_vals.push(Some(schema_default_vals[i].clone())); - } - } - DataBlock::create_with_default_value_and_chunk( - &data_schema, - &chunk, - &default_vals, - num_rows, - ) - }; - - // populate cache is necessary - if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { - // populate array cache items - for item in deserialized_column_arrays.into_iter() { - if let DeserializedArray::Deserialized((column_id, array, size)) = item { - let key = TableDataColumnCacheKey::new(block_path, column_id); - cache.put(key.into(), Arc::new((array, size))) - } - } - } - data_block - } - - fn chunks_to_parquet_array_iter<'a>( - metas: Vec<&ColumnMeta>, - chunks: Vec<&'a [u8]>, - rows: usize, - column_descriptors: Vec<&ColumnDescriptor>, - field: Field, - compression: &Compression, - uncompressed_buffer: Arc, - ) -> Result> { - let columns = metas - .iter() - .zip(chunks.into_iter().zip(column_descriptors.iter())) - .map(|(meta, (chunk, column_descriptor))| { - let meta = meta.as_parquet().unwrap(); - - let page_meta_data = PageMetaData { - column_start: meta.offset, - num_values: meta.num_values as i64, - compression: Self::to_parquet_compression(compression)?, - descriptor: column_descriptor.descriptor.clone(), - }; - let pages = PageReader::new_with_page_meta( - chunk, - page_meta_data, - Arc::new(|_, _| true), - vec![], - usize::MAX, - ); - - Ok(BuffedBasicDecompressor::new( - pages, - uncompressed_buffer.clone(), - )) - }) - .collect::>>()?; - - let types = column_descriptors - .iter() - .map(|column_descriptor| &column_descriptor.descriptor.primitive_type) - .collect::>(); - - Ok(column_iter_to_arrays( - columns, - types, - field, - Some(rows), - rows, - )?) - } - - // TODO: refactor this method - #[allow(clippy::too_many_arguments)] - fn deserialize_field<'a>( - &self, - column: &ColumnNode, - column_metas: &HashMap, - column_chunks: &'a HashMap>, - num_rows: usize, - compression: &Compression, - uncompressed_buffer: &'a Option>, - is_nested: bool, - ) -> Result>> { - let indices = &column.leaf_ids; - let is_nested = is_nested || indices.len() > 1; - let estimated_cap = indices.len(); - let mut field_column_metas = Vec::with_capacity(estimated_cap); - let mut field_column_data = Vec::with_capacity(estimated_cap); - let mut field_column_descriptors = Vec::with_capacity(estimated_cap); - let mut field_uncompressed_size = 0; - - for (i, leaf_column_id) in indices.iter().enumerate() { - let column_id = column.leaf_column_ids[i]; - if let Some(column_meta) = column_metas.get(&column_id) { - if let Some(chunk) = column_chunks.get(&(*leaf_column_id as ColumnId)) { - match chunk { - DataItem::RawData(data) => { - let column_descriptor = - &self.parquet_schema_descriptor.columns()[*leaf_column_id]; - field_column_metas.push(column_meta); - field_column_data.push(*data); - field_column_descriptors.push(column_descriptor); - field_uncompressed_size += data.len(); - } - DataItem::ColumnArray(column_array) => { - if is_nested { - return Err(ErrorCode::StorageOther( - "unexpected nested field: nested leaf field hits cached", - )); - } - return Ok(Some(DeserializedArray::Cached(column_array))); - } - } - } else { - // no raw data or cache item of given column id - break; - } - } else { - // no column meta of given colmun id - break; - } - } - - if !field_column_metas.is_empty() { - let field_name = column.field.name.to_owned(); - let mut array_iter = Self::chunks_to_parquet_array_iter( - field_column_metas, - field_column_data, - num_rows, - field_column_descriptors, - column.field.clone(), - compression, - uncompressed_buffer - .clone() - .unwrap_or_else(|| UncompressedBuffer::new(0)), - )?; - let array = array_iter.next().transpose()?.ok_or_else(|| { - ErrorCode::StorageOther(format!( - "unexpected deserialization error, no array found for field {field_name} " - )) - })?; - - // mark the array - if is_nested { - // the array is not intended to be cached - Ok(Some(DeserializedArray::NoNeedToCache(array))) - } else { - // the array is deserialized from raw bytes, should be cached - Ok(Some(DeserializedArray::Deserialized(( - indices[0] as ColumnId, - array, - field_uncompressed_size, - )))) - } - } else { - Ok(None) - } - } - - fn to_parquet_compression(meta_compression: &Compression) -> Result { - match meta_compression { - Compression::Lz4 => { - let err_msg = r#"Deprecated compression algorithm [Lz4] detected. - - The Legacy compression algorithm [Lz4] is no longer supported. - To migrate data from old format, please consider re-create the table, - by using an old compatible version [v0.8.25-nightly … v0.7.12-nightly]. - - - Bring up the compatible version of databend-query - - re-create the table - Suppose the name of table is T - ~~~ - create table tmp_t as select * from T; - drop table T all; - alter table tmp_t rename to T; - ~~~ - Please note that the history of table T WILL BE LOST. - "#; - Err(ErrorCode::StorageOther(err_msg)) - } - Compression::Lz4Raw => Ok(ParquetCompression::Lz4Raw), - Compression::Snappy => Ok(ParquetCompression::Snappy), - Compression::Zstd => Ok(ParquetCompression::Zstd), - Compression::Gzip => Ok(ParquetCompression::Gzip), - Compression::None => Ok(ParquetCompression::Uncompressed), - } - } } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs new file mode 100644 index 0000000000000..5d0ea44989d35 --- /dev/null +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -0,0 +1,354 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Instant; + +use common_arrow::arrow::array::Array; +use common_arrow::arrow::chunk::Chunk; +use common_arrow::arrow::datatypes::Field; +use common_arrow::arrow::io::parquet::read::column_iter_to_arrays; +use common_arrow::arrow::io::parquet::read::ArrayIter; +use common_arrow::parquet::compression::Compression as ParquetCompression; +use common_arrow::parquet::metadata::ColumnDescriptor; +use common_arrow::parquet::read::PageMetaData; +use common_arrow::parquet::read::PageReader; +use common_catalog::plan::PartInfoPtr; +use common_catalog::table::ColumnId; +use common_exception::ErrorCode; +use common_exception::Result; +use common_expression::DataBlock; +use common_storage::ColumnNode; +use storages_common_cache::CacheAccessor; +use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache_manager::CacheManager; +use storages_common_cache_manager::SizedColumnArray; +use storages_common_table_meta::meta::ColumnMeta; +use storages_common_table_meta::meta::Compression; + +use crate::fuse_part::FusePartInfo; +use crate::io::read::block::block_reader_merge_io::DataItem; +use crate::io::read::block::decompressor::BuffedBasicDecompressor; +use crate::io::BlockReader; +use crate::io::UncompressedBuffer; +use crate::metrics::*; + +enum DeserializedArray<'a> { + Cached(&'a Arc), + Deserialized((ColumnId, Box, usize)), + NoNeedToCache(Box), +} + +impl BlockReader { + /// Deserialize column chunks data from parquet format to DataBlock. + pub fn deserialize_parquet_chunks( + &self, + part: PartInfoPtr, + chunks: HashMap, + ) -> Result { + let part = FusePartInfo::from_part(&part)?; + let start = Instant::now(); + + if chunks.is_empty() { + return Ok(DataBlock::new(vec![], part.nums_rows)); + } + + let deserialized_res = self.deserialize_parquet_chunks_with_buffer( + &part.location, + part.nums_rows, + &part.compression, + &part.columns_meta, + chunks, + None, + ); + + // Perf. + { + metrics_inc_remote_io_deserialize_milliseconds(start.elapsed().as_millis() as u64); + } + + deserialized_res + } + + pub fn build_default_values_block(&self, num_rows: usize) -> Result { + let data_schema = self.data_schema(); + let default_vals = self.default_vals.clone(); + DataBlock::create_with_default_value(&data_schema, &default_vals, num_rows) + } + + /// Deserialize column chunks data from parquet format to DataBlock with a uncompressed buffer. + pub fn deserialize_parquet_chunks_with_buffer( + &self, + block_path: &str, + num_rows: usize, + compression: &Compression, + column_metas: &HashMap, + column_chunks: HashMap, + uncompressed_buffer: Option>, + ) -> Result { + if column_chunks.is_empty() { + return self.build_default_values_block(num_rows); + } + + let fields = self + .projection + .project_column_nodes_nested_aware(&self.column_nodes)?; + let mut need_default_vals = Vec::with_capacity(fields.len()); + let mut need_to_fill_default_val = false; + let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); + for (column, is_nested_field) in &fields { + match self.deserialize_field( + column, + column_metas, + &column_chunks, + num_rows, + compression, + &uncompressed_buffer, + *is_nested_field, + )? { + None => { + need_to_fill_default_val = true; + need_default_vals.push(true); + } + Some(v) => { + deserialized_column_arrays.push(v); + need_default_vals.push(false); + } + } + } + + // assembly the arrays + let mut chunk_arrays = vec![]; + for array in &deserialized_column_arrays { + match array { + DeserializedArray::Deserialized((_, array, ..)) => { + chunk_arrays.push(array); + } + DeserializedArray::NoNeedToCache(array) => { + chunk_arrays.push(array); + } + DeserializedArray::Cached(sized_column) => { + chunk_arrays.push(&sized_column.0); + } + } + } + + // build data block + let chunk = Chunk::try_new(chunk_arrays)?; + let data_block = if !need_to_fill_default_val { + DataBlock::from_arrow_chunk(&chunk, &self.data_schema()) + } else { + let data_schema = self.data_schema(); + let schema_default_vals = self.default_vals.clone(); + let mut default_vals = Vec::with_capacity(need_default_vals.len()); + for (i, need_default_val) in need_default_vals.iter().enumerate() { + if !need_default_val { + default_vals.push(None); + } else { + default_vals.push(Some(schema_default_vals[i].clone())); + } + } + DataBlock::create_with_default_value_and_chunk( + &data_schema, + &chunk, + &default_vals, + num_rows, + ) + }; + + // populate cache if necessary + if let Some(cache) = CacheManager::instance().get_table_data_array_cache() { + // populate array cache items + for item in deserialized_column_arrays.into_iter() { + if let DeserializedArray::Deserialized((column_id, array, size)) = item { + let key = TableDataColumnCacheKey::new(block_path, column_id); + cache.put(key.into(), Arc::new((array, size))) + } + } + } + data_block + } + + fn chunks_to_parquet_array_iter<'a>( + metas: Vec<&ColumnMeta>, + chunks: Vec<&'a [u8]>, + rows: usize, + column_descriptors: Vec<&ColumnDescriptor>, + field: Field, + compression: &Compression, + uncompressed_buffer: Arc, + ) -> Result> { + let columns = metas + .iter() + .zip(chunks.into_iter().zip(column_descriptors.iter())) + .map(|(meta, (chunk, column_descriptor))| { + let meta = meta.as_parquet().unwrap(); + + let page_meta_data = PageMetaData { + column_start: meta.offset, + num_values: meta.num_values as i64, + compression: Self::to_parquet_compression(compression)?, + descriptor: column_descriptor.descriptor.clone(), + }; + let pages = PageReader::new_with_page_meta( + chunk, + page_meta_data, + Arc::new(|_, _| true), + vec![], + usize::MAX, + ); + + Ok(BuffedBasicDecompressor::new( + pages, + uncompressed_buffer.clone(), + )) + }) + .collect::>>()?; + + let types = column_descriptors + .iter() + .map(|column_descriptor| &column_descriptor.descriptor.primitive_type) + .collect::>(); + + Ok(column_iter_to_arrays( + columns, + types, + field, + Some(rows), + rows, + )?) + } + + // TODO: refactor this method + #[allow(clippy::too_many_arguments)] + fn deserialize_field<'a>( + &self, + column: &ColumnNode, + column_metas: &HashMap, + column_chunks: &'a HashMap>, + num_rows: usize, + compression: &Compression, + uncompressed_buffer: &'a Option>, + is_nested_column: bool, + ) -> Result>> { + let indices = &column.leaf_ids; + // column passed in may be a compound field (with sub leaves), + // or a leaf column of compound field + let is_nested = is_nested_column || indices.len() > 1; + let estimated_cap = indices.len(); + let mut field_column_metas = Vec::with_capacity(estimated_cap); + let mut field_column_data = Vec::with_capacity(estimated_cap); + let mut field_column_descriptors = Vec::with_capacity(estimated_cap); + let mut field_uncompressed_size = 0; + + for (i, leaf_column_id) in indices.iter().enumerate() { + let column_id = column.leaf_column_ids[i]; + if let Some(column_meta) = column_metas.get(&column_id) { + if let Some(chunk) = column_chunks.get(&(*leaf_column_id as ColumnId)) { + match chunk { + DataItem::RawData(data) => { + let column_descriptor = + &self.parquet_schema_descriptor.columns()[*leaf_column_id]; + field_column_metas.push(column_meta); + field_column_data.push(*data); + field_column_descriptors.push(column_descriptor); + field_uncompressed_size += data.len(); + } + DataItem::ColumnArray(column_array) => { + if is_nested { + // TODO more context info for error message + return Err(ErrorCode::StorageOther( + "unexpected nested field: nested leaf field hits cached", + )); + } + // since it is not nested, one column is enough + return Ok(Some(DeserializedArray::Cached(column_array))); + } + } + } else { + // no raw data or cache item of given column id, it is unexpected + return Err(ErrorCode::StorageOther("unexpected: column data not found")); + } + } else { + // no column meta of given colmun id + break; + } + } + + if !field_column_metas.is_empty() { + let field_name = column.field.name.to_owned(); + let mut array_iter = Self::chunks_to_parquet_array_iter( + field_column_metas, + field_column_data, + num_rows, + field_column_descriptors, + column.field.clone(), + compression, + uncompressed_buffer + .clone() + .unwrap_or_else(|| UncompressedBuffer::new(0)), + )?; + let array = array_iter.next().transpose()?.ok_or_else(|| { + ErrorCode::StorageOther(format!( + "unexpected deserialization error, no array found for field {field_name} " + )) + })?; + + // mark the array + if is_nested { + // the array is not intended to be cached + Ok(Some(DeserializedArray::NoNeedToCache(array))) + } else { + // the array is deserialized from raw bytes, should be cached + Ok(Some(DeserializedArray::Deserialized(( + indices[0] as ColumnId, + array, + field_uncompressed_size, + )))) + } + } else { + Ok(None) + } + } + + fn to_parquet_compression(meta_compression: &Compression) -> Result { + match meta_compression { + Compression::Lz4 => { + let err_msg = r#"Deprecated compression algorithm [Lz4] detected. + + The Legacy compression algorithm [Lz4] is no longer supported. + To migrate data from old format, please consider re-create the table, + by using an old compatible version [v0.8.25-nightly … v0.7.12-nightly]. + + - Bring up the compatible version of databend-query + - re-create the table + Suppose the name of table is T + ~~~ + create table tmp_t as select * from T; + drop table T all; + alter table tmp_t rename to T; + ~~~ + Please note that the history of table T WILL BE LOST. + "#; + Err(ErrorCode::StorageOther(err_msg)) + } + Compression::Lz4Raw => Ok(ParquetCompression::Lz4Raw), + Compression::Snappy => Ok(ParquetCompression::Snappy), + Compression::Zstd => Ok(ParquetCompression::Zstd), + Compression::Gzip => Ok(ParquetCompression::Gzip), + Compression::None => Ok(ParquetCompression::Uncompressed), + } + } +} diff --git a/src/query/storages/fuse/src/io/read/block/mod.rs b/src/query/storages/fuse/src/io/read/block/mod.rs index 587e676d1c9d9..ebc08b71e60eb 100644 --- a/src/query/storages/fuse/src/io/read/block/mod.rs +++ b/src/query/storages/fuse/src/io/read/block/mod.rs @@ -13,11 +13,15 @@ // limitations under the License. // mod block_reader; +mod block_reader_merge_io; +mod block_reader_merge_io_async; +mod block_reader_merge_io_sync; mod block_reader_native; mod block_reader_parquet; +mod block_reader_parquet_deserialize; mod decompressor; pub use block_reader::BlockReader; -pub use block_reader::MergeIOReadResult; +pub use block_reader_merge_io::MergeIOReadResult; pub use block_reader_native::NativeReaderExt; pub use decompressor::UncompressedBuffer; diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source.rs index e2ed209d1c6bb..e6b717adcc5ed 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source.rs @@ -23,7 +23,7 @@ use common_expression::BlockMetaInfoPtr; use serde::Deserializer; use serde::Serializer; -use crate::MergeIOReadResult; +use crate::io::MergeIOReadResult; pub struct DataSourceMeta { pub part: Vec, diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index 819d40d5a0d2c..b6e0992fedc74 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -30,10 +30,10 @@ use common_pipeline_core::processors::Processor; use crate::fuse_part::FusePartInfo; use crate::io::BlockReader; +use crate::io::MergeIOReadResult; use crate::io::UncompressedBuffer; use crate::metrics::metrics_inc_remote_io_deserialize_milliseconds; use crate::operations::read::parquet_data_source::DataSourceMeta; -use crate::MergeIOReadResult; pub struct DeserializeDataTransform { scan_progress: Arc, From 943c4999a5e7492accf5ed14b3d54c7290e36972 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 02:38:06 +0800 Subject: [PATCH 40/80] refactoring --- Cargo.lock | 1 - src/query/catalog/Cargo.toml | 1 - .../fuse/src/io/read/block/block_reader.rs | 5 +++ .../block/block_reader_parquet_deserialize.rs | 35 ++++++++++++++----- .../05_0028_ddl_alter_table_add_drop_column | 2 +- 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c9f9511b79cac..6af93d0118b7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1322,7 +1322,6 @@ dependencies = [ "dashmap", "dyn-clone", "goldenfile", - "opendal", "rand 0.8.5", "serde", "serde_json", diff --git a/src/query/catalog/Cargo.toml b/src/query/catalog/Cargo.toml index f0cb0259148b7..3eb21e709e831 100644 --- a/src/query/catalog/Cargo.toml +++ b/src/query/catalog/Cargo.toml @@ -19,7 +19,6 @@ common-meta-types = { path = "../../meta/types" } common-pipeline-core = { path = "../pipeline/core" } common-settings = { path = "../settings" } common-storage = { path = "../../common/storage" } -opendal = { workspace = true } async-trait = "0.1.57" chrono = { workspace = true } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index b429c93cbf048..64ac8f23508ec 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -52,6 +52,7 @@ impl BlockReader { projection: Projection, ctx: Arc, ) -> Result> { + eprintln!("self schema {:#?}", schema); let projected_schema = match projection { Projection::Columns(ref indices) => TableSchemaRef::new(schema.project(indices)), Projection::InnerColumns(ref path_indices) => { @@ -63,6 +64,10 @@ impl BlockReader { let parquet_schema_descriptor = to_parquet_schema(&arrow_schema)?; let column_nodes = ColumnNodes::new_from_schema(&arrow_schema, Some(&schema)); + for x in &column_nodes.column_nodes { + eprintln!("field: {}, ids {:?}", x.field.name, x.leaf_column_ids); + } + let project_column_nodes: Vec = projection .project_column_nodes(&column_nodes)? .iter() diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs index 5d0ea44989d35..48899e0b1bf17 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -109,6 +109,10 @@ impl BlockReader { let mut need_to_fill_default_val = false; let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); for (column, is_nested_field) in &fields { + eprintln!( + "deser col {}, idx {:?}", + column.field.name, &column.leaf_column_ids + ); match self.deserialize_field( column, column_metas, @@ -151,13 +155,12 @@ impl BlockReader { DataBlock::from_arrow_chunk(&chunk, &self.data_schema()) } else { let data_schema = self.data_schema(); - let schema_default_vals = self.default_vals.clone(); let mut default_vals = Vec::with_capacity(need_default_vals.len()); for (i, need_default_val) in need_default_vals.iter().enumerate() { if !need_default_val { default_vals.push(None); } else { - default_vals.push(Some(schema_default_vals[i].clone())); + default_vals.push(Some(self.default_vals[i].clone())); } } DataBlock::create_with_default_value_and_chunk( @@ -174,6 +177,7 @@ impl BlockReader { for item in deserialized_column_arrays.into_iter() { if let DeserializedArray::Deserialized((column_id, array, size)) = item { let key = TableDataColumnCacheKey::new(block_path, column_id); + eprintln!("caching key {}", key.as_ref()); cache.put(key.into(), Arc::new((array, size))) } } @@ -231,7 +235,7 @@ impl BlockReader { )?) } - // TODO: refactor this method + // TODO: refactor this method, too many args #[allow(clippy::too_many_arguments)] fn deserialize_field<'a>( &self, @@ -244,19 +248,32 @@ impl BlockReader { is_nested_column: bool, ) -> Result>> { let indices = &column.leaf_ids; + let is_nested = is_nested_column || indices.len() > 1; + eprintln!( + "column name {}, nested {}, index {:?}, leaves {:?}", + column.field.name.as_str(), + is_nested, + indices, + &column.leaf_column_ids, + ); // column passed in may be a compound field (with sub leaves), // or a leaf column of compound field - let is_nested = is_nested_column || indices.len() > 1; let estimated_cap = indices.len(); let mut field_column_metas = Vec::with_capacity(estimated_cap); let mut field_column_data = Vec::with_capacity(estimated_cap); let mut field_column_descriptors = Vec::with_capacity(estimated_cap); let mut field_uncompressed_size = 0; + let mut column_id = 0; for (i, leaf_column_id) in indices.iter().enumerate() { - let column_id = column.leaf_column_ids[i]; + column_id = column.leaf_column_ids[i]; + eprintln!( + "column name {}, column id{:?}", + column.field.name.as_str(), + column_id, + ); if let Some(column_meta) = column_metas.get(&column_id) { - if let Some(chunk) = column_chunks.get(&(*leaf_column_id as ColumnId)) { + if let Some(chunk) = column_chunks.get(&column_id) { match chunk { DataItem::RawData(data) => { let column_descriptor = @@ -278,7 +295,7 @@ impl BlockReader { } } } else { - // no raw data or cache item of given column id, it is unexpected + // no raw data of given column id, it is unexpected return Err(ErrorCode::StorageOther("unexpected: column data not found")); } } else { @@ -309,11 +326,13 @@ impl BlockReader { // mark the array if is_nested { // the array is not intended to be cached + // currently, caching of compound filed columns is not support Ok(Some(DeserializedArray::NoNeedToCache(array))) } else { // the array is deserialized from raw bytes, should be cached + // let column_id = column.leaf_column_ids[indices[0]]; Ok(Some(DeserializedArray::Deserialized(( - indices[0] as ColumnId, + column_id, array, field_uncompressed_size, )))) diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column index 9f1df385a9ea2..9cbc09778292a 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column +++ b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column @@ -44,7 +44,7 @@ SELECT d,b.1,c,b.2 FROM `05_0028_at_t0_2` 4.0 1 [101,2] 100 statement ok -DROP TABLE IF EXISTS `05_0028_at_t0_2` +DROP TABLE IF EXISTS `06_0028_at_t0_2` query FIIIF SELECT d,b.1,c,b.2,b.3 FROM `05_0028_at_t0` From 8d6f32b7211ba34d029bad2f662677f972e712aa Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 03:18:51 +0800 Subject: [PATCH 41/80] fix sqllogic test --- src/common/storage/src/column_node.rs | 21 +++++++++++++++++++ src/query/catalog/src/plan/projection.rs | 13 ++++++++---- .../block/block_reader_parquet_deserialize.rs | 18 +--------------- .../05_0028_ddl_alter_table_add_drop_column | 2 +- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/common/storage/src/column_node.rs b/src/common/storage/src/column_node.rs index a1315300ef824..ff93100ca5fcb 100644 --- a/src/common/storage/src/column_node.rs +++ b/src/common/storage/src/column_node.rs @@ -98,6 +98,27 @@ impl ColumnNodes { } Ok(column_node) } + + pub fn traverse_path_nested_aware<'a>( + column_nodes: &'a [ColumnNode], + path: &'a [usize], + is_nested: bool, + ) -> Result<(&'a ColumnNode, bool)> { + let column_node = &column_nodes[path[0]]; + let is_nested = is_nested || column_node.children.is_some(); + if path.len() > 1 { + return match &column_node.children { + Some(ref children) => { + Self::traverse_path_nested_aware(children, &path[1..], is_nested) + } + None => Err(ErrorCode::Internal(format!( + "Cannot get column_node by path: {:?}", + path + ))), + }; + } + Ok((column_node, is_nested)) + } } /// `ColumnNode` contains all the leaf column ids of the column. diff --git a/src/query/catalog/src/plan/projection.rs b/src/query/catalog/src/plan/projection.rs index 867072a59cb06..b0f96d43fff00 100644 --- a/src/query/catalog/src/plan/projection.rs +++ b/src/query/catalog/src/plan/projection.rs @@ -86,17 +86,22 @@ impl Projection { let column_nodes = match self { Projection::Columns(indices) => indices .iter() - .map(|idx| (&column_nodes.column_nodes[*idx], false)) + .map(|idx| { + let column_node = &column_nodes.column_nodes[*idx]; + (column_node, column_node.children.is_some()) + }) .collect(), Projection::InnerColumns(path_indices) => { let paths: Vec<&Vec> = path_indices.values().collect(); paths .iter() .map(|path| { - ( - ColumnNodes::traverse_path(&column_nodes.column_nodes, path).unwrap(), - true, + ColumnNodes::traverse_path_nested_aware( + &column_nodes.column_nodes, + path, + false, ) + .unwrap() }) .collect() } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs index 48899e0b1bf17..e621b32ffe4b6 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -109,10 +109,6 @@ impl BlockReader { let mut need_to_fill_default_val = false; let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); for (column, is_nested_field) in &fields { - eprintln!( - "deser col {}, idx {:?}", - column.field.name, &column.leaf_column_ids - ); match self.deserialize_field( column, column_metas, @@ -177,7 +173,6 @@ impl BlockReader { for item in deserialized_column_arrays.into_iter() { if let DeserializedArray::Deserialized((column_id, array, size)) = item { let key = TableDataColumnCacheKey::new(block_path, column_id); - eprintln!("caching key {}", key.as_ref()); cache.put(key.into(), Arc::new((array, size))) } } @@ -249,13 +244,6 @@ impl BlockReader { ) -> Result>> { let indices = &column.leaf_ids; let is_nested = is_nested_column || indices.len() > 1; - eprintln!( - "column name {}, nested {}, index {:?}, leaves {:?}", - column.field.name.as_str(), - is_nested, - indices, - &column.leaf_column_ids, - ); // column passed in may be a compound field (with sub leaves), // or a leaf column of compound field let estimated_cap = indices.len(); @@ -267,11 +255,6 @@ impl BlockReader { let mut column_id = 0; for (i, leaf_column_id) in indices.iter().enumerate() { column_id = column.leaf_column_ids[i]; - eprintln!( - "column name {}, column id{:?}", - column.field.name.as_str(), - column_id, - ); if let Some(column_meta) = column_metas.get(&column_id) { if let Some(chunk) = column_chunks.get(&column_id) { match chunk { @@ -295,6 +278,7 @@ impl BlockReader { } } } else { + // TODO more context info for error message // no raw data of given column id, it is unexpected return Err(ErrorCode::StorageOther("unexpected: column data not found")); } diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column index 9cbc09778292a..9f1df385a9ea2 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column +++ b/tests/sqllogictests/suites/base/05_ddl/05_0028_ddl_alter_table_add_drop_column @@ -44,7 +44,7 @@ SELECT d,b.1,c,b.2 FROM `05_0028_at_t0_2` 4.0 1 [101,2] 100 statement ok -DROP TABLE IF EXISTS `06_0028_at_t0_2` +DROP TABLE IF EXISTS `05_0028_at_t0_2` query FIIIF SELECT d,b.1,c,b.2,b.3 FROM `05_0028_at_t0` From 3741a3e52697102a3d4a5e72815c1bb5933bbf48 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 03:27:01 +0800 Subject: [PATCH 42/80] remove unwraps --- src/query/catalog/src/plan/projection.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/query/catalog/src/plan/projection.rs b/src/query/catalog/src/plan/projection.rs index b0f96d43fff00..16b0d8635b117 100644 --- a/src/query/catalog/src/plan/projection.rs +++ b/src/query/catalog/src/plan/projection.rs @@ -66,10 +66,8 @@ impl Projection { let paths: Vec<&Vec> = path_indices.values().collect(); paths .iter() - .map(|path| { - ColumnNodes::traverse_path(&column_nodes.column_nodes, path).unwrap() - }) - .collect() + .map(|path| ColumnNodes::traverse_path(&column_nodes.column_nodes, path)) + .collect::>()? } }; Ok(column_nodes) @@ -101,9 +99,8 @@ impl Projection { path, false, ) - .unwrap() }) - .collect() + .collect::>()? } }; Ok(column_nodes) From 842eec9664b978e77aaec23921f51383bbe21d9b Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 12:18:36 +0800 Subject: [PATCH 43/80] clean up --- .../storages/common/cache-manager/src/cache_manager.rs | 6 +++--- src/query/storages/common/cache/src/providers/disk_cache.rs | 4 ++-- .../storages/common/cache/src/providers/table_data_cache.rs | 2 +- src/query/storages/fuse/src/io/read/block/block_reader.rs | 4 ---- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 60b85307b0897..e3b34e87f4591 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -60,13 +60,13 @@ impl CacheManager { Self::new_block_data_cache( &config.table_disk_cache_root, config.table_data_cache_population_queue_size, - config.table_disk_cache_mb_size, + config.table_disk_cache_mb_size * 1024 * 1024, )? }; // setup in-memory table column cache let table_column_array_cache = Self::new_in_memory_cache( - config.table_cache_column_mb_size, + config.table_cache_column_mb_size * 1024 * 1024, ColumnArrayMeter, "table_data_cache_column_array", ); @@ -174,7 +174,7 @@ impl CacheManager { fn new_block_data_cache( path: &str, population_queue_size: u32, - disk_cache_mb_size: u64, + disk_cache_bytes_size: u64, ) -> Result> { if disk_cache_mb_size > 0 { let cache_holder = TableDataCacheBuilder::new_table_data_disk_cache( diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index a663dd2af1464..b631521d512f8 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -35,8 +35,8 @@ pub struct DiskBytesCache { pub struct DiskCacheBuilder; impl DiskCacheBuilder { - pub fn new_disk_cache(path: &str, disk_cache_size: u64) -> Result { - let external_cache = DiskCache::new(path, disk_cache_size * 1024 * 1024) + pub fn new_disk_cache(path: &str, disk_cache_bytes_size: u64) -> Result { + let external_cache = DiskCache::new(path, disk_cache_size) .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; let inner = Arc::new(RwLock::new(external_cache)); Ok(DiskBytesCache { inner }) diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 78b635135081c..575bd76784a8b 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -80,7 +80,7 @@ impl TableDataCacheBuilder { pub fn new_table_data_disk_cache( path: &str, population_queue_size: u32, - disk_cache_mb_size: u64, + disk_cache_bytes_size: u64, ) -> Result> { let disk_cache = DiskCacheBuilder::new_disk_cache(path, disk_cache_mb_size)?; let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); diff --git a/src/query/storages/fuse/src/io/read/block/block_reader.rs b/src/query/storages/fuse/src/io/read/block/block_reader.rs index 64ac8f23508ec..8757b03d728d3 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader.rs @@ -52,7 +52,6 @@ impl BlockReader { projection: Projection, ctx: Arc, ) -> Result> { - eprintln!("self schema {:#?}", schema); let projected_schema = match projection { Projection::Columns(ref indices) => TableSchemaRef::new(schema.project(indices)), Projection::InnerColumns(ref path_indices) => { @@ -64,9 +63,6 @@ impl BlockReader { let parquet_schema_descriptor = to_parquet_schema(&arrow_schema)?; let column_nodes = ColumnNodes::new_from_schema(&arrow_schema, Some(&schema)); - for x in &column_nodes.column_nodes { - eprintln!("field: {}, ids {:?}", x.field.name, x.leaf_column_ids); - } let project_column_nodes: Vec = projection .project_column_nodes(&column_nodes)? From abd411de5bd0c94a5c556b7e84da4fae41e32545 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 12:20:28 +0800 Subject: [PATCH 44/80] fix typo --- src/query/storages/common/cache-manager/src/cache_manager.rs | 4 ++-- src/query/storages/common/cache/src/providers/disk_cache.rs | 2 +- .../storages/common/cache/src/providers/table_data_cache.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index e3b34e87f4591..8fea4b87eef39 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -176,11 +176,11 @@ impl CacheManager { population_queue_size: u32, disk_cache_bytes_size: u64, ) -> Result> { - if disk_cache_mb_size > 0 { + if disk_cache_bytes_size > 0 { let cache_holder = TableDataCacheBuilder::new_table_data_disk_cache( path, population_queue_size, - disk_cache_mb_size, + disk_cache_bytes_size, )?; Ok(Some(cache_holder)) } else { diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index b631521d512f8..919eff99dcfe3 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -36,7 +36,7 @@ pub struct DiskBytesCache { pub struct DiskCacheBuilder; impl DiskCacheBuilder { pub fn new_disk_cache(path: &str, disk_cache_bytes_size: u64) -> Result { - let external_cache = DiskCache::new(path, disk_cache_size) + let external_cache = DiskCache::new(path, disk_cache_bytes_size) .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; let inner = Arc::new(RwLock::new(external_cache)); Ok(DiskBytesCache { inner }) diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 575bd76784a8b..ec74a60ec7fc8 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -82,7 +82,7 @@ impl TableDataCacheBuilder { population_queue_size: u32, disk_cache_bytes_size: u64, ) -> Result> { - let disk_cache = DiskCacheBuilder::new_disk_cache(path, disk_cache_mb_size)?; + let disk_cache = DiskCacheBuilder::new_disk_cache(path, disk_cache_bytes_size)?; let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); let num_population_thread = 1; Ok(TableDataCache { From e1eda534a500199ab5c22daf47681187b653bed4 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 13:10:57 +0800 Subject: [PATCH 45/80] add doc --- .../storages/fuse/src/io/read/block/block_reader_merge_io.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs index 3e8d50e9a0965..41a8255a2a2e1 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs @@ -111,6 +111,7 @@ impl MergeIOReadResult { range: Range, ) { if let Some(table_data_cache) = &self.table_data_cache { + // populate raw column data cache (compressed raw bytes) if let Ok(chunk_data) = self.get_chunk(chunk_index, &self.block_path) { let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); let data = &chunk_data[range.clone()]; From fd7cea6101bfb2b1fbc8c55505e2ac2dde653fc4 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 13:46:30 +0800 Subject: [PATCH 46/80] fix cache metric name --- src/query/storages/common/cache/src/metrics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/storages/common/cache/src/metrics.rs b/src/query/storages/common/cache/src/metrics.rs index 5dd7fedc36a40..d6f5885a55508 100644 --- a/src/query/storages/common/cache/src/metrics.rs +++ b/src/query/storages/common/cache/src/metrics.rs @@ -38,5 +38,5 @@ pub fn metrics_inc_cache_hit_count(c: u64, cache_name: &str) { } pub fn metrics_inc_cache_population_pending_count(c: i64, cache_name: &str) { - increment_gauge!(key_str(cache_name, "memory_hit_count"), c as f64); + increment_gauge!(key_str(cache_name, "population_pending_count"), c as f64); } From 16fbedf312d8619975e22f9eb471d40d769ede6c Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 13:53:20 +0800 Subject: [PATCH 47/80] fix: shoot should have swap the data cache construction after setting changed --- src/query/storages/common/cache-manager/src/cache_manager.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 8fea4b87eef39..41647a52f7815 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -55,13 +55,13 @@ impl CacheManager { pub fn init(config: &QueryConfig) -> Result<()> { // setup table data cache let table_data_cache = if config.table_data_cache_enabled { - None - } else { Self::new_block_data_cache( &config.table_disk_cache_root, config.table_data_cache_population_queue_size, config.table_disk_cache_mb_size * 1024 * 1024, )? + } else { + None }; // setup in-memory table column cache From 6d50da5d32ef1b4e9448b554278f1e08d91ac9fd Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 16:34:33 +0800 Subject: [PATCH 48/80] fix: metrics of pending cache population items --- .../storages/common/cache/src/providers/table_data_cache.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index ec74a60ec7fc8..af8973b55158d 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -112,9 +112,11 @@ impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCach // populate the cache to external cache(disk/redis) asyncly let msg = CacheItem { key: k, value: v }; match self.population_queue.try_send(msg) { - Ok(_) => {} - Err(TrySendError::Full(_)) => { + Ok(_) => { metrics_inc_cache_population_pending_count(1, TABLE_DATA_CACHE_NAME); + } + Err(TrySendError::Full(_)) => { + metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); warn!("external cache population queue is full"); } Err(TrySendError::Disconnected(_)) => { From 27978156953e851eefaf2541caac0ccde045dd01 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 18:42:07 +0800 Subject: [PATCH 49/80] tweak metrics --- src/query/storages/common/cache/src/metrics.rs | 4 ++++ .../common/cache/src/providers/table_data_cache.rs | 9 ++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/query/storages/common/cache/src/metrics.rs b/src/query/storages/common/cache/src/metrics.rs index d6f5885a55508..2bcb69642196b 100644 --- a/src/query/storages/common/cache/src/metrics.rs +++ b/src/query/storages/common/cache/src/metrics.rs @@ -40,3 +40,7 @@ pub fn metrics_inc_cache_hit_count(c: u64, cache_name: &str) { pub fn metrics_inc_cache_population_pending_count(c: i64, cache_name: &str) { increment_gauge!(key_str(cache_name, "population_pending_count"), c as f64); } + +pub fn metrics_inc_cache_population_overflow_count(c: i64, cache_name: &str) { + increment_gauge!(key_str(cache_name, "population_overflow_count"), c as f64); +} diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index af8973b55158d..8fc39a1e405aa 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -28,6 +28,7 @@ use tracing::warn; use crate::metrics_inc_cache_access_count; use crate::metrics_inc_cache_hit_count; use crate::metrics_inc_cache_miss_count; +use crate::metrics_inc_cache_population_overflow_count; use crate::metrics_inc_cache_population_pending_count; use crate::CacheAccessor; use crate::DiskBytesCache; @@ -117,10 +118,11 @@ impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCach } Err(TrySendError::Full(_)) => { metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); - warn!("external cache population queue is full"); + metrics_inc_cache_population_overflow_count(-1, TABLE_DATA_CACHE_NAME); + warn!("table data cache population queue is full"); } Err(TrySendError::Disconnected(_)) => { - error!("external cache population thread is down"); + error!("table data cache population thread is down"); } } } @@ -164,7 +166,8 @@ where T: CacheAccessor, DefaultHashBuilder, Count> + Send + Sync } fn start(self: Arc) -> Result> { - let thread_builder = std::thread::Builder::new().name("cache-population".to_owned()); + let thread_builder = + std::thread::Builder::new().name("table-data-cache-population".to_owned()); thread_builder.spawn(move || self.populate()).map_err(|e| { ErrorCode::StorageOther(format!("spawn cache population worker thread failed, {e}")) }) From f15a38677841b300cf9982a2bf0231fb6cb6ce40 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 19:52:35 +0800 Subject: [PATCH 50/80] refactor: declare cache name in cache_managers mod --- Cargo.lock | 1 - .../common/cache-manager/src/cache_manager.rs | 33 ++++++++++++------ .../common/cache-manager/src/caches.rs | 12 +++---- src/query/storages/common/cache/src/cache.rs | 14 ++++++-- .../common/cache/src/read/cached_reader.rs | 34 ++++--------------- .../storages/common/cache/src/read/readers.rs | 5 +-- src/query/storages/fuse/Cargo.toml | 1 - .../src/io/read/bloom/column_filter_reader.rs | 6 +--- .../fuse/src/io/read/meta/meta_readers.rs | 4 --- .../mutation/compact/compact_transform.rs | 5 ++- .../operations/mutation/mutation_transform.rs | 5 ++- .../hive/hive/src/hive_meta_data_reader.rs | 1 - 12 files changed, 56 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 98a9bad996f16..f82d6f283edea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2104,7 +2104,6 @@ dependencies = [ "chrono", "common-arrow", "common-base", - "common-cache", "common-catalog", "common-exception", "common-expression", diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 41647a52f7815..ae8b509f4c362 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -84,14 +84,24 @@ impl CacheManager { table_column_array_cache, })); } else { - let table_snapshot_cache = Self::new_item_cache(config.table_cache_snapshot_count); - let table_statistic_cache = Self::new_item_cache(config.table_cache_statistic_count); - let segment_info_cache = Self::new_item_cache(config.table_cache_segment_count); - let bloom_index_filter_cache = - Self::new_item_cache(config.table_cache_bloom_index_filter_count); - let bloom_index_meta_cache = - Self::new_item_cache(config.table_cache_bloom_index_meta_count); - let file_meta_data_cache = Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS); + let table_snapshot_cache = + Self::new_item_cache(config.table_cache_snapshot_count, "table_snapshot_cache"); + let table_statistic_cache = + Self::new_item_cache(config.table_cache_statistic_count, "table_statistics_cache"); + let segment_info_cache = + Self::new_item_cache(config.table_cache_segment_count, "segment_info_cache"); + let bloom_index_filter_cache = Self::new_item_cache( + config.table_cache_bloom_index_filter_count, + "bloom_index_filter_cache", + ); + let bloom_index_meta_cache = Self::new_item_cache( + config.table_cache_bloom_index_meta_count, + "bloom_index_file_meta_data_cache", + ); + let file_meta_data_cache = Self::new_item_cache( + DEFAULT_FILE_META_DATA_CACHE_ITEMS, + "parquet_file_meta_cache", + ); GlobalInstance::set(Arc::new(Self { table_snapshot_cache, segment_info_cache, @@ -144,9 +154,12 @@ impl CacheManager { } // create cache that meters size by `Count` - fn new_item_cache(capacity: u64) -> Option> { + fn new_item_cache( + capacity: u64, + name: impl Into, + ) -> Option>> { if capacity > 0 { - Some(InMemoryCacheBuilder::new_item_cache(capacity)) + Some(InMemoryCacheBuilder::new_item_cache(capacity).name_with(name.into())) } else { None } diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index 0d64f6f262625..c91b59c4cec21 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -29,19 +29,19 @@ use storages_common_table_meta::meta::TableSnapshotStatistics; use crate::cache_manager::CacheManager; /// In memory object cache of SegmentInfo -pub type SegmentInfoCache = InMemoryItemCacheHolder; +pub type SegmentInfoCache = NamedCache>; /// In memory object cache of TableSnapshot -pub type TableSnapshotCache = InMemoryItemCacheHolder; +pub type TableSnapshotCache = NamedCache>; /// In memory object cache of TableSnapshotStatistics -pub type TableSnapshotStatisticCache = InMemoryItemCacheHolder; +pub type TableSnapshotStatisticCache = NamedCache>; /// In memory object cache of bloom filter. /// For each indexed data block, the bloom xor8 filter of column is cached individually -pub type BloomIndexFilterCache = InMemoryItemCacheHolder; +pub type BloomIndexFilterCache = NamedCache>; pub struct BloomIndexMeta(pub FileMetaData); /// In memory object cache of parquet FileMetaData of bloom index data -pub type BloomIndexMetaCache = InMemoryItemCacheHolder; +pub type BloomIndexMetaCache = NamedCache>; /// In memory object cache of parquet FileMetaData of external parquet files -pub type FileMetaDataCache = InMemoryItemCacheHolder; +pub type FileMetaDataCache = NamedCache>; /// In memory object cache of parquet FileMetaData of external parquet files pub type ColumnArrayCache = diff --git a/src/query/storages/common/cache/src/cache.rs b/src/query/storages/common/cache/src/cache.rs index 99a3ba30d16c6..2f9fcec21da0c 100644 --- a/src/query/storages/common/cache/src/cache.rs +++ b/src/query/storages/common/cache/src/cache.rs @@ -41,8 +41,11 @@ where pub trait Named where Self: Sized { - fn name_with(self, name: String) -> NamedCache { - NamedCache { name, cache: self } + fn name_with(self, name: impl Into) -> NamedCache { + NamedCache { + name: name.into(), + cache: self, + } } } @@ -55,6 +58,13 @@ pub struct NamedCache { cache: C, } +impl NamedCache { + #[inline] + pub fn name(&self) -> &str { + &self.name + } +} + impl CacheAccessor for NamedCache where C: CacheAccessor, diff --git a/src/query/storages/common/cache/src/read/cached_reader.rs b/src/query/storages/common/cache/src/read/cached_reader.rs index 93386de9439f2..30f1563ccc307 100644 --- a/src/query/storages/common/cache/src/read/cached_reader.rs +++ b/src/query/storages/common/cache/src/read/cached_reader.rs @@ -22,34 +22,27 @@ use common_exception::Result; use parking_lot::RwLock; use super::loader::LoadParams; -use crate::metrics::metrics_inc_cache_access_count; -use crate::metrics::metrics_inc_cache_hit_count; -use crate::metrics::metrics_inc_cache_miss_count; use crate::metrics::metrics_inc_cache_miss_load_millisecond; use crate::CacheAccessor; use crate::Loader; +use crate::NamedCache; /// A cache-aware reader pub struct CachedReader { cache: Option, loader: L, - cache_name: String, } pub type CacheHolder = Arc, S, M>>>; -impl CachedReader> +impl CachedReader>> where L: Loader + Sync, S: BuildHasher, M: CountableMeter>, { - pub fn new(cache: Option>, name: impl Into, loader: L) -> Self { - Self { - cache, - cache_name: name.into(), - loader, - } + pub fn new(cache: Option>>, loader: L) -> Self { + Self { cache, loader } } /// Load the object at `location`, uses/populates the cache if possible/necessary. @@ -57,21 +50,9 @@ where match &self.cache { None => Ok(Arc::new(self.loader.load(params).await?)), Some(cache) => { - // Perf. - { - metrics_inc_cache_access_count(1, &self.cache_name); - } - let cache_key = self.loader.cache_key(params); match cache.get(cache_key.as_str()) { - Some(item) => { - // Perf. - { - metrics_inc_cache_hit_count(1, &self.cache_name); - } - - Ok(item) - } + Some(item) => Ok(item), None => { let start = Instant::now(); @@ -80,10 +61,9 @@ where // Perf. { - metrics_inc_cache_miss_count(1, &self.cache_name); metrics_inc_cache_miss_load_millisecond( start.elapsed().as_millis() as u64, - &self.cache_name, + cache.name(), ); } @@ -96,6 +76,6 @@ where } pub fn name(&self) -> &str { - self.cache_name.as_str() + self.cache.as_ref().map(|c| c.name()).unwrap_or("") } } diff --git a/src/query/storages/common/cache/src/read/readers.rs b/src/query/storages/common/cache/src/read/readers.rs index 7429544a71e3d..a0b27db424ddf 100644 --- a/src/query/storages/common/cache/src/read/readers.rs +++ b/src/query/storages/common/cache/src/read/readers.rs @@ -15,6 +15,7 @@ use crate::read::cached_reader::CachedReader; use crate::InMemoryBytesCacheHolder; use crate::InMemoryItemCacheHolder; +use crate::NamedCache; -pub type InMemoryItemCacheReader = CachedReader>; -pub type InMemoryBytesCacheReader = CachedReader; +pub type InMemoryItemCacheReader = CachedReader>>; +pub type InMemoryBytesCacheReader = CachedReader>; diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index fab7eea1a5c7b..82259a484f744 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -14,7 +14,6 @@ test = false [dependencies] common-arrow = { path = "../../../common/arrow" } common-base = { path = "../../../common/base" } -common-cache = { path = "../../../common/cache" } common-catalog = { path = "../../catalog" } common-exception = { path = "../../../common/exception" } common-expression = { path = "../../expression" } diff --git a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs index 219206d1e304f..4db7581f56bf6 100644 --- a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs +++ b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs @@ -66,11 +66,7 @@ impl BloomColumnFilterReader { column_descriptor: column_chunk_meta.descriptor().clone(), }; - let cached_reader = CachedReader::new( - Xor8Filter::cache(), - "bloom_index_filter_cache".to_owned(), - loader, - ); + let cached_reader = CachedReader::new(Xor8Filter::cache(), loader); let param = LoadParams { location: index_path, diff --git a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs index fb98c34b67adb..3ca710c0caec4 100644 --- a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs +++ b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs @@ -46,7 +46,6 @@ impl MetaReaders { pub fn segment_info_reader(dal: Operator, schema: TableSchemaRef) -> SegmentInfoReader { SegmentInfoReader::new( CacheManager::instance().get_table_segment_cache(), - "segment_info_cache".to_owned(), LoaderWrapper((dal, schema)), ) } @@ -54,7 +53,6 @@ impl MetaReaders { pub fn table_snapshot_reader(dal: Operator) -> TableSnapshotReader { TableSnapshotReader::new( CacheManager::instance().get_table_snapshot_cache(), - "snapshot_cache".to_owned(), LoaderWrapper(dal), ) } @@ -62,7 +60,6 @@ impl MetaReaders { pub fn table_snapshot_statistics_reader(dal: Operator) -> TableSnapshotStatisticsReader { TableSnapshotStatisticsReader::new( CacheManager::instance().get_table_snapshot_statistics_cache(), - "table_statistics_cache".to_owned(), LoaderWrapper(dal), ) } @@ -70,7 +67,6 @@ impl MetaReaders { pub fn file_meta_data_reader(dal: Operator) -> BloomIndexFileMetaDataReader { BloomIndexFileMetaDataReader::new( CacheManager::instance().get_bloom_index_meta_cache(), - "bloom_index_file_meta_data_cache".to_owned(), LoaderWrapper(dal), ) } diff --git a/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs b/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs index 370de35f2c501..c47f2e2c75b29 100644 --- a/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs +++ b/src/query/storages/fuse/src/operations/mutation/compact/compact_transform.rs @@ -19,7 +19,6 @@ use std::time::Instant; use common_base::base::Progress; use common_base::base::ProgressValues; -use common_cache::Cache; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; @@ -29,6 +28,7 @@ use common_expression::TableSchemaRef; use common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use opendal::Operator; use storages_common_blocks::blocks_to_parquet; +use storages_common_cache::CacheAccessor; use storages_common_cache_manager::CacheManager; use storages_common_index::BloomIndex; use storages_common_table_meta::meta::BlockMeta; @@ -306,8 +306,7 @@ impl Processor for CompactTransform { } State::Output { location, segment } => { if let Some(segment_cache) = CacheManager::instance().get_table_segment_cache() { - let cache = &mut segment_cache.write(); - cache.put(location.clone(), segment.clone()); + segment_cache.put(location.clone(), segment.clone()) } let meta = CompactSinkMeta::create( diff --git a/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs b/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs index c366e9e74cfcf..baedbf2dcb000 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutation_transform.rs @@ -17,7 +17,6 @@ use std::collections::BTreeMap; use std::collections::HashMap; use std::sync::Arc; -use common_cache::Cache; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; use common_exception::Result; @@ -25,6 +24,7 @@ use common_expression::BlockThresholds; use common_expression::DataBlock; use common_expression::TableSchemaRef; use opendal::Operator; +use storages_common_cache::CacheAccessor; use storages_common_cache_manager::CacheManager; use storages_common_table_meta::meta::BlockMeta; use storages_common_table_meta::meta::Location; @@ -155,8 +155,7 @@ impl MutationTransform { handles.push(async move { op.object(&segment.location).write(segment.data).await?; if let Some(segment_cache) = CacheManager::instance().get_table_segment_cache() { - let cache = &mut segment_cache.write(); - cache.put(segment.location.clone(), segment.segment.clone()); + segment_cache.put(segment.location.clone(), segment.segment.clone()); } Ok::<_, ErrorCode>(()) }); diff --git a/src/query/storages/hive/hive/src/hive_meta_data_reader.rs b/src/query/storages/hive/hive/src/hive_meta_data_reader.rs index 1d8f71d119677..d3878b16b46a9 100644 --- a/src/query/storages/hive/hive/src/hive_meta_data_reader.rs +++ b/src/query/storages/hive/hive/src/hive_meta_data_reader.rs @@ -30,7 +30,6 @@ impl MetaDataReader { pub fn meta_data_reader(dal: Operator) -> FileMetaDataReader { FileMetaDataReader::new( CacheManager::instance().get_file_meta_data_cache(), - "file_meta_data_cache".to_owned(), LoaderWrapper(dal), ) } From 9e817d68ca12b356158c38f2e23cbedc202fd9c7 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sat, 11 Feb 2023 20:26:34 +0800 Subject: [PATCH 51/80] refactor: resovle clippy::too_many_arguments --- .../common/cache/src/read/cached_reader.rs | 4 +- .../block/block_reader_parquet_deserialize.rs | 41 +++++++++++-------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/query/storages/common/cache/src/read/cached_reader.rs b/src/query/storages/common/cache/src/read/cached_reader.rs index 30f1563ccc307..d0f62b1e63057 100644 --- a/src/query/storages/common/cache/src/read/cached_reader.rs +++ b/src/query/storages/common/cache/src/read/cached_reader.rs @@ -17,12 +17,12 @@ use std::sync::Arc; use std::time::Instant; use common_cache::CountableMeter; -use common_cache::LruCache; use common_exception::Result; use parking_lot::RwLock; use super::loader::LoadParams; use crate::metrics::metrics_inc_cache_miss_load_millisecond; +use crate::providers::ImMemoryCache; use crate::CacheAccessor; use crate::Loader; use crate::NamedCache; @@ -33,7 +33,7 @@ pub struct CachedReader { loader: L, } -pub type CacheHolder = Arc, S, M>>>; +pub type CacheHolder = Arc>>; impl CachedReader>> where diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs index e621b32ffe4b6..993f8ecc6b3ad 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -51,6 +51,14 @@ enum DeserializedArray<'a> { NoNeedToCache(Box), } +pub struct FieldDeserializationContext<'a> { + column_metas: &'a HashMap, + column_chunks: &'a HashMap>, + num_rows: usize, + compression: &'a Compression, + uncompressed_buffer: &'a Option>, +} + impl BlockReader { /// Deserialize column chunks data from parquet format to DataBlock. pub fn deserialize_parquet_chunks( @@ -108,16 +116,15 @@ impl BlockReader { let mut need_default_vals = Vec::with_capacity(fields.len()); let mut need_to_fill_default_val = false; let mut deserialized_column_arrays = Vec::with_capacity(self.projection.len()); + let field_deserialization_ctx = FieldDeserializationContext { + column_metas, + column_chunks: &column_chunks, + num_rows, + compression, + uncompressed_buffer: &uncompressed_buffer, + }; for (column, is_nested_field) in &fields { - match self.deserialize_field( - column, - column_metas, - &column_chunks, - num_rows, - compression, - &uncompressed_buffer, - *is_nested_field, - )? { + match self.deserialize_field(&field_deserialization_ctx, column, *is_nested_field)? { None => { need_to_fill_default_val = true; need_default_vals.push(true); @@ -230,22 +237,19 @@ impl BlockReader { )?) } - // TODO: refactor this method, too many args - #[allow(clippy::too_many_arguments)] fn deserialize_field<'a>( &self, + deserialization_context: &'a FieldDeserializationContext, column: &ColumnNode, - column_metas: &HashMap, - column_chunks: &'a HashMap>, - num_rows: usize, - compression: &Compression, - uncompressed_buffer: &'a Option>, is_nested_column: bool, ) -> Result>> { let indices = &column.leaf_ids; - let is_nested = is_nested_column || indices.len() > 1; + let column_chunks = deserialization_context.column_chunks; + let compression = deserialization_context.compression; + let uncompressed_buffer = deserialization_context.uncompressed_buffer; // column passed in may be a compound field (with sub leaves), // or a leaf column of compound field + let is_nested = is_nested_column || indices.len() > 1; let estimated_cap = indices.len(); let mut field_column_metas = Vec::with_capacity(estimated_cap); let mut field_column_data = Vec::with_capacity(estimated_cap); @@ -255,7 +259,7 @@ impl BlockReader { let mut column_id = 0; for (i, leaf_column_id) in indices.iter().enumerate() { column_id = column.leaf_column_ids[i]; - if let Some(column_meta) = column_metas.get(&column_id) { + if let Some(column_meta) = deserialization_context.column_metas.get(&column_id) { if let Some(chunk) = column_chunks.get(&column_id) { match chunk { DataItem::RawData(data) => { @@ -288,6 +292,7 @@ impl BlockReader { } } + let num_rows = deserialization_context.num_rows; if !field_column_metas.is_empty() { let field_name = column.field.name.to_owned(); let mut array_iter = Self::chunks_to_parquet_array_iter( From c4819fb77fc5a1f41899091e994434d311d4deeb Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sun, 12 Feb 2023 20:55:07 +0800 Subject: [PATCH 52/80] refactor: move DiskCache into crate storage-common-cache --- Cargo.lock | 5 +- src/common/cache/Cargo.toml | 1 - src/common/cache/src/lib.rs | 4 - src/common/cache/src/lru_disk_cache.rs | 321 ---------------- src/common/cache/tests/it/main.rs | 1 - src/query/storages/common/cache/Cargo.toml | 6 +- src/query/storages/common/cache/src/lib.rs | 11 +- .../common/cache/src/providers/disk_cache.rs | 344 ++++++++++++++++-- .../cache/src/providers/memory_cache.rs | 1 - .../common/cache/src/providers/mod.rs | 8 +- .../cache/src/providers/table_data_cache.rs | 11 +- .../storages/common/cache/tests/it/main.rs | 2 + .../cache/tests/it/providers/disk_cache.rs} | 34 +- .../common/cache/tests/it/providers/mod.rs | 16 + 14 files changed, 381 insertions(+), 384 deletions(-) delete mode 100644 src/common/cache/src/lru_disk_cache.rs rename src/{common/cache/tests/it/lru_disk_cache.rs => query/storages/common/cache/tests/it/providers/disk_cache.rs} (84%) create mode 100644 src/query/storages/common/cache/tests/it/providers/mod.rs diff --git a/Cargo.lock b/Cargo.lock index f82d6f283edea..40a21ff07090a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1298,7 +1298,6 @@ dependencies = [ "hex", "ritelinked", "siphasher", - "tempfile", "tracing", "walkdir", ] @@ -7824,13 +7823,17 @@ dependencies = [ "common-exception", "crc32fast", "crossbeam-channel", + "hex", "metrics", "opendal", "parking_lot 0.12.1", "ringbuffer", "serde", "serde_json", + "siphasher", + "tempfile", "tracing", + "walkdir", ] [[package]] diff --git a/src/common/cache/Cargo.toml b/src/common/cache/Cargo.toml index 3b8fd06a46143..a702e881b8478 100644 --- a/src/common/cache/Cargo.toml +++ b/src/common/cache/Cargo.toml @@ -28,4 +28,3 @@ walkdir = "2.3.2" heapsize_ = { package = "heapsize", version = "0.4.2", optional = true } [dev-dependencies] -tempfile = "3.3.0" diff --git a/src/common/cache/src/lib.rs b/src/common/cache/src/lib.rs index 17bcdaea889de..b3a62cefb3e8b 100644 --- a/src/common/cache/src/lib.rs +++ b/src/common/cache/src/lib.rs @@ -19,14 +19,10 @@ extern crate heapsize_; mod cache; -mod lru_disk_cache; mod meter; pub use cache::lru::LruCache; pub use cache::Cache; -pub use lru_disk_cache::result::Error as DiskCacheError; -pub use lru_disk_cache::result::Result as DiskCacheResult; -pub use lru_disk_cache::*; pub use meter::bytes_meter::BytesMeter; pub use meter::count_meter::Count; pub use meter::count_meter::CountableMeter; diff --git a/src/common/cache/src/lru_disk_cache.rs b/src/common/cache/src/lru_disk_cache.rs deleted file mode 100644 index ddc8bb9e210c1..0000000000000 --- a/src/common/cache/src/lru_disk_cache.rs +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2023 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fs; -use std::fs::File; -use std::hash::Hasher; -use std::io::prelude::*; -use std::io::IoSlice; -use std::path::Path; -use std::path::PathBuf; - -use siphasher::sip128; -use siphasher::sip128::Hasher128; -use tracing::error; -use walkdir::WalkDir; - -use crate::Cache; -use crate::DefaultHashBuilder; -use crate::FileSize; -use crate::LruCache; - -// TODO doc the disk cache path layout - -/// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified -/// time, such that the oldest modified file is returned first. -fn get_all_files>(path: P) -> impl Iterator { - WalkDir::new(path.as_ref()).into_iter().filter_map(|e| { - e.ok().and_then(|f| { - // Only look at files - if f.file_type().is_file() { - f.metadata().ok().map(|m| (f.path().to_owned(), m.len())) - } else { - None - } - }) - }) -} - -/// An LRU cache of files on disk. -pub type LruDiskCache = DiskCache>; - -/// An basic disk cache of files on disk. -pub struct DiskCache { - cache: C, - root: PathBuf, -} - -// make it public for IT -pub struct CacheKey(String); - -impl From for CacheKey -where S: AsRef -{ - // convert key string into hex string of SipHash 2-4 128 - fn from(key: S) -> Self { - let mut sip = sip128::SipHasher24::new(); - let key = key.as_ref(); - sip.write(key.as_bytes()); - let hash = sip.finish128(); - let hex_hash = hex::encode(hash.as_bytes()); - CacheKey(hex_hash) - } -} - -impl From<&CacheKey> for PathBuf { - fn from(cache_key: &CacheKey) -> Self { - let prefix = &cache_key.0[0..3]; - let mut path_buf = PathBuf::from(prefix); - path_buf.push(Path::new(&cache_key.0)); - path_buf - } -} - -impl DiskCache -where C: Cache -{ - /// Create an `DiskCache` with `ritelinked::DefaultHashBuilder` that stores files in `path`, - /// limited to `size` bytes. - /// - /// Existing files in `path` will be stored with their last-modified time from the filesystem - /// used as the order for the recency of their use. Any files that are individually larger - /// than `size` bytes will be removed. - /// - /// The cache is not observant of changes to files under `path` from external sources, it - /// expects to have sole maintenance of the contents. - pub fn new(path: T, size: u64) -> Result - where PathBuf: From { - DiskCache { - cache: C::with_meter_and_hasher(size, FileSize, DefaultHashBuilder::default()), - root: PathBuf::from(path), - } - .init() - } -} - -impl DiskCache -where C: Cache -{ - /// Return the current size of all the files in the cache. - pub fn size(&self) -> u64 { - self.cache.size() - } - - /// Return the count of entries in the cache. - pub fn len(&self) -> usize { - self.cache.len() - } - - pub fn is_empty(&self) -> bool { - self.cache.len() == 0 - } - - /// Return the maximum size of the cache. - pub fn capacity(&self) -> u64 { - self.cache.capacity() - } - - /// Return the path in which the cache is stored. - pub fn path(&self) -> &Path { - self.root.as_path() - } - - /// Return the path that `key` would be stored at. - fn rel_to_abs_path>(&self, rel_path: K) -> PathBuf { - self.root.join(rel_path) - } - - /// Scan `self.root` for existing files and store them. - fn init(mut self) -> Result { - fs::create_dir_all(&self.root)?; - for (file, size) in get_all_files(&self.root) { - if !self.can_store(size) { - fs::remove_file(file).unwrap_or_else(|e| { - error!( - "Error removing file `{}` which is too large for the cache ({} bytes)", - e, size - ) - }); - } else { - while self.cache.size() + size > self.cache.capacity() { - let (rel_path, _) = self - .cache - .pop_by_policy() - .expect("Unexpectedly empty cache!"); - let cache_item_path = self.abs_path_of_cache_key(&CacheKey(rel_path)); - fs::remove_file(&cache_item_path).unwrap_or_else(|e| { - error!( - "Error removing file from cache: `{:?}`: {}", - cache_item_path, e - ) - }); - } - let relative_path = file - .strip_prefix(&self.root) - .map_err(|_e| self::Error::MalformedPath)?; - let cache_key = Self::recovery_from(relative_path); - self.cache.put(cache_key, size); - } - } - Ok(self) - } - - /// Returns `true` if the disk cache can store a file of `size` bytes. - pub fn can_store(&self, size: u64) -> bool { - size <= self.cache.capacity() - } - - fn recovery_from(relative_path: &Path) -> String { - let key_string = match relative_path.file_name() { - Some(file_name) => match file_name.to_str() { - Some(str) => str.to_owned(), - None => { - // relative_path is constructed by ourself, and shall be valid utf8 string - unreachable!() - } - }, - None => { - // only called during init, and only path of files are passed in - unreachable!() - } - }; - key_string - } - - fn cache_key(&self, key: &str) -> CacheKey { - CacheKey::from(key) - } - - fn abs_path_of_cache_key(&self, cache_key: &CacheKey) -> PathBuf { - let path = PathBuf::from(cache_key); - self.rel_to_abs_path(path) - } - - pub fn insert_bytes(&mut self, key: &str, bytes: &[&[u8]]) -> Result<()> { - let bytes_len = bytes.iter().map(|x| x.len() as u64).sum::(); - // check if this chunk of bytes itself is too large - if !self.can_store(bytes_len) { - return Err(Error::FileTooLarge); - } - - // check eviction - if self.cache.size() + bytes_len > self.cache.capacity() { - if let Some((rel_path, _)) = self.cache.pop_by_policy() { - let cached_item_path = self.abs_path_of_cache_key(&CacheKey(rel_path)); - fs::remove_file(&cached_item_path).unwrap_or_else(|e| { - error!( - "Error removing file from cache: `{:?}`: {}", - cached_item_path, e - ) - }); - } - } - - let cache_key = self.cache_key(key.as_ref()); - let path = self.abs_path_of_cache_key(&cache_key); - if let Some(parent_path) = path.parent() { - fs::create_dir_all(parent_path)?; - } - let mut f = File::create(&path)?; - let mut bufs = Vec::with_capacity(bytes.len()); - for slick in bytes { - bufs.push(IoSlice::new(slick)); - } - f.write_all_vectored(&mut bufs)?; - self.cache.put(cache_key.0, bytes_len); - Ok(()) - } - - /// Return `true` if a file with path `key` is in the cache. - pub fn contains_key(&self, key: &str) -> bool { - let cache_key = self.cache_key(key); - self.cache.contains(&cache_key.0) - } - - /// Get an opened `File` for `key`, if one exists and can be opened. Updates the Cache state - /// of the file if present. Avoid using this method if at all possible, prefer `.get`. - pub fn get_file(&mut self, key: &str) -> Result { - let cache_key = self.cache_key(key); - let path = self.abs_path_of_cache_key(&cache_key); - self.cache - .get(&cache_key.0) - .ok_or(Error::FileNotInCache) - .and_then(|_len| File::open(path).map_err(Into::into)) - } - - /// Remove the given key from the cache. - pub fn remove(&mut self, key: &str) -> Result<()> { - let cache_key = self.cache_key(key); - match self.cache.pop(&cache_key.0) { - Some(_) => { - let path = self.abs_path_of_cache_key(&cache_key); - fs::remove_file(&path).map_err(|e| { - error!("Error removing file from cache: `{:?}`: {}", path, e); - Into::into(e) - }) - } - None => Ok(()), - } - } -} - -pub mod result { - use std::error::Error as StdError; - use std::fmt; - use std::io; - - /// Errors returned by this crate. - #[derive(Debug)] - pub enum Error { - /// The file was too large to fit in the cache. - FileTooLarge, - /// The file was not in the cache. - FileNotInCache, - /// The file was not in the cache. - MalformedPath, - /// An IO Error occurred. - Io(io::Error), - } - - impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Error::FileTooLarge => write!(f, "File too large"), - Error::FileNotInCache => write!(f, "File not in cache"), - Error::MalformedPath => write!(f, "Malformed catch file path"), - Error::Io(ref e) => write!(f, "{}", e), - } - } - } - - impl StdError for Error { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - match self { - Error::Io(ref e) => Some(e), - _ => None, - } - } - } - - impl From for Error { - fn from(e: io::Error) -> Error { - Error::Io(e) - } - } - - /// A convenience `Result` type - pub type Result = std::result::Result; -} - -use result::*; diff --git a/src/common/cache/tests/it/main.rs b/src/common/cache/tests/it/main.rs index 4bfc6f691478f..0f9ba048238f0 100644 --- a/src/common/cache/tests/it/main.rs +++ b/src/common/cache/tests/it/main.rs @@ -15,4 +15,3 @@ #![allow(clippy::uninlined_format_args)] mod cache; -mod lru_disk_cache; diff --git a/src/query/storages/common/cache/Cargo.toml b/src/query/storages/common/cache/Cargo.toml index 35dca504b1c5a..811a2c1baa01d 100644 --- a/src/query/storages/common/cache/Cargo.toml +++ b/src/query/storages/common/cache/Cargo.toml @@ -19,12 +19,16 @@ common-exception = { path = "../../../../common/exception" } async-trait = { version = "0.1.57", package = "async-trait-fn" } crc32fast = "1.3.2" crossbeam-channel = "0.5.6" +hex = "0.4.3" metrics = "0.20.1" opendal = { workspace = true } parking_lot = "0.12.1" ringbuffer = "0.12.0" serde = { workspace = true } serde_json = { workspace = true } +siphasher = "0.3.10" tracing = "0.1.36" +walkdir = "2.3.2" -[build-dependencies] +[dev-dependencies] +tempfile = "3.3.0" diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index 3e58bb7247ce1..cb889efceedf8 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![feature(write_all_vectored)] + mod cache; mod metrics; mod providers; @@ -20,12 +22,15 @@ mod read; pub use cache::CacheAccessor; pub use cache::Named; pub use cache::NamedCache; -pub use providers::DiskBytesCache; -pub use providers::DiskCache; -pub use providers::DiskCacheBuilder; +pub use providers::DiskCacheError; +pub use providers::DiskCacheKey; +pub use providers::DiskCacheResult; pub use providers::InMemoryBytesCacheHolder; pub use providers::InMemoryCacheBuilder; pub use providers::InMemoryItemCacheHolder; +pub use providers::LruDiskCache; +pub use providers::LruDiskCacheBuilder; +pub use providers::LruDiskCacheHolder; pub use providers::TableDataCache; pub use providers::TableDataCacheBuilder; pub use providers::TableDataColumnCacheKey; diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 919eff99dcfe3..e1c33441d65b8 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -12,45 +12,308 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fs; +use std::fs::File; +use std::hash::Hasher; +use std::io::IoSlice; use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; use std::sync::Arc; +use common_cache::Cache; use common_cache::Count; -pub use common_cache::LruDiskCache as DiskCache; +use common_cache::DefaultHashBuilder; +use common_cache::FileSize; +use common_cache::LruCache; use common_exception::ErrorCode; use common_exception::Result; use parking_lot::RwLock; +use siphasher::sip128; +use siphasher::sip128::Hasher128; use tracing::error; use crate::CacheAccessor; -/// Tiered cache which consist of -/// A in-memory cache -/// A ring that keep the reference of bytes -/// A slow disk or redis based persistent cache -#[derive(Clone)] -pub struct DiskBytesCache { - inner: Arc>, +pub struct DiskCache { + cache: C, + root: PathBuf, } -pub struct DiskCacheBuilder; -impl DiskCacheBuilder { - pub fn new_disk_cache(path: &str, disk_cache_bytes_size: u64) -> Result { - let external_cache = DiskCache::new(path, disk_cache_bytes_size) - .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; - let inner = Arc::new(RwLock::new(external_cache)); - Ok(DiskBytesCache { inner }) +pub struct DiskCacheKey(String); + +impl From for DiskCacheKey +where S: AsRef +{ + // convert key string into hex string of SipHash 2-4 128 bit + fn from(key: S) -> Self { + let mut sip = sip128::SipHasher24::new(); + let key = key.as_ref(); + sip.write(key.as_bytes()); + let hash = sip.finish128(); + let hex_hash = hex::encode(hash.as_bytes()); + DiskCacheKey(hex_hash) + } +} + +impl From<&DiskCacheKey> for PathBuf { + fn from(cache_key: &DiskCacheKey) -> Self { + let prefix = &cache_key.0[0..3]; + let mut path_buf = PathBuf::from(prefix); + path_buf.push(Path::new(&cache_key.0)); + path_buf + } +} + +impl DiskCache +where C: Cache +{ + /// Create an `DiskCache` with `ritelinked::DefaultHashBuilder` that stores files in `path`, + /// limited to `size` bytes. + /// + /// Existing files in `path` will be stored with their last-modified time from the filesystem + /// used as the order for the recency of their use. Any files that are individually larger + /// than `size` bytes will be removed. + /// + /// The cache is not observant of changes to files under `path` from external sources, it + /// expects to have sole maintenance of the contents. + pub fn new(path: T, size: u64) -> self::result::Result + where PathBuf: From { + DiskCache { + cache: C::with_meter_and_hasher(size, FileSize, DefaultHashBuilder::default()), + root: PathBuf::from(path), + } + .init() + } +} + +impl DiskCache +where C: Cache +{ + /// Return the current size of all the files in the cache. + pub fn size(&self) -> u64 { + self.cache.size() + } + + /// Return the count of entries in the cache. + pub fn len(&self) -> usize { + self.cache.len() + } + + pub fn is_empty(&self) -> bool { + self.cache.len() == 0 + } + + /// Return the maximum size of the cache. + pub fn capacity(&self) -> u64 { + self.cache.capacity() + } + + /// Return the path in which the cache is stored. + pub fn path(&self) -> &Path { + self.root.as_path() + } + + /// Return the path that `key` would be stored at. + fn rel_to_abs_path>(&self, rel_path: K) -> PathBuf { + self.root.join(rel_path) + } + + /// Scan `self.root` for existing files and store them. + fn init(mut self) -> self::result::Result { + fs::create_dir_all(&self.root)?; + for (file, size) in get_all_files(&self.root) { + if !self.can_store(size) { + fs::remove_file(file).unwrap_or_else(|e| { + error!( + "Error removing file `{}` which is too large for the cache ({} bytes)", + e, size + ) + }); + } else { + while self.cache.size() + size > self.cache.capacity() { + let (rel_path, _) = self + .cache + .pop_by_policy() + .expect("Unexpectedly empty cache!"); + let cache_item_path = self.abs_path_of_cache_key(&DiskCacheKey(rel_path)); + fs::remove_file(&cache_item_path).unwrap_or_else(|e| { + error!( + "Error removing file from cache: `{:?}`: {}", + cache_item_path, e + ) + }); + } + let relative_path = file + .strip_prefix(&self.root) + .map_err(|_e| self::Error::MalformedPath)?; + let cache_key = Self::recovery_from(relative_path); + self.cache.put(cache_key, size); + } + } + Ok(self) + } + + /// Returns `true` if the disk cache can store a file of `size` bytes. + pub fn can_store(&self, size: u64) -> bool { + size <= self.cache.capacity() + } + + fn recovery_from(relative_path: &Path) -> String { + let key_string = match relative_path.file_name() { + Some(file_name) => match file_name.to_str() { + Some(str) => str.to_owned(), + None => { + // relative_path is constructed by ourself, and shall be valid utf8 string + unreachable!() + } + }, + None => { + // only called during init, and only path of files are passed in + unreachable!() + } + }; + key_string + } + + fn cache_key(&self, key: &str) -> DiskCacheKey { + DiskCacheKey::from(key) + } + + fn abs_path_of_cache_key(&self, cache_key: &DiskCacheKey) -> PathBuf { + let path = PathBuf::from(cache_key); + self.rel_to_abs_path(path) + } + + pub fn insert_bytes(&mut self, key: &str, bytes: &[&[u8]]) -> self::result::Result<()> { + let bytes_len = bytes.iter().map(|x| x.len() as u64).sum::(); + // check if this chunk of bytes itself is too large + if !self.can_store(bytes_len) { + return Err(Error::FileTooLarge); + } + + // check eviction + if self.cache.size() + bytes_len > self.cache.capacity() { + if let Some((rel_path, _)) = self.cache.pop_by_policy() { + let cached_item_path = self.abs_path_of_cache_key(&DiskCacheKey(rel_path)); + fs::remove_file(&cached_item_path).unwrap_or_else(|e| { + error!( + "Error removing file from cache: `{:?}`: {}", + cached_item_path, e + ) + }); + } + } + + let cache_key = self.cache_key(key.as_ref()); + let path = self.abs_path_of_cache_key(&cache_key); + if let Some(parent_path) = path.parent() { + fs::create_dir_all(parent_path)?; + } + let mut f = File::create(&path)?; + let mut bufs = Vec::with_capacity(bytes.len()); + for slick in bytes { + bufs.push(IoSlice::new(slick)); + } + f.write_all_vectored(&mut bufs)?; + self.cache.put(cache_key.0, bytes_len); + Ok(()) + } + + /// Return `true` if a file with path `key` is in the cache. + pub fn contains_key(&self, key: &str) -> bool { + let cache_key = self.cache_key(key); + self.cache.contains(&cache_key.0) + } + + /// Get an opened `File` for `key`, if one exists and can be opened. Updates the Cache state + /// of the file if present. Avoid using this method if at all possible, prefer `.get`. + pub fn get_file(&mut self, key: &str) -> self::result::Result { + let cache_key = self.cache_key(key); + let path = self.abs_path_of_cache_key(&cache_key); + self.cache + .get(&cache_key.0) + .ok_or(Error::FileNotInCache) + .and_then(|_len| File::open(path).map_err(Into::into)) + } + + /// Remove the given key from the cache. + pub fn remove(&mut self, key: &str) -> Result<()> { + let cache_key = self.cache_key(key); + match self.cache.pop(&cache_key.0) { + Some(_) => { + let path = self.abs_path_of_cache_key(&cache_key); + fs::remove_file(&path).map_err(|e| { + error!("Error removing file from cache: `{:?}`: {}", path, e); + Into::into(e) + }) + } + None => Ok(()), + } + } +} + +pub mod result { + use std::error::Error as StdError; + use std::fmt; + use std::io; + + /// Errors returned by this crate. + #[derive(Debug)] + pub enum Error { + /// The file was too large to fit in the cache. + FileTooLarge, + /// The file was not in the cache. + FileNotInCache, + /// The file was not in the cache. + MalformedPath, + /// An IO Error occurred. + Io(io::Error), + } + + impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::FileTooLarge => write!(f, "File too large"), + Error::FileNotInCache => write!(f, "File not in cache"), + Error::MalformedPath => write!(f, "Malformed catch file path"), + Error::Io(ref e) => write!(f, "{e}"), + } + } + } + + impl StdError for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::Io(ref e) => Some(e), + _ => None, + } + } } + + impl From for Error { + fn from(e: io::Error) -> Error { + Error::Io(e) + } + } + + /// A convenience `Result` type + pub type Result = std::result::Result; } -impl CacheAccessor, common_cache::DefaultHashBuilder, Count> for DiskBytesCache { +use result::*; + +impl CacheAccessor, common_cache::DefaultHashBuilder, Count> + for LruDiskCacheHolder +{ fn get>(&self, k: Q) -> Option>> { let k = k.as_ref(); // check disk cache let read_file = || { let mut file = { - let mut inner = self.inner.write(); - inner.get_file(k)? + let mut cache = self.write(); + cache.get_file(k)? }; let mut v = vec![]; file.read_to_end(&mut v)?; @@ -62,8 +325,8 @@ impl CacheAccessor, common_cache::DefaultHashBuilder, Count> for error!("data cache, of key {k}, crc validation failure: {e}"); { // remove the invalid cache, error of removal ignored - let mut inner = self.inner.write(); - let _ = inner.remove(k); + let mut cache = self.write(); + let _ = cache.remove(k); } None } else { @@ -85,16 +348,16 @@ impl CacheAccessor, common_cache::DefaultHashBuilder, Count> for fn put(&self, key: String, value: Arc>) { let crc = crc32fast::hash(value.as_slice()); let crc_bytes = crc.to_le_bytes(); - let mut inner = self.inner.write(); - if let Err(e) = inner.insert_bytes(&key, &[value.as_slice(), &crc_bytes]) { + let mut cache = self.write(); + if let Err(e) = cache.insert_bytes(&key, &[value.as_slice(), &crc_bytes]) { error!("put disk cache item failed {}", e); } } fn evict(&self, k: &str) -> bool { if let Err(e) = { - let mut inner = self.inner.write(); - inner.remove(k) + let mut cache = self.write(); + cache.remove(k) } { error!("evict disk cache item failed {}", e); false @@ -104,8 +367,8 @@ impl CacheAccessor, common_cache::DefaultHashBuilder, Count> for } fn contains_key(&self, k: &str) -> bool { - let inner = self.inner.read(); - inner.contains_key(k) + let cache = self.read(); + cache.contains_key(k) } } @@ -131,3 +394,32 @@ fn validate_checksum(bytes: &[u8]) -> Result<()> { } } } + +/// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified +/// time, such that the oldest modified file is returned first. +fn get_all_files>(path: P) -> impl Iterator { + walkdir::WalkDir::new(path.as_ref()) + .into_iter() + .filter_map(|e| { + e.ok().and_then(|f| { + // Only look at files + if f.file_type().is_file() { + f.metadata().ok().map(|m| (f.path().to_owned(), m.len())) + } else { + None + } + }) + }) +} + +pub type LruDiskCache = DiskCache>; +pub type LruDiskCacheHolder = Arc>; + +pub struct LruDiskCacheBuilder; +impl LruDiskCacheBuilder { + pub fn new_disk_cache(path: &str, disk_cache_bytes_size: u64) -> Result { + let external_cache = DiskCache::new(path, disk_cache_bytes_size) + .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; + Ok(Arc::new(RwLock::new(external_cache))) + } +} diff --git a/src/query/storages/common/cache/src/providers/memory_cache.rs b/src/query/storages/common/cache/src/providers/memory_cache.rs index d2e04a07c1986..ab5709fc1a5cd 100644 --- a/src/query/storages/common/cache/src/providers/memory_cache.rs +++ b/src/query/storages/common/cache/src/providers/memory_cache.rs @@ -58,7 +58,6 @@ impl InMemoryCacheBuilder { } } -// TODO move this to super // default impls mod impls { use std::sync::Arc; diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index 1f2c828a65da7..fa4c362bad6ca 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -15,9 +15,13 @@ mod disk_cache; mod memory_cache; mod table_data_cache; -pub use disk_cache::DiskBytesCache; +pub use disk_cache::result::Error as DiskCacheError; +pub use disk_cache::result::Result as DiskCacheResult; pub use disk_cache::DiskCache; -pub use disk_cache::DiskCacheBuilder; +pub use disk_cache::DiskCacheKey; +pub use disk_cache::LruDiskCache; +pub use disk_cache::LruDiskCacheBuilder; +pub use disk_cache::LruDiskCacheHolder; pub use memory_cache::BytesCache; pub use memory_cache::ImMemoryCache; pub use memory_cache::InMemoryBytesCacheHolder; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 8fc39a1e405aa..95606001d80ce 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -17,7 +17,6 @@ use std::thread::JoinHandle; use common_cache::Count; use common_cache::DefaultHashBuilder; -pub use common_cache::LruDiskCache as DiskCache; use common_exception::ErrorCode; use common_exception::Result; use crossbeam_channel::TrySendError; @@ -30,9 +29,9 @@ use crate::metrics_inc_cache_hit_count; use crate::metrics_inc_cache_miss_count; use crate::metrics_inc_cache_population_overflow_count; use crate::metrics_inc_cache_population_pending_count; +use crate::providers::LruDiskCacheHolder; use crate::CacheAccessor; -use crate::DiskBytesCache; -use crate::DiskCacheBuilder; +use crate::LruDiskCacheBuilder; struct CacheItem { key: String, @@ -68,7 +67,7 @@ impl AsRef for TableDataColumnCacheKey { /// - a bounded channel that keep the references of items being cached /// - a disk or redis based external cache #[derive(Clone)] -pub struct TableDataCache { +pub struct TableDataCache { external_cache: T, population_queue: crossbeam_channel::Sender, _cache_populator: DiskCachePopulator, @@ -82,8 +81,8 @@ impl TableDataCacheBuilder { path: &str, population_queue_size: u32, disk_cache_bytes_size: u64, - ) -> Result> { - let disk_cache = DiskCacheBuilder::new_disk_cache(path, disk_cache_bytes_size)?; + ) -> Result> { + let disk_cache = LruDiskCacheBuilder::new_disk_cache(path, disk_cache_bytes_size)?; let (rx, tx) = crossbeam_channel::bounded(population_queue_size as usize); let num_population_thread = 1; Ok(TableDataCache { diff --git a/src/query/storages/common/cache/tests/it/main.rs b/src/query/storages/common/cache/tests/it/main.rs index ea0ed57e60edf..39aa524049506 100644 --- a/src/query/storages/common/cache/tests/it/main.rs +++ b/src/query/storages/common/cache/tests/it/main.rs @@ -11,3 +11,5 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +mod providers; diff --git a/src/common/cache/tests/it/lru_disk_cache.rs b/src/query/storages/common/cache/tests/it/providers/disk_cache.rs similarity index 84% rename from src/common/cache/tests/it/lru_disk_cache.rs rename to src/query/storages/common/cache/tests/it/providers/disk_cache.rs index ddf0da2bca583..2bb3da98fd764 100644 --- a/src/common/cache/tests/it/lru_disk_cache.rs +++ b/src/query/storages/common/cache/tests/it/providers/disk_cache.rs @@ -21,10 +21,10 @@ use std::io::Write; use std::path::Path; use std::path::PathBuf; -use common_cache::CacheKey; -use common_cache::DiskCacheError; -use common_cache::DiskCacheResult; -use common_cache::LruDiskCache; +use storages_common_cache::DiskCacheError; +use storages_common_cache::DiskCacheKey; +use storages_common_cache::DiskCacheResult; +use storages_common_cache::LruDiskCache as DiskCache; use tempfile::TempDir; struct TestFixture { @@ -37,7 +37,7 @@ trait InsertSingleSlice { fn insert_single_slice(&mut self, key: &str, bytes: &[u8]) -> DiskCacheResult<()>; } -impl InsertSingleSlice for LruDiskCache { +impl InsertSingleSlice for DiskCache { fn insert_single_slice(&mut self, key: &str, bytes: &[u8]) -> DiskCacheResult<()> { self.insert_bytes(key, &[bytes]) } @@ -86,13 +86,13 @@ impl TestFixture { #[test] fn test_empty_dir() { let f = TestFixture::new(); - LruDiskCache::new(f.tmp(), 1024).unwrap(); + DiskCache::new(f.tmp(), 1024).unwrap(); } #[test] fn test_missing_root() { let f = TestFixture::new(); - LruDiskCache::new(f.tmp().join("not-here"), 1024).unwrap(); + DiskCache::new(f.tmp().join("not-here"), 1024).unwrap(); } #[test] @@ -103,12 +103,12 @@ fn test_some_existing_files() { let total_bytes: u64 = sizes.clone().sum(); for i in sizes { let file_name = format!("file-{i}"); - let test_key = CacheKey::from(file_name.as_str()); + let test_key = DiskCacheKey::from(file_name.as_str()); let test_path = PathBuf::from(&test_key); f.create_file(test_path, i as usize); } - let c = LruDiskCache::new(f.tmp(), total_bytes).unwrap(); + let c = DiskCache::new(f.tmp(), total_bytes).unwrap(); assert_eq!(c.size(), total_bytes); assert_eq!(c.len(), items); } @@ -123,11 +123,11 @@ fn test_existing_file_too_large() { let sizes = (0..).take(items_count); for i in sizes { let file_name = format!("file-{i}"); - let test_key = CacheKey::from(file_name.as_str()); + let test_key = DiskCacheKey::from(file_name.as_str()); let test_path = PathBuf::from(&test_key); f.create_file(test_path, item_size); } - let c = LruDiskCache::new(f.tmp(), capacity as u64).unwrap(); + let c = DiskCache::new(f.tmp(), capacity as u64).unwrap(); assert_eq!(c.size(), capacity as u64); assert_eq!(c.len(), items_count_shall_be_kept); @@ -140,7 +140,7 @@ fn test_existing_file_too_large() { #[test] fn test_insert_bytes() { let f = TestFixture::new(); - let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); + let mut c = DiskCache::new(f.tmp(), 25).unwrap(); c.insert_single_slice("a/b/c", &[0; 10]).unwrap(); assert!(c.contains_key("a/b/c")); c.insert_single_slice("a/b/d", &[0; 10]).unwrap(); @@ -151,7 +151,7 @@ fn test_insert_bytes() { // The least-recently-used file should have been removed. assert!(!c.contains_key("a/b/c")); - let evicted_file_path = PathBuf::from(&CacheKey::from("a/b/c")); + let evicted_file_path = PathBuf::from(&DiskCacheKey::from("a/b/c")); assert!(!f.tmp().join(evicted_file_path).exists()); } @@ -159,7 +159,7 @@ fn test_insert_bytes() { fn test_insert_bytes_exact() { // Test that files adding up to exactly the size limit works. let f = TestFixture::new(); - let mut c = LruDiskCache::new(f.tmp(), 20).unwrap(); + let mut c = DiskCache::new(f.tmp(), 20).unwrap(); c.insert_single_slice("file1", &[1; 10]).unwrap(); c.insert_single_slice("file2", &[2; 10]).unwrap(); assert_eq!(c.size(), 20); @@ -172,7 +172,7 @@ fn test_insert_bytes_exact() { fn test_add_get_lru() { let f = TestFixture::new(); { - let mut c = LruDiskCache::new(f.tmp(), 25).unwrap(); + let mut c = DiskCache::new(f.tmp(), 25).unwrap(); c.insert_single_slice("file1", &[1; 10]).unwrap(); c.insert_single_slice("file2", &[2; 10]).unwrap(); // Get the file to bump its LRU status. @@ -191,9 +191,9 @@ fn test_add_get_lru() { #[test] fn test_insert_bytes_too_large() { let f = TestFixture::new(); - let mut c = LruDiskCache::new(f.tmp(), 1).unwrap(); + let mut c = DiskCache::new(f.tmp(), 1).unwrap(); match c.insert_single_slice("a/b/c", &[0; 2]) { Err(DiskCacheError::FileTooLarge) => {} - x => panic!("Unexpected result: {:?}", x), + x => panic!("Unexpected result: {x:?}"), } } diff --git a/src/query/storages/common/cache/tests/it/providers/mod.rs b/src/query/storages/common/cache/tests/it/providers/mod.rs new file mode 100644 index 0000000000000..ad150d5b016b8 --- /dev/null +++ b/src/query/storages/common/cache/tests/it/providers/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +mod disk_cache; From ccd1ada41dbaa5793cc4dc24cd8577908de26384 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sun, 12 Feb 2023 21:21:56 +0800 Subject: [PATCH 53/80] refacot: rename setting and metrics --- src/query/config/src/inner.rs | 22 ++++++++++++++----- src/query/config/src/outer_v0.rs | 19 +++++++++++----- .../common/cache-manager/src/cache_manager.rs | 20 ++++++++--------- .../storages/common/cache/src/metrics.rs | 2 +- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 84cafe43e3746..fba3758ee41a9 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -151,7 +151,8 @@ pub struct QueryConfig { pub table_meta_cache_enabled: bool, /// Max number of cached table block meta pub table_cache_block_meta_count: u64, - /// Table memory cache size (MB) + /// Table memory cache size (MB), + /// @deprecated pub table_memory_cache_mb_size: u64, /// Max number of cached table snapshot pub table_cache_snapshot_count: u64, @@ -164,14 +165,25 @@ pub struct QueryConfig { /// Max number of cached bloom index filters pub table_cache_bloom_index_filter_count: u64, /// Max size of in memory table column object cache - pub table_cache_column_mb_size: u64, + /// + /// The cache items are deserialized object, may take a lot of memory. + /// Please set it to zero to disable it. + pub table_data_cache_in_memory_column_mb_size: u64, /// Indicates if table data cache is enabled pub table_data_cache_enabled: bool, /// Table disk cache folder root pub table_disk_cache_root: String, /// Max size of external cache population queue length - /// TODO explain this, how it effect the memory usage - /// the item being queued are typically referencing items that inside in-memory cached data cache + /// + /// the items being queued reference table column row data, which are + /// un-deserialized and usually compressed (depends on table compression options). + /// + /// - please monitor the 'table_data_cache_population_pending_count' metric + /// if it is too high, and takes too much memory, please consider decrease this value + /// + /// - please monitor the 'population_overflow_count' metric + /// if it keeps increasing, and disk cache hits rate is not as expected. please consider + /// increase this value. pub table_data_cache_population_queue_size: u32, /// Table disk cache size (MB) pub table_disk_cache_mb_size: u64, @@ -228,7 +240,7 @@ impl Default for QueryConfig { table_cache_segment_count: 10240, table_cache_bloom_index_meta_count: 3000, table_cache_bloom_index_filter_count: 1024 * 1024, - table_cache_column_mb_size: 10 * 1024, + table_data_cache_in_memory_column_mb_size: 0, table_data_cache_enabled: false, table_data_cache_population_queue_size: 65536, table_disk_cache_root: "_cache".to_string(), diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index fca760af6daf6..80ceefd88229d 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -1301,10 +1301,15 @@ pub struct QueryConfig { #[clap(long, default_value = "1048576")] pub table_cache_bloom_index_filter_count: u64, - /// Max size of in memory table column object cache, default value is 10 GiB - /// To disable this cache , jus set it to 0 - #[clap(long, default_value = "10240")] - pub table_cache_column_mb_size: u64, + /// Max size of in memory table column object cache + /// + /// The cache items are deserialized table column objects, may take a lot of memory. + /// + /// Default value is 0, which disable this cache. If query nodes have plenty of un-utilized + /// memory, the working set can be fitted into, and the access pattern will benefit from + /// caching, consider enabled this cache. + #[clap(long, default_value = "0")] + pub table_data_cache_in_memory_column_mb_size: u64, /// Indicates if table data cached is enabled, default false #[clap(long)] @@ -1399,7 +1404,8 @@ impl TryInto for QueryConfig { table_cache_segment_count: self.table_cache_segment_count, table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, - table_cache_column_mb_size: self.table_cache_column_mb_size, + table_data_cache_in_memory_column_mb_size: self + .table_data_cache_in_memory_column_mb_size, table_data_cache_enabled: self.table_data_cache_enabled, table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, table_disk_cache_root: self.table_disk_cache_root, @@ -1469,7 +1475,8 @@ impl From for QueryConfig { table_cache_segment_count: inner.table_cache_segment_count, table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, - table_cache_column_mb_size: inner.table_cache_column_mb_size, + table_data_cache_in_memory_column_mb_size: inner + .table_data_cache_in_memory_column_mb_size, table_data_cache_enabled: inner.table_data_cache_enabled, table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, management_mode: inner.management_mode, diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index ae8b509f4c362..739e7d27ddbaa 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -66,9 +66,9 @@ impl CacheManager { // setup in-memory table column cache let table_column_array_cache = Self::new_in_memory_cache( - config.table_cache_column_mb_size * 1024 * 1024, + config.table_data_cache_in_memory_column_mb_size * 1024 * 1024, ColumnArrayMeter, - "table_data_cache_column_array", + "table_data_column_array", ); // setup in-memory table meta cache @@ -85,23 +85,21 @@ impl CacheManager { })); } else { let table_snapshot_cache = - Self::new_item_cache(config.table_cache_snapshot_count, "table_snapshot_cache"); + Self::new_item_cache(config.table_cache_snapshot_count, "table_snapshot"); let table_statistic_cache = - Self::new_item_cache(config.table_cache_statistic_count, "table_statistics_cache"); + Self::new_item_cache(config.table_cache_statistic_count, "table_statistics"); let segment_info_cache = - Self::new_item_cache(config.table_cache_segment_count, "segment_info_cache"); + Self::new_item_cache(config.table_cache_segment_count, "segment_info"); let bloom_index_filter_cache = Self::new_item_cache( config.table_cache_bloom_index_filter_count, - "bloom_index_filter_cache", + "bloom_index_filter", ); let bloom_index_meta_cache = Self::new_item_cache( config.table_cache_bloom_index_meta_count, - "bloom_index_file_meta_data_cache", - ); - let file_meta_data_cache = Self::new_item_cache( - DEFAULT_FILE_META_DATA_CACHE_ITEMS, - "parquet_file_meta_cache", + "bloom_index_file_meta_data", ); + let file_meta_data_cache = + Self::new_item_cache(DEFAULT_FILE_META_DATA_CACHE_ITEMS, "parquet_file_meta"); GlobalInstance::set(Arc::new(Self { table_snapshot_cache, segment_info_cache, diff --git a/src/query/storages/common/cache/src/metrics.rs b/src/query/storages/common/cache/src/metrics.rs index 2bcb69642196b..9fd88c75a9613 100644 --- a/src/query/storages/common/cache/src/metrics.rs +++ b/src/query/storages/common/cache/src/metrics.rs @@ -34,7 +34,7 @@ pub fn metrics_inc_cache_miss_load_millisecond(c: u64, cache_name: &str) { } pub fn metrics_inc_cache_hit_count(c: u64, cache_name: &str) { - increment_gauge!(key_str(cache_name, "memory_hit_count"), c as f64); + increment_gauge!(key_str(cache_name, "hit_count"), c as f64); } pub fn metrics_inc_cache_population_pending_count(c: i64, cache_name: &str) { From 65e07ab0b8d7f87bac3e1a4bea22f284a1108932 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sun, 12 Feb 2023 21:49:16 +0800 Subject: [PATCH 54/80] update doc --- src/query/storages/common/cache-manager/src/caches.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/storages/common/cache-manager/src/caches.rs b/src/query/storages/common/cache-manager/src/caches.rs index c91b59c4cec21..41ba7b42455f1 100644 --- a/src/query/storages/common/cache-manager/src/caches.rs +++ b/src/query/storages/common/cache-manager/src/caches.rs @@ -43,7 +43,7 @@ pub type BloomIndexMetaCache = NamedCache>; -/// In memory object cache of parquet FileMetaData of external parquet files +/// In memory object cache of table column array pub type ColumnArrayCache = NamedCache>; pub type ArrayRawDataUncompressedSize = usize; From be4c69d01c345ebbac3e708198557f4fe7143397 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sun, 12 Feb 2023 22:23:16 +0800 Subject: [PATCH 55/80] refactor: renaming cache setting names and units --- src/query/config/src/inner.rs | 14 +++++------ src/query/config/src/outer_v0.rs | 25 +++++++++---------- src/query/service/tests/it/configs.rs | 8 +++--- .../storages/testdata/configs_table_basic.txt | 4 +-- .../common/cache-manager/src/cache_manager.rs | 4 +-- 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index fba3758ee41a9..310e9b9c2a598 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -164,18 +164,18 @@ pub struct QueryConfig { pub table_cache_bloom_index_meta_count: u64, /// Max number of cached bloom index filters pub table_cache_bloom_index_filter_count: u64, - /// Max size of in memory table column object cache + /// Max size(bytes) of in memory table column object cache /// /// The cache items are deserialized object, may take a lot of memory. /// Please set it to zero to disable it. - pub table_data_cache_in_memory_column_mb_size: u64, + pub table_data_cache_in_memory_max_size: u64, /// Indicates if table data cache is enabled pub table_data_cache_enabled: bool, /// Table disk cache folder root pub table_disk_cache_root: String, /// Max size of external cache population queue length /// - /// the items being queued reference table column row data, which are + /// the items being queued reference table column raw data, which are /// un-deserialized and usually compressed (depends on table compression options). /// /// - please monitor the 'table_data_cache_population_pending_count' metric @@ -185,8 +185,8 @@ pub struct QueryConfig { /// if it keeps increasing, and disk cache hits rate is not as expected. please consider /// increase this value. pub table_data_cache_population_queue_size: u32, - /// Table disk cache size (MB) - pub table_disk_cache_mb_size: u64, + /// Table disk cache size (bytes), default value is 21474836480 + pub table_disk_cache_max_size: u64, /// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv. pub management_mode: bool, pub jwt_key_file: String, @@ -240,11 +240,11 @@ impl Default for QueryConfig { table_cache_segment_count: 10240, table_cache_bloom_index_meta_count: 3000, table_cache_bloom_index_filter_count: 1024 * 1024, - table_data_cache_in_memory_column_mb_size: 0, + table_data_cache_in_memory_max_size: 0, table_data_cache_enabled: false, table_data_cache_population_queue_size: 65536, table_disk_cache_root: "_cache".to_string(), - table_disk_cache_mb_size: 20 * 1024, + table_disk_cache_max_size: 20 * 1024 * 1024 * 1024, management_mode: false, jwt_key_file: "".to_string(), async_insert_max_data_size: 10000, diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index 80ceefd88229d..c1fbbd7e627e0 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -1273,9 +1273,9 @@ pub struct QueryConfig { #[clap(long, default_value = "_cache")] pub table_disk_cache_root: String, - /// Table disk cache size (mb) - #[clap(long, default_value = "20480")] - pub table_disk_cache_mb_size: u64, + /// Table disk cache size (bytes), default values is 20GB + #[clap(long, default_value = "21474836480")] + pub table_disk_cache_max_size: u64, /// Max number of cached table snapshot #[clap(long, default_value = "256")] @@ -1309,15 +1309,16 @@ pub struct QueryConfig { /// memory, the working set can be fitted into, and the access pattern will benefit from /// caching, consider enabled this cache. #[clap(long, default_value = "0")] - pub table_data_cache_in_memory_column_mb_size: u64, + pub table_data_cache_in_memory_max_size: u64, /// Indicates if table data cached is enabled, default false #[clap(long)] pub table_data_cache_enabled: bool, - /// Max item that could be pending in the external cache population queue - /// default value 65536 items. Increase this value if it takes too much times - /// to fully populate the disk cache. + /// Max number of items that could be pending in the table data cache population queue + /// default value 65536 items. + /// Increase this value if it takes too much times to fully populate the disk cache. + /// Decrease this value if it takes too much memory to queue the items being cached. #[clap(long, default_value = "65536")] pub table_data_cache_population_queue_size: u32, @@ -1404,12 +1405,11 @@ impl TryInto for QueryConfig { table_cache_segment_count: self.table_cache_segment_count, table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, - table_data_cache_in_memory_column_mb_size: self - .table_data_cache_in_memory_column_mb_size, + table_data_cache_in_memory_max_size: self.table_data_cache_in_memory_max_size, table_data_cache_enabled: self.table_data_cache_enabled, table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, table_disk_cache_root: self.table_disk_cache_root, - table_disk_cache_mb_size: self.table_disk_cache_mb_size, + table_disk_cache_max_size: self.table_disk_cache_max_size, management_mode: self.management_mode, jwt_key_file: self.jwt_key_file, async_insert_max_data_size: self.async_insert_max_data_size, @@ -1469,14 +1469,13 @@ impl From for QueryConfig { table_cache_block_meta_count: inner.table_cache_block_meta_count, table_memory_cache_mb_size: inner.table_memory_cache_mb_size, table_disk_cache_root: inner.table_disk_cache_root, - table_disk_cache_mb_size: inner.table_disk_cache_mb_size, + table_disk_cache_max_size: inner.table_disk_cache_max_size, table_cache_snapshot_count: inner.table_cache_snapshot_count, table_cache_statistic_count: inner.table_cache_statistic_count, table_cache_segment_count: inner.table_cache_segment_count, table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, - table_data_cache_in_memory_column_mb_size: inner - .table_data_cache_in_memory_column_mb_size, + table_data_cache_in_memory_max_size: inner.table_data_cache_in_memory_max_size, table_data_cache_enabled: inner.table_data_cache_enabled, table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, management_mode: inner.management_mode, diff --git a/src/query/service/tests/it/configs.rs b/src/query/service/tests/it/configs.rs index 1624061a55e21..50c660fc8c043 100644 --- a/src/query/service/tests/it/configs.rs +++ b/src/query/service/tests/it/configs.rs @@ -248,7 +248,7 @@ fn test_env_config_fs() -> Result<()> { assert!(configured.query.table_meta_cache_enabled); assert_eq!(512, configured.query.table_memory_cache_mb_size); assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(512, configured.query.table_disk_cache_mb_size); + assert_eq!(21474836480, configured.query.table_disk_cache_max_size); assert_eq!(10240, configured.query.table_cache_segment_count); assert_eq!(256, configured.query.table_cache_snapshot_count); assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); @@ -374,7 +374,7 @@ fn test_env_config_gcs() -> Result<()> { assert!(configured.query.table_meta_cache_enabled); assert_eq!(512, configured.query.table_memory_cache_mb_size); assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(512, configured.query.table_disk_cache_mb_size); + assert_eq!(21474836480, configured.query.table_disk_cache_max_size); assert_eq!(10240, configured.query.table_cache_segment_count); assert_eq!(256, configured.query.table_cache_snapshot_count); assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); @@ -507,7 +507,7 @@ fn test_env_config_oss() -> Result<()> { assert!(configured.query.table_meta_cache_enabled); assert_eq!(512, configured.query.table_memory_cache_mb_size); assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(512, configured.query.table_disk_cache_mb_size); + assert_eq!(21474836480, configured.query.table_disk_cache_max_size); assert_eq!(10240, configured.query.table_cache_segment_count); assert_eq!(256, configured.query.table_cache_snapshot_count); assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); @@ -567,7 +567,7 @@ table_cache_segment_count = 10240 table_cache_block_meta_count = 102400 table_memory_cache_mb_size = 256 table_disk_cache_root = "_cache" -table_disk_cache_mb_size = 1024 +table_disk_cache_max_size = 1024 table_cache_bloom_index_meta_count = 3000 table_cache_bloom_index_filter_count = 1048576 management_mode = false diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index c7eaaa2bc7255..7656d2218bb95 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -63,13 +63,13 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | "query" | "table_cache_block_meta_count" | "102400" | "" | | "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | | "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | -| "query" | "table_cache_column_mb_size" | "10240" | "" | | "query" | "table_cache_segment_count" | "10240" | "" | | "query" | "table_cache_snapshot_count" | "256" | "" | | "query" | "table_cache_statistic_count" | "256" | "" | | "query" | "table_data_cache_enabled" | "false" | "" | +| "query" | "table_data_cache_in_memory_max_size" | "0" | "" | | "query" | "table_data_cache_population_queue_size" | "65536" | "" | -| "query" | "table_disk_cache_mb_size" | "20480" | "" | +| "query" | "table_disk_cache_max_size" | "21474836480" | "" | | "query" | "table_disk_cache_root" | "_cache" | "" | | "query" | "table_engine_memory_enabled" | "true" | "" | | "query" | "table_memory_cache_mb_size" | "256" | "" | diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 739e7d27ddbaa..ecac32a185129 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -58,7 +58,7 @@ impl CacheManager { Self::new_block_data_cache( &config.table_disk_cache_root, config.table_data_cache_population_queue_size, - config.table_disk_cache_mb_size * 1024 * 1024, + config.table_disk_cache_max_size, )? } else { None @@ -66,7 +66,7 @@ impl CacheManager { // setup in-memory table column cache let table_column_array_cache = Self::new_in_memory_cache( - config.table_data_cache_in_memory_column_mb_size * 1024 * 1024, + config.table_data_cache_in_memory_max_size, ColumnArrayMeter, "table_data_column_array", ); From b73055807fb3e9f83e81dc9f2b059cdbb8bd2484 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Sun, 12 Feb 2023 23:20:27 +0800 Subject: [PATCH 56/80] refactor: disk cache open file without holding lock --- .../common/cache/src/providers/disk_cache.rs | 74 +++++++++---------- .../cache/tests/it/providers/disk_cache.rs | 8 +- 2 files changed, 40 insertions(+), 42 deletions(-) diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index e1c33441d65b8..a3698e4f29d9a 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -227,15 +227,12 @@ where C: Cache self.cache.contains(&cache_key.0) } - /// Get an opened `File` for `key`, if one exists and can be opened. Updates the Cache state - /// of the file if present. Avoid using this method if at all possible, prefer `.get`. - pub fn get_file(&mut self, key: &str) -> self::result::Result { + pub fn get_cache_path(&mut self, key: &str) -> Option { let cache_key = self.cache_key(key); - let path = self.abs_path_of_cache_key(&cache_key); self.cache .get(&cache_key.0) - .ok_or(Error::FileNotInCache) - .and_then(|_len| File::open(path).map_err(Into::into)) + .map(|_| ()) // release the &mut self + .map(|_| self.abs_path_of_cache_key(&cache_key)) } /// Remove the given key from the cache. @@ -265,8 +262,6 @@ pub mod result { /// The file was too large to fit in the cache. FileTooLarge, /// The file was not in the cache. - FileNotInCache, - /// The file was not in the cache. MalformedPath, /// An IO Error occurred. Io(io::Error), @@ -276,7 +271,6 @@ pub mod result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Error::FileTooLarge => write!(f, "File too large"), - Error::FileNotInCache => write!(f, "File not in cache"), Error::MalformedPath => write!(f, "Malformed catch file path"), Error::Io(ref e) => write!(f, "{e}"), } @@ -309,40 +303,44 @@ impl CacheAccessor, common_cache::DefaultHashBuilder, Count> { fn get>(&self, k: Q) -> Option>> { let k = k.as_ref(); - // check disk cache - let read_file = || { - let mut file = { - let mut cache = self.write(); - cache.get_file(k)? + { + let mut cache = self.write(); + cache.get_cache_path(k) + } + .and_then(|cache_file_path| { + // check disk cache + let get_cache_content = || { + let mut v = vec![]; + let mut file = File::open(cache_file_path)?; + file.read_to_end(&mut v)?; + Ok::<_, Box>(v) }; - let mut v = vec![]; - file.read_to_end(&mut v)?; - Ok::<_, Box>(v) - }; - match read_file() { - Ok(mut bytes) => { - if let Err(e) = validate_checksum(bytes.as_slice()) { - error!("data cache, of key {k}, crc validation failure: {e}"); - { - // remove the invalid cache, error of removal ignored - let mut cache = self.write(); - let _ = cache.remove(k); + + match get_cache_content() { + Ok(mut bytes) => { + if let Err(e) = validate_checksum(bytes.as_slice()) { + error!("data cache, of key {k}, crc validation failure: {e}"); + { + // remove the invalid cache, error of removal ignored + let mut cache = self.write(); + let _ = cache.remove(k); + } + None + } else { + // trim the checksum bytes and return + let total_len = bytes.len(); + let body_len = total_len - 4; + bytes.truncate(body_len); + let item = Arc::new(bytes); + Some(item) } + } + Err(e) => { + error!("get disk cache item failed, cache_key {k}. {e}"); None - } else { - // trim the checksum bytes and return - let total_len = bytes.len(); - let body_len = total_len - 4; - bytes.truncate(body_len); - let item = Arc::new(bytes); - Some(item) } } - Err(e) => { - error!("get disk cache item failed, {}", e); - None - } - } + }) } fn put(&self, key: String, value: Arc>) { diff --git a/src/query/storages/common/cache/tests/it/providers/disk_cache.rs b/src/query/storages/common/cache/tests/it/providers/disk_cache.rs index 2bb3da98fd764..42347d8d31b8f 100644 --- a/src/query/storages/common/cache/tests/it/providers/disk_cache.rs +++ b/src/query/storages/common/cache/tests/it/providers/disk_cache.rs @@ -176,10 +176,10 @@ fn test_add_get_lru() { c.insert_single_slice("file1", &[1; 10]).unwrap(); c.insert_single_slice("file2", &[2; 10]).unwrap(); // Get the file to bump its LRU status. - assert_eq!(read_all(&mut c.get_file("file1").unwrap()).unwrap(), vec![ - 1u8; - 10 - ]); + assert_eq!( + read_all(&mut File::open(c.get_cache_path("file1").unwrap()).unwrap()).unwrap(), + vec![1u8; 10] + ); // Adding this third file should put the cache above the limit. c.insert_single_slice("file3", &[3; 10]).unwrap(); assert_eq!(c.size(), 20); From 9e9c6e5c3286d076b19c0445f179fe18d07e0533 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 00:11:44 +0800 Subject: [PATCH 57/80] remove obsolete setting from ci configs --- scripts/ci/deploy/config/databend-query-node-1.toml | 1 - scripts/ci/deploy/config/databend-query-node-2.toml | 1 - scripts/ci/deploy/config/databend-query-node-3.toml | 1 - src/query/expression/src/block.rs | 5 ----- .../storages/common/cache/src/providers/table_data_cache.rs | 6 ++---- 5 files changed, 2 insertions(+), 12 deletions(-) diff --git a/scripts/ci/deploy/config/databend-query-node-1.toml b/scripts/ci/deploy/config/databend-query-node-1.toml index c147f99d18f42..914c6ce565899 100644 --- a/scripts/ci/deploy/config/databend-query-node-1.toml +++ b/scripts/ci/deploy/config/databend-query-node-1.toml @@ -36,7 +36,6 @@ database_engine_github_enabled = true table_meta_cache_enabled = true table_memory_cache_mb_size = 1024 table_disk_cache_root = "_cache" -table_disk_cache_mb_size = 10240 table_cache_bloom_index_meta_count=3000 table_cache_bloom_index_filter_count=1048576 diff --git a/scripts/ci/deploy/config/databend-query-node-2.toml b/scripts/ci/deploy/config/databend-query-node-2.toml index b1318b85c45d5..57af9541aeeda 100644 --- a/scripts/ci/deploy/config/databend-query-node-2.toml +++ b/scripts/ci/deploy/config/databend-query-node-2.toml @@ -36,7 +36,6 @@ database_engine_github_enabled = true table_meta_cache_enabled = true table_memory_cache_mb_size = 1024 table_disk_cache_root = "./.databend/cache" -table_disk_cache_mb_size = 10240 table_cache_bloom_index_meta_count=3000 table_cache_bloom_index_filter_count=1048576 diff --git a/scripts/ci/deploy/config/databend-query-node-3.toml b/scripts/ci/deploy/config/databend-query-node-3.toml index 5045053e8778d..52ce664de0577 100644 --- a/scripts/ci/deploy/config/databend-query-node-3.toml +++ b/scripts/ci/deploy/config/databend-query-node-3.toml @@ -37,7 +37,6 @@ database_engine_github_enabled = true table_meta_cache_enabled = true table_memory_cache_mb_size = 1024 table_disk_cache_root = "./.databend/cache" -table_disk_cache_mb_size = 10240 table_cache_bloom_index_meta_count=3000 table_cache_bloom_index_filter_count=1048576 diff --git a/src/query/expression/src/block.rs b/src/query/expression/src/block.rs index cdedb8db464e0..f7aeaf830f657 100644 --- a/src/query/expression/src/block.rs +++ b/src/query/expression/src/block.rs @@ -359,13 +359,11 @@ impl DataBlock { let mut chunk_idx: usize = 0; let schema_fields = schema.fields(); let chunk_columns = chuck.arrays(); - eprintln!("chunk_column len {}", chunk_columns.len()); let mut columns = Vec::with_capacity(default_vals.len()); for (i, default_val) in default_vals.iter().enumerate() { let field = &schema_fields[i]; let data_type = field.data_type(); - eprintln!("data type {}", data_type); let column = match default_val { Some(default_val) => BlockEntry { @@ -373,15 +371,12 @@ impl DataBlock { value: Value::Scalar(default_val.to_owned()), }, None => { - eprintln!("chunk idx {}", chunk_idx); let chunk_column = chunk_columns.get(chunk_idx).unwrap(); - eprintln!("get column"); chunk_idx += 1; let entry = BlockEntry { data_type: data_type.clone(), value: Value::Column(Column::from_arrow(chunk_column.as_ref(), data_type)), }; - eprintln!("build entry"); entry } }; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 95606001d80ce..3ac6465f79aff 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -22,7 +22,6 @@ use common_exception::Result; use crossbeam_channel::TrySendError; use tracing::error; use tracing::info; -use tracing::warn; use crate::metrics_inc_cache_access_count; use crate::metrics_inc_cache_hit_count; @@ -73,7 +72,7 @@ pub struct TableDataCache { _cache_populator: DiskCachePopulator, } -const TABLE_DATA_CACHE_NAME: &str = "table_data_cache"; +const TABLE_DATA_CACHE_NAME: &str = "table_data"; pub struct TableDataCacheBuilder; impl TableDataCacheBuilder { @@ -118,7 +117,6 @@ impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCach Err(TrySendError::Full(_)) => { metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); metrics_inc_cache_population_overflow_count(-1, TABLE_DATA_CACHE_NAME); - warn!("table data cache population queue is full"); } Err(TrySendError::Disconnected(_)) => { error!("table data cache population thread is down"); @@ -157,7 +155,7 @@ where T: CacheAccessor, DefaultHashBuilder, Count> + Send + Sync metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); } Err(_) => { - info!("cache work shutdown"); + info!("table data cache worker shutdown"); break; } } From 01486b25757115da9b545af3e24bbe213a534cfe Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 00:26:58 +0800 Subject: [PATCH 58/80] fix: revert DataBlock --- src/query/expression/src/block.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/query/expression/src/block.rs b/src/query/expression/src/block.rs index f7aeaf830f657..5a48a31a2549a 100644 --- a/src/query/expression/src/block.rs +++ b/src/query/expression/src/block.rs @@ -371,13 +371,12 @@ impl DataBlock { value: Value::Scalar(default_val.to_owned()), }, None => { - let chunk_column = chunk_columns.get(chunk_idx).unwrap(); + let chunk_column = &chunk_columns[chunk_idx]; chunk_idx += 1; - let entry = BlockEntry { + BlockEntry { data_type: data_type.clone(), value: Value::Column(Column::from_arrow(chunk_column.as_ref(), data_type)), - }; - entry + } } }; From 06c59061c6fa6e38d505262b3cfffc7640a20bce Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 00:53:49 +0800 Subject: [PATCH 59/80] fix: metric of cache population overflow should be increased --- .../storages/common/cache/src/providers/table_data_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 3ac6465f79aff..5488c8ed60a59 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -116,7 +116,7 @@ impl CacheAccessor, DefaultHashBuilder, Count> for TableDataCach } Err(TrySendError::Full(_)) => { metrics_inc_cache_population_pending_count(-1, TABLE_DATA_CACHE_NAME); - metrics_inc_cache_population_overflow_count(-1, TABLE_DATA_CACHE_NAME); + metrics_inc_cache_population_overflow_count(1, TABLE_DATA_CACHE_NAME); } Err(TrySendError::Disconnected(_)) => { error!("table data cache population thread is down"); From 3954137e9f3c677414c9f35f21dbfa3bff09db67 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 01:13:21 +0800 Subject: [PATCH 60/80] adjust doc --- .../storages/common/cache/src/providers/table_data_cache.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 5488c8ed60a59..6a19b093695ac 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -62,9 +62,6 @@ impl AsRef for TableDataColumnCacheKey { } } -/// Tiered cache which consist of -/// - a bounded channel that keep the references of items being cached -/// - a disk or redis based external cache #[derive(Clone)] pub struct TableDataCache { external_cache: T, From 31d1f443ec83df0832fef03160374e7b1930fbf4 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 01:38:48 +0800 Subject: [PATCH 61/80] remove obsolete setting --- scripts/ci/deploy/config/databend-query-node-shared.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/ci/deploy/config/databend-query-node-shared.toml b/scripts/ci/deploy/config/databend-query-node-shared.toml index ac27fea74587c..eddbd03329d41 100644 --- a/scripts/ci/deploy/config/databend-query-node-shared.toml +++ b/scripts/ci/deploy/config/databend-query-node-shared.toml @@ -36,7 +36,6 @@ database_engine_github_enabled = true table_meta_cache_enabled = true table_memory_cache_mb_size = 1024 table_disk_cache_root = "_cache" -table_disk_cache_mb_size = 10240 table_cache_bloom_index_meta_count=3000 table_cache_bloom_index_filter_count=1048576 From d7be32ba4dc711679f752446b898733485b00d91 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 12:14:56 +0800 Subject: [PATCH 62/80] add waring log for failure of invalid disk cache item removal --- .../storages/common/cache/src/providers/disk_cache.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index a3698e4f29d9a..101cea2c40689 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -33,6 +33,7 @@ use parking_lot::RwLock; use siphasher::sip128; use siphasher::sip128::Hasher128; use tracing::error; +use tracing::warn; use crate::CacheAccessor; @@ -322,8 +323,13 @@ impl CacheAccessor, common_cache::DefaultHashBuilder, Count> error!("data cache, of key {k}, crc validation failure: {e}"); { // remove the invalid cache, error of removal ignored - let mut cache = self.write(); - let _ = cache.remove(k); + let r = { + let mut cache = self.write(); + cache.remove(k) + }; + if let Err(e) = r { + warn!("failed to remove invalid cache item, key {k}. {e}"); + } } None } else { From c17edbb7f43fc6ae1d3222e5cc06f166c018a6a3 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 12:15:47 +0800 Subject: [PATCH 63/80] remove unnecessary clone --- .../fuse/src/io/read/block/block_reader_parquet.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index 5ecf06dea5e03..af28a2f08ce5b 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; - use common_exception::Result; use common_expression::DataBlock; use storages_common_table_meta::meta::BlockMeta; @@ -29,12 +27,7 @@ impl BlockReader { settings: &ReadSettings, meta: &BlockMeta, ) -> Result { - // Build columns meta. - let columns_meta = meta - .col_metas - .iter() - .map(|(column_id, meta)| (*column_id, meta.clone())) - .collect::>(); + let columns_meta = &meta.col_metas; // Get the merged IO read result. let fetched = self From 88b2e3d8b88cad6dbf362b14b2995632e008a2c9 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 13:54:03 +0800 Subject: [PATCH 64/80] fix lint --- .../fuse/src/io/read/block/block_reader_parquet.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index af28a2f08ce5b..cc484c929514c 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -30,12 +30,12 @@ impl BlockReader { let columns_meta = &meta.col_metas; // Get the merged IO read result. - let fetched = self - .read_columns_data_by_merge_io(settings, &meta.location.0, &columns_meta) + let merge_io_read_result = self + .read_columns_data_by_merge_io(settings, &meta.location.0, columns_meta) .await?; // Get the columns chunk. - let column_chunks = fetched.columns_chunks()?; + let column_chunks = merge_io_read_result.columns_chunks()?; let num_rows = meta.row_count as usize; @@ -43,7 +43,7 @@ impl BlockReader { &meta.location.0, num_rows, &meta.compression, - &columns_meta, + columns_meta, column_chunks, None, ) From 8afb9811fe285dea4d6f933c674436fa7928ccf6 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 15:49:35 +0800 Subject: [PATCH 65/80] refactor: TableDataColumnCacheKey -> TableDataCacheKey --- src/query/storages/common/cache/src/lib.rs | 2 +- src/query/storages/common/cache/src/providers/mod.rs | 2 +- .../common/cache/src/providers/table_data_cache.rs | 10 +++++----- .../fuse/src/io/read/block/block_reader_merge_io.rs | 4 ++-- .../src/io/read/block/block_reader_merge_io_async.rs | 6 +++--- .../src/io/read/block/block_reader_merge_io_sync.rs | 6 +++--- .../io/read/block/block_reader_parquet_deserialize.rs | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/query/storages/common/cache/src/lib.rs b/src/query/storages/common/cache/src/lib.rs index cb889efceedf8..af50223ff5dd4 100644 --- a/src/query/storages/common/cache/src/lib.rs +++ b/src/query/storages/common/cache/src/lib.rs @@ -33,7 +33,7 @@ pub use providers::LruDiskCacheBuilder; pub use providers::LruDiskCacheHolder; pub use providers::TableDataCache; pub use providers::TableDataCacheBuilder; -pub use providers::TableDataColumnCacheKey; +pub use providers::TableDataCacheKey; pub use read::CacheKey; pub use read::CachedReader; pub use read::InMemoryBytesCacheReader; diff --git a/src/query/storages/common/cache/src/providers/mod.rs b/src/query/storages/common/cache/src/providers/mod.rs index fa4c362bad6ca..083a40dcd2749 100644 --- a/src/query/storages/common/cache/src/providers/mod.rs +++ b/src/query/storages/common/cache/src/providers/mod.rs @@ -29,4 +29,4 @@ pub use memory_cache::InMemoryCacheBuilder; pub use memory_cache::InMemoryItemCacheHolder; pub use table_data_cache::TableDataCache; pub use table_data_cache::TableDataCacheBuilder; -pub use table_data_cache::TableDataColumnCacheKey; +pub use table_data_cache::TableDataCacheKey; diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 6a19b093695ac..b524af567290f 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -38,11 +38,11 @@ struct CacheItem { } #[derive(Clone)] -pub struct TableDataColumnCacheKey { +pub struct TableDataCacheKey { cache_key: String, } -impl TableDataColumnCacheKey { +impl TableDataCacheKey { pub fn new(block_path: &str, column_id: u32) -> Self { Self { cache_key: format!("{block_path}-{column_id}"), @@ -50,13 +50,13 @@ impl TableDataColumnCacheKey { } } -impl From for String { - fn from(value: TableDataColumnCacheKey) -> Self { +impl From for String { + fn from(value: TableDataCacheKey) -> Self { value.cache_key } } -impl AsRef for TableDataColumnCacheKey { +impl AsRef for TableDataCacheKey { fn as_ref(&self) -> &str { &self.cache_key } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs index 41a8255a2a2e1..8a5e93c7fdf5a 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs @@ -21,7 +21,7 @@ use common_exception::ErrorCode; use common_exception::Result; use storages_common_cache::CacheAccessor; use storages_common_cache::TableDataCache; -use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache::TableDataCacheKey; use storages_common_cache_manager::SizedColumnArray; pub struct OwnerMemory { @@ -113,7 +113,7 @@ impl MergeIOReadResult { if let Some(table_data_cache) = &self.table_data_cache { // populate raw column data cache (compressed raw bytes) if let Ok(chunk_data) = self.get_chunk(chunk_index, &self.block_path) { - let cache_key = TableDataColumnCacheKey::new(&self.block_path, column_id); + let cache_key = TableDataCacheKey::new(&self.block_path, column_id); let data = &chunk_data[range.clone()]; table_data_cache.put(cache_key.as_ref().to_owned(), Arc::new(data.to_vec())); } diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs index e21949ed4dfa7..c7f5380574344 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs @@ -24,7 +24,7 @@ use common_exception::Result; use futures::future::try_join_all; use opendal::Object; use storages_common_cache::CacheAccessor; -use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache::TableDataCacheKey; use storages_common_cache_manager::CacheManager; use storages_common_table_meta::meta::ColumnMeta; @@ -137,13 +137,13 @@ impl BlockReader { } let mut ranges = vec![]; - // for async read, always try using table data cache (if enabled in settings) + // for async read, try using table data cache (if enabled in settings) let column_data_cache = CacheManager::instance().get_table_data_cache(); let column_array_cache = CacheManager::instance().get_table_data_array_cache(); let mut cached_column_data = vec![]; let mut cached_column_array = vec![]; for (_index, (column_id, ..)) in self.project_indices.iter() { - let column_cache_key = TableDataColumnCacheKey::new(location, *column_id); + let column_cache_key = TableDataCacheKey::new(location, *column_id); // first, check column array object cache if let Some(cache_array) = column_array_cache.get(&column_cache_key) { diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs index 29820b42f99ef..90b665ea643a7 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs @@ -21,7 +21,7 @@ use common_exception::ErrorCode; use common_exception::Result; use opendal::Object; use storages_common_cache::CacheAccessor; -use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache::TableDataCacheKey; use storages_common_cache_manager::CacheManager; use crate::fuse_part::FusePartInfo; @@ -63,7 +63,7 @@ impl BlockReader { let owner_memory = OwnerMemory::create(io_res); - // for sync read, we disable table *data* cache + // for sync read, we disable table data cache let table_data_cache = None; let mut read_res = MergeIOReadResult::create( owner_memory, @@ -107,7 +107,7 @@ impl BlockReader { for (_index, (column_id, ..)) in self.project_indices.iter() { // first, check column array object cache let block_path = &part.location; - let column_cache_key = TableDataColumnCacheKey::new(block_path, *column_id); + let column_cache_key = TableDataCacheKey::new(block_path, *column_id); if let Some(cache_array) = column_array_cache.get(&column_cache_key) { cached_column_array.push((*column_id, cache_array)); continue; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs index 993f8ecc6b3ad..ac765d38bda12 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -32,7 +32,7 @@ use common_exception::Result; use common_expression::DataBlock; use common_storage::ColumnNode; use storages_common_cache::CacheAccessor; -use storages_common_cache::TableDataColumnCacheKey; +use storages_common_cache::TableDataCacheKey; use storages_common_cache_manager::CacheManager; use storages_common_cache_manager::SizedColumnArray; use storages_common_table_meta::meta::ColumnMeta; @@ -179,7 +179,7 @@ impl BlockReader { // populate array cache items for item in deserialized_column_arrays.into_iter() { if let DeserializedArray::Deserialized((column_id, array, size)) = item { - let key = TableDataColumnCacheKey::new(block_path, column_id); + let key = TableDataCacheKey::new(block_path, column_id); cache.put(key.into(), Arc::new((array, size))) } } From b6872c6a7b445076597f8d7c47eb4ab44d0af8d5 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 16:18:45 +0800 Subject: [PATCH 66/80] resolve conflict --- .../storages/testdata/configs_table_basic.txt | 251 +++++++++--------- 1 file changed, 127 insertions(+), 124 deletions(-) diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index a0ae3f622b0a6..4b7585da5fb2f 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -1,129 +1,132 @@ ---------- TABLE INFO ------------ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemConfigs -------- TABLE CONTENTS ---------- -+-----------+----------------------------------------+----------------------------------+----------+ -| Column 0 | Column 1 | Column 2 | Column 3 | -+-----------+----------------------------------------+----------------------------------+----------+ -| "log" | "dir" | "./.databend/logs" | "" | -| "log" | "file.dir" | "./.databend/logs" | "" | -| "log" | "file.format" | "text" | "" | -| "log" | "file.level" | "DEBUG" | "" | -| "log" | "file.on" | "true" | "" | -| "log" | "level" | "DEBUG" | "" | -| "log" | "query_enabled" | "false" | "" | -| "log" | "stderr.format" | "text" | "" | -| "log" | "stderr.level" | "INFO" | "" | -| "log" | "stderr.on" | "true" | "" | -| "meta" | "auto_sync_interval" | "0" | "" | -| "meta" | "client_timeout_in_second" | "10" | "" | -| "meta" | "embedded_dir" | "" | "" | -| "meta" | "endpoints" | "" | "" | -| "meta" | "password" | "" | "" | -| "meta" | "rpc_tls_meta_server_root_ca_cert" | "" | "" | -| "meta" | "rpc_tls_meta_service_domain_name" | "localhost" | "" | -| "meta" | "username" | "root" | "" | -| "query" | "admin_api_address" | "127.0.0.1:8080" | "" | -| "query" | "api_tls_server_cert" | "" | "" | -| "query" | "api_tls_server_key" | "" | "" | -| "query" | "api_tls_server_root_ca_cert" | "" | "" | -| "query" | "async_insert_busy_timeout" | "200" | "" | -| "query" | "async_insert_max_data_size" | "10000" | "" | -| "query" | "async_insert_stale_timeout" | "0" | "" | -| "query" | "clickhouse_handler_host" | "127.0.0.1" | "" | -| "query" | "clickhouse_handler_port" | "9000" | "" | -| "query" | "clickhouse_http_handler_host" | "127.0.0.1" | "" | -| "query" | "clickhouse_http_handler_port" | "8124" | "" | -| "query" | "cluster_id" | "" | "" | -| "query" | "database_engine_github_enabled" | "true" | "" | -| "query" | "flight_api_address" | "127.0.0.1:9090" | "" | -| "query" | "http_handler_host" | "127.0.0.1" | "" | -| "query" | "http_handler_port" | "8000" | "" | -| "query" | "http_handler_result_timeout_secs" | "60" | "" | -| "query" | "http_handler_tls_server_cert" | "" | "" | -| "query" | "http_handler_tls_server_key" | "" | "" | -| "query" | "http_handler_tls_server_root_ca_cert" | "" | "" | -| "query" | "internal_enable_sandbox_tenant" | "false" | "" | -| "query" | "jwt_key_file" | "" | "" | -| "query" | "jwt_key_files" | "" | "" | -| "query" | "management_mode" | "false" | "" | -| "query" | "max_active_sessions" | "256" | "" | -| "query" | "max_memory_limit_enabled" | "false" | "" | -| "query" | "max_query_log_size" | "10000" | "" | -| "query" | "max_server_memory_usage" | "0" | "" | -| "query" | "metric_api_address" | "127.0.0.1:7070" | "" | -| "query" | "mysql_handler_host" | "127.0.0.1" | "" | -| "query" | "mysql_handler_port" | "3307" | "" | -| "query" | "num_cpus" | "0" | "" | -| "query" | "quota" | "null" | "" | -| "query" | "rpc_tls_query_server_root_ca_cert" | "" | "" | -| "query" | "rpc_tls_query_service_domain_name" | "localhost" | "" | -| "query" | "rpc_tls_server_cert" | "" | "" | -| "query" | "rpc_tls_server_key" | "" | "" | -| "query" | "share_endpoint_address" | "" | "" | -| "query" | "share_endpoint_auth_token_file" | "" | "" | -| "query" | "table_cache_block_meta_count" | "102400" | "" | -| "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | -| "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | -| "query" | "table_cache_segment_count" | "10240" | "" | -| "query" | "table_cache_snapshot_count" | "256" | "" | -| "query" | "table_cache_statistic_count" | "256" | "" | -| "query" | "table_disk_cache_mb_size" | "1024" | "" | -| "query" | "table_disk_cache_root" | "_cache" | "" | -| "query" | "table_engine_memory_enabled" | "true" | "" | -| "query" | "table_memory_cache_mb_size" | "256" | "" | -| "query" | "table_meta_cache_enabled" | "true" | "" | -| "query" | "tenant_id" | "test" | "" | -| "query" | "users" | "" | "" | -| "query" | "wait_timeout_mills" | "5000" | "" | -| "storage" | "allow_insecure" | "false" | "" | -| "storage" | "azblob.account_key" | "" | "" | -| "storage" | "azblob.account_name" | "" | "" | -| "storage" | "azblob.container" | "" | "" | -| "storage" | "azblob.endpoint_url" | "" | "" | -| "storage" | "azblob.root" | "" | "" | -| "storage" | "cache.fs.data_path" | "_data" | "" | -| "storage" | "cache.moka.max_capacity" | "1073741824" | "" | -| "storage" | "cache.moka.time_to_idle" | "600" | "" | -| "storage" | "cache.moka.time_to_live" | "3600" | "" | -| "storage" | "cache.num_cpus" | "0" | "" | -| "storage" | "cache.redis.db" | "0" | "" | -| "storage" | "cache.redis.default_ttl" | "0" | "" | -| "storage" | "cache.redis.endpoint_url" | "" | "" | -| "storage" | "cache.redis.password" | "" | "" | -| "storage" | "cache.redis.root" | "" | "" | -| "storage" | "cache.redis.username" | "" | "" | -| "storage" | "cache.type" | "none" | "" | -| "storage" | "fs.data_path" | "_data" | "" | -| "storage" | "gcs.bucket" | "" | "" | -| "storage" | "gcs.credential" | "" | "" | -| "storage" | "gcs.endpoint_url" | "https://storage.googleapis.com" | "" | -| "storage" | "gcs.root" | "" | "" | -| "storage" | "hdfs.name_node" | "" | "" | -| "storage" | "hdfs.root" | "" | "" | -| "storage" | "num_cpus" | "0" | "" | -| "storage" | "obs.access_key_id" | "" | "" | -| "storage" | "obs.bucket" | "" | "" | -| "storage" | "obs.endpoint_url" | "" | "" | -| "storage" | "obs.root" | "" | "" | -| "storage" | "obs.secret_access_key" | "" | "" | -| "storage" | "oss.access_key_id" | "" | "" | -| "storage" | "oss.access_key_secret" | "" | "" | -| "storage" | "oss.bucket" | "" | "" | -| "storage" | "oss.endpoint_url" | "" | "" | -| "storage" | "oss.presign_endpoint_url" | "" | "" | -| "storage" | "oss.root" | "" | "" | -| "storage" | "s3.access_key_id" | "" | "" | -| "storage" | "s3.bucket" | "" | "" | -| "storage" | "s3.enable_virtual_host_style" | "false" | "" | -| "storage" | "s3.endpoint_url" | "https://s3.amazonaws.com" | "" | -| "storage" | "s3.external_id" | "" | "" | -| "storage" | "s3.master_key" | "" | "" | -| "storage" | "s3.region" | "" | "" | -| "storage" | "s3.role_arn" | "" | "" | -| "storage" | "s3.root" | "" | "" | -| "storage" | "s3.secret_access_key" | "" | "" | -| "storage" | "s3.security_token" | "" | "" | -| "storage" | "type" | "fs" | "" | -+-----------+----------------------------------------+----------------------------------+----------+ ++-----------+------------------------------------------+----------------------------------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++-----------+------------------------------------------+----------------------------------+----------+ +| "log" | "dir" | "./.databend/logs" | "" | +| "log" | "file.dir" | "./.databend/logs" | "" | +| "log" | "file.format" | "text" | "" | +| "log" | "file.level" | "DEBUG" | "" | +| "log" | "file.on" | "true" | "" | +| "log" | "level" | "DEBUG" | "" | +| "log" | "query_enabled" | "false" | "" | +| "log" | "stderr.format" | "text" | "" | +| "log" | "stderr.level" | "INFO" | "" | +| "log" | "stderr.on" | "true" | "" | +| "meta" | "auto_sync_interval" | "0" | "" | +| "meta" | "client_timeout_in_second" | "10" | "" | +| "meta" | "embedded_dir" | "" | "" | +| "meta" | "endpoints" | "" | "" | +| "meta" | "password" | "" | "" | +| "meta" | "rpc_tls_meta_server_root_ca_cert" | "" | "" | +| "meta" | "rpc_tls_meta_service_domain_name" | "localhost" | "" | +| "meta" | "username" | "root" | "" | +| "query" | "admin_api_address" | "127.0.0.1:8080" | "" | +| "query" | "api_tls_server_cert" | "" | "" | +| "query" | "api_tls_server_key" | "" | "" | +| "query" | "api_tls_server_root_ca_cert" | "" | "" | +| "query" | "async_insert_busy_timeout" | "200" | "" | +| "query" | "async_insert_max_data_size" | "10000" | "" | +| "query" | "async_insert_stale_timeout" | "0" | "" | +| "query" | "clickhouse_handler_host" | "127.0.0.1" | "" | +| "query" | "clickhouse_handler_port" | "9000" | "" | +| "query" | "clickhouse_http_handler_host" | "127.0.0.1" | "" | +| "query" | "clickhouse_http_handler_port" | "8124" | "" | +| "query" | "cluster_id" | "" | "" | +| "query" | "database_engine_github_enabled" | "true" | "" | +| "query" | "flight_api_address" | "127.0.0.1:9090" | "" | +| "query" | "http_handler_host" | "127.0.0.1" | "" | +| "query" | "http_handler_port" | "8000" | "" | +| "query" | "http_handler_result_timeout_secs" | "60" | "" | +| "query" | "http_handler_tls_server_cert" | "" | "" | +| "query" | "http_handler_tls_server_key" | "" | "" | +| "query" | "http_handler_tls_server_root_ca_cert" | "" | "" | +| "query" | "internal_enable_sandbox_tenant" | "false" | "" | +| "query" | "jwt_key_file" | "" | "" | +| "query" | "jwt_key_files" | "" | "" | +| "query" | "management_mode" | "false" | "" | +| "query" | "max_active_sessions" | "256" | "" | +| "query" | "max_memory_limit_enabled" | "false" | "" | +| "query" | "max_query_log_size" | "10000" | "" | +| "query" | "max_server_memory_usage" | "0" | "" | +| "query" | "metric_api_address" | "127.0.0.1:7070" | "" | +| "query" | "mysql_handler_host" | "127.0.0.1" | "" | +| "query" | "mysql_handler_port" | "3307" | "" | +| "query" | "num_cpus" | "0" | "" | +| "query" | "quota" | "null" | "" | +| "query" | "rpc_tls_query_server_root_ca_cert" | "" | "" | +| "query" | "rpc_tls_query_service_domain_name" | "localhost" | "" | +| "query" | "rpc_tls_server_cert" | "" | "" | +| "query" | "rpc_tls_server_key" | "" | "" | +| "query" | "share_endpoint_address" | "" | "" | +| "query" | "share_endpoint_auth_token_file" | "" | "" | +| "query" | "table_cache_block_meta_count" | "102400" | "" | +| "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | +| "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | +| "query" | "table_cache_segment_count" | "10240" | "" | +| "query" | "table_cache_snapshot_count" | "256" | "" | +| "query" | "table_cache_statistic_count" | "256" | "" | +| "query" | "table_data_cache_enabled" | "false" | "" | +| "query" | "table_data_cache_in_memory_max_size" | "0" | "" | +| "query" | "table_data_cache_population_queue_size" | "65536" | "" | +| "query" | "table_disk_cache_max_size" | "21474836480" | "" | +| "query" | "table_disk_cache_root" | "_cache" | "" | +| "query" | "table_engine_memory_enabled" | "true" | "" | +| "query" | "table_memory_cache_mb_size" | "256" | "" | +| "query" | "table_meta_cache_enabled" | "true" | "" | +| "query" | "tenant_id" | "test" | "" | +| "query" | "users" | "" | "" | +| "query" | "wait_timeout_mills" | "5000" | "" | +| "storage" | "allow_insecure" | "false" | "" | +| "storage" | "azblob.account_key" | "" | "" | +| "storage" | "azblob.account_name" | "" | "" | +| "storage" | "azblob.container" | "" | "" | +| "storage" | "azblob.endpoint_url" | "" | "" | +| "storage" | "azblob.root" | "" | "" | +| "storage" | "cache.fs.data_path" | "_data" | "" | +| "storage" | "cache.moka.max_capacity" | "1073741824" | "" | +| "storage" | "cache.moka.time_to_idle" | "600" | "" | +| "storage" | "cache.moka.time_to_live" | "3600" | "" | +| "storage" | "cache.num_cpus" | "0" | "" | +| "storage" | "cache.redis.db" | "0" | "" | +| "storage" | "cache.redis.default_ttl" | "0" | "" | +| "storage" | "cache.redis.endpoint_url" | "" | "" | +| "storage" | "cache.redis.password" | "" | "" | +| "storage" | "cache.redis.root" | "" | "" | +| "storage" | "cache.redis.username" | "" | "" | +| "storage" | "cache.type" | "none" | "" | +| "storage" | "fs.data_path" | "_data" | "" | +| "storage" | "gcs.bucket" | "" | "" | +| "storage" | "gcs.credential" | "" | "" | +| "storage" | "gcs.endpoint_url" | "https://storage.googleapis.com" | "" | +| "storage" | "gcs.root" | "" | "" | +| "storage" | "hdfs.name_node" | "" | "" | +| "storage" | "hdfs.root" | "" | "" | +| "storage" | "num_cpus" | "0" | "" | +| "storage" | "obs.access_key_id" | "" | "" | +| "storage" | "obs.bucket" | "" | "" | +| "storage" | "obs.endpoint_url" | "" | "" | +| "storage" | "obs.root" | "" | "" | +| "storage" | "obs.secret_access_key" | "" | "" | +| "storage" | "oss.access_key_id" | "" | "" | +| "storage" | "oss.access_key_secret" | "" | "" | +| "storage" | "oss.bucket" | "" | "" | +| "storage" | "oss.endpoint_url" | "" | "" | +| "storage" | "oss.presign_endpoint_url" | "" | "" | +| "storage" | "oss.root" | "" | "" | +| "storage" | "s3.access_key_id" | "" | "" | +| "storage" | "s3.bucket" | "" | "" | +| "storage" | "s3.enable_virtual_host_style" | "false" | "" | +| "storage" | "s3.endpoint_url" | "https://s3.amazonaws.com" | "" | +| "storage" | "s3.external_id" | "" | "" | +| "storage" | "s3.master_key" | "" | "" | +| "storage" | "s3.region" | "" | "" | +| "storage" | "s3.role_arn" | "" | "" | +| "storage" | "s3.root" | "" | "" | +| "storage" | "s3.secret_access_key" | "" | "" | +| "storage" | "s3.security_token" | "" | "" | +| "storage" | "type" | "fs" | "" | ++-----------+------------------------------------------+----------------------------------+----------+ From f81f25aba9b54d053003d0b4c812d46e06d8d7f7 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 20:03:50 +0800 Subject: [PATCH 67/80] tenant disk cache path isolation --- .../storages/common/cache-manager/src/cache_manager.rs | 8 ++++++-- .../storages/common/cache/src/providers/disk_cache.rs | 5 ++++- .../common/cache/src/providers/table_data_cache.rs | 3 ++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index ecac32a185129..20522e37b540c 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -13,6 +13,7 @@ // limitations under the License. // +use std::path::PathBuf; use std::sync::Arc; use common_base::base::GlobalInstance; @@ -55,8 +56,11 @@ impl CacheManager { pub fn init(config: &QueryConfig) -> Result<()> { // setup table data cache let table_data_cache = if config.table_data_cache_enabled { + let real_disk_cache_root = PathBuf::from(&config.table_disk_cache_root) + .join(&config.tenant_id) + .join("v1"); Self::new_block_data_cache( - &config.table_disk_cache_root, + &real_disk_cache_root, config.table_data_cache_population_queue_size, config.table_disk_cache_max_size, )? @@ -183,7 +187,7 @@ impl CacheManager { } fn new_block_data_cache( - path: &str, + path: &PathBuf, population_queue_size: u32, disk_cache_bytes_size: u64, ) -> Result> { diff --git a/src/query/storages/common/cache/src/providers/disk_cache.rs b/src/query/storages/common/cache/src/providers/disk_cache.rs index 101cea2c40689..22649b9eec1e5 100644 --- a/src/query/storages/common/cache/src/providers/disk_cache.rs +++ b/src/query/storages/common/cache/src/providers/disk_cache.rs @@ -421,7 +421,10 @@ pub type LruDiskCacheHolder = Arc>; pub struct LruDiskCacheBuilder; impl LruDiskCacheBuilder { - pub fn new_disk_cache(path: &str, disk_cache_bytes_size: u64) -> Result { + pub fn new_disk_cache( + path: &PathBuf, + disk_cache_bytes_size: u64, + ) -> Result { let external_cache = DiskCache::new(path, disk_cache_bytes_size) .map_err(|e| ErrorCode::StorageOther(format!("create disk cache failed, {e}")))?; Ok(Arc::new(RwLock::new(external_cache))) diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index b524af567290f..2ec6b129b1364 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::path::PathBuf; use std::sync::Arc; use std::thread::JoinHandle; @@ -74,7 +75,7 @@ const TABLE_DATA_CACHE_NAME: &str = "table_data"; pub struct TableDataCacheBuilder; impl TableDataCacheBuilder { pub fn new_table_data_disk_cache( - path: &str, + path: &PathBuf, population_queue_size: u32, disk_cache_bytes_size: u64, ) -> Result> { From 5b3315bd947c83bfc653910f9b9d6fe6fc6a16f1 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 22:17:24 +0800 Subject: [PATCH 68/80] resovle conflicts --- .../fuse/src/io/read/block/block_reader_merge_io.rs | 2 +- .../fuse/src/io/read/block/block_reader_merge_io_async.rs | 2 +- .../fuse/src/io/read/block/block_reader_merge_io_sync.rs | 2 +- .../fuse/src/io/read/block/block_reader_parquet.rs | 2 -- .../src/io/read/block/block_reader_parquet_deserialize.rs | 8 ++++---- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs index 8a5e93c7fdf5a..97ded3a7a9c80 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io.rs @@ -16,9 +16,9 @@ use std::collections::HashMap; use std::ops::Range; use std::sync::Arc; -use common_catalog::table::ColumnId; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::ColumnId; use storages_common_cache::CacheAccessor; use storages_common_cache::TableDataCache; use storages_common_cache::TableDataCacheKey; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs index c7f5380574344..f640c1b7ba89a 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_async.rs @@ -18,9 +18,9 @@ use std::time::Instant; use common_base::rangemap::RangeMerger; use common_base::runtime::UnlimitedFuture; -use common_catalog::table::ColumnId; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::ColumnId; use futures::future::try_join_all; use opendal::Object; use storages_common_cache::CacheAccessor; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs index 90b665ea643a7..d87a3504d8bd6 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_merge_io_sync.rs @@ -16,9 +16,9 @@ use std::ops::Range; use common_base::rangemap::RangeMerger; use common_catalog::plan::PartInfoPtr; -use common_catalog::table::ColumnId; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::ColumnId; use opendal::Object; use storages_common_cache::CacheAccessor; use storages_common_cache::TableDataCacheKey; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs index b1297cf692375..cc484c929514c 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet.rs @@ -13,9 +13,7 @@ // limitations under the License. use common_exception::Result; -use common_expression::ColumnId; use common_expression::DataBlock; -use common_expression::FieldIndex; use storages_common_table_meta::meta::BlockMeta; use crate::io::read::ReadSettings; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs index ac765d38bda12..b3c6d617a90d9 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -26,9 +26,9 @@ use common_arrow::parquet::metadata::ColumnDescriptor; use common_arrow::parquet::read::PageMetaData; use common_arrow::parquet::read::PageReader; use common_catalog::plan::PartInfoPtr; -use common_catalog::table::ColumnId; use common_exception::ErrorCode; use common_exception::Result; +use common_expression::ColumnId; use common_expression::DataBlock; use common_storage::ColumnNode; use storages_common_cache::CacheAccessor; @@ -243,7 +243,7 @@ impl BlockReader { column: &ColumnNode, is_nested_column: bool, ) -> Result>> { - let indices = &column.leaf_ids; + let indices = &column.leaf_indices; let column_chunks = deserialization_context.column_chunks; let compression = deserialization_context.compression; let uncompressed_buffer = deserialization_context.uncompressed_buffer; @@ -257,14 +257,14 @@ impl BlockReader { let mut field_uncompressed_size = 0; let mut column_id = 0; - for (i, leaf_column_id) in indices.iter().enumerate() { + for (i, leaf_index) in indices.iter().enumerate() { column_id = column.leaf_column_ids[i]; if let Some(column_meta) = deserialization_context.column_metas.get(&column_id) { if let Some(chunk) = column_chunks.get(&column_id) { match chunk { DataItem::RawData(data) => { let column_descriptor = - &self.parquet_schema_descriptor.columns()[*leaf_column_id]; + &self.parquet_schema_descriptor.columns()[*leaf_index]; field_column_metas.push(column_meta); field_column_data.push(*data); field_column_descriptors.push(column_descriptor); From 5b7a5655fe338eeb6777a8653b41a716638f8d04 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Mon, 13 Feb 2023 23:28:20 +0800 Subject: [PATCH 69/80] minor code gc --- .../src/io/read/block/block_reader_parquet_deserialize.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs index b3c6d617a90d9..623eab247379d 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_parquet_deserialize.rs @@ -256,9 +256,8 @@ impl BlockReader { let mut field_column_descriptors = Vec::with_capacity(estimated_cap); let mut field_uncompressed_size = 0; - let mut column_id = 0; for (i, leaf_index) in indices.iter().enumerate() { - column_id = column.leaf_column_ids[i]; + let column_id = column.leaf_column_ids[i]; if let Some(column_meta) = deserialization_context.column_metas.get(&column_id) { if let Some(chunk) = column_chunks.get(&column_id) { match chunk { @@ -319,7 +318,7 @@ impl BlockReader { Ok(Some(DeserializedArray::NoNeedToCache(array))) } else { // the array is deserialized from raw bytes, should be cached - // let column_id = column.leaf_column_ids[indices[0]]; + let column_id = column.leaf_column_ids[0]; Ok(Some(DeserializedArray::Deserialized(( column_id, array, From 4c29d6776461ba0722e54bdeb386fa0b6b74e217 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Tue, 14 Feb 2023 18:12:51 +0800 Subject: [PATCH 70/80] refactor cache configuration --- .../deploy/config/databend-query-node-1.toml | 24 ++++ src/query/config/src/inner.rs | 89 ++++++++++++ src/query/config/src/outer_v0.rs | 136 +++++++++++++++++- .../storages/system/src/configs_table.rs | 11 ++ 4 files changed, 255 insertions(+), 5 deletions(-) diff --git a/scripts/ci/deploy/config/databend-query-node-1.toml b/scripts/ci/deploy/config/databend-query-node-1.toml index 914c6ce565899..baeafd6e1d45f 100644 --- a/scripts/ci/deploy/config/databend-query-node-1.toml +++ b/scripts/ci/deploy/config/databend-query-node-1.toml @@ -109,3 +109,27 @@ data_path = "./.databend/stateless_test_data" # endpoint_url = "" # access_key_id = "" # access_key_secret = "" + + +[cache] + +# in-memory cache of .... +table_meta_enabled = true +table_meta_snapshot_item = 1024 +table_meta_segment_item = 1024 + +table_index_bloom_enabled = true +table_index_bloom_meta = 1024 +table_index_bloom_filter = 1024 + +table_data_enabled = true +table_data_bytes = 1024 +table_deserialized_data = 1024 + +[cache.disk] +path = "_cache" + +[cache.redis] +path = "_cache" +endpoint = "..." + diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 27a83ca50fea9..6205f83b58a74 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -58,6 +58,9 @@ pub struct Config { // - Later, catalog information SHOULD be kept in KV Service // - currently only supports HIVE (via hive meta store) pub catalogs: HashMap, + + // Cache Config + pub cache: TheCache, } impl Config { @@ -443,3 +446,89 @@ impl Default for LocalConfig { } } } + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TheCache { + /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches + pub enable_table_meta_caches: bool, + + /// Max number of cached table snapshot + pub table_meta_snapshot_count: u64, + + /// Max number of cached table segment + pub table_meta_segment_count: u64, + + /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches + pub enable_table_index_bloom: bool, + + /// Max number of cached bloom index meta objects. Set it to 0 to disable it. + pub table_bloom_index_meta_count: u64, + + /// Max number of cached bloom index filters. Set it to 0 to disable it. + // One bloom index filter per column of data block being indexed will be generated if necessary. + // + // For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full + // table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) + pub table_bloom_index_filter_count: u64, + + /// Max number of cached bloom index filters. Set it to 0 to disable it. + pub data_cache_storage: TableDataExternalCache, + + /// Storage that hold the raw data caches + pub disk_cache_config: DiskCacheConfig, + + /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// + /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. + /// + /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, + /// and the access pattern will benefit from caching, consider enabled this cache. + pub table_data_deserialized_data_bytes: u64, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TableDataExternalCache { + None, + Disk, + // Redis, +} + +impl Default for TableDataExternalCache { + fn default() -> Self { + Self::None + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct DiskCacheConfig { + /// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it. + pub max_size: u64, + + /// Table disk cache root path + pub path: String, +} + +impl Default for DiskCacheConfig { + fn default() -> Self { + Self { + max_size: 21474836480, + path: "./.databend/_cache".to_owned(), + } + } +} + +impl Default for TheCache { + fn default() -> Self { + Self { + enable_table_meta_caches: true, + table_meta_snapshot_count: 256, + table_meta_segment_count: 10240, + enable_table_index_bloom: true, + table_bloom_index_meta_count: 3000, + table_bloom_index_filter_count: 1048576, + data_cache_storage: Default::default(), + disk_cache_config: Default::default(), + table_data_deserialized_data_bytes: 0, + } + } +} diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index a23a2d246ff2a..e47116d21c8b1 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -21,6 +21,7 @@ use std::str::FromStr; use clap::Args; use clap::Parser; +use clap::ValueEnum; use common_base::base::mask_string; use common_exception::ErrorCode; use common_exception::Result; @@ -50,6 +51,7 @@ use serfig::collectors::from_file; use serfig::collectors::from_self; use serfig::parsers::Toml; +use super::inner; use super::inner::CatalogConfig as InnerCatalogConfig; use super::inner::CatalogHiveConfig as InnerCatalogHiveConfig; use super::inner::Config as InnerConfig; @@ -111,6 +113,10 @@ pub struct Config { #[clap(flatten)] pub local: LocalConfig, + // cache configs + #[clap(flatten)] + pub cache: CacheConfig, + /// external catalog config. /// /// - Later, catalog information SHOULD be kept in KV Service @@ -195,6 +201,7 @@ impl From for Config { .into_iter() .map(|(k, v)| (k, v.into())) .collect(), + cache: Default::default(), // TODO } } } @@ -226,6 +233,7 @@ impl TryInto for Config { storage: self.storage.try_into()?, local: self.local.try_into()?, catalogs, + cache: self.cache.try_into()?, }) } } @@ -291,7 +299,7 @@ pub struct StorageConfig { pub oss: OssStorageConfig, #[clap(skip)] - pub cache: CacheConfig, + pub cache: StorageCacheConfig, } impl Default for StorageConfig { @@ -490,7 +498,7 @@ impl From for HiveCatalogConfig { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(default)] -pub struct CacheConfig { +pub struct StorageCacheConfig { #[serde(rename = "type")] pub cache_type: String, @@ -507,13 +515,13 @@ pub struct CacheConfig { pub redis: RedisStorageConfig, } -impl Default for CacheConfig { +impl Default for StorageCacheConfig { fn default() -> Self { InnerCacheConfig::default().into() } } -impl From for CacheConfig { +impl From for StorageCacheConfig { fn from(inner: InnerCacheConfig) -> Self { let mut cfg = Self { cache_num_cpus: inner.num_cpus, @@ -546,7 +554,7 @@ impl From for CacheConfig { } } -impl TryInto for CacheConfig { +impl TryInto for StorageCacheConfig { type Error = ErrorCode; fn try_into(self) -> Result { Ok(InnerCacheConfig { @@ -1886,3 +1894,121 @@ impl TryInto for LocalConfig { }) } } + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args, Default)] +#[serde(default, deny_unknown_fields)] +pub struct CacheConfig { + /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches + #[clap(long = "cache-enable-table-meta-caches", default_value = "true")] + pub enable_table_meta_caches: bool, + + /// Max number of cached table snapshot + #[clap(long = "cache-table-meta-snapshot-count", default_value = "256")] + pub table_meta_snapshot_count: u64, + + /// Max number of cached table segment + #[clap(long = "cache-table-meta-segment-count", default_value = "10240")] + pub table_meta_segment_count: u64, + + /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches + #[clap(long = "cache-enable-table-bloom-index-caches", default_value = "true")] + pub enable_table_index_bloom: bool, + + /// Max number of cached bloom index meta objects. Set it to 0 to disable it. + #[clap(long = "cache-table-bloom-index-meta-count", default_value = "3000")] + pub table_bloom_index_meta_count: u64, + + /// Max number of cached bloom index filters. Set it to 0 to disable it. + // One bloom index filter per column of data block being indexed will be generated if necessary. + // + // For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full + // table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) + #[clap( + long = "cache-table-bloom-index-filter-count", + default_value = "1048576" + )] + pub table_bloom_index_filter_count: u64, + + /// Type of data cache storage + #[clap(long = "cache-data-cache-storage", value_enum, default_value_t)] + pub data_cache_storage: ExternalStorageType, + + /// Storage that hold the raw data caches + #[clap(flatten)] + #[serde(alias = "disk")] + pub disk_cache_config: DiskCacheConfig, + + /// Max size of in memory table column object cache. By default it is 0 (disabled) + /// + /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. + /// + /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, + /// and the access pattern will benefit from caching, consider enabled this cache. + #[clap(long = "cache-table-data-deserialized-data-bytes", default_value = "0")] + pub table_data_deserialized_data_bytes: u64, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] +pub enum ExternalStorageType { + #[serde(alias = "none")] + None, + #[serde(alias = "disk")] + Disk, + // Redis, +} + +impl Default for ExternalStorageType { + fn default() -> Self { + Self::None + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args, Default)] +#[serde(default, deny_unknown_fields)] +pub struct DiskCacheConfig { + /// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it. + #[clap(long = "cache-disk-max-bytes", default_value = "21474836480")] + pub max_bytes: u64, + + /// Table disk cache root path + #[clap(long = "cache-disk-path", default_value = "./.databend/_cache")] + pub path: String, +} + +impl TryFrom for inner::TheCache { + type Error = ErrorCode; + + fn try_from(value: CacheConfig) -> std::result::Result { + Ok(Self { + enable_table_meta_caches: value.enable_table_meta_caches, + table_meta_snapshot_count: value.table_meta_snapshot_count, + table_meta_segment_count: value.table_meta_segment_count, + enable_table_index_bloom: value.enable_table_index_bloom, + table_bloom_index_meta_count: value.table_bloom_index_meta_count, + table_bloom_index_filter_count: value.table_bloom_index_filter_count, + data_cache_storage: value.data_cache_storage.try_into()?, + disk_cache_config: value.disk_cache_config.try_into()?, + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + }) + } +} + +impl TryFrom for inner::DiskCacheConfig { + type Error = ErrorCode; + fn try_from(value: DiskCacheConfig) -> std::result::Result { + Ok(Self { + max_size: value.max_bytes, + path: value.path, + }) + } +} + +impl TryFrom for inner::TableDataExternalCache { + type Error = ErrorCode; + fn try_from(value: ExternalStorageType) -> std::result::Result { + Ok(match value { + ExternalStorageType::None => inner::TableDataExternalCache::None, + ExternalStorageType::Disk => inner::TableDataExternalCache::Disk, + }) + } +} diff --git a/src/query/storages/system/src/configs_table.rs b/src/query/storages/system/src/configs_table.rs index d1d06181f839a..9413756db9c64 100644 --- a/src/query/storages/system/src/configs_table.rs +++ b/src/query/storages/system/src/configs_table.rs @@ -85,6 +85,17 @@ impl SyncSystemTable for ConfigsTable { meta_config_value, ); + let cache_config = config.cache; + let cache_config_value = serde_json::to_value(cache_config)?; + ConfigsTable::extract_config( + &mut names, + &mut values, + &mut groups, + &mut descs, + "cache".to_string(), + cache_config_value, + ); + // Clone storage config to avoid change it's value. // // TODO(xuanwo): From 9e051868aeb066effe37517b4b4366b71e8cd515 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 11:44:53 +0800 Subject: [PATCH 71/80] the converters --- src/query/config/src/inner.rs | 16 +++++----- src/query/config/src/outer_v0.rs | 54 ++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 6205f83b58a74..6f096a81f35d5 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -60,7 +60,7 @@ pub struct Config { pub catalogs: HashMap, // Cache Config - pub cache: TheCache, + pub cache: CacheConfig, } impl Config { @@ -448,7 +448,7 @@ impl Default for LocalConfig { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct TheCache { +pub struct CacheConfig { /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches pub enable_table_meta_caches: bool, @@ -472,7 +472,7 @@ pub struct TheCache { pub table_bloom_index_filter_count: u64, /// Max number of cached bloom index filters. Set it to 0 to disable it. - pub data_cache_storage: TableDataExternalCache, + pub data_cache_storage: ExternalCacheStorageType, /// Storage that hold the raw data caches pub disk_cache_config: DiskCacheConfig, @@ -487,13 +487,13 @@ pub struct TheCache { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum TableDataExternalCache { +pub enum ExternalCacheStorageType { None, Disk, // Redis, } -impl Default for TableDataExternalCache { +impl Default for ExternalCacheStorageType { fn default() -> Self { Self::None } @@ -502,7 +502,7 @@ impl Default for TableDataExternalCache { #[derive(Clone, Debug, PartialEq, Eq)] pub struct DiskCacheConfig { /// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it. - pub max_size: u64, + pub max_bytes: u64, /// Table disk cache root path pub path: String, @@ -511,13 +511,13 @@ pub struct DiskCacheConfig { impl Default for DiskCacheConfig { fn default() -> Self { Self { - max_size: 21474836480, + max_bytes: 21474836480, path: "./.databend/_cache".to_owned(), } } } -impl Default for TheCache { +impl Default for CacheConfig { fn default() -> Self { Self { enable_table_meta_caches: true, diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/outer_v0.rs index e47116d21c8b1..d446d3aba2905 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/outer_v0.rs @@ -201,7 +201,7 @@ impl From for Config { .into_iter() .map(|(k, v)| (k, v.into())) .collect(), - cache: Default::default(), // TODO + cache: inner.cache.into(), } } } @@ -1931,7 +1931,7 @@ pub struct CacheConfig { /// Type of data cache storage #[clap(long = "cache-data-cache-storage", value_enum, default_value_t)] - pub data_cache_storage: ExternalStorageType, + pub data_cache_storage: ExternalCacheStorageType, /// Storage that hold the raw data caches #[clap(flatten)] @@ -1949,7 +1949,7 @@ pub struct CacheConfig { } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] -pub enum ExternalStorageType { +pub enum ExternalCacheStorageType { #[serde(alias = "none")] None, #[serde(alias = "disk")] @@ -1957,7 +1957,7 @@ pub enum ExternalStorageType { // Redis, } -impl Default for ExternalStorageType { +impl Default for ExternalCacheStorageType { fn default() -> Self { Self::None } @@ -1975,7 +1975,7 @@ pub struct DiskCacheConfig { pub path: String, } -impl TryFrom for inner::TheCache { +impl TryFrom for inner::CacheConfig { type Error = ErrorCode; fn try_from(value: CacheConfig) -> std::result::Result { @@ -1993,22 +1993,56 @@ impl TryFrom for inner::TheCache { } } +impl From for CacheConfig { + fn from(value: inner::CacheConfig) -> Self { + Self { + enable_table_meta_caches: value.enable_table_meta_caches, + table_meta_snapshot_count: value.table_meta_snapshot_count, + table_meta_segment_count: value.table_meta_segment_count, + enable_table_index_bloom: value.enable_table_index_bloom, + table_bloom_index_meta_count: value.table_bloom_index_meta_count, + table_bloom_index_filter_count: value.table_bloom_index_filter_count, + data_cache_storage: value.data_cache_storage.into(), + disk_cache_config: value.disk_cache_config.into(), + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + } + } +} + impl TryFrom for inner::DiskCacheConfig { type Error = ErrorCode; fn try_from(value: DiskCacheConfig) -> std::result::Result { Ok(Self { - max_size: value.max_bytes, + max_bytes: value.max_bytes, path: value.path, }) } } -impl TryFrom for inner::TableDataExternalCache { +impl From for DiskCacheConfig { + fn from(value: inner::DiskCacheConfig) -> Self { + Self { + max_bytes: value.max_bytes, + path: value.path, + } + } +} + +impl TryFrom for inner::ExternalCacheStorageType { type Error = ErrorCode; - fn try_from(value: ExternalStorageType) -> std::result::Result { + fn try_from(value: ExternalCacheStorageType) -> std::result::Result { Ok(match value { - ExternalStorageType::None => inner::TableDataExternalCache::None, - ExternalStorageType::Disk => inner::TableDataExternalCache::Disk, + ExternalCacheStorageType::None => inner::ExternalCacheStorageType::None, + ExternalCacheStorageType::Disk => inner::ExternalCacheStorageType::Disk, }) } } + +impl From for ExternalCacheStorageType { + fn from(value: inner::ExternalCacheStorageType) -> Self { + match value { + inner::ExternalCacheStorageType::None => ExternalCacheStorageType::None, + inner::ExternalCacheStorageType::Disk => ExternalCacheStorageType::Disk, + } + } +} From 0eaf74795ce5ec0379644c186e115c1932d6cc28 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 21:33:17 +0800 Subject: [PATCH 72/80] cache configs --- .../deploy/config/databend-query-node-1.toml | 54 +- .../deploy/config/databend-query-node-2.toml | 47 +- .../deploy/config/databend-query-node-3.toml | 47 +- src/binaries/query/local.rs | 4 +- src/binaries/query/main.rs | 6 +- .../config/src/{outer_v0.rs => config.rs} | 609 +++++++++++------- src/query/config/src/global.rs | 10 +- src/query/config/src/lib.rs | 36 +- src/query/config/src/{inner.rs => setting.rs} | 120 ++-- src/query/config/tests/main.rs | 12 +- src/query/service/src/api/http/v1/config.rs | 6 +- .../service/src/api/http/v1/tenant_tables.rs | 4 +- src/query/service/src/api/http_service.rs | 8 +- .../src/api/rpc/exchange/exchange_manager.rs | 6 +- .../service/src/api/rpc/packets/packet.rs | 8 +- .../src/api/rpc/packets/packet_execute.rs | 4 +- .../src/api/rpc/packets/packet_executor.rs | 4 +- .../src/api/rpc/packets/packet_publisher.rs | 4 +- src/query/service/src/api/rpc_service.rs | 8 +- src/query/service/src/auth.rs | 4 +- .../service/src/catalogs/catalog_manager.rs | 22 +- .../src/catalogs/default/database_catalog.rs | 4 +- .../src/catalogs/default/immutable_catalog.rs | 4 +- .../src/catalogs/default/mutable_catalog.rs | 4 +- src/query/service/src/clusters/cluster.rs | 20 +- .../service/src/databases/database_factory.rs | 4 +- .../src/databases/system/system_database.rs | 4 +- src/query/service/src/global_services.rs | 12 +- .../access/management_mode_access.rs | 4 +- .../src/interpreters/interpreter_metrics.rs | 4 +- .../src/interpreters/interpreter_query_log.rs | 8 +- .../src/interpreters/interpreter_unsetting.rs | 6 +- src/query/service/src/procedures/procedure.rs | 4 +- .../service/src/servers/http/http_services.rs | 14 +- .../http/v1/query/http_query_manager.rs | 4 +- .../service/src/sessions/query_ctx_shared.rs | 4 +- src/query/service/src/sessions/session.rs | 4 +- src/query/service/src/sessions/session_ctx.rs | 4 +- src/query/service/src/sessions/session_mgr.rs | 12 +- src/query/service/tests/it/configs.rs | 229 ++++--- .../it/servers/http/clickhouse_handler.rs | 4 +- .../it/storages/fuse/table_test_fixture.rs | 4 +- .../storages/testdata/configs_table_basic.txt | 25 +- src/query/service/tests/it/tests/config.rs | 10 +- src/query/service/tests/it/tests/context.rs | 4 +- src/query/service/tests/it/tests/sessions.rs | 4 +- src/query/settings/src/lib.rs | 10 +- src/query/sharing/src/signer.rs | 4 +- src/query/sql/src/planner/binder/copy.rs | 8 +- src/query/sql/src/planner/binder/location.rs | 4 +- src/query/sql/src/planner/binder/table.rs | 4 +- .../sql/src/planner/expression_parser.rs | 4 +- src/query/sql/tests/location.rs | 6 +- .../common/cache-manager/src/cache_manager.rs | 48 +- .../storages/factory/src/storage_factory.rs | 4 +- .../storages/system/src/configs_table.rs | 22 +- .../storages/system/src/tracing_table.rs | 6 +- 57 files changed, 913 insertions(+), 630 deletions(-) rename src/query/config/src/{outer_v0.rs => config.rs} (79%) rename src/query/config/src/{inner.rs => setting.rs} (85%) diff --git a/scripts/ci/deploy/config/databend-query-node-1.toml b/scripts/ci/deploy/config/databend-query-node-1.toml index baeafd6e1d45f..bd429c8ecd6a7 100644 --- a/scripts/ci/deploy/config/databend-query-node-1.toml +++ b/scripts/ci/deploy/config/databend-query-node-1.toml @@ -33,12 +33,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "_cache" -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - # [[query.users]] # name = "admin" # auth_type = "no_password" @@ -113,23 +107,41 @@ data_path = "./.databend/stateless_test_data" [cache] -# in-memory cache of .... -table_meta_enabled = true -table_meta_snapshot_item = 1024 -table_meta_segment_item = 1024 - +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_caches = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches table_index_bloom_enabled = true -table_index_bloom_meta = 1024 -table_index_bloom_filter = 1024 +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 -table_data_enabled = true -table_data_bytes = 1024 -table_deserialized_data = 1024 +### table data caches ### -[cache.disk] -path = "_cache" +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" -[cache.redis] -path = "_cache" -endpoint = "..." +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/scripts/ci/deploy/config/databend-query-node-2.toml b/scripts/ci/deploy/config/databend-query-node-2.toml index 57af9541aeeda..95d266187d526 100644 --- a/scripts/ci/deploy/config/databend-query-node-2.toml +++ b/scripts/ci/deploy/config/databend-query-node-2.toml @@ -33,12 +33,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "./.databend/cache" -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - [log] [log.file] @@ -63,3 +57,44 @@ type = "fs" # Comment out this block if you're NOT using local file system as storage. [storage.fs] data_path = "./.databend/stateless_test_data" + +] + +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_caches = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches +table_index_bloom_enabled = true +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 + +### table data caches ### + +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" + +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/scripts/ci/deploy/config/databend-query-node-3.toml b/scripts/ci/deploy/config/databend-query-node-3.toml index 52ce664de0577..fc56f78684de9 100644 --- a/scripts/ci/deploy/config/databend-query-node-3.toml +++ b/scripts/ci/deploy/config/databend-query-node-3.toml @@ -34,12 +34,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "./.databend/cache" -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - [log] [log.file] @@ -64,3 +58,44 @@ type = "fs" # Comment out this block if you're NOT using local file system as storage. [storage.fs] data_path = "./.databend/stateless_test_data" + +[cache] + +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_caches = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches +table_index_bloom_enabled = true +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 + +### table data caches ### + +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" + +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/src/binaries/query/local.rs b/src/binaries/query/local.rs index 29a540fd35e9e..21e87dc9682ea 100644 --- a/src/binaries/query/local.rs +++ b/src/binaries/query/local.rs @@ -16,7 +16,7 @@ use std::time::Instant; use comfy_table::Cell; use comfy_table::Table; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -28,7 +28,7 @@ use databend_query::sql::Planner; use databend_query::GlobalServices; use tokio_stream::StreamExt; -pub async fn query_local(conf: &Config) -> Result<()> { +pub async fn query_local(conf: &Setting) -> Result<()> { let mut conf = conf.clone(); conf.storage.allow_insecure = true; let local_conf = conf.local.clone(); diff --git a/src/binaries/query/main.rs b/src/binaries/query/main.rs index a73c41253ad27..af6515a0d98ed 100644 --- a/src/binaries/query/main.rs +++ b/src/binaries/query/main.rs @@ -23,7 +23,7 @@ use common_base::mem_allocator::GlobalAllocator; use common_base::runtime::Runtime; use common_base::runtime::GLOBAL_MEM_STAT; use common_base::set_alloc_error_hook; -use common_config::Config; +use common_config::Setting; use common_config::DATABEND_COMMIT_VERSION; use common_config::QUERY_SEMVER; use common_exception::Result; @@ -62,7 +62,7 @@ fn main() { } async fn main_entrypoint() -> Result<()> { - let conf: Config = Config::load()?; + let conf: Setting = Setting::load()?; if run_cmd(&conf).await? { return Ok(()); @@ -310,7 +310,7 @@ async fn main_entrypoint() -> Result<()> { Ok(()) } -async fn run_cmd(conf: &Config) -> Result { +async fn run_cmd(conf: &Setting) -> Result { if conf.cmd.is_empty() { return Ok(false); } diff --git a/src/query/config/src/outer_v0.rs b/src/query/config/src/config.rs similarity index 79% rename from src/query/config/src/outer_v0.rs rename to src/query/config/src/config.rs index d446d3aba2905..6cf820b5068c9 100644 --- a/src/query/config/src/outer_v0.rs +++ b/src/query/config/src/config.rs @@ -51,18 +51,21 @@ use serfig::collectors::from_file; use serfig::collectors::from_self; use serfig::parsers::Toml; -use super::inner; -use super::inner::CatalogConfig as InnerCatalogConfig; -use super::inner::CatalogHiveConfig as InnerCatalogHiveConfig; -use super::inner::Config as InnerConfig; -use super::inner::LocalConfig as InnerLocalConfig; -use super::inner::MetaConfig as InnerMetaConfig; -use super::inner::QueryConfig as InnerQueryConfig; +use super::setting; +use super::setting::CatalogHiveSetting as InnerCatalogHiveConfig; +use super::setting::CatalogSetting as InnerCatalogConfig; +use super::setting::LocalConfig as InnerLocalConfig; +use super::setting::MetaConfig as InnerMetaConfig; +use super::setting::QuerySetting as InnerQueryConfig; +use super::setting::Setting as InnerConfig; +use crate::Setting; use crate::DATABEND_COMMIT_VERSION; +// FIXME: too much boilerplate here + const CATALOG_HIVE: &str = "hive"; -/// Outer config for `query`. +/// Config for `query`. /// /// We will use this config to handle /// @@ -131,7 +134,7 @@ pub struct Config { impl Default for Config { fn default() -> Self { - InnerConfig::default().into_outer() + InnerConfig::default().into_config() } } @@ -184,60 +187,6 @@ impl Config { } } -impl From for Config { - fn from(inner: InnerConfig) -> Self { - Self { - cmd: inner.cmd, - config_file: inner.config_file, - query: inner.query.into(), - log: inner.log.into(), - meta: inner.meta.into(), - storage: inner.storage.into(), - catalog: HiveCatalogConfig::default(), - local: inner.local.into(), - - catalogs: inner - .catalogs - .into_iter() - .map(|(k, v)| (k, v.into())) - .collect(), - cache: inner.cache.into(), - } - } -} - -impl TryInto for Config { - type Error = ErrorCode; - - fn try_into(self) -> Result { - let mut catalogs = HashMap::new(); - for (k, v) in self.catalogs.into_iter() { - let catalog = v.try_into()?; - catalogs.insert(k, catalog); - } - if !self.catalog.meta_store_address.is_empty() || !self.catalog.protocol.is_empty() { - tracing::warn!( - "`catalog` is planned to be deprecated, please add catalog in `catalogs` instead" - ); - let hive = self.catalog.try_into()?; - let catalog = InnerCatalogConfig::Hive(hive); - catalogs.insert(CATALOG_HIVE.to_string(), catalog); - } - - Ok(InnerConfig { - cmd: self.cmd, - config_file: self.config_file, - query: self.query.try_into()?, - log: self.log.try_into()?, - meta: self.meta.try_into()?, - storage: self.storage.try_into()?, - local: self.local.try_into()?, - catalogs, - cache: self.cache.try_into()?, - }) - } -} - /// Storage config group. /// /// # TODO(xuanwo) @@ -1265,71 +1214,6 @@ pub struct QueryConfig { #[clap(long, default_value = "10000")] pub max_query_log_size: usize, - /// Table Meta Cached enabled - #[clap(long, default_value = "true")] - pub table_meta_cache_enabled: bool, - - /// Max number of cached table block meta - #[clap(long, default_value = "102400")] - pub table_cache_block_meta_count: u64, - - /// Table memory cache size (mb) - #[clap(long, default_value = "256")] - pub table_memory_cache_mb_size: u64, - - /// Table disk cache folder root - #[clap(long, default_value = "_cache")] - pub table_disk_cache_root: String, - - /// Table disk cache size (bytes), default values is 20GB - #[clap(long, default_value = "21474836480")] - pub table_disk_cache_max_size: u64, - - /// Max number of cached table snapshot - #[clap(long, default_value = "256")] - pub table_cache_snapshot_count: u64, - - /// Max number of cached table snapshot statistics - #[clap(long, default_value = "256")] - pub table_cache_statistic_count: u64, - - /// Max number of cached table segment - #[clap(long, default_value = "10240")] - pub table_cache_segment_count: u64, - - /// Max number of cached bloom index meta objects - #[clap(long, default_value = "3000")] - pub table_cache_bloom_index_meta_count: u64, - - /// Max number of cached bloom index filters, default value is 1024 * 1024 items. - /// One bloom index filter per column of data block being indexed will be generated if necessary. - /// - /// For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full - /// table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) - #[clap(long, default_value = "1048576")] - pub table_cache_bloom_index_filter_count: u64, - - /// Max size of in memory table column object cache - /// - /// The cache items are deserialized table column objects, may take a lot of memory. - /// - /// Default value is 0, which disable this cache. If query nodes have plenty of un-utilized - /// memory, the working set can be fitted into, and the access pattern will benefit from - /// caching, consider enabled this cache. - #[clap(long, default_value = "0")] - pub table_data_cache_in_memory_max_size: u64, - - /// Indicates if table data cached is enabled, default false - #[clap(long)] - pub table_data_cache_enabled: bool, - - /// Max number of items that could be pending in the table data cache population queue - /// default value 65536 items. - /// Increase this value if it takes too much times to fully populate the disk cache. - /// Decrease this value if it takes too much memory to queue the items being cached. - #[clap(long, default_value = "65536")] - pub table_data_cache_population_queue_size: u32, - /// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv. #[clap(long)] pub management_mode: bool, @@ -1368,6 +1252,53 @@ pub struct QueryConfig { #[clap(long)] pub internal_enable_sandbox_tenant: bool, + + // ----- the following options/args are all deprecated ---- + // ----- and turned into Option, to help user migrate the configs ---- + /// OBSOLETED: Table disk cache size (mb). + #[clap(long)] + pub table_disk_cache_mb_size: Option, + + /// OBSOLETED: Table Meta Cached enabled + #[clap(long)] + pub table_meta_cache_enabled: Option, + + /// OBSOLETED: Max number of cached table block meta + #[clap(long)] + pub table_cache_block_meta_count: Option, + + /// OBSOLETED: Table memory cache size (mb) + #[clap(long)] + pub table_memory_cache_mb_size: Option, + + /// OBSOLETED: Table disk cache folder root + #[clap(long)] + pub table_disk_cache_root: Option, + + /// OBSOLETED: Max number of cached table snapshot + #[clap(long)] + pub table_cache_snapshot_count: Option, + + /// OBSOLETED: Max number of cached table snapshot statistics + #[clap(long)] + pub table_cache_statistic_count: Option, + + /// OBSOLETED: Max number of cached table segment + #[clap(long)] + pub table_cache_segment_count: Option, + + /// OBSOLETED: Max number of cached bloom index meta objects + #[clap(long)] + pub table_cache_bloom_index_meta_count: Option, + + /// OBSOLETED: + /// Max number of cached bloom index filters, default value is 1024 * 1024 items. + /// One bloom index filter per column of data block being indexed will be generated if necessary. + /// + /// For example, a table of 1024 columns, with 800 data blocks, a query that triggers a full + /// table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) + #[clap(long)] + pub table_cache_bloom_index_filter_count: Option, } impl Default for QueryConfig { @@ -1380,6 +1311,7 @@ impl TryInto for QueryConfig { type Error = ErrorCode; fn try_into(self) -> Result { + self.check_obsoleted()?; Ok(InnerQueryConfig { tenant_id: self.tenant_id, cluster_id: self.cluster_id, @@ -1410,19 +1342,6 @@ impl TryInto for QueryConfig { table_engine_memory_enabled: self.table_engine_memory_enabled, wait_timeout_mills: self.wait_timeout_mills, max_query_log_size: self.max_query_log_size, - table_meta_cache_enabled: self.table_meta_cache_enabled, - table_cache_block_meta_count: self.table_cache_block_meta_count, - table_memory_cache_mb_size: self.table_memory_cache_mb_size, - table_cache_snapshot_count: self.table_cache_snapshot_count, - table_cache_statistic_count: self.table_cache_statistic_count, - table_cache_segment_count: self.table_cache_segment_count, - table_cache_bloom_index_meta_count: self.table_cache_bloom_index_meta_count, - table_cache_bloom_index_filter_count: self.table_cache_bloom_index_filter_count, - table_data_cache_in_memory_max_size: self.table_data_cache_in_memory_max_size, - table_data_cache_enabled: self.table_data_cache_enabled, - table_data_cache_population_queue_size: self.table_data_cache_population_queue_size, - table_disk_cache_root: self.table_disk_cache_root, - table_disk_cache_max_size: self.table_disk_cache_max_size, management_mode: self.management_mode, jwt_key_file: self.jwt_key_file, jwt_key_files: self.jwt_key_files, @@ -1479,19 +1398,6 @@ impl From for QueryConfig { database_engine_github_enabled: true, wait_timeout_mills: inner.wait_timeout_mills, max_query_log_size: inner.max_query_log_size, - table_meta_cache_enabled: inner.table_meta_cache_enabled, - table_cache_block_meta_count: inner.table_cache_block_meta_count, - table_memory_cache_mb_size: inner.table_memory_cache_mb_size, - table_disk_cache_root: inner.table_disk_cache_root, - table_disk_cache_max_size: inner.table_disk_cache_max_size, - table_cache_snapshot_count: inner.table_cache_snapshot_count, - table_cache_statistic_count: inner.table_cache_statistic_count, - table_cache_segment_count: inner.table_cache_segment_count, - table_cache_bloom_index_meta_count: inner.table_cache_bloom_index_meta_count, - table_cache_bloom_index_filter_count: inner.table_cache_bloom_index_filter_count, - table_data_cache_in_memory_max_size: inner.table_data_cache_in_memory_max_size, - table_data_cache_enabled: inner.table_data_cache_enabled, - table_data_cache_population_queue_size: inner.table_data_cache_population_queue_size, management_mode: inner.management_mode, jwt_key_file: inner.jwt_key_file, jwt_key_files: inner.jwt_key_files, @@ -1503,6 +1409,17 @@ impl From for QueryConfig { share_endpoint_auth_token_file: inner.share_endpoint_auth_token_file, quota: inner.tenant_quota, internal_enable_sandbox_tenant: inner.internal_enable_sandbox_tenant, + // obsoleted config entries + table_disk_cache_mb_size: None, + table_meta_cache_enabled: None, + table_cache_block_meta_count: None, + table_memory_cache_mb_size: None, + table_disk_cache_root: None, + table_cache_snapshot_count: None, + table_cache_statistic_count: None, + table_cache_segment_count: None, + table_cache_bloom_index_meta_count: None, + table_cache_bloom_index_filter_count: None, } } } @@ -1900,6 +1817,7 @@ impl TryInto for LocalConfig { pub struct CacheConfig { /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches #[clap(long = "cache-enable-table-meta-caches", default_value = "true")] + #[serde(default = "bool_true")] pub enable_table_meta_caches: bool, /// Max number of cached table snapshot @@ -1910,8 +1828,13 @@ pub struct CacheConfig { #[clap(long = "cache-table-meta-segment-count", default_value = "10240")] pub table_meta_segment_count: u64, + /// Max number of cached table statistic meta + #[clap(long = "cache-table-meta-statistic-count", default_value = "256")] + pub table_meta_statistic_count: u64, + /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches #[clap(long = "cache-enable-table-bloom-index-caches", default_value = "true")] + #[serde(default = "bool_true")] pub enable_table_index_bloom: bool, /// Max number of cached bloom index meta objects. Set it to 0 to disable it. @@ -1931,16 +1854,33 @@ pub struct CacheConfig { /// Type of data cache storage #[clap(long = "cache-data-cache-storage", value_enum, default_value_t)] - pub data_cache_storage: ExternalCacheStorageType, + pub data_cache_storage: ExternalCacheStorageTypeConfig, - /// Storage that hold the raw data caches + /// Max size of external cache population queue length + /// + /// the items being queued reference table column raw data, which are + /// un-deserialized and usually compressed (depends on table compression options). + /// + /// - please monitor the 'table_data_cache_population_pending_count' metric + /// if it is too high, and takes too much memory, please consider decrease this value + /// + /// - please monitor the 'population_overflow_count' metric + /// if it keeps increasing, and disk cache hits rate is not as expected. please consider + /// increase this value. + #[clap( + long = "cache-data-cache-population-queue-size", + default_value = "65536" + )] + pub table_data_cache_population_queue_size: u32, + + /// Storage that hold the data caches #[clap(flatten)] - #[serde(alias = "disk")] + #[serde(rename = "disk")] pub disk_cache_config: DiskCacheConfig, /// Max size of in memory table column object cache. By default it is 0 (disabled) /// - /// CAUTION: The cache items are deserialized table column objects, may take a lot of memory. + /// CAUTION: The cached items are deserialized table column objects, may take a lot of memory. /// /// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into, /// and the access pattern will benefit from caching, consider enabled this cache. @@ -1948,16 +1888,20 @@ pub struct CacheConfig { pub table_data_deserialized_data_bytes: u64, } +#[inline] +fn bool_true() -> bool { + true +} + #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] -pub enum ExternalCacheStorageType { - #[serde(alias = "none")] +#[serde(rename_all = "lowercase")] +pub enum ExternalCacheStorageTypeConfig { None, - #[serde(alias = "disk")] Disk, // Redis, } -impl Default for ExternalCacheStorageType { +impl Default for ExternalCacheStorageTypeConfig { fn default() -> Self { Self::None } @@ -1975,74 +1919,307 @@ pub struct DiskCacheConfig { pub path: String, } -impl TryFrom for inner::CacheConfig { - type Error = ErrorCode; +mod cache_config_converters { + use super::*; + + impl From for Config { + fn from(inner: Setting) -> Self { + Self { + cmd: inner.cmd, + config_file: inner.config_file, + query: inner.query.into(), + log: inner.log.into(), + meta: inner.meta.into(), + storage: inner.storage.into(), + catalog: HiveCatalogConfig::default(), + local: inner.local.into(), + + catalogs: inner + .catalogs + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect(), + cache: inner.cache.into(), + } + } + } - fn try_from(value: CacheConfig) -> std::result::Result { - Ok(Self { - enable_table_meta_caches: value.enable_table_meta_caches, - table_meta_snapshot_count: value.table_meta_snapshot_count, - table_meta_segment_count: value.table_meta_segment_count, - enable_table_index_bloom: value.enable_table_index_bloom, - table_bloom_index_meta_count: value.table_bloom_index_meta_count, - table_bloom_index_filter_count: value.table_bloom_index_filter_count, - data_cache_storage: value.data_cache_storage.try_into()?, - disk_cache_config: value.disk_cache_config.try_into()?, - table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, - }) + impl TryInto for Config { + type Error = ErrorCode; + + fn try_into(self) -> Result { + let mut catalogs = HashMap::new(); + for (k, v) in self.catalogs.into_iter() { + let catalog = v.try_into()?; + catalogs.insert(k, catalog); + } + if !self.catalog.meta_store_address.is_empty() || !self.catalog.protocol.is_empty() { + tracing::warn!( + "`catalog` is planned to be deprecated, please add catalog in `catalogs` instead" + ); + let hive = self.catalog.try_into()?; + let catalog = InnerCatalogConfig::Hive(hive); + catalogs.insert(CATALOG_HIVE.to_string(), catalog); + } + + Ok(InnerConfig { + cmd: self.cmd, + config_file: self.config_file, + query: self.query.try_into()?, + log: self.log.try_into()?, + meta: self.meta.try_into()?, + storage: self.storage.try_into()?, + local: self.local.try_into()?, + catalogs, + cache: self.cache.try_into()?, + }) + } } -} -impl From for CacheConfig { - fn from(value: inner::CacheConfig) -> Self { - Self { - enable_table_meta_caches: value.enable_table_meta_caches, - table_meta_snapshot_count: value.table_meta_snapshot_count, - table_meta_segment_count: value.table_meta_segment_count, - enable_table_index_bloom: value.enable_table_index_bloom, - table_bloom_index_meta_count: value.table_bloom_index_meta_count, - table_bloom_index_filter_count: value.table_bloom_index_filter_count, - data_cache_storage: value.data_cache_storage.into(), - disk_cache_config: value.disk_cache_config.into(), - table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + impl TryFrom for setting::CacheSetting { + type Error = ErrorCode; + + fn try_from(value: CacheConfig) -> std::result::Result { + Ok(Self { + enable_table_meta_caches: value.enable_table_meta_caches, + table_meta_snapshot_count: value.table_meta_snapshot_count, + table_meta_segment_count: value.table_meta_segment_count, + table_meta_statistic_count: value.table_meta_statistic_count, + enable_table_index_bloom: value.enable_table_index_bloom, + table_bloom_index_meta_count: value.table_bloom_index_meta_count, + table_bloom_index_filter_count: value.table_bloom_index_filter_count, + data_cache_storage: value.data_cache_storage.try_into()?, + table_data_cache_population_queue_size: value + .table_data_cache_population_queue_size, + disk_cache_config: value.disk_cache_config.try_into()?, + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + }) } } -} -impl TryFrom for inner::DiskCacheConfig { - type Error = ErrorCode; - fn try_from(value: DiskCacheConfig) -> std::result::Result { - Ok(Self { - max_bytes: value.max_bytes, - path: value.path, - }) + impl From for CacheConfig { + fn from(value: setting::CacheSetting) -> Self { + Self { + enable_table_meta_caches: value.enable_table_meta_caches, + table_meta_snapshot_count: value.table_meta_snapshot_count, + table_meta_segment_count: value.table_meta_segment_count, + table_meta_statistic_count: value.table_meta_statistic_count, + enable_table_index_bloom: value.enable_table_index_bloom, + table_bloom_index_meta_count: value.table_bloom_index_meta_count, + table_bloom_index_filter_count: value.table_bloom_index_filter_count, + data_cache_storage: value.data_cache_storage.into(), + table_data_cache_population_queue_size: value + .table_data_cache_population_queue_size, + disk_cache_config: value.disk_cache_config.into(), + table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes, + } + } } -} -impl From for DiskCacheConfig { - fn from(value: inner::DiskCacheConfig) -> Self { - Self { - max_bytes: value.max_bytes, - path: value.path, + impl TryFrom for setting::DiskCacheConfig { + type Error = ErrorCode; + fn try_from(value: DiskCacheConfig) -> std::result::Result { + Ok(Self { + max_bytes: value.max_bytes, + path: value.path, + }) } } -} -impl TryFrom for inner::ExternalCacheStorageType { - type Error = ErrorCode; - fn try_from(value: ExternalCacheStorageType) -> std::result::Result { - Ok(match value { - ExternalCacheStorageType::None => inner::ExternalCacheStorageType::None, - ExternalCacheStorageType::Disk => inner::ExternalCacheStorageType::Disk, - }) + impl From for DiskCacheConfig { + fn from(value: setting::DiskCacheConfig) -> Self { + Self { + max_bytes: value.max_bytes, + path: value.path, + } + } + } + + impl TryFrom for setting::ExternalCacheStorageTypeSetting { + type Error = ErrorCode; + fn try_from( + value: ExternalCacheStorageTypeConfig, + ) -> std::result::Result { + Ok(match value { + ExternalCacheStorageTypeConfig::None => { + setting::ExternalCacheStorageTypeSetting::None + } + ExternalCacheStorageTypeConfig::Disk => { + setting::ExternalCacheStorageTypeSetting::Disk + } + }) + } + } + + impl From for ExternalCacheStorageTypeConfig { + fn from(value: setting::ExternalCacheStorageTypeSetting) -> Self { + match value { + setting::ExternalCacheStorageTypeSetting::None => { + ExternalCacheStorageTypeConfig::None + } + setting::ExternalCacheStorageTypeSetting::Disk => { + ExternalCacheStorageTypeConfig::Disk + } + } + } } } -impl From for ExternalCacheStorageType { - fn from(value: inner::ExternalCacheStorageType) -> Self { - match value { - inner::ExternalCacheStorageType::None => ExternalCacheStorageType::None, - inner::ExternalCacheStorageType::Disk => ExternalCacheStorageType::Disk, +// Obsoleted configuration entries checking +// +// The following code should be removed from the release after the next release. +// Just give user errors without any detail explanation and migration suggestions. +impl QueryConfig { + fn check( + target: &Option, + cli_arg_name: &str, + alternative_cli_arg: &str, + alternative_toml_config: &str, + alternative_env_var: &str, + ) -> Option { + target.as_ref().map(|_| { + format!( + " + -------------------------------------------------------------- + *** {cli_arg_name} *** is obsoleted : + -------------------------------------------------------------- + alternative command-line options : {alternative_cli_arg} + alternative environment variable : {alternative_env_var} + alternative toml config : {alternative_toml_config} + -------------------------------------------------------------- +" + ) + }) + } + fn check_obsoleted(&self) -> Result<()> { + let check_results = vec![ + Self::check( + &self.table_disk_cache_mb_size, + "table-disk-cache-mb-size", + "cache-disk-max-bytes", + r#" + [cache] + ... + data_cache_storage = "disk" + ... + [cache.disk] + max_bytes = [MAX_BYTES] + ... + "#, + "CACHE_DISK_MAX_BYTES", + ), + Self::check( + &self.table_meta_cache_enabled, + "table-meta-cache-enabled", + "cache-enable-table-meta-cache", + r#" + [cache] + table-meta-cache-enabled=["true"|"false"] + "#, + "CACHE_ENABLE_TABLE_META_CACHE", + ), + Self::check( + &self.table_cache_block_meta_count, + "table-cache-block-meta-count", + "N/A", + "N/A", + "N/A", + ), + Self::check( + &self.table_memory_cache_mb_size, + "table-memory-cache-mb-size", + "N/A", + "N/A", + "N/A", + ), + Self::check( + &self.table_disk_cache_root, + "table-disk-cache-root", + "cache-disk-path", + r#" + [cache] + ... + data_cache_storage = "disk" + ... + [cache.disk] + max_bytes = [MAX_BYTES] + path = [PATH] + ... + "#, + "CACHE_DISK_PATH", + ), + Self::check( + &self.table_cache_snapshot_count, + "table-cache-snapshot-count", + "cache-table-meta-snapshot-count", + r#" + [cache] + table_meta_snapshot_count = [COUNT] + "#, + "CACHE_TABLE_META_SNAPSHOT_COUNT", + ), + Self::check( + &self.table_cache_statistic_count, + "table-cache-statistic-count", + "cache-table-meta-statistic-count", + r#" + [cache] + table_meta_statistic_count = [COUNT] + "#, + "CACHE_TABLE_META_STATISTIC_COUNT", + ), + Self::check( + &self.table_cache_segment_count, + "table-cache-segment-count", + "cache-table-meta-segment-count", + r#" + [cache] + table_meta_segment_count = [COUNT] + "#, + "CACHE_TABLE_META_SEGMENT_COUNT", + ), + Self::check( + &self.table_cache_bloom_index_meta_count, + "table-cache-bloom-index-meta-count", + "cache-table-bloom-index-meta-count", + r#" + [cache] + table_bloom_index_meta_count = [COUNT] + "#, + "CACHE_TABLE_BLOOM_INDEX_META_COUNT", + ), + Self::check( + &self.table_cache_bloom_index_filter_count, + "table-cache-bloom-index-filter-count", + "cache-table-bloom-index-filter-count", + r#" + [cache] + table_bloom_index_filter_count = [COUNT] + "#, + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", + ), + ]; + let messages = check_results.into_iter().flatten().collect::>(); + if !messages.is_empty() { + let errors = messages.join("\n"); + Err(ErrorCode::InvalidConfig(format!("\n{errors}"))) + } else { + Ok(()) } } + + pub const fn obsoleted_option_keys() -> &'static [&'static str; 10] { + &[ + "table_disk_cache_mb_size", + "table_meta_cache_enabled", + "table_cache_block_meta_count", + "table_memory_cache_mb_size", + "table_disk_cache_root", + "table_cache_snapshot_count", + "table_cache_statistic_count", + "table_cache_segment_count", + "table_cache_bloom_index_meta_count", + "table_cache_bloom_index_filter_count", + ] + } } diff --git a/src/query/config/src/global.rs b/src/query/config/src/global.rs index ebabd55375e3c..04ba0dc98ef59 100644 --- a/src/query/config/src/global.rs +++ b/src/query/config/src/global.rs @@ -17,17 +17,17 @@ use std::sync::Arc; use common_base::base::GlobalInstance; use common_exception::Result; -use crate::Config; +use crate::Setting; -pub struct GlobalConfig; +pub struct GlobalSetting; -impl GlobalConfig { - pub fn init(config: Config) -> Result<()> { +impl GlobalSetting { + pub fn init(config: Setting) -> Result<()> { GlobalInstance::set(Arc::new(config)); Ok(()) } - pub fn instance() -> Arc { + pub fn instance() -> Arc { GlobalInstance::get() } } diff --git a/src/query/config/src/lib.rs b/src/query/config/src/lib.rs index 9684d9b5e0f72..d1bfa02b4dc21 100644 --- a/src/query/config/src/lib.rs +++ b/src/query/config/src/lib.rs @@ -15,30 +15,34 @@ #![allow(clippy::uninlined_format_args)] #![feature(no_sanitize)] +mod config; /// Config mods provide config support. /// /// We are providing two config types: /// -/// - [`inner::Config`] which will be exposed as [`crate::Config`] will be used in all business logic. -/// - [`outer_v0::Config`] is the outer config for [`inner::Config`] which will be exposed to end-users. -/// - [`global::GlobalConfig`] is a global config singleton of [`crate::Config`]. +/// - [`config::Config`] represents the options from command line , configuration files or environment vars. +/// - [`setting::Setting`] "internal representation" of application settings . +/// - [`global::GlobalSetting`] A global singleton of [`crate::Setting`]. /// -/// It's safe to refactor [`inner::Config`] in anyway, as long as it satisfied the following traits +/// It's safe to refactor [`setting::Setting`] in anyway, as long as it satisfied the following traits /// -/// - `TryInto for outer_v0::Config` -/// - `From for outer_v0::Config` +/// - `TryInto for config::Config` +/// - `From for config::Config` mod global; -mod inner; -mod outer_v0; +mod setting; mod version; -pub use global::GlobalConfig; -pub use inner::CatalogConfig; -pub use inner::CatalogHiveConfig; -pub use inner::Config; -pub use inner::QueryConfig; -pub use inner::ThriftProtocol; -pub use outer_v0::Config as OuterConfig; -pub use outer_v0::StorageConfig; +pub use config::Config; +pub use config::ExternalCacheStorageTypeConfig; +pub use config::QueryConfig; +pub use config::StorageConfig; +pub use global::GlobalSetting; +pub use setting::CacheSetting; +pub use setting::CatalogHiveSetting; +pub use setting::CatalogSetting; +pub use setting::ExternalCacheStorageTypeSetting; +pub use setting::QuerySetting; +pub use setting::Setting; +pub use setting::ThriftProtocol; pub use version::DATABEND_COMMIT_VERSION; pub use version::QUERY_SEMVER; diff --git a/src/query/config/src/inner.rs b/src/query/config/src/setting.rs similarity index 85% rename from src/query/config/src/inner.rs rename to src/query/config/src/setting.rs index 6f096a81f35d5..eaaad4e5ebc4b 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/setting.rs @@ -30,18 +30,18 @@ use common_storage::StorageConfig; use common_tracing::Config as LogConfig; use common_users::idm_config::IDMConfig; -use super::outer_v0::Config as OuterV0Config; +use super::config::Config; /// Inner config for query. /// /// All function should implement based on this Config. #[derive(Clone, Default, Debug, PartialEq, Eq)] -pub struct Config { +pub struct Setting { pub cmd: String, pub config_file: String, // Query engine config. - pub query: QueryConfig, + pub query: QuerySetting, pub log: LogConfig, @@ -57,18 +57,18 @@ pub struct Config { // external catalog config. // - Later, catalog information SHOULD be kept in KV Service // - currently only supports HIVE (via hive meta store) - pub catalogs: HashMap, + pub catalogs: HashMap, // Cache Config - pub cache: CacheConfig, + pub cache: CacheSetting, } -impl Config { - /// As requires by [RFC: Config Backward Compatibility](https://github.com/datafuselabs/databend/pull/5324), we will load user's config via wrapper [`ConfigV0`] and then convert from [`ConfigV0`] to [`Config`]. +impl Setting { + /// As requires by [RFC: Config Backward Compatibility](https://github.com/datafuselabs/databend/pull/5324), we will load user's config via wrapper [`ConfigV0`] and then convert from [`ConfigV0`] to [`Setting`]. /// /// In the future, we could have `ConfigV1` and `ConfigV2`. pub fn load() -> Result { - let cfg: Self = OuterV0Config::load(true)?.try_into()?; + let cfg: Self = Config::load(true)?.try_into()?; // Only check meta config when cmd is empty. if cfg.cmd.is_empty() { @@ -81,7 +81,7 @@ impl Config { /// /// This function is served for tests only. pub fn load_for_test() -> Result { - let cfg: Self = OuterV0Config::load(false)?.try_into()?; + let cfg: Self = Config::load(false)?.try_into()?; Ok(cfg) } @@ -99,7 +99,7 @@ impl Config { !self.query.rpc_tls_server_key.is_empty() && !self.query.rpc_tls_server_cert.is_empty() } - /// Transform config into the outer style. + /// Transform Setting into the Config. /// /// This function should only be used for end-users. /// @@ -108,13 +108,13 @@ impl Config { /// - system config table /// - HTTP Handler /// - tests - pub fn into_outer(self) -> OuterV0Config { - OuterV0Config::from(self) + pub fn into_config(self) -> Config { + Config::from(self) } } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct QueryConfig { +pub struct QuerySetting { /// Tenant id for get the information from the MetaSrv. pub tenant_id: String, /// ID for construct the cluster. @@ -150,46 +150,6 @@ pub struct QueryConfig { pub table_engine_memory_enabled: bool, pub wait_timeout_mills: u64, pub max_query_log_size: usize, - /// Table Meta Cached enabled - pub table_meta_cache_enabled: bool, - /// Max number of cached table block meta - pub table_cache_block_meta_count: u64, - /// Table memory cache size (MB), - /// @deprecated - pub table_memory_cache_mb_size: u64, - /// Max number of cached table snapshot - pub table_cache_snapshot_count: u64, - /// Max number of cached table statistic - pub table_cache_statistic_count: u64, - /// Max number of cached table segment - pub table_cache_segment_count: u64, - /// Max number of cached bloom index meta objects - pub table_cache_bloom_index_meta_count: u64, - /// Max number of cached bloom index filters - pub table_cache_bloom_index_filter_count: u64, - /// Max size(bytes) of in memory table column object cache - /// - /// The cache items are deserialized object, may take a lot of memory. - /// Please set it to zero to disable it. - pub table_data_cache_in_memory_max_size: u64, - /// Indicates if table data cache is enabled - pub table_data_cache_enabled: bool, - /// Table disk cache folder root - pub table_disk_cache_root: String, - /// Max size of external cache population queue length - /// - /// the items being queued reference table column raw data, which are - /// un-deserialized and usually compressed (depends on table compression options). - /// - /// - please monitor the 'table_data_cache_population_pending_count' metric - /// if it is too high, and takes too much memory, please consider decrease this value - /// - /// - please monitor the 'population_overflow_count' metric - /// if it keeps increasing, and disk cache hits rate is not as expected. please consider - /// increase this value. - pub table_data_cache_population_queue_size: u32, - /// Table disk cache size (bytes), default value is 21474836480 - pub table_disk_cache_max_size: u64, /// If in management mode, only can do some meta level operations(database/table/user/stage etc.) with metasrv. pub management_mode: bool, pub jwt_key_file: String, @@ -204,7 +164,7 @@ pub struct QueryConfig { pub internal_enable_sandbox_tenant: bool, } -impl Default for QueryConfig { +impl Default for QuerySetting { fn default() -> Self { Self { tenant_id: "admin".to_string(), @@ -236,19 +196,6 @@ impl Default for QueryConfig { table_engine_memory_enabled: true, wait_timeout_mills: 5000, max_query_log_size: 10_000, - table_meta_cache_enabled: true, - table_cache_block_meta_count: 102400, - table_memory_cache_mb_size: 256, - table_cache_snapshot_count: 256, - table_cache_statistic_count: 256, - table_cache_segment_count: 10240, - table_cache_bloom_index_meta_count: 3000, - table_cache_bloom_index_filter_count: 1024 * 1024, - table_data_cache_in_memory_max_size: 0, - table_data_cache_enabled: false, - table_data_cache_population_queue_size: 65536, - table_disk_cache_root: "_cache".to_string(), - table_disk_cache_max_size: 20 * 1024 * 1024 * 1024, management_mode: false, jwt_key_file: "".to_string(), jwt_key_files: Vec::new(), @@ -264,7 +211,7 @@ impl Default for QueryConfig { } } -impl QueryConfig { +impl QuerySetting { pub fn to_rpc_client_tls_config(&self) -> RpcClientTlsConfig { RpcClientTlsConfig { rpc_tls_server_root_ca_cert: self.rpc_tls_query_server_root_ca_cert.clone(), @@ -386,8 +333,8 @@ impl Debug for MetaConfig { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum CatalogConfig { - Hive(CatalogHiveConfig), +pub enum CatalogSetting { + Hive(CatalogHiveSetting), } // TODO: add compat protocol support @@ -417,12 +364,12 @@ impl Display for ThriftProtocol { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct CatalogHiveConfig { +pub struct CatalogHiveSetting { pub address: String, pub protocol: ThriftProtocol, } -impl Default for CatalogHiveConfig { +impl Default for CatalogHiveSetting { fn default() -> Self { Self { address: "127.0.0.1:9083".to_string(), @@ -448,7 +395,7 @@ impl Default for LocalConfig { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct CacheConfig { +pub struct CacheSetting { /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches pub enable_table_meta_caches: bool, @@ -458,6 +405,9 @@ pub struct CacheConfig { /// Max number of cached table segment pub table_meta_segment_count: u64, + /// Max number of cached table segment + pub table_meta_statistic_count: u64, + /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches pub enable_table_index_bloom: bool, @@ -471,8 +421,20 @@ pub struct CacheConfig { // table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) pub table_bloom_index_filter_count: u64, - /// Max number of cached bloom index filters. Set it to 0 to disable it. - pub data_cache_storage: ExternalCacheStorageType, + pub data_cache_storage: ExternalCacheStorageTypeSetting, + + /// Max size of external cache population queue length + /// + /// the items being queued reference table column raw data, which are + /// un-deserialized and usually compressed (depends on table compression options). + /// + /// - please monitor the 'table_data_cache_population_pending_count' metric + /// if it is too high, and takes too much memory, please consider decrease this value + /// + /// - please monitor the 'population_overflow_count' metric + /// if it keeps increasing, and disk cache hits rate is not as expected. please consider + /// increase this value. + pub table_data_cache_population_queue_size: u32, /// Storage that hold the raw data caches pub disk_cache_config: DiskCacheConfig, @@ -487,13 +449,13 @@ pub struct CacheConfig { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum ExternalCacheStorageType { +pub enum ExternalCacheStorageTypeSetting { None, Disk, // Redis, } -impl Default for ExternalCacheStorageType { +impl Default for ExternalCacheStorageTypeSetting { fn default() -> Self { Self::None } @@ -517,16 +479,18 @@ impl Default for DiskCacheConfig { } } -impl Default for CacheConfig { +impl Default for CacheSetting { fn default() -> Self { Self { enable_table_meta_caches: true, table_meta_snapshot_count: 256, table_meta_segment_count: 10240, + table_meta_statistic_count: 256, enable_table_index_bloom: true, table_bloom_index_meta_count: 3000, table_bloom_index_filter_count: 1048576, data_cache_storage: Default::default(), + table_data_cache_population_queue_size: 65536, disk_cache_config: Default::default(), table_data_deserialized_data_bytes: 0, } diff --git a/src/query/config/tests/main.rs b/src/query/config/tests/main.rs index 811d8fbf30fbe..f9a051b6aa492 100644 --- a/src/query/config/tests/main.rs +++ b/src/query/config/tests/main.rs @@ -16,19 +16,19 @@ use std::ffi::OsString; use clap::Parser; use common_config::Config; -use common_config::OuterConfig; +use common_config::Setting; use pretty_assertions::assert_eq; -/// It's required to make sure config's default value is the same with clap. +/// It's required to make sure setting's default value is the same with clap. #[test] fn test_config_default() { - let type_default = Config::default(); - let args_default: Config = OuterConfig::parse_from(Vec::::new()) + let setting_default = Setting::default(); + let config_default: Setting = Config::parse_from(Vec::::new()) .try_into() .expect("parse from args must succeed"); assert_eq!( - type_default, args_default, - "inner config's default value is different from args, please check again" + setting_default, config_default, + "default setting is different from default config, please check again" ) } diff --git a/src/query/service/src/api/http/v1/config.rs b/src/query/service/src/api/http/v1/config.rs index 0e9ebb1cac42e..5e29f6b6a99ff 100644 --- a/src/query/service/src/api/http/v1/config.rs +++ b/src/query/service/src/api/http/v1/config.rs @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_config::GlobalConfig; +use common_config::GlobalSetting; use poem::web::Json; use poem::IntoResponse; #[poem::handler] pub async fn config_handler() -> poem::Result { - Ok(Json(GlobalConfig::instance().as_ref().clone().into_outer())) + Ok(Json( + GlobalSetting::instance().as_ref().clone().into_config(), + )) } diff --git a/src/query/service/src/api/http/v1/tenant_tables.rs b/src/query/service/src/api/http/v1/tenant_tables.rs index 3e66c9ed92d23..af9d50ff37c8a 100644 --- a/src/query/service/src/api/http/v1/tenant_tables.rs +++ b/src/query/service/src/api/http/v1/tenant_tables.rs @@ -16,7 +16,7 @@ use chrono::DateTime; use chrono::Utc; use common_catalog::catalog::CatalogManager; use common_catalog::catalog_kind::CATALOG_DEFAULT; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::Result; use poem::web::Json; use poem::web::Path; @@ -81,7 +81,7 @@ pub async fn list_tenant_tables_handler( // This handler returns the statistics about the tables of the current tenant. #[poem::handler] pub async fn list_tables_handler() -> poem::Result { - let tenant = &GlobalConfig::instance().query.tenant_id; + let tenant = &GlobalSetting::instance().query.tenant_id; if tenant.is_empty() { return Ok(Json(TenantTablesResponse { tables: vec![] })); } diff --git a/src/query/service/src/api/http_service.rs b/src/query/service/src/api/http_service.rs index 3468af269cc23..252896d6563a4 100644 --- a/src/query/service/src/api/http_service.rs +++ b/src/query/service/src/api/http_service.rs @@ -15,7 +15,7 @@ use std::net::SocketAddr; use std::path::Path; -use common_config::Config; +use common_config::Setting; use common_exception::Result; use common_http::health_handler; use common_http::home::debug_home_handler; @@ -34,12 +34,12 @@ use tracing::warn; use crate::servers::Server; pub struct HttpService { - config: Config, + config: Setting, shutdown_handler: HttpShutdownHandler, } impl HttpService { - pub fn create(config: &Config) -> Result> { + pub fn create(config: &Setting) -> Result> { Ok(Box::new(HttpService { config: config.clone(), shutdown_handler: HttpShutdownHandler::create("http api".to_string()), @@ -90,7 +90,7 @@ impl HttpService { route } - fn build_tls(config: &Config) -> Result { + fn build_tls(config: &Setting) -> Result { let certificate = RustlsCertificate::new() .cert(std::fs::read(config.query.api_tls_server_cert.as_str())?) .key(std::fs::read(config.query.api_tls_server_key.as_str())?); diff --git a/src/query/service/src/api/rpc/exchange/exchange_manager.rs b/src/query/service/src/api/rpc/exchange/exchange_manager.rs index 2185502e91b7b..1d91ce9c3b1fe 100644 --- a/src/query/service/src/api/rpc/exchange/exchange_manager.rs +++ b/src/query/service/src/api/rpc/exchange/exchange_manager.rs @@ -23,7 +23,7 @@ use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_base::runtime::Thread; use common_base::runtime::TrySpawn; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_grpc::ConnectionFactory; @@ -119,7 +119,7 @@ impl DataExchangeManager { } pub async fn create_client(address: &str) -> Result { - let config = GlobalConfig::instance(); + let config = GlobalSetting::instance(); let address = address.to_string(); GlobalIORuntime::instance() @@ -237,7 +237,7 @@ impl DataExchangeManager { let settings = ctx.get_settings(); let timeout = settings.get_flight_client_timeout()?; let root_actions = actions.get_root_actions()?; - let conf = GlobalConfig::instance(); + let conf = GlobalSetting::instance(); // Initialize channels between cluster nodes actions diff --git a/src/query/service/src/api/rpc/packets/packet.rs b/src/query/service/src/api/rpc/packets/packet.rs index d86562c91c0a2..76d78cf9935ca 100644 --- a/src/query/service/src/api/rpc/packets/packet.rs +++ b/src/query/service/src/api/rpc/packets/packet.rs @@ -13,7 +13,7 @@ // limitations under the License. use common_arrow::arrow_format::flight::service::flight_service_client::FlightServiceClient; -use common_config::Config; +use common_config::Setting; use common_exception::Result; use common_grpc::ConnectionFactory; @@ -21,12 +21,12 @@ use crate::api::FlightClient; #[async_trait::async_trait] pub trait Packet: Send + Sync { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()>; + async fn commit(&self, config: &Setting, timeout: u64) -> Result<()>; } #[async_trait::async_trait] impl Packet for Vec { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { for packet in self.iter() { packet.commit(config, timeout).await?; } @@ -35,7 +35,7 @@ impl Packet for Vec { } } -pub async fn create_client(config: &Config, address: &str) -> Result { +pub async fn create_client(config: &Setting, address: &str) -> Result { match config.tls_query_cli_enabled() { true => Ok(FlightClient::new(FlightServiceClient::new( ConnectionFactory::create_rpc_channel( diff --git a/src/query/service/src/api/rpc/packets/packet_execute.rs b/src/query/service/src/api/rpc/packets/packet_execute.rs index 35000e6ae38f2..56e9eb5b18a63 100644 --- a/src/query/service/src/api/rpc/packets/packet_execute.rs +++ b/src/query/service/src/api/rpc/packets/packet_execute.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::NodeInfo; @@ -49,7 +49,7 @@ impl ExecutePartialQueryPacket { #[async_trait::async_trait] impl Packet for ExecutePartialQueryPacket { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { if !self.executors_info.contains_key(&self.executor) { return Err(ErrorCode::ClusterUnknownNode(format!( "Not found {} node in cluster", diff --git a/src/query/service/src/api/rpc/packets/packet_executor.rs b/src/query/service/src/api/rpc/packets/packet_executor.rs index 1fef1e582f3da..68175630f537e 100644 --- a/src/query/service/src/api/rpc/packets/packet_executor.rs +++ b/src/query/service/src/api/rpc/packets/packet_executor.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::NodeInfo; @@ -56,7 +56,7 @@ impl QueryFragmentsPlanPacket { #[async_trait::async_trait] impl Packet for QueryFragmentsPlanPacket { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { if !self.executors_info.contains_key(&self.executor) { return Err(ErrorCode::Internal(format!( "Not found {} node in cluster", diff --git a/src/query/service/src/api/rpc/packets/packet_publisher.rs b/src/query/service/src/api/rpc/packets/packet_publisher.rs index 1a22c6119bb00..cf7e648054793 100644 --- a/src/query/service/src/api/rpc/packets/packet_publisher.rs +++ b/src/query/service/src/api/rpc/packets/packet_publisher.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_exception::Result; use common_meta_types::NodeInfo; @@ -53,7 +53,7 @@ impl InitNodesChannelPacket { #[async_trait::async_trait] impl Packet for InitNodesChannelPacket { - async fn commit(&self, config: &Config, timeout: u64) -> Result<()> { + async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { let executor_info = &self.executor; let mut conn = create_client(config, &executor_info.flight_address).await?; let action = FlightAction::InitNodesChannel(InitNodesChannel { diff --git a/src/query/service/src/api/rpc_service.rs b/src/query/service/src/api/rpc_service.rs index 350d8e45b31be..dcc62e3cf59c9 100644 --- a/src/query/service/src/api/rpc_service.rs +++ b/src/query/service/src/api/rpc_service.rs @@ -20,7 +20,7 @@ use common_arrow::arrow_format::flight::service::flight_service_server::FlightSe use common_base::base::tokio; use common_base::base::tokio::net::TcpListener; use common_base::base::tokio::sync::Notify; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use tokio_stream::wrappers::TcpListenerStream; @@ -33,12 +33,12 @@ use crate::api::rpc::DatabendQueryFlightService; use crate::servers::Server as DatabendQueryServer; pub struct RpcService { - pub config: Config, + pub config: Setting, pub abort_notify: Arc, } impl RpcService { - pub fn create(config: Config) -> Result> { + pub fn create(config: Setting) -> Result> { Ok(Box::new(Self { config, abort_notify: Arc::new(Notify::new()), @@ -60,7 +60,7 @@ impl RpcService { } } - async fn server_tls_config(conf: &Config) -> Result { + async fn server_tls_config(conf: &Setting) -> Result { let cert = tokio::fs::read(conf.query.rpc_tls_server_cert.as_str()).await?; let key = tokio::fs::read(conf.query.rpc_tls_server_key.as_str()).await?; let server_identity = Identity::from_pem(cert, key); diff --git a/src/query/service/src/auth.rs b/src/query/service/src/auth.rs index 319cb43424b35..1d342fd13b505 100644 --- a/src/query/service/src/auth.rs +++ b/src/query/service/src/auth.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -pub use common_config::Config; +pub use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::AuthInfo; @@ -41,7 +41,7 @@ pub enum Credential { } impl AuthMgr { - pub fn create(cfg: &Config) -> Result> { + pub fn create(cfg: &Setting) -> Result> { Ok(Arc::new(AuthMgr { jwt_auth: JwtAuthenticator::try_create( cfg.query.jwt_key_file.clone(), diff --git a/src/query/service/src/catalogs/catalog_manager.rs b/src/query/service/src/catalogs/catalog_manager.rs index 5663163fe1bc1..2b5cac1bff604 100644 --- a/src/query/service/src/catalogs/catalog_manager.rs +++ b/src/query/service/src/catalogs/catalog_manager.rs @@ -18,8 +18,8 @@ use common_base::base::GlobalInstance; use common_catalog::catalog::Catalog; pub use common_catalog::catalog::CatalogManager; use common_catalog::catalog_kind::CATALOG_DEFAULT; -use common_config::CatalogConfig; -use common_config::Config; +use common_config::CatalogSetting; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CatalogType; @@ -33,13 +33,13 @@ use crate::catalogs::DatabaseCatalog; #[async_trait::async_trait] pub trait CatalogManagerHelper { - async fn init(conf: &Config) -> Result<()>; + async fn init(conf: &Setting) -> Result<()>; - async fn try_create(conf: &Config) -> Result>; + async fn try_create(conf: &Setting) -> Result>; - async fn register_build_in_catalogs(&self, conf: &Config) -> Result<()>; + async fn register_build_in_catalogs(&self, conf: &Setting) -> Result<()>; - fn register_external_catalogs(&self, conf: &Config) -> Result<()>; + fn register_external_catalogs(&self, conf: &Setting) -> Result<()>; fn create_user_defined_catalog(&self, req: CreateCatalogReq) -> Result<()>; @@ -48,13 +48,13 @@ pub trait CatalogManagerHelper { #[async_trait::async_trait] impl CatalogManagerHelper for CatalogManager { - async fn init(conf: &Config) -> Result<()> { + async fn init(conf: &Setting) -> Result<()> { GlobalInstance::set(Self::try_create(conf).await?); Ok(()) } - async fn try_create(conf: &Config) -> Result> { + async fn try_create(conf: &Setting) -> Result> { let catalog_manager = CatalogManager { catalogs: DashMap::new(), }; @@ -69,7 +69,7 @@ impl CatalogManagerHelper for CatalogManager { Ok(Arc::new(catalog_manager)) } - async fn register_build_in_catalogs(&self, conf: &Config) -> Result<()> { + async fn register_build_in_catalogs(&self, conf: &Setting) -> Result<()> { let default_catalog: Arc = Arc::new(DatabaseCatalog::try_create_with_config(conf.clone()).await?); self.catalogs @@ -77,14 +77,14 @@ impl CatalogManagerHelper for CatalogManager { Ok(()) } - fn register_external_catalogs(&self, conf: &Config) -> Result<()> { + fn register_external_catalogs(&self, conf: &Setting) -> Result<()> { // currently, if the `hive` feature is not enabled // the loop will quit after the first iteration. // this is expected. #[allow(clippy::never_loop)] for (name, ctl) in conf.catalogs.iter() { match ctl { - CatalogConfig::Hive(ctl) => { + CatalogSetting::Hive(ctl) => { // register hive catalog #[cfg(not(feature = "hive"))] { diff --git a/src/query/service/src/catalogs/default/database_catalog.rs b/src/query/service/src/catalogs/default/database_catalog.rs index 1c0ab50caa20e..66ae364d1360f 100644 --- a/src/query/service/src/catalogs/default/database_catalog.rs +++ b/src/query/service/src/catalogs/default/database_catalog.rs @@ -16,7 +16,7 @@ use std::any::Any; use std::sync::Arc; use common_catalog::table_args::TableArgs; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CountTablesReply; @@ -87,7 +87,7 @@ impl DatabaseCatalog { } } - pub async fn try_create_with_config(conf: Config) -> Result { + pub async fn try_create_with_config(conf: Setting) -> Result { let immutable_catalog = ImmutableCatalog::try_create_with_config(&conf).await?; let mutable_catalog = MutableCatalog::try_create_with_config(conf).await?; let table_function_factory = TableFunctionFactory::create(); diff --git a/src/query/service/src/catalogs/default/immutable_catalog.rs b/src/query/service/src/catalogs/default/immutable_catalog.rs index 1dda37632ee56..2fce94247419f 100644 --- a/src/query/service/src/catalogs/default/immutable_catalog.rs +++ b/src/query/service/src/catalogs/default/immutable_catalog.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CountTablesReply; @@ -70,7 +70,7 @@ pub struct ImmutableCatalog { } impl ImmutableCatalog { - pub async fn try_create_with_config(conf: &Config) -> Result { + pub async fn try_create_with_config(conf: &Setting) -> Result { // The global db meta. let mut sys_db_meta = InMemoryMetas::create(SYS_DB_ID_BEGIN, SYS_TBL_ID_BEGIN); sys_db_meta.init_db("system"); diff --git a/src/query/service/src/catalogs/default/mutable_catalog.rs b/src/query/service/src/catalogs/default/mutable_catalog.rs index 475b4e58e108c..44d0825a49d4d 100644 --- a/src/query/service/src/catalogs/default/mutable_catalog.rs +++ b/src/query/service/src/catalogs/default/mutable_catalog.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_exception::Result; use common_meta_api::SchemaApi; use common_meta_app::schema::CountTablesReply; @@ -91,7 +91,7 @@ impl MutableCatalog { /// /// MetaEmbedded /// ``` - pub async fn try_create_with_config(conf: Config) -> Result { + pub async fn try_create_with_config(conf: Setting) -> Result { let meta = { let provider = Arc::new(MetaStoreProvider::new(conf.meta.to_meta_grpc_client_conf())); diff --git a/src/query/service/src/clusters/cluster.rs b/src/query/service/src/clusters/cluster.rs index 27206766ccf16..bf77eac8eca31 100644 --- a/src/query/service/src/clusters/cluster.rs +++ b/src/query/service/src/clusters/cluster.rs @@ -32,7 +32,7 @@ use common_base::base::GlobalUniqName; use common_base::base::SignalStream; use common_base::base::SignalType; pub use common_catalog::cluster_info::Cluster; -use common_config::Config; +use common_config::Setting; use common_config::DATABEND_COMMIT_VERSION; use common_exception::ErrorCode; use common_exception::Result; @@ -73,7 +73,7 @@ pub trait ClusterHelper { fn is_empty(&self) -> bool; fn is_local(&self, node: &NodeInfo) -> bool; fn local_id(&self) -> String; - async fn create_node_conn(&self, name: &str, config: &Config) -> Result; + async fn create_node_conn(&self, name: &str, config: &Setting) -> Result; fn get_nodes(&self) -> Vec>; } @@ -102,7 +102,7 @@ impl ClusterHelper for Cluster { self.local_id.clone() } - async fn create_node_conn(&self, name: &str, config: &Config) -> Result { + async fn create_node_conn(&self, name: &str, config: &Setting) -> Result { for node in &self.nodes { if node.id == name { return match config.tls_query_cli_enabled() { @@ -140,7 +140,7 @@ impl ClusterHelper for Cluster { impl ClusterDiscovery { const METRIC_LABEL_FUNCTION: &'static str = "function"; - pub async fn create_meta_client(cfg: &Config) -> Result { + pub async fn create_meta_client(cfg: &Setting) -> Result { let meta_api_provider = MetaStoreProvider::new(cfg.meta.to_meta_grpc_client_conf()); match meta_api_provider.create_meta_store().await { Ok(meta_store) => Ok(meta_store), @@ -150,14 +150,14 @@ impl ClusterDiscovery { } } - pub async fn init(cfg: Config) -> Result<()> { + pub async fn init(cfg: Setting) -> Result<()> { let metastore = ClusterDiscovery::create_meta_client(&cfg).await?; GlobalInstance::set(Self::try_create(&cfg, metastore).await?); Ok(()) } - pub async fn try_create(cfg: &Config, metastore: MetaStore) -> Result> { + pub async fn try_create(cfg: &Setting, metastore: MetaStore) -> Result> { let (lift_time, provider) = Self::create_provider(cfg, metastore)?; Ok(Arc::new(ClusterDiscovery { @@ -180,7 +180,7 @@ impl ClusterDiscovery { } fn create_provider( - cfg: &Config, + cfg: &Setting, metastore: MetaStore, ) -> Result<(Duration, Arc)> { // TODO: generate if tenant or cluster id is empty @@ -192,7 +192,7 @@ impl ClusterDiscovery { Ok((lift_time, Arc::new(cluster_manager))) } - pub async fn discover(&self, config: &Config) -> Result> { + pub async fn discover(&self, config: &Setting) -> Result> { match self.api_provider.get_nodes().await { Err(cause) => { label_counter_with_val_and_labels( @@ -351,7 +351,7 @@ impl ClusterDiscovery { }; } - pub async fn register_to_metastore(self: &Arc, cfg: &Config) -> Result<()> { + pub async fn register_to_metastore(self: &Arc, cfg: &Setting) -> Result<()> { let cpus = cfg.query.num_cpus; let mut address = cfg.query.flight_api_address.clone(); @@ -503,7 +503,7 @@ impl ClusterHeartbeat { } } -pub async fn create_client(config: &Config, address: &str) -> Result { +pub async fn create_client(config: &Setting, address: &str) -> Result { match config.tls_query_cli_enabled() { true => Ok(FlightClient::new(FlightServiceClient::new( ConnectionFactory::create_rpc_channel( diff --git a/src/query/service/src/databases/database_factory.rs b/src/query/service/src/databases/database_factory.rs index 6e56e574bb7ac..41d7030d95be3 100644 --- a/src/query/service/src/databases/database_factory.rs +++ b/src/query/service/src/databases/database_factory.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::DatabaseInfo; @@ -45,7 +45,7 @@ pub struct DatabaseFactory { } impl DatabaseFactory { - pub fn create(_: Config) -> Self { + pub fn create(_: Setting) -> Self { let creators: DashMap> = DashMap::new(); creators.insert( DefaultDatabase::NAME.to_string(), diff --git a/src/query/service/src/databases/system/system_database.rs b/src/query/service/src/databases/system/system_database.rs index fbca5e70f5ac0..f1fe1f2e7e569 100644 --- a/src/query/service/src/databases/system/system_database.rs +++ b/src/query/service/src/databases/system/system_database.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_meta_app::schema::DatabaseIdent; use common_meta_app::schema::DatabaseInfo; use common_meta_app::schema::DatabaseMeta; @@ -54,7 +54,7 @@ pub struct SystemDatabase { } impl SystemDatabase { - pub fn create(sys_db_meta: &mut InMemoryMetas, config: &Config) -> Self { + pub fn create(sys_db_meta: &mut InMemoryMetas, config: &Setting) -> Self { let table_list: Vec> = vec![ OneTable::create(sys_db_meta.next_table_id()), FunctionsTable::create(sys_db_meta.next_table_id()), diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 6e65ebc036916..2dc2c86550d24 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -15,8 +15,8 @@ use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_catalog::catalog::CatalogManager; -use common_config::Config; -use common_config::GlobalConfig; +use common_config::GlobalSetting; +use common_config::Setting; use common_exception::Result; use common_storage::CacheOperator; use common_storage::DataOperator; @@ -35,14 +35,14 @@ use crate::sessions::SessionManager; pub struct GlobalServices; impl GlobalServices { - pub async fn init(config: Config) -> Result<()> { + pub async fn init(config: Setting) -> Result<()> { GlobalInstance::init_production(); GlobalServices::init_with(config).await } - pub async fn init_with(config: Config) -> Result<()> { + pub async fn init_with(config: Setting) -> Result<()> { // The order of initialization is very important - GlobalConfig::init(config.clone())?; + GlobalSetting::init(config.clone())?; let app_name_shuffle = format!("{}-{}", config.query.tenant_id, config.query.cluster_id); @@ -61,7 +61,7 @@ impl GlobalServices { config.query.tenant_id.clone(), )?; - CacheManager::init(&config.query)?; + CacheManager::init(&config.cache, &config.query.tenant_id)?; CatalogManager::init(&config).await?; HttpQueryManager::init(&config).await?; DataExchangeManager::init()?; diff --git a/src/query/service/src/interpreters/access/management_mode_access.rs b/src/query/service/src/interpreters/access/management_mode_access.rs index c202e450f0143..46d147d1c7efe 100644 --- a/src/query/service/src/interpreters/access/management_mode_access.rs +++ b/src/query/service/src/interpreters/access/management_mode_access.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; @@ -32,7 +32,7 @@ impl AccessChecker for ManagementModeAccess { // Check what we can do if in management mode. async fn check(&self, plan: &Plan) -> Result<()> { // Allows for management-mode. - if GlobalConfig::instance().query.management_mode { + if GlobalSetting::instance().query.management_mode { let ok = match plan { Plan::Query {rewrite_kind, .. } => { use common_sql::plans::RewriteKind; diff --git a/src/query/service/src/interpreters/interpreter_metrics.rs b/src/query/service/src/interpreters/interpreter_metrics.rs index d42d9af0bc0a0..267701431d3ba 100644 --- a/src/query/service/src/interpreters/interpreter_metrics.rs +++ b/src/query/service/src/interpreters/interpreter_metrics.rs @@ -16,7 +16,7 @@ use std::time::Duration; use std::time::SystemTime; use std::time::UNIX_EPOCH; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_metrics::label_counter_with_val_and_labels; use common_metrics::label_histogram_with_val; @@ -56,7 +56,7 @@ impl InterpreterMetrics { let handler_type = ctx.get_current_session().get_type().to_string(); let query_kind = ctx.get_query_kind(); let tenant_id = ctx.get_tenant(); - let cluster_id = GlobalConfig::instance().query.cluster_id.clone(); + let cluster_id = GlobalSetting::instance().query.cluster_id.clone(); vec![ (LABEL_HANDLER, handler_type), diff --git a/src/query/service/src/interpreters/interpreter_query_log.rs b/src/query/service/src/interpreters/interpreter_query_log.rs index 995f306e1c4f2..85a70b7c47875 100644 --- a/src/query/service/src/interpreters/interpreter_query_log.rs +++ b/src/query/service/src/interpreters/interpreter_query_log.rs @@ -18,7 +18,7 @@ use std::time::Duration; use std::time::SystemTime; use std::time::UNIX_EPOCH; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_storages_system::LogType; @@ -81,7 +81,7 @@ impl InterpreterQueryLog { // User. let handler_type = ctx.get_current_session().get_type().to_string(); let tenant_id = ctx.get_tenant(); - let cluster_id = GlobalConfig::instance().query.cluster_id.clone(); + let cluster_id = GlobalSetting::instance().query.cluster_id.clone(); let user = ctx.get_current_user()?; let sql_user = user.name; let sql_user_quota = format!("{:?}", user.quota); @@ -185,8 +185,8 @@ impl InterpreterQueryLog { pub fn log_finish(ctx: &QueryContext, now: SystemTime, err: Option) -> Result<()> { // User. let handler_type = ctx.get_current_session().get_type().to_string(); - let tenant_id = GlobalConfig::instance().query.tenant_id.clone(); - let cluster_id = GlobalConfig::instance().query.cluster_id.clone(); + let tenant_id = GlobalSetting::instance().query.tenant_id.clone(); + let cluster_id = GlobalSetting::instance().query.cluster_id.clone(); let user = ctx.get_current_user()?; let sql_user = user.name; let sql_user_quota = format!("{:?}", user.quota); diff --git a/src/query/service/src/interpreters/interpreter_unsetting.rs b/src/query/service/src/interpreters/interpreter_unsetting.rs index 7c0407d5e1c0b..8bdc36a40ac05 100644 --- a/src/query/service/src/interpreters/interpreter_unsetting.rs +++ b/src/query/service/src/interpreters/interpreter_unsetting.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::Result; use common_settings::ScopeLevel::Global; use common_sql::plans::UnSettingPlan; @@ -58,14 +58,14 @@ impl Interpreter for UnSettingInterpreter { } let default_val = { if setting == "max_memory_usage" { - let conf = GlobalConfig::instance(); + let conf = GlobalSetting::instance(); if conf.query.max_server_memory_usage == 0 { settings.check_and_get_default_value(setting)?.to_string() } else { conf.query.max_server_memory_usage.to_string() } } else if setting == "max_threads" { - let conf = GlobalConfig::instance(); + let conf = GlobalSetting::instance(); if conf.query.num_cpus == 0 { settings.check_and_get_default_value(setting)?.to_string() } else { diff --git a/src/query/service/src/procedures/procedure.rs b/src/query/service/src/procedures/procedure.rs index ce9b7ea94b281..2e285d39074cc 100644 --- a/src/query/service/src/procedures/procedure.rs +++ b/src/query/service/src/procedures/procedure.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -45,7 +45,7 @@ pub trait Procedure: Sync + Send { features.num_arguments, )?; - if features.management_mode_required && !GlobalConfig::instance().query.management_mode { + if features.management_mode_required && !GlobalSetting::instance().query.management_mode { return Err(ErrorCode::ManagementModePermissionDenied(format!( "Access denied: '{}' only used in management-mode", self.name() diff --git a/src/query/service/src/servers/http/http_services.rs b/src/query/service/src/servers/http/http_services.rs index a69d67de3942d..284dbc1277dd2 100644 --- a/src/query/service/src/servers/http/http_services.rs +++ b/src/query/service/src/servers/http/http_services.rs @@ -15,8 +15,8 @@ use std::net::SocketAddr; use std::path::Path; -use common_config::Config; -use common_config::GlobalConfig; +use common_config::GlobalSetting; +use common_config::Setting; use common_exception::Result; use common_http::HttpShutdownHandler; use poem::get; @@ -80,7 +80,7 @@ impl HttpHandler { })) } - async fn build_router(&self, config: &Config, sock: SocketAddr) -> Result { + async fn build_router(&self, config: &Setting, sock: SocketAddr) -> Result { let ep = match self.kind { HttpHandlerKind::Query => Route::new() .at( @@ -105,7 +105,7 @@ impl HttpHandler { .boxed()) } - fn build_tls(config: &Config) -> Result { + fn build_tls(config: &Setting) -> Result { let certificate = RustlsCertificate::new() .cert(std::fs::read( config.query.http_handler_tls_server_cert.as_str(), @@ -125,7 +125,7 @@ impl HttpHandler { async fn start_with_tls(&mut self, listening: SocketAddr) -> Result { info!("Http Handler TLS enabled"); - let config = GlobalConfig::instance(); + let config = GlobalSetting::instance(); let tls_config = Self::build_tls(config.as_ref())?; let router = self.build_router(config.as_ref(), listening).await?; @@ -136,7 +136,7 @@ impl HttpHandler { async fn start_without_tls(&mut self, listening: SocketAddr) -> Result { let router = self - .build_router(GlobalConfig::instance().as_ref(), listening) + .build_router(GlobalSetting::instance().as_ref(), listening) .await?; self.shutdown_handler .start_service(listening, None, router, None) @@ -151,7 +151,7 @@ impl Server for HttpHandler { } async fn start(&mut self, listening: SocketAddr) -> Result { - let config = GlobalConfig::instance(); + let config = GlobalSetting::instance(); match config.query.http_handler_tls_server_key.is_empty() || config.query.http_handler_tls_server_cert.is_empty() { diff --git a/src/query/service/src/servers/http/v1/query/http_query_manager.rs b/src/query/service/src/servers/http/v1/query/http_query_manager.rs index 772aae01ddca0..24d12eb707fd9 100644 --- a/src/query/service/src/servers/http/v1/query/http_query_manager.rs +++ b/src/query/service/src/servers/http/v1/query/http_query_manager.rs @@ -21,7 +21,7 @@ use common_base::base::tokio::time::sleep; use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_base::runtime::TrySpawn; -use common_config::Config; +use common_config::Setting; use common_exception::Result; use parking_lot::Mutex; use tracing::warn; @@ -48,7 +48,7 @@ pub struct HttpQueryManager { } impl HttpQueryManager { - pub async fn init(cfg: &Config) -> Result<()> { + pub async fn init(cfg: &Setting) -> Result<()> { GlobalInstance::set(Arc::new(HttpQueryManager { queries: Arc::new(RwLock::new(HashMap::new())), sessions: Mutex::new(ExpiringMap::default()), diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs index 542561588644f..e4b21df1abefc 100644 --- a/src/query/service/src/sessions/query_ctx_shared.rs +++ b/src/query/service/src/sessions/query_ctx_shared.rs @@ -23,7 +23,7 @@ use std::time::SystemTime; use common_base::base::Progress; use common_base::runtime::Runtime; use common_catalog::table_context::StageAttachment; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -84,7 +84,7 @@ pub struct QueryContextShared { impl QueryContextShared { pub fn try_create( - config: &Config, + config: &Setting, session: Arc, cluster_cache: Arc, ) -> Result> { diff --git a/src/query/service/src/sessions/session.rs b/src/query/service/src/sessions/session.rs index ad9b90e1c7e83..9b2f1a2f50069 100644 --- a/src/query/service/src/sessions/session.rs +++ b/src/query/service/src/sessions/session.rs @@ -16,7 +16,7 @@ use std::net::SocketAddr; use std::sync::Arc; use chrono_tz::Tz; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_io::prelude::FormatSettings; @@ -126,7 +126,7 @@ impl Session { /// For a query, execution environment(e.g cluster) should be immutable. /// We can bind the environment to the context in create_context method. pub async fn create_query_context(self: &Arc) -> Result> { - let config = GlobalConfig::instance(); + let config = GlobalSetting::instance(); let session = self.clone(); let cluster = ClusterDiscovery::instance().discover(&config).await?; let shared = QueryContextShared::try_create(&config, session, cluster)?; diff --git a/src/query/service/src/sessions/session_ctx.rs b/src/query/service/src/sessions/session_ctx.rs index 8dc9c5278bfe2..f88e58d5a791e 100644 --- a/src/query/service/src/sessions/session_ctx.rs +++ b/src/query/service/src/sessions/session_ctx.rs @@ -18,7 +18,7 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::sync::Weak; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::Result; use common_meta_app::principal::RoleInfo; use common_meta_app::principal::UserInfo; @@ -129,7 +129,7 @@ impl SessionContext { } pub fn get_current_tenant(&self) -> String { - let conf = GlobalConfig::instance(); + let conf = GlobalSetting::instance(); if conf.query.internal_enable_sandbox_tenant { let sandbox_tenant = self.settings.get_sandbox_tenant().unwrap_or_default(); diff --git a/src/query/service/src/sessions/session_mgr.rs b/src/query/service/src/sessions/session_mgr.rs index c6999557a720a..a1377818d06e9 100644 --- a/src/query/service/src/sessions/session_mgr.rs +++ b/src/query/service/src/sessions/session_mgr.rs @@ -24,8 +24,8 @@ use std::time::Duration; use common_base::base::tokio; use common_base::base::GlobalInstance; use common_base::base::SignalStream; -use common_config::Config; -use common_config::GlobalConfig; +use common_config::GlobalSetting; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_metrics::label_counter; @@ -59,13 +59,13 @@ pub struct SessionManager { } impl SessionManager { - pub fn init(conf: &Config) -> Result<()> { + pub fn init(conf: &Setting) -> Result<()> { GlobalInstance::set(Self::create(conf)); Ok(()) } - pub fn create(conf: &Config) -> Arc { + pub fn create(conf: &Setting) -> Arc { let max_sessions = conf.query.max_active_sessions as usize; Arc::new(SessionManager { max_sessions, @@ -82,7 +82,7 @@ impl SessionManager { pub async fn create_session(&self, typ: SessionType) -> Result> { // TODO: maybe deadlock - let config = GlobalConfig::instance(); + let config = GlobalSetting::instance(); { let sessions = self.active_sessions.read(); self.validate_max_active_sessions(sessions.len(), "active sessions")?; @@ -149,7 +149,7 @@ impl SessionManager { } pub fn destroy_session(&self, session_id: &String) { - let config = GlobalConfig::instance(); + let config = GlobalSetting::instance(); label_counter( METRIC_SESSION_CLOSE_NUMBERS, &config.query.tenant_id, diff --git a/src/query/service/tests/it/configs.rs b/src/query/service/tests/it/configs.rs index 50c660fc8c043..b5b8eea2521a3 100644 --- a/src/query/service/tests/it/configs.rs +++ b/src/query/service/tests/it/configs.rs @@ -17,10 +17,12 @@ use std::env::temp_dir; use std::fs; use std::io::Write; -use common_config::CatalogConfig; -use common_config::CatalogHiveConfig; -use common_config::Config; +use common_config::CatalogHiveSetting; +use common_config::CatalogSetting; +use common_config::ExternalCacheStorageTypeConfig; +use common_config::Setting; use common_config::ThriftProtocol; +use common_exception::ErrorCode; use common_exception::Result; use pretty_assertions::assert_eq; @@ -44,16 +46,15 @@ fn test_env_config_s3() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHES", Some("true")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("s3")), @@ -83,7 +84,9 @@ fn test_env_config_s3() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = Setting::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -125,15 +128,14 @@ fn test_env_config_s3() -> Result<()> { assert_eq!("us.key", configured.storage.s3.secret_access_key); assert_eq!("us.bucket", configured.storage.s3.bucket); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_meta_caches); + assert!(configured.cache.enable_table_index_bloom); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); assert_eq!(HashMap::new(), configured.catalogs); }, @@ -162,16 +164,15 @@ fn test_env_config_fs() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QU-ERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYTES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("fs")), @@ -201,7 +202,9 @@ fn test_env_config_fs() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = Setting::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -243,18 +246,16 @@ fn test_env_config_fs() -> Result<()> { assert_eq!("", configured.storage.gcs.gcs_root); assert_eq!("", configured.storage.gcs.credential); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(512, configured.query.table_memory_cache_mb_size); - assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(21474836480, configured.query.table_disk_cache_max_size); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_index_bloom); + assert!(configured.cache.enable_table_meta_caches); + assert_eq!("_cache_env", configured.cache.disk_cache_config.path); + assert_eq!(512, configured.cache.disk_cache_config.max_bytes); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); }, ); @@ -281,16 +282,15 @@ fn test_env_config_gcs() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYTES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("gcs")), @@ -320,7 +320,9 @@ fn test_env_config_gcs() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = Setting::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -369,18 +371,16 @@ fn test_env_config_gcs() -> Result<()> { assert_eq!("", configured.storage.oss.oss_access_key_id); assert_eq!("", configured.storage.oss.oss_access_key_secret); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(512, configured.query.table_memory_cache_mb_size); - assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(21474836480, configured.query.table_disk_cache_max_size); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_meta_caches); + assert!(configured.cache.enable_table_index_bloom); + assert_eq!("_cache_env", configured.cache.disk_cache_config.path); + assert_eq!(512, configured.cache.disk_cache_config.max_bytes); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); }, ); @@ -407,16 +407,17 @@ fn test_env_config_oss() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), - ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_DISK_CACHE_ROOT", Some("_cache_env")), - ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("512")), - ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("256")), - ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("10240")), + ("CACHE_ENABLE_TABLE_META_CACHES", Some("true")), + ("CACHE_DATA_CACHE_STORAGE", Some("disk")), + ("TABLE_CACHE_BLOOM_INDEX_FILTER_COUNT", Some("1")), + ("CACHE_DISK_PATH", Some("_cache_env")), + ("CACHE_DISK_MAX_BYTES", Some("512")), + ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), + ("CACHE_TABLE_META_SEGMENT_COUNT", Some("10240")), ("META_ENDPOINTS", Some("0.0.0.0:9191")), - ("TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("3000")), + ("CACHE_TABLE_BLOOM_INDEX_META_COUNT", Some("3000")), ( - "TABLE_CACHE_BLOOM_INDEX_DATA_BYTES", + "CACHE_TABLE_BLOOM_INDEX_FILTER_COUNT", Some(format!("{}", 1024 * 1024 * 1024).as_str()), ), ("STORAGE_TYPE", Some("oss")), @@ -446,7 +447,9 @@ fn test_env_config_oss() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Config::load_for_test().expect("must success").into_outer(); + let configured = Setting::load_for_test() + .expect("must success") + .into_config(); assert_eq!("DEBUG", configured.log.level); @@ -502,18 +505,15 @@ fn test_env_config_oss() -> Result<()> { assert_eq!("", configured.storage.gcs.gcs_root); assert_eq!("", configured.storage.gcs.credential); - assert!(configured.query.table_engine_memory_enabled); - - assert!(configured.query.table_meta_cache_enabled); - assert_eq!(512, configured.query.table_memory_cache_mb_size); - assert_eq!("_cache_env", configured.query.table_disk_cache_root); - assert_eq!(21474836480, configured.query.table_disk_cache_max_size); - assert_eq!(10240, configured.query.table_cache_segment_count); - assert_eq!(256, configured.query.table_cache_snapshot_count); - assert_eq!(3000, configured.query.table_cache_bloom_index_meta_count); + assert!(configured.cache.enable_table_meta_caches); + assert_eq!("_cache_env", configured.cache.disk_cache_config.path); + assert_eq!(512, configured.cache.disk_cache_config.max_bytes); + assert_eq!(10240, configured.cache.table_meta_segment_count); + assert_eq!(256, configured.cache.table_meta_snapshot_count); + assert_eq!(3000, configured.cache.table_bloom_index_meta_count); assert_eq!( - 1024 * 1024, - configured.query.table_cache_bloom_index_filter_count + 1024 * 1024 * 1024, + configured.cache.table_bloom_index_filter_count ); }, ); @@ -561,15 +561,6 @@ table_engine_memory_enabled = true database_engine_github_enabled = true wait_timeout_mills = 5000 max_query_log_size = 10000 -table_meta_cache_enabled = false -table_cache_snapshot_count = 256 -table_cache_segment_count = 10240 -table_cache_block_meta_count = 102400 -table_memory_cache_mb_size = 256 -table_disk_cache_root = "_cache" -table_disk_cache_max_size = 1024 -table_cache_bloom_index_meta_count = 3000 -table_cache_bloom_index_filter_count = 1048576 management_mode = false jwt_key_file = "" async_insert_max_data_size = 10000 @@ -640,6 +631,19 @@ protocol = "binary" type = "hive" address = "127.0.0.1:9083" protocol = "binary" + +[cache] + +enable_table_meta_caches = false +table_meta_snapshot_count = 256 +table_meta_segment_count = 10240 +table_bloom_index_meta_count = 3000 +table_bloom_index_filter_count = 1048576 + +data_cache_storage = "disk" + +[cache.disk] +path = "_cache" "# .as_bytes(), )?; @@ -655,14 +659,21 @@ protocol = "binary" ("STORAGE_TYPE", None), ], || { - let cfg = Config::load_for_test() + let cfg = Setting::load_for_test() .expect("config load success") - .into_outer(); + .into_config(); assert_eq!("tenant_id_from_env", cfg.query.tenant_id); assert_eq!("access_key_id_from_env", cfg.storage.s3.access_key_id); assert_eq!("s3", cfg.storage.storage_type); + let cache_config = &cfg.cache; + assert_eq!( + cache_config.data_cache_storage, + ExternalCacheStorageTypeConfig::Disk + ); + assert_eq!(cache_config.disk_cache_config.path, "_cache"); + // NOTE: // // after the config conversion procedure: @@ -680,7 +691,7 @@ protocol = "binary" assert!(inner.is_ok(), "casting must success"); let cfg = inner.unwrap(); match cfg { - CatalogConfig::Hive(cfg) => { + CatalogSetting::Hive(cfg) => { assert_eq!("127.0.0.1:9083", cfg.address, "address incorrect"); assert_eq!("binary", cfg.protocol.to_string(), "protocol incorrect"); } @@ -715,11 +726,11 @@ protocol = "binary" temp_env::with_vars( vec![("CONFIG_FILE", Some(file_path.to_string_lossy().as_ref()))], || { - let cfg = Config::load_for_test().expect("config load success"); + let cfg = Setting::load_for_test().expect("config load success"); assert_eq!( cfg.catalogs["hive"], - CatalogConfig::Hive(CatalogHiveConfig { + CatalogSetting::Hive(CatalogHiveSetting { address: "1.1.1.1:10000".to_string(), protocol: ThriftProtocol::Binary, }) @@ -755,11 +766,11 @@ protocol = "binary" temp_env::with_vars( vec![("CONFIG_FILE", Some(file_path.to_string_lossy().as_ref()))], || { - let cfg = Config::load_for_test().expect("config load success"); + let cfg = Setting::load_for_test().expect("config load success"); assert_eq!( cfg.catalogs["my_hive"], - CatalogConfig::Hive(CatalogHiveConfig { + CatalogSetting::Hive(CatalogHiveSetting { address: "1.1.1.1:12000".to_string(), protocol: ThriftProtocol::Binary, }) @@ -772,3 +783,29 @@ protocol = "binary" Ok(()) } + +#[test] +fn test_env_config_obsoleted() -> Result<()> { + let obsoleted = vec![ + ("QUERY_TABLE_DISK_CACHE_MB_SIZE", Some("1")), + ("QUERY_TABLE_META_CACHE_ENABLED", Some("true")), + ("QUERY_TABLE_CACHE_BLOCK_META_COUNT", Some("1")), + ("QUERY_TABLE_MEMORY_CACHE_MB_SIZE", Some("1")), + ("QUERY_TABLE_DISK_CACHE_ROOT", Some("1")), + ("QUERY_TABLE_CACHE_SNAPSHOT_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_STATISTIC_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_SEGMENT_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_BLOOM_INDEX_META_COUNT", Some("1")), + ("QUERY_TABLE_CACHE_BLOOM_INDEX_FILTER_COUNT", Some("1")), + ]; + + for env_var in obsoleted { + temp_env::with_vars(vec![env_var], || { + let r = Setting::load_for_test(); + assert!(r.is_err(), "expecting `Err`, but got `Ok`"); + assert_eq!(r.unwrap_err().code(), ErrorCode::INVALID_CONFIG) + }); + } + + Ok(()) +} diff --git a/src/query/service/tests/it/servers/http/clickhouse_handler.rs b/src/query/service/tests/it/servers/http/clickhouse_handler.rs index b18eba150b5c0..d74158912ec65 100644 --- a/src/query/service/tests/it/servers/http/clickhouse_handler.rs +++ b/src/query/service/tests/it/servers/http/clickhouse_handler.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use common_base::base::tokio; -use common_config::Config; +use common_config::Setting; use common_exception::Result; use databend_query::auth::AuthMgr; use databend_query::servers::http::middleware::HTTPSessionEndpoint; @@ -422,7 +422,7 @@ struct Server { } impl Server { - pub async fn new(config: &Config) -> Result { + pub async fn new(config: &Setting) -> Result { let session_middleware = HTTPSessionMiddleware::create(HttpHandlerKind::Clickhouse, AuthMgr::create(config)?); let endpoint = Route::new() diff --git a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs index b42bdab35c7df..e7f6ac3081ea4 100644 --- a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs +++ b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use common_ast::ast::Engine; use common_catalog::catalog_kind::CATALOG_DEFAULT; use common_catalog::table::AppendMode; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::Result; use common_expression::block_debug::assert_blocks_sorted_eq_with_name; use common_expression::infer_table_schema; @@ -426,7 +426,7 @@ pub async fn check_data_dir( check_last_snapshot: Option<()>, check_table_statistic_file: Option<()>, ) -> Result<()> { - let data_path = match &GlobalConfig::instance().storage.params { + let data_path = match &GlobalSetting::instance().storage.params { StorageParams::Fs(v) => v.root.clone(), _ => panic!("storage type is not fs"), }; diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 4b7585da5fb2f..30510a4b0c0be 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -4,6 +4,18 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo +-----------+------------------------------------------+----------------------------------+----------+ | Column 0 | Column 1 | Column 2 | Column 3 | +-----------+------------------------------------------+----------------------------------+----------+ +| "cache" | "data_cache_storage" | "none" | "" | +| "cache" | "disk.max_bytes" | "21474836480" | "" | +| "cache" | "disk.path" | "./.databend/_cache" | "" | +| "cache" | "enable_table_index_bloom" | "true" | "" | +| "cache" | "enable_table_meta_caches" | "true" | "" | +| "cache" | "table_bloom_index_filter_count" | "1048576" | "" | +| "cache" | "table_bloom_index_meta_count" | "3000" | "" | +| "cache" | "table_data_cache_population_queue_size" | "65536" | "" | +| "cache" | "table_data_deserialized_data_bytes" | "0" | "" | +| "cache" | "table_meta_segment_count" | "10240" | "" | +| "cache" | "table_meta_snapshot_count" | "256" | "" | +| "cache" | "table_meta_statistic_count" | "256" | "" | | "log" | "dir" | "./.databend/logs" | "" | | "log" | "file.dir" | "./.databend/logs" | "" | | "log" | "file.format" | "text" | "" | @@ -61,20 +73,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | "query" | "rpc_tls_server_key" | "" | "" | | "query" | "share_endpoint_address" | "" | "" | | "query" | "share_endpoint_auth_token_file" | "" | "" | -| "query" | "table_cache_block_meta_count" | "102400" | "" | -| "query" | "table_cache_bloom_index_filter_count" | "1048576" | "" | -| "query" | "table_cache_bloom_index_meta_count" | "3000" | "" | -| "query" | "table_cache_segment_count" | "10240" | "" | -| "query" | "table_cache_snapshot_count" | "256" | "" | -| "query" | "table_cache_statistic_count" | "256" | "" | -| "query" | "table_data_cache_enabled" | "false" | "" | -| "query" | "table_data_cache_in_memory_max_size" | "0" | "" | -| "query" | "table_data_cache_population_queue_size" | "65536" | "" | -| "query" | "table_disk_cache_max_size" | "21474836480" | "" | -| "query" | "table_disk_cache_root" | "_cache" | "" | | "query" | "table_engine_memory_enabled" | "true" | "" | -| "query" | "table_memory_cache_mb_size" | "256" | "" | -| "query" | "table_meta_cache_enabled" | "true" | "" | | "query" | "tenant_id" | "test" | "" | | "query" | "users" | "" | "" | | "query" | "wait_timeout_mills" | "5000" | "" | diff --git a/src/query/service/tests/it/tests/config.rs b/src/query/service/tests/it/tests/config.rs index 5a5b2b5546503..17834d16efcf2 100644 --- a/src/query/service/tests/it/tests/config.rs +++ b/src/query/service/tests/it/tests/config.rs @@ -14,17 +14,17 @@ use std::collections::HashMap; -use common_config::Config; +use common_config::Setting; use common_meta_app::principal::AuthInfo; use common_users::idm_config::IDMConfig; pub struct ConfigBuilder { - conf: Config, + conf: Setting, } impl ConfigBuilder { pub fn create() -> ConfigBuilder { - let mut conf = Config::default(); + let mut conf = Setting::default(); conf.query.tenant_id = "test".to_string(); conf.log = common_tracing::Config::new_testing(); @@ -128,11 +128,11 @@ impl ConfigBuilder { self } - pub fn build(self) -> Config { + pub fn build(self) -> Setting { self.conf } - pub fn config(&self) -> Config { + pub fn config(&self) -> Setting { self.conf.clone() } } diff --git a/src/query/service/tests/it/tests/context.rs b/src/query/service/tests/it/tests/context.rs index 35714080abf6c..f01bb9fcf6f85 100644 --- a/src/query/service/tests/it/tests/context.rs +++ b/src/query/service/tests/it/tests/context.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Config; +use common_config::Setting; use common_config::DATABEND_COMMIT_VERSION; use common_exception::Result; use common_meta_app::principal::AuthInfo; @@ -69,7 +69,7 @@ pub async fn create_query_context_with_session( } pub async fn create_query_context_with_config( - config: Config, + config: Setting, mut current_user: Option, ) -> Result<(TestGuard, Arc)> { let guard = TestGlobalServices::setup(config).await?; diff --git a/src/query/service/tests/it/tests/sessions.rs b/src/query/service/tests/it/tests/sessions.rs index f24e3c7d7c7c0..d82f90e291597 100644 --- a/src/query/service/tests/it/tests/sessions.rs +++ b/src/query/service/tests/it/tests/sessions.rs @@ -13,7 +13,7 @@ // limitations under the License. use common_base::base::GlobalInstance; -use common_config::Config; +use common_config::Setting; use common_exception::Result; use common_tracing::set_panic_hook; use databend_query::clusters::ClusterDiscovery; @@ -27,7 +27,7 @@ unsafe impl Send for TestGlobalServices {} unsafe impl Sync for TestGlobalServices {} impl TestGlobalServices { - pub async fn setup(config: Config) -> Result { + pub async fn setup(config: Setting) -> Result { set_panic_hook(); std::env::set_var("UNIT_TEST", "TRUE"); diff --git a/src/query/settings/src/lib.rs b/src/query/settings/src/lib.rs index 738de0cc18f46..a31e890ce767e 100644 --- a/src/query/settings/src/lib.rs +++ b/src/query/settings/src/lib.rs @@ -25,8 +25,8 @@ use std::sync::Arc; use common_ast::Dialect; use common_base::runtime::GlobalIORuntime; use common_base::runtime::TrySpawn; -use common_config::Config; -use common_config::GlobalConfig; +use common_config::GlobalSetting; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::UserSetting; @@ -80,7 +80,7 @@ impl Settings { user_api: Arc, tenant: String, ) -> Result> { - let config = GlobalConfig::instance(); + let config = GlobalSetting::instance(); let settings = Self::default_settings(&tenant, config)?; let ret = { @@ -107,7 +107,7 @@ impl Settings { Ok(ret) } - pub fn default_settings(tenant: &str, conf: Arc) -> Result> { + pub fn default_settings(tenant: &str, conf: Arc) -> Result> { let memory_info = sys_info::mem_info().map_err(ErrorCode::from_std_error)?; let mut num_cpus = num_cpus::get() as u64; if conf.query.num_cpus != 0 { @@ -484,7 +484,7 @@ impl Settings { // Only used for testings pub fn default_test_settings() -> Result> { - Self::default_settings("default", Arc::new(Config::default())) + Self::default_settings("default", Arc::new(Setting::default())) } // Get max_block_size. diff --git a/src/query/sharing/src/signer.rs b/src/query/sharing/src/signer.rs index 1cd1018e36e5b..cfaf461429cea 100644 --- a/src/query/sharing/src/signer.rs +++ b/src/query/sharing/src/signer.rs @@ -22,7 +22,7 @@ use anyhow::anyhow; use anyhow::Result; use bytes::Bytes; use common_auth::RefreshableToken; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use http::header::AUTHORIZATION; use http::header::CONTENT_LENGTH; use http::Method; @@ -152,7 +152,7 @@ impl SharedSigner { .collect(); let bs = Bytes::from(serde_json::to_vec(&reqs)?); let auth = self.token.to_header().await?; - let requester = GlobalConfig::instance().as_ref().query.tenant_id.clone(); + let requester = GlobalSetting::instance().as_ref().query.tenant_id.clone(); let req = Request::builder() .method(Method::POST) .uri(&self.endpoint) diff --git a/src/query/sql/src/planner/binder/copy.rs b/src/query/sql/src/planner/binder/copy.rs index bdd8253fac1d7..476bb2cefbb0f 100644 --- a/src/query/sql/src/planner/binder/copy.rs +++ b/src/query/sql/src/planner/binder/copy.rs @@ -29,7 +29,7 @@ use common_catalog::plan::DataSourcePlan; use common_catalog::plan::Partitions; use common_catalog::plan::StageTableInfo; use common_catalog::table_context::TableContext; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::FileFormatOptions; @@ -287,7 +287,7 @@ impl<'a> Binder { .await?; let (storage_params, path) = parse_uri_location(src_uri_location)?; - if !storage_params.is_secure() && !GlobalConfig::instance().storage.allow_insecure { + if !storage_params.is_secure() && !GlobalSetting::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy from insecure storage is not allowed", )); @@ -405,7 +405,7 @@ impl<'a> Binder { .map_err(ErrorCode::SyntaxException)?; let (storage_params, path) = parse_uri_location(dst_uri_location)?; - if !storage_params.is_secure() && !GlobalConfig::instance().storage.allow_insecure { + if !storage_params.is_secure() && !GlobalSetting::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy into insecure storage is not allowed", )); @@ -469,7 +469,7 @@ impl<'a> Binder { .map_err(ErrorCode::SyntaxException)?; let (storage_params, path) = parse_uri_location(dst_uri_location)?; - if !storage_params.is_secure() && !GlobalConfig::instance().storage.allow_insecure { + if !storage_params.is_secure() && !GlobalSetting::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy into insecure storage is not allowed", )); diff --git a/src/query/sql/src/planner/binder/location.rs b/src/query/sql/src/planner/binder/location.rs index b24b5ef1553a8..36e3a3df0a962 100644 --- a/src/query/sql/src/planner/binder/location.rs +++ b/src/query/sql/src/planner/binder/location.rs @@ -18,7 +18,7 @@ use std::io::Result; use anyhow::anyhow; use common_ast::ast::UriLocation; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_meta_app::storage::StorageAzblobConfig; use common_meta_app::storage::StorageFsConfig; use common_meta_app::storage::StorageGcsConfig; @@ -164,7 +164,7 @@ fn parse_s3_params(l: &mut UriLocation, root: String) -> Result { root, // Disable credential load by default. // TODO(xuanwo): we should support AssumeRole. - disable_credential_loader: !GlobalConfig::instance().storage.allow_insecure, + disable_credential_loader: !GlobalSetting::instance().storage.allow_insecure, enable_virtual_host_style, role_arn, external_id, diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 3d47e3aedf2bd..156b337c2aa08 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -37,7 +37,7 @@ use common_catalog::table::ColumnStatistics; use common_catalog::table::NavigationPoint; use common_catalog::table::Table; use common_catalog::table_function::TableFunction; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; @@ -298,7 +298,7 @@ impl Binder { FileLocation::Uri(mut l) => { let (storage_params, path) = parse_uri_location(&mut l)?; if !storage_params.is_secure() - && !GlobalConfig::instance().storage.allow_insecure + && !GlobalSetting::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy from insecure storage is not allowed", diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index 00d54ac5f3cdb..a39d89e548a62 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -23,7 +23,7 @@ use common_base::base::tokio::task::block_in_place; use common_catalog::catalog::CATALOG_DEFAULT; use common_catalog::table::Table; use common_catalog::table_context::TableContext; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; @@ -54,7 +54,7 @@ pub fn parse_exprs( unwrap_tuple: bool, sql: &str, ) -> Result> { - let settings = Settings::default_settings("", GlobalConfig::instance())?; + let settings = Settings::default_settings("", GlobalSetting::instance())?; let mut bind_context = BindContext::new(); let metadata = Arc::new(RwLock::new(Metadata::default())); let table_index = metadata.write().add_table( diff --git a/src/query/sql/tests/location.rs b/src/query/sql/tests/location.rs index b38e36913be5d..5e6da6835c4e3 100644 --- a/src/query/sql/tests/location.rs +++ b/src/query/sql/tests/location.rs @@ -21,8 +21,8 @@ use std::collections::BTreeMap; use anyhow::Result; use common_ast::ast::UriLocation; use common_base::base::GlobalInstance; -use common_config::Config; -use common_config::GlobalConfig; +use common_config::GlobalSetting; +use common_config::Setting; use common_meta_app::storage::StorageFsConfig; // use common_storage::StorageFtpConfig; use common_meta_app::storage::StorageGcsConfig; @@ -44,7 +44,7 @@ fn test_parse_uri_location() -> Result<()> { }; GlobalInstance::init_testing(&thread_name); - GlobalConfig::init(Config::default())?; + GlobalSetting::init(Setting::default())?; let cases = vec![ ( diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 20522e37b540c..15282f8ebc8d2 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -19,7 +19,8 @@ use std::sync::Arc; use common_base::base::GlobalInstance; use common_cache::CountableMeter; use common_cache::DefaultHashBuilder; -use common_config::QueryConfig; +use common_config::CacheSetting; +use common_config::ExternalCacheStorageTypeSetting; use common_exception::Result; use storages_common_cache::InMemoryCacheBuilder; use storages_common_cache::InMemoryItemCacheHolder; @@ -53,30 +54,33 @@ pub struct CacheManager { impl CacheManager { /// Initialize the caches according to the relevant configurations. - pub fn init(config: &QueryConfig) -> Result<()> { + pub fn init(config: &CacheSetting, tenant_id: impl Into) -> Result<()> { // setup table data cache - let table_data_cache = if config.table_data_cache_enabled { - let real_disk_cache_root = PathBuf::from(&config.table_disk_cache_root) - .join(&config.tenant_id) - .join("v1"); - Self::new_block_data_cache( - &real_disk_cache_root, - config.table_data_cache_population_queue_size, - config.table_disk_cache_max_size, - )? - } else { - None + let table_data_cache = { + match config.data_cache_storage { + ExternalCacheStorageTypeSetting::None => None, + ExternalCacheStorageTypeSetting::Disk => { + let real_disk_cache_root = PathBuf::from(&config.disk_cache_config.path) + .join(tenant_id.into()) + .join("v1"); + Self::new_block_data_cache( + &real_disk_cache_root, + config.table_data_cache_population_queue_size, + config.disk_cache_config.max_bytes, + )? + } + } }; // setup in-memory table column cache let table_column_array_cache = Self::new_in_memory_cache( - config.table_data_cache_in_memory_max_size, + config.table_data_deserialized_data_bytes, ColumnArrayMeter, "table_data_column_array", ); // setup in-memory table meta cache - if !config.table_meta_cache_enabled { + if !config.enable_table_meta_caches { GlobalInstance::set(Arc::new(Self { table_snapshot_cache: None, segment_info_cache: None, @@ -89,17 +93,15 @@ impl CacheManager { })); } else { let table_snapshot_cache = - Self::new_item_cache(config.table_cache_snapshot_count, "table_snapshot"); + Self::new_item_cache(config.table_meta_snapshot_count, "table_snapshot"); let table_statistic_cache = - Self::new_item_cache(config.table_cache_statistic_count, "table_statistics"); + Self::new_item_cache(config.table_meta_statistic_count, "table_statistics"); let segment_info_cache = - Self::new_item_cache(config.table_cache_segment_count, "segment_info"); - let bloom_index_filter_cache = Self::new_item_cache( - config.table_cache_bloom_index_filter_count, - "bloom_index_filter", - ); + Self::new_item_cache(config.table_meta_segment_count, "segment_info"); + let bloom_index_filter_cache = + Self::new_item_cache(config.table_bloom_index_filter_count, "bloom_index_filter"); let bloom_index_meta_cache = Self::new_item_cache( - config.table_cache_bloom_index_meta_count, + config.table_bloom_index_meta_count, "bloom_index_file_meta_data", ); let file_meta_data_cache = diff --git a/src/query/storages/factory/src/storage_factory.rs b/src/query/storages/factory/src/storage_factory.rs index a69dc27db9658..b6938150b6525 100644 --- a/src/query/storages/factory/src/storage_factory.rs +++ b/src/query/storages/factory/src/storage_factory.rs @@ -15,7 +15,7 @@ use std::sync::Arc; pub use common_catalog::catalog::StorageDescription; -use common_config::Config; +use common_config::Setting; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::TableInfo; @@ -67,7 +67,7 @@ pub struct StorageFactory { } impl StorageFactory { - pub fn create(conf: Config) -> Self { + pub fn create(conf: Setting) -> Self { let creators: DashMap = Default::default(); // Register memory table engine. diff --git a/src/query/storages/system/src/configs_table.rs b/src/query/storages/system/src/configs_table.rs index 9413756db9c64..c96b33ee87cd0 100644 --- a/src/query/storages/system/src/configs_table.rs +++ b/src/query/storages/system/src/configs_table.rs @@ -17,7 +17,8 @@ use std::sync::Arc; use common_base::base::mask_string; use common_catalog::table::Table; use common_catalog::table_context::TableContext; -use common_config::GlobalConfig; +use common_config::GlobalSetting; +use common_config::QueryConfig; use common_exception::Result; use common_expression::types::StringType; use common_expression::utils::FromData; @@ -30,6 +31,7 @@ use common_meta_app::schema::TableInfo; use common_meta_app::schema::TableMeta; use itertools::Itertools; use serde_json::Value as JsonValue; +use serde_json::Value; use crate::SyncOneBlockSystemTable; use crate::SyncSystemTable; @@ -46,14 +48,16 @@ impl SyncSystemTable for ConfigsTable { } fn get_full_data(&self, _ctx: Arc) -> Result { - let config = GlobalConfig::instance().as_ref().clone().into_outer(); + let config = GlobalSetting::instance().as_ref().clone().into_config(); let mut names: Vec = vec![]; let mut values: Vec = vec![]; let mut groups: Vec = vec![]; let mut descs: Vec = vec![]; let query_config = config.query; - let query_config_value = serde_json::to_value(query_config)?; + let query_config_value = + Self::remove_obsolete_query_configs(serde_json::to_value(query_config)?); + ConfigsTable::extract_config( &mut names, &mut values, @@ -277,4 +281,16 @@ impl ConfigsTable { groups.push(group); descs.push(desc); } + + fn remove_obsolete_query_configs(config_json: JsonValue) -> JsonValue { + match config_json { + Value::Object(mut config_json_obj) => { + for key in QueryConfig::obsoleted_option_keys().iter() { + config_json_obj.remove(*key); + } + JsonValue::Object(config_json_obj) + } + _ => config_json, + } + } } diff --git a/src/query/storages/system/src/tracing_table.rs b/src/query/storages/system/src/tracing_table.rs index e90a4d2f67bfa..de664d7e4e369 100644 --- a/src/query/storages/system/src/tracing_table.rs +++ b/src/query/storages/system/src/tracing_table.rs @@ -25,7 +25,7 @@ use common_catalog::plan::Partitions; use common_catalog::plan::PushDownInfo; use common_catalog::table::Table; use common_catalog::table_context::TableContext; -use common_config::GlobalConfig; +use common_config::GlobalSetting; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; @@ -77,9 +77,9 @@ impl TracingTable { fn log_files() -> Result> { debug!( "list log files from {:?}", - std::fs::canonicalize(GlobalConfig::instance().log.file.dir.as_str()) + std::fs::canonicalize(GlobalSetting::instance().log.file.dir.as_str()) ); - WalkDir::new(GlobalConfig::instance().log.file.dir.as_str()) + WalkDir::new(GlobalSetting::instance().log.file.dir.as_str()) // NOTE:(everpcpc) ignore log files in subdir with different format .max_depth(1) .sort_by_key(|file| file.file_name().to_owned()) From 86fbcbe0a75a804356a1777faca92f32e5a32028 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 21:51:53 +0800 Subject: [PATCH 73/80] fix typo --- scripts/ci/deploy/config/databend-query-node-2.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/deploy/config/databend-query-node-2.toml b/scripts/ci/deploy/config/databend-query-node-2.toml index 95d266187d526..c8d5216b395f8 100644 --- a/scripts/ci/deploy/config/databend-query-node-2.toml +++ b/scripts/ci/deploy/config/databend-query-node-2.toml @@ -58,7 +58,7 @@ type = "fs" [storage.fs] data_path = "./.databend/stateless_test_data" -] +[cache] ### table meta caches ### # Enable table meta cache. Default is true. From 4f1a00672606ab64b904d1f4a007d5a473c42e24 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 22:18:40 +0800 Subject: [PATCH 74/80] fix typos --- .../deploy/config/databend-query-node-1.toml | 2 +- .../deploy/config/databend-query-node-2.toml | 2 +- .../deploy/config/databend-query-node-3.toml | 2 +- .../config/databend-query-node-shared.toml | 47 ++++++++++++++++--- src/query/config/src/config.rs | 2 +- 5 files changed, 45 insertions(+), 10 deletions(-) diff --git a/scripts/ci/deploy/config/databend-query-node-1.toml b/scripts/ci/deploy/config/databend-query-node-1.toml index bd429c8ecd6a7..f7991a81acd43 100644 --- a/scripts/ci/deploy/config/databend-query-node-1.toml +++ b/scripts/ci/deploy/config/databend-query-node-1.toml @@ -121,7 +121,7 @@ table_meta_statistic_count = 256 ### table bloom index caches ### # Enable bloom index cache. Default is true # Set it to false will disable all the bloom index caches -table_index_bloom_enabled = true +enable_table_bloom_index_caches = true # Max number of cached bloom index meta objects. Set it to 0 to disable it. table_bloom_index_meta_count = 3000 # Max number of cached bloom index filters. Set it to 0 to disable it. diff --git a/scripts/ci/deploy/config/databend-query-node-2.toml b/scripts/ci/deploy/config/databend-query-node-2.toml index c8d5216b395f8..8f2849693d872 100644 --- a/scripts/ci/deploy/config/databend-query-node-2.toml +++ b/scripts/ci/deploy/config/databend-query-node-2.toml @@ -74,7 +74,7 @@ table_meta_statistic_count = 256 ### table bloom index caches ### # Enable bloom index cache. Default is true # Set it to false will disable all the bloom index caches -table_index_bloom_enabled = true +enable_table_bloom_index_caches = true # Max number of cached bloom index meta objects. Set it to 0 to disable it. table_bloom_index_meta_count = 3000 # Max number of cached bloom index filters. Set it to 0 to disable it. diff --git a/scripts/ci/deploy/config/databend-query-node-3.toml b/scripts/ci/deploy/config/databend-query-node-3.toml index fc56f78684de9..9721d42fecd02 100644 --- a/scripts/ci/deploy/config/databend-query-node-3.toml +++ b/scripts/ci/deploy/config/databend-query-node-3.toml @@ -75,7 +75,7 @@ table_meta_statistic_count = 256 ### table bloom index caches ### # Enable bloom index cache. Default is true # Set it to false will disable all the bloom index caches -table_index_bloom_enabled = true +enable_table_bloom_index_caches = true # Max number of cached bloom index meta objects. Set it to 0 to disable it. table_bloom_index_meta_count = 3000 # Max number of cached bloom index filters. Set it to 0 to disable it. diff --git a/scripts/ci/deploy/config/databend-query-node-shared.toml b/scripts/ci/deploy/config/databend-query-node-shared.toml index eddbd03329d41..f3eeffbe8c8c1 100644 --- a/scripts/ci/deploy/config/databend-query-node-shared.toml +++ b/scripts/ci/deploy/config/databend-query-node-shared.toml @@ -33,12 +33,6 @@ cluster_id = "test_cluster" table_engine_memory_enabled = true database_engine_github_enabled = true -table_meta_cache_enabled = true -table_memory_cache_mb_size = 1024 -table_disk_cache_root = "_cache" -table_cache_bloom_index_meta_count=3000 -table_cache_bloom_index_filter_count=1048576 - share_endpoint_address = "127.0.0.1:33003" # receive shared information from open sharing # [[query.users]] # name = "admin" @@ -109,3 +103,44 @@ data_path = "./.databend/stateless_test_data" # endpoint_url = "" # access_key_id = "" # access_key_secret = "" + +[cache] + +### table meta caches ### +# Enable table meta cache. Default is true. +# Set it to false wll disable all the table meta caches +enable_table_meta_caches = true +# Max number of cached table snapshot. Set it to 0 to disable it. +table_meta_snapshot_count = 256 +# Max number of cached table segment. Set it to 0 to disable it. +table_meta_segment_count = 10240 +# Max number of cached table statistic meta. Set it to 0 to disable it. +table_meta_statistic_count = 256 + +### table bloom index caches ### +# Enable bloom index cache. Default is true +# Set it to false will disable all the bloom index caches +enable_table_bloom_index_caches = true +# Max number of cached bloom index meta objects. Set it to 0 to disable it. +table_bloom_index_meta_count = 3000 +# Max number of cached bloom index filters. Set it to 0 to disable it. +table_bloom_index_filter_count = 1048576 + +### table data caches ### + +# Type of storage to keep the table data cache +# +# available options: [none|disk] +# default is "none", which disable table data cache +# use "disk" to enabled disk cache +data_cache_storage = "none" + +# Max size of external cache population queue length +table_data_cache_population_queue_size = 65535 + + +[cache.disk] +# cache path +path = "./databend/_cache" +# max bytes of cached data 20G +max_bytes = 21474836480 diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 7a0dabfcdc1f0..9b0433e1566c8 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -2111,7 +2111,7 @@ impl QueryConfig { Self::check( &self.table_meta_cache_enabled, "table-meta-cache-enabled", - "cache-enable-table-meta-cache", + "cache-enable-table-meta-caches", r#" [cache] table-meta-cache-enabled=["true"|"false"] From 4c72dc590d0d4943c62156f5398c4d78ee21071f Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 22:57:02 +0800 Subject: [PATCH 75/80] revert `common_config::inner` namespace to avoid naming collision of `Setting` (with `common-settings::Settings`) --- src/binaries/query/local.rs | 4 +- src/binaries/query/main.rs | 6 +- src/query/config/src/config.rs | 65 ++++++++----------- src/query/config/src/global.rs | 10 +-- src/query/config/src/{setting.rs => inner.rs} | 38 +++++------ src/query/config/src/lib.rs | 31 +++++---- src/query/config/tests/main.rs | 6 +- src/query/service/src/api/http/v1/config.rs | 4 +- .../service/src/api/http/v1/tenant_tables.rs | 4 +- src/query/service/src/api/http_service.rs | 8 +-- .../src/api/rpc/exchange/exchange_manager.rs | 6 +- .../service/src/api/rpc/packets/packet.rs | 8 +-- .../src/api/rpc/packets/packet_execute.rs | 4 +- .../src/api/rpc/packets/packet_executor.rs | 4 +- .../src/api/rpc/packets/packet_publisher.rs | 4 +- src/query/service/src/api/rpc_service.rs | 8 +-- src/query/service/src/auth.rs | 4 +- .../service/src/catalogs/catalog_manager.rs | 22 +++---- .../src/catalogs/default/database_catalog.rs | 4 +- .../src/catalogs/default/immutable_catalog.rs | 4 +- .../src/catalogs/default/mutable_catalog.rs | 4 +- src/query/service/src/clusters/cluster.rs | 23 ++++--- .../service/src/databases/database_factory.rs | 4 +- .../src/databases/system/system_database.rs | 4 +- src/query/service/src/global_services.rs | 10 +-- .../access/management_mode_access.rs | 4 +- .../src/interpreters/interpreter_metrics.rs | 4 +- .../src/interpreters/interpreter_query_log.rs | 8 +-- .../src/interpreters/interpreter_unsetting.rs | 6 +- src/query/service/src/procedures/procedure.rs | 4 +- .../service/src/servers/http/http_services.rs | 14 ++-- .../http/v1/query/http_query_manager.rs | 4 +- .../service/src/sessions/query_ctx_shared.rs | 4 +- src/query/service/src/sessions/session.rs | 4 +- src/query/service/src/sessions/session_ctx.rs | 4 +- src/query/service/src/sessions/session_mgr.rs | 12 ++-- src/query/service/tests/it/configs.rs | 32 ++++----- .../it/servers/http/clickhouse_handler.rs | 4 +- .../it/storages/fuse/table_test_fixture.rs | 4 +- src/query/service/tests/it/tests/config.rs | 10 +-- src/query/service/tests/it/tests/context.rs | 4 +- src/query/service/tests/it/tests/sessions.rs | 4 +- src/query/settings/src/lib.rs | 10 +-- src/query/sharing/src/signer.rs | 4 +- src/query/sql/src/planner/binder/copy.rs | 8 +-- src/query/sql/src/planner/binder/location.rs | 4 +- src/query/sql/src/planner/binder/table.rs | 4 +- .../sql/src/planner/expression_parser.rs | 4 +- src/query/sql/tests/location.rs | 6 +- .../common/cache-manager/src/cache_manager.rs | 10 +-- .../storages/factory/src/storage_factory.rs | 4 +- .../storages/system/src/configs_table.rs | 4 +- .../storages/system/src/tracing_table.rs | 6 +- 53 files changed, 235 insertions(+), 244 deletions(-) rename src/query/config/src/{setting.rs => inner.rs} (96%) diff --git a/src/binaries/query/local.rs b/src/binaries/query/local.rs index 21e87dc9682ea..00715f5c5330d 100644 --- a/src/binaries/query/local.rs +++ b/src/binaries/query/local.rs @@ -16,7 +16,7 @@ use std::time::Instant; use comfy_table::Cell; use comfy_table::Table; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -28,7 +28,7 @@ use databend_query::sql::Planner; use databend_query::GlobalServices; use tokio_stream::StreamExt; -pub async fn query_local(conf: &Setting) -> Result<()> { +pub async fn query_local(conf: &InnerConfig) -> Result<()> { let mut conf = conf.clone(); conf.storage.allow_insecure = true; let local_conf = conf.local.clone(); diff --git a/src/binaries/query/main.rs b/src/binaries/query/main.rs index 5e17f99ec8bd1..c14f8d11445c4 100644 --- a/src/binaries/query/main.rs +++ b/src/binaries/query/main.rs @@ -23,7 +23,7 @@ use common_base::mem_allocator::GlobalAllocator; use common_base::runtime::Runtime; use common_base::runtime::GLOBAL_MEM_STAT; use common_base::set_alloc_error_hook; -use common_config::Setting; +use common_config::InnerConfig; use common_config::DATABEND_COMMIT_VERSION; use common_config::QUERY_SEMVER; use common_exception::Result; @@ -62,7 +62,7 @@ fn main() { } async fn main_entrypoint() -> Result<()> { - let conf: Setting = Setting::load()?; + let conf: InnerConfig = InnerConfig::load()?; if run_cmd(&conf).await? { return Ok(()); @@ -310,7 +310,7 @@ async fn main_entrypoint() -> Result<()> { Ok(()) } -async fn run_cmd(conf: &Setting) -> Result { +async fn run_cmd(conf: &InnerConfig) -> Result { if conf.cmd.is_empty() { return Ok(false); } diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 9b0433e1566c8..e2c181ef46f4a 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -51,14 +51,13 @@ use serfig::collectors::from_file; use serfig::collectors::from_self; use serfig::parsers::Toml; -use super::setting; -use super::setting::CatalogHiveSetting as InnerCatalogHiveConfig; -use super::setting::CatalogSetting as InnerCatalogConfig; -use super::setting::LocalConfig as InnerLocalConfig; -use super::setting::MetaConfig as InnerMetaConfig; -use super::setting::QuerySetting as InnerQueryConfig; -use super::setting::Setting as InnerConfig; -use crate::Setting; +use super::inner; +use super::inner::CatalogConfig as InnerCatalogConfig; +use super::inner::CatalogHiveConfig as InnerCatalogHiveConfig; +use super::inner::InnerConfig; +use super::inner::LocalConfig as InnerLocalConfig; +use super::inner::MetaConfig as InnerMetaConfig; +use super::inner::QueryConfig as InnerQueryConfig; use crate::DATABEND_COMMIT_VERSION; // FIXME: too much boilerplate here @@ -1854,7 +1853,7 @@ pub struct CacheConfig { /// Type of data cache storage #[clap(long = "cache-data-cache-storage", value_enum, default_value_t)] - pub data_cache_storage: ExternalCacheStorageTypeConfig, + pub data_cache_storage: CacheStorageTypeConfig, /// Max size of external cache population queue length /// @@ -1895,13 +1894,13 @@ fn bool_true() -> bool { #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, ValueEnum)] #[serde(rename_all = "lowercase")] -pub enum ExternalCacheStorageTypeConfig { +pub enum CacheStorageTypeConfig { None, Disk, // Redis, } -impl Default for ExternalCacheStorageTypeConfig { +impl Default for CacheStorageTypeConfig { fn default() -> Self { Self::None } @@ -1922,8 +1921,8 @@ pub struct DiskCacheConfig { mod cache_config_converters { use super::*; - impl From for Config { - fn from(inner: Setting) -> Self { + impl From for Config { + fn from(inner: InnerConfig) -> Self { Self { cmd: inner.cmd, config_file: inner.config_file, @@ -1944,7 +1943,7 @@ mod cache_config_converters { } } - impl TryInto for Config { + impl TryInto for Config { type Error = ErrorCode; fn try_into(self) -> Result { @@ -1976,7 +1975,7 @@ mod cache_config_converters { } } - impl TryFrom for setting::CacheSetting { + impl TryFrom for inner::CacheConfig { type Error = ErrorCode; fn try_from(value: CacheConfig) -> std::result::Result { @@ -1997,8 +1996,8 @@ mod cache_config_converters { } } - impl From for CacheConfig { - fn from(value: setting::CacheSetting) -> Self { + impl From for CacheConfig { + fn from(value: inner::CacheConfig) -> Self { Self { enable_table_meta_caches: value.enable_table_meta_caches, table_meta_snapshot_count: value.table_meta_snapshot_count, @@ -2016,7 +2015,7 @@ mod cache_config_converters { } } - impl TryFrom for setting::DiskCacheConfig { + impl TryFrom for inner::DiskCacheConfig { type Error = ErrorCode; fn try_from(value: DiskCacheConfig) -> std::result::Result { Ok(Self { @@ -2026,8 +2025,8 @@ mod cache_config_converters { } } - impl From for DiskCacheConfig { - fn from(value: setting::DiskCacheConfig) -> Self { + impl From for DiskCacheConfig { + fn from(value: inner::DiskCacheConfig) -> Self { Self { max_bytes: value.max_bytes, path: value.path, @@ -2035,31 +2034,21 @@ mod cache_config_converters { } } - impl TryFrom for setting::ExternalCacheStorageTypeSetting { + impl TryFrom for inner::CacheStorageTypeConfig { type Error = ErrorCode; - fn try_from( - value: ExternalCacheStorageTypeConfig, - ) -> std::result::Result { + fn try_from(value: CacheStorageTypeConfig) -> std::result::Result { Ok(match value { - ExternalCacheStorageTypeConfig::None => { - setting::ExternalCacheStorageTypeSetting::None - } - ExternalCacheStorageTypeConfig::Disk => { - setting::ExternalCacheStorageTypeSetting::Disk - } + CacheStorageTypeConfig::None => inner::CacheStorageTypeConfig::None, + CacheStorageTypeConfig::Disk => inner::CacheStorageTypeConfig::Disk, }) } } - impl From for ExternalCacheStorageTypeConfig { - fn from(value: setting::ExternalCacheStorageTypeSetting) -> Self { + impl From for CacheStorageTypeConfig { + fn from(value: inner::CacheStorageTypeConfig) -> Self { match value { - setting::ExternalCacheStorageTypeSetting::None => { - ExternalCacheStorageTypeConfig::None - } - setting::ExternalCacheStorageTypeSetting::Disk => { - ExternalCacheStorageTypeConfig::Disk - } + inner::CacheStorageTypeConfig::None => CacheStorageTypeConfig::None, + inner::CacheStorageTypeConfig::Disk => CacheStorageTypeConfig::Disk, } } } diff --git a/src/query/config/src/global.rs b/src/query/config/src/global.rs index 04ba0dc98ef59..f14da60210b85 100644 --- a/src/query/config/src/global.rs +++ b/src/query/config/src/global.rs @@ -17,17 +17,17 @@ use std::sync::Arc; use common_base::base::GlobalInstance; use common_exception::Result; -use crate::Setting; +use crate::InnerConfig; -pub struct GlobalSetting; +pub struct GlobalConfig; -impl GlobalSetting { - pub fn init(config: Setting) -> Result<()> { +impl GlobalConfig { + pub fn init(config: InnerConfig) -> Result<()> { GlobalInstance::set(Arc::new(config)); Ok(()) } - pub fn instance() -> Arc { + pub fn instance() -> Arc { GlobalInstance::get() } } diff --git a/src/query/config/src/setting.rs b/src/query/config/src/inner.rs similarity index 96% rename from src/query/config/src/setting.rs rename to src/query/config/src/inner.rs index 74e3e3fc9d959..2ae7806144cb4 100644 --- a/src/query/config/src/setting.rs +++ b/src/query/config/src/inner.rs @@ -36,12 +36,12 @@ use super::config::Config; /// /// All function should implement based on this Config. #[derive(Clone, Default, Debug, PartialEq, Eq)] -pub struct Setting { +pub struct InnerConfig { pub cmd: String, pub config_file: String, // Query engine config. - pub query: QuerySetting, + pub query: QueryConfig, pub log: LogConfig, @@ -57,14 +57,14 @@ pub struct Setting { // external catalog config. // - Later, catalog information SHOULD be kept in KV Service // - currently only supports HIVE (via hive meta store) - pub catalogs: HashMap, + pub catalogs: HashMap, // Cache Config - pub cache: CacheSetting, + pub cache: CacheConfig, } -impl Setting { - /// As requires by [RFC: Config Backward Compatibility](https://github.com/datafuselabs/databend/pull/5324), we will load user's config via wrapper [`ConfigV0`] and then convert from [`ConfigV0`] to [`Setting`]. +impl InnerConfig { + /// As requires by [RFC: Config Backward Compatibility](https://github.com/datafuselabs/databend/pull/5324), we will load user's config via wrapper [`ConfigV0`] and then convert from [`ConfigV0`] to [`InnerConfig`]. /// /// In the future, we could have `ConfigV1` and `ConfigV2`. pub fn load() -> Result { @@ -99,7 +99,7 @@ impl Setting { !self.query.rpc_tls_server_key.is_empty() && !self.query.rpc_tls_server_cert.is_empty() } - /// Transform Setting into the Config. + /// Transform inner::Config into the Config. /// /// This function should only be used for end-users. /// @@ -114,7 +114,7 @@ impl Setting { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct QuerySetting { +pub struct QueryConfig { /// Tenant id for get the information from the MetaSrv. pub tenant_id: String, /// ID for construct the cluster. @@ -164,7 +164,7 @@ pub struct QuerySetting { pub internal_enable_sandbox_tenant: bool, } -impl Default for QuerySetting { +impl Default for QueryConfig { fn default() -> Self { Self { tenant_id: "admin".to_string(), @@ -211,7 +211,7 @@ impl Default for QuerySetting { } } -impl QuerySetting { +impl QueryConfig { pub fn to_rpc_client_tls_config(&self) -> RpcClientTlsConfig { RpcClientTlsConfig { rpc_tls_server_root_ca_cert: self.rpc_tls_query_server_root_ca_cert.clone(), @@ -333,8 +333,8 @@ impl Debug for MetaConfig { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum CatalogSetting { - Hive(CatalogHiveSetting), +pub enum CatalogConfig { + Hive(CatalogHiveConfig), } // TODO: add compat protocol support @@ -364,12 +364,12 @@ impl Display for ThriftProtocol { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct CatalogHiveSetting { +pub struct CatalogHiveConfig { pub address: String, pub protocol: ThriftProtocol, } -impl Default for CatalogHiveSetting { +impl Default for CatalogHiveConfig { fn default() -> Self { Self { address: "127.0.0.1:9083".to_string(), @@ -395,7 +395,7 @@ impl Default for LocalConfig { } #[derive(Clone, Debug, PartialEq, Eq)] -pub struct CacheSetting { +pub struct CacheConfig { /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches pub enable_table_meta_caches: bool, @@ -421,7 +421,7 @@ pub struct CacheSetting { // table filter on 2 columns, might populate 2 * 800 bloom index filter cache items (at most) pub table_bloom_index_filter_count: u64, - pub data_cache_storage: ExternalCacheStorageTypeSetting, + pub data_cache_storage: CacheStorageTypeConfig, /// Max size of external cache population queue length /// @@ -449,13 +449,13 @@ pub struct CacheSetting { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum ExternalCacheStorageTypeSetting { +pub enum CacheStorageTypeConfig { None, Disk, // Redis, } -impl Default for ExternalCacheStorageTypeSetting { +impl Default for CacheStorageTypeConfig { fn default() -> Self { Self::None } @@ -479,7 +479,7 @@ impl Default for DiskCacheConfig { } } -impl Default for CacheSetting { +impl Default for CacheConfig { fn default() -> Self { Self { enable_table_meta_caches: true, diff --git a/src/query/config/src/lib.rs b/src/query/config/src/lib.rs index d1bfa02b4dc21..157f625907bd2 100644 --- a/src/query/config/src/lib.rs +++ b/src/query/config/src/lib.rs @@ -15,34 +15,33 @@ #![allow(clippy::uninlined_format_args)] #![feature(no_sanitize)] -mod config; /// Config mods provide config support. /// /// We are providing two config types: /// /// - [`config::Config`] represents the options from command line , configuration files or environment vars. -/// - [`setting::Setting`] "internal representation" of application settings . -/// - [`global::GlobalSetting`] A global singleton of [`crate::Setting`]. +/// - [`inner::InnerConfig`] "internal representation" of application settings, built from Config. +/// - [`global::GlobalConfig`] A global singleton of [`crate::InnerConfig`]. /// -/// It's safe to refactor [`setting::Setting`] in anyway, as long as it satisfied the following traits +/// It's safe to refactor [`inner::InnerConfig`] in anyway, as long as it satisfied the following traits /// -/// - `TryInto for config::Config` -/// - `From for config::Config` +/// - `TryInto for config::Config` +/// - `From for config::Config` +mod config; mod global; -mod setting; +mod inner; mod version; +pub use config::CacheStorageTypeConfig; pub use config::Config; -pub use config::ExternalCacheStorageTypeConfig; pub use config::QueryConfig; pub use config::StorageConfig; -pub use global::GlobalSetting; -pub use setting::CacheSetting; -pub use setting::CatalogHiveSetting; -pub use setting::CatalogSetting; -pub use setting::ExternalCacheStorageTypeSetting; -pub use setting::QuerySetting; -pub use setting::Setting; -pub use setting::ThriftProtocol; +pub use global::GlobalConfig; +pub use inner::CacheConfig; +pub use inner::CacheStorageTypeConfig as CacheStorageTypeInnerConfig; +pub use inner::CatalogConfig; +pub use inner::CatalogHiveConfig; +pub use inner::InnerConfig; +pub use inner::ThriftProtocol; pub use version::DATABEND_COMMIT_VERSION; pub use version::QUERY_SEMVER; diff --git a/src/query/config/tests/main.rs b/src/query/config/tests/main.rs index f9a051b6aa492..7769d6b02edce 100644 --- a/src/query/config/tests/main.rs +++ b/src/query/config/tests/main.rs @@ -16,14 +16,14 @@ use std::ffi::OsString; use clap::Parser; use common_config::Config; -use common_config::Setting; +use common_config::InnerConfig; use pretty_assertions::assert_eq; /// It's required to make sure setting's default value is the same with clap. #[test] fn test_config_default() { - let setting_default = Setting::default(); - let config_default: Setting = Config::parse_from(Vec::::new()) + let setting_default = InnerConfig::default(); + let config_default: InnerConfig = Config::parse_from(Vec::::new()) .try_into() .expect("parse from args must succeed"); diff --git a/src/query/service/src/api/http/v1/config.rs b/src/query/service/src/api/http/v1/config.rs index 5e29f6b6a99ff..da255fa93fc50 100644 --- a/src/query/service/src/api/http/v1/config.rs +++ b/src/query/service/src/api/http/v1/config.rs @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_config::GlobalSetting; +use common_config::GlobalConfig; use poem::web::Json; use poem::IntoResponse; #[poem::handler] pub async fn config_handler() -> poem::Result { Ok(Json( - GlobalSetting::instance().as_ref().clone().into_config(), + GlobalConfig::instance().as_ref().clone().into_config(), )) } diff --git a/src/query/service/src/api/http/v1/tenant_tables.rs b/src/query/service/src/api/http/v1/tenant_tables.rs index af9d50ff37c8a..3e66c9ed92d23 100644 --- a/src/query/service/src/api/http/v1/tenant_tables.rs +++ b/src/query/service/src/api/http/v1/tenant_tables.rs @@ -16,7 +16,7 @@ use chrono::DateTime; use chrono::Utc; use common_catalog::catalog::CatalogManager; use common_catalog::catalog_kind::CATALOG_DEFAULT; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::Result; use poem::web::Json; use poem::web::Path; @@ -81,7 +81,7 @@ pub async fn list_tenant_tables_handler( // This handler returns the statistics about the tables of the current tenant. #[poem::handler] pub async fn list_tables_handler() -> poem::Result { - let tenant = &GlobalSetting::instance().query.tenant_id; + let tenant = &GlobalConfig::instance().query.tenant_id; if tenant.is_empty() { return Ok(Json(TenantTablesResponse { tables: vec![] })); } diff --git a/src/query/service/src/api/http_service.rs b/src/query/service/src/api/http_service.rs index 24802f24d5e09..7997ed7714aae 100644 --- a/src/query/service/src/api/http_service.rs +++ b/src/query/service/src/api/http_service.rs @@ -15,7 +15,7 @@ use std::net::SocketAddr; use std::path::Path; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_http::health_handler; use common_http::home::debug_home_handler; @@ -36,12 +36,12 @@ use tracing::warn; use crate::servers::Server; pub struct HttpService { - config: Setting, + config: InnerConfig, shutdown_handler: HttpShutdownHandler, } impl HttpService { - pub fn create(config: &Setting) -> Box { + pub fn create(config: &InnerConfig) -> Box { Box::new(HttpService { config: config.clone(), shutdown_handler: HttpShutdownHandler::create("http api".to_string()), @@ -92,7 +92,7 @@ impl HttpService { route } - fn build_tls(config: &Setting) -> Result { + fn build_tls(config: &InnerConfig) -> Result { let certificate = RustlsCertificate::new() .cert(std::fs::read(config.query.api_tls_server_cert.as_str())?) .key(std::fs::read(config.query.api_tls_server_key.as_str())?); diff --git a/src/query/service/src/api/rpc/exchange/exchange_manager.rs b/src/query/service/src/api/rpc/exchange/exchange_manager.rs index 1d91ce9c3b1fe..2185502e91b7b 100644 --- a/src/query/service/src/api/rpc/exchange/exchange_manager.rs +++ b/src/query/service/src/api/rpc/exchange/exchange_manager.rs @@ -23,7 +23,7 @@ use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_base::runtime::Thread; use common_base::runtime::TrySpawn; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_grpc::ConnectionFactory; @@ -119,7 +119,7 @@ impl DataExchangeManager { } pub async fn create_client(address: &str) -> Result { - let config = GlobalSetting::instance(); + let config = GlobalConfig::instance(); let address = address.to_string(); GlobalIORuntime::instance() @@ -237,7 +237,7 @@ impl DataExchangeManager { let settings = ctx.get_settings(); let timeout = settings.get_flight_client_timeout()?; let root_actions = actions.get_root_actions()?; - let conf = GlobalSetting::instance(); + let conf = GlobalConfig::instance(); // Initialize channels between cluster nodes actions diff --git a/src/query/service/src/api/rpc/packets/packet.rs b/src/query/service/src/api/rpc/packets/packet.rs index 76d78cf9935ca..25d9eb7c1f6b0 100644 --- a/src/query/service/src/api/rpc/packets/packet.rs +++ b/src/query/service/src/api/rpc/packets/packet.rs @@ -13,7 +13,7 @@ // limitations under the License. use common_arrow::arrow_format::flight::service::flight_service_client::FlightServiceClient; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::Result; use common_grpc::ConnectionFactory; @@ -21,12 +21,12 @@ use crate::api::FlightClient; #[async_trait::async_trait] pub trait Packet: Send + Sync { - async fn commit(&self, config: &Setting, timeout: u64) -> Result<()>; + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()>; } #[async_trait::async_trait] impl Packet for Vec { - async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { for packet in self.iter() { packet.commit(config, timeout).await?; } @@ -35,7 +35,7 @@ impl Packet for Vec { } } -pub async fn create_client(config: &Setting, address: &str) -> Result { +pub async fn create_client(config: &InnerConfig, address: &str) -> Result { match config.tls_query_cli_enabled() { true => Ok(FlightClient::new(FlightServiceClient::new( ConnectionFactory::create_rpc_channel( diff --git a/src/query/service/src/api/rpc/packets/packet_execute.rs b/src/query/service/src/api/rpc/packets/packet_execute.rs index 56e9eb5b18a63..dcfe5f828c533 100644 --- a/src/query/service/src/api/rpc/packets/packet_execute.rs +++ b/src/query/service/src/api/rpc/packets/packet_execute.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::NodeInfo; @@ -49,7 +49,7 @@ impl ExecutePartialQueryPacket { #[async_trait::async_trait] impl Packet for ExecutePartialQueryPacket { - async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { if !self.executors_info.contains_key(&self.executor) { return Err(ErrorCode::ClusterUnknownNode(format!( "Not found {} node in cluster", diff --git a/src/query/service/src/api/rpc/packets/packet_executor.rs b/src/query/service/src/api/rpc/packets/packet_executor.rs index 68175630f537e..5e5f29d45d1d8 100644 --- a/src/query/service/src/api/rpc/packets/packet_executor.rs +++ b/src/query/service/src/api/rpc/packets/packet_executor.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::NodeInfo; @@ -56,7 +56,7 @@ impl QueryFragmentsPlanPacket { #[async_trait::async_trait] impl Packet for QueryFragmentsPlanPacket { - async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { if !self.executors_info.contains_key(&self.executor) { return Err(ErrorCode::Internal(format!( "Not found {} node in cluster", diff --git a/src/query/service/src/api/rpc/packets/packet_publisher.rs b/src/query/service/src/api/rpc/packets/packet_publisher.rs index cf7e648054793..c148bb1b4829c 100644 --- a/src/query/service/src/api/rpc/packets/packet_publisher.rs +++ b/src/query/service/src/api/rpc/packets/packet_publisher.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::Result; use common_meta_types::NodeInfo; @@ -53,7 +53,7 @@ impl InitNodesChannelPacket { #[async_trait::async_trait] impl Packet for InitNodesChannelPacket { - async fn commit(&self, config: &Setting, timeout: u64) -> Result<()> { + async fn commit(&self, config: &InnerConfig, timeout: u64) -> Result<()> { let executor_info = &self.executor; let mut conn = create_client(config, &executor_info.flight_address).await?; let action = FlightAction::InitNodesChannel(InitNodesChannel { diff --git a/src/query/service/src/api/rpc_service.rs b/src/query/service/src/api/rpc_service.rs index dcc62e3cf59c9..a53c528fd5878 100644 --- a/src/query/service/src/api/rpc_service.rs +++ b/src/query/service/src/api/rpc_service.rs @@ -20,7 +20,7 @@ use common_arrow::arrow_format::flight::service::flight_service_server::FlightSe use common_base::base::tokio; use common_base::base::tokio::net::TcpListener; use common_base::base::tokio::sync::Notify; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use tokio_stream::wrappers::TcpListenerStream; @@ -33,12 +33,12 @@ use crate::api::rpc::DatabendQueryFlightService; use crate::servers::Server as DatabendQueryServer; pub struct RpcService { - pub config: Setting, + pub config: InnerConfig, pub abort_notify: Arc, } impl RpcService { - pub fn create(config: Setting) -> Result> { + pub fn create(config: InnerConfig) -> Result> { Ok(Box::new(Self { config, abort_notify: Arc::new(Notify::new()), @@ -60,7 +60,7 @@ impl RpcService { } } - async fn server_tls_config(conf: &Setting) -> Result { + async fn server_tls_config(conf: &InnerConfig) -> Result { let cert = tokio::fs::read(conf.query.rpc_tls_server_cert.as_str()).await?; let key = tokio::fs::read(conf.query.rpc_tls_server_key.as_str()).await?; let server_identity = Identity::from_pem(cert, key); diff --git a/src/query/service/src/auth.rs b/src/query/service/src/auth.rs index 0512bf8851d0d..fb4ef331b268e 100644 --- a/src/query/service/src/auth.rs +++ b/src/query/service/src/auth.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -pub use common_config::Setting; +pub use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::AuthInfo; @@ -41,7 +41,7 @@ pub enum Credential { } impl AuthMgr { - pub fn create(cfg: &Setting) -> Arc { + pub fn create(cfg: &InnerConfig) -> Arc { Arc::new(AuthMgr { jwt_auth: JwtAuthenticator::create( cfg.query.jwt_key_file.clone(), diff --git a/src/query/service/src/catalogs/catalog_manager.rs b/src/query/service/src/catalogs/catalog_manager.rs index 2b5cac1bff604..94f7168824032 100644 --- a/src/query/service/src/catalogs/catalog_manager.rs +++ b/src/query/service/src/catalogs/catalog_manager.rs @@ -18,8 +18,8 @@ use common_base::base::GlobalInstance; use common_catalog::catalog::Catalog; pub use common_catalog::catalog::CatalogManager; use common_catalog::catalog_kind::CATALOG_DEFAULT; -use common_config::CatalogSetting; -use common_config::Setting; +use common_config::CatalogConfig; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CatalogType; @@ -33,13 +33,13 @@ use crate::catalogs::DatabaseCatalog; #[async_trait::async_trait] pub trait CatalogManagerHelper { - async fn init(conf: &Setting) -> Result<()>; + async fn init(conf: &InnerConfig) -> Result<()>; - async fn try_create(conf: &Setting) -> Result>; + async fn try_create(conf: &InnerConfig) -> Result>; - async fn register_build_in_catalogs(&self, conf: &Setting) -> Result<()>; + async fn register_build_in_catalogs(&self, conf: &InnerConfig) -> Result<()>; - fn register_external_catalogs(&self, conf: &Setting) -> Result<()>; + fn register_external_catalogs(&self, conf: &InnerConfig) -> Result<()>; fn create_user_defined_catalog(&self, req: CreateCatalogReq) -> Result<()>; @@ -48,13 +48,13 @@ pub trait CatalogManagerHelper { #[async_trait::async_trait] impl CatalogManagerHelper for CatalogManager { - async fn init(conf: &Setting) -> Result<()> { + async fn init(conf: &InnerConfig) -> Result<()> { GlobalInstance::set(Self::try_create(conf).await?); Ok(()) } - async fn try_create(conf: &Setting) -> Result> { + async fn try_create(conf: &InnerConfig) -> Result> { let catalog_manager = CatalogManager { catalogs: DashMap::new(), }; @@ -69,7 +69,7 @@ impl CatalogManagerHelper for CatalogManager { Ok(Arc::new(catalog_manager)) } - async fn register_build_in_catalogs(&self, conf: &Setting) -> Result<()> { + async fn register_build_in_catalogs(&self, conf: &InnerConfig) -> Result<()> { let default_catalog: Arc = Arc::new(DatabaseCatalog::try_create_with_config(conf.clone()).await?); self.catalogs @@ -77,14 +77,14 @@ impl CatalogManagerHelper for CatalogManager { Ok(()) } - fn register_external_catalogs(&self, conf: &Setting) -> Result<()> { + fn register_external_catalogs(&self, conf: &InnerConfig) -> Result<()> { // currently, if the `hive` feature is not enabled // the loop will quit after the first iteration. // this is expected. #[allow(clippy::never_loop)] for (name, ctl) in conf.catalogs.iter() { match ctl { - CatalogSetting::Hive(ctl) => { + CatalogConfig::Hive(ctl) => { // register hive catalog #[cfg(not(feature = "hive"))] { diff --git a/src/query/service/src/catalogs/default/database_catalog.rs b/src/query/service/src/catalogs/default/database_catalog.rs index 66ae364d1360f..b65265116a6b7 100644 --- a/src/query/service/src/catalogs/default/database_catalog.rs +++ b/src/query/service/src/catalogs/default/database_catalog.rs @@ -16,7 +16,7 @@ use std::any::Any; use std::sync::Arc; use common_catalog::table_args::TableArgs; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CountTablesReply; @@ -87,7 +87,7 @@ impl DatabaseCatalog { } } - pub async fn try_create_with_config(conf: Setting) -> Result { + pub async fn try_create_with_config(conf: InnerConfig) -> Result { let immutable_catalog = ImmutableCatalog::try_create_with_config(&conf).await?; let mutable_catalog = MutableCatalog::try_create_with_config(conf).await?; let table_function_factory = TableFunctionFactory::create(); diff --git a/src/query/service/src/catalogs/default/immutable_catalog.rs b/src/query/service/src/catalogs/default/immutable_catalog.rs index 2fce94247419f..5ee134d46da48 100644 --- a/src/query/service/src/catalogs/default/immutable_catalog.rs +++ b/src/query/service/src/catalogs/default/immutable_catalog.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::CountTablesReply; @@ -70,7 +70,7 @@ pub struct ImmutableCatalog { } impl ImmutableCatalog { - pub async fn try_create_with_config(conf: &Setting) -> Result { + pub async fn try_create_with_config(conf: &InnerConfig) -> Result { // The global db meta. let mut sys_db_meta = InMemoryMetas::create(SYS_DB_ID_BEGIN, SYS_TBL_ID_BEGIN); sys_db_meta.init_db("system"); diff --git a/src/query/service/src/catalogs/default/mutable_catalog.rs b/src/query/service/src/catalogs/default/mutable_catalog.rs index 44d0825a49d4d..78a4b74823dba 100644 --- a/src/query/service/src/catalogs/default/mutable_catalog.rs +++ b/src/query/service/src/catalogs/default/mutable_catalog.rs @@ -15,7 +15,7 @@ use std::any::Any; use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::Result; use common_meta_api::SchemaApi; use common_meta_app::schema::CountTablesReply; @@ -91,7 +91,7 @@ impl MutableCatalog { /// /// MetaEmbedded /// ``` - pub async fn try_create_with_config(conf: Setting) -> Result { + pub async fn try_create_with_config(conf: InnerConfig) -> Result { let meta = { let provider = Arc::new(MetaStoreProvider::new(conf.meta.to_meta_grpc_client_conf())); diff --git a/src/query/service/src/clusters/cluster.rs b/src/query/service/src/clusters/cluster.rs index bf77eac8eca31..afd9da7e08072 100644 --- a/src/query/service/src/clusters/cluster.rs +++ b/src/query/service/src/clusters/cluster.rs @@ -32,7 +32,7 @@ use common_base::base::GlobalUniqName; use common_base::base::SignalStream; use common_base::base::SignalType; pub use common_catalog::cluster_info::Cluster; -use common_config::Setting; +use common_config::InnerConfig; use common_config::DATABEND_COMMIT_VERSION; use common_exception::ErrorCode; use common_exception::Result; @@ -73,7 +73,7 @@ pub trait ClusterHelper { fn is_empty(&self) -> bool; fn is_local(&self, node: &NodeInfo) -> bool; fn local_id(&self) -> String; - async fn create_node_conn(&self, name: &str, config: &Setting) -> Result; + async fn create_node_conn(&self, name: &str, config: &InnerConfig) -> Result; fn get_nodes(&self) -> Vec>; } @@ -102,7 +102,7 @@ impl ClusterHelper for Cluster { self.local_id.clone() } - async fn create_node_conn(&self, name: &str, config: &Setting) -> Result { + async fn create_node_conn(&self, name: &str, config: &InnerConfig) -> Result { for node in &self.nodes { if node.id == name { return match config.tls_query_cli_enabled() { @@ -140,7 +140,7 @@ impl ClusterHelper for Cluster { impl ClusterDiscovery { const METRIC_LABEL_FUNCTION: &'static str = "function"; - pub async fn create_meta_client(cfg: &Setting) -> Result { + pub async fn create_meta_client(cfg: &InnerConfig) -> Result { let meta_api_provider = MetaStoreProvider::new(cfg.meta.to_meta_grpc_client_conf()); match meta_api_provider.create_meta_store().await { Ok(meta_store) => Ok(meta_store), @@ -150,14 +150,17 @@ impl ClusterDiscovery { } } - pub async fn init(cfg: Setting) -> Result<()> { + pub async fn init(cfg: InnerConfig) -> Result<()> { let metastore = ClusterDiscovery::create_meta_client(&cfg).await?; GlobalInstance::set(Self::try_create(&cfg, metastore).await?); Ok(()) } - pub async fn try_create(cfg: &Setting, metastore: MetaStore) -> Result> { + pub async fn try_create( + cfg: &InnerConfig, + metastore: MetaStore, + ) -> Result> { let (lift_time, provider) = Self::create_provider(cfg, metastore)?; Ok(Arc::new(ClusterDiscovery { @@ -180,7 +183,7 @@ impl ClusterDiscovery { } fn create_provider( - cfg: &Setting, + cfg: &InnerConfig, metastore: MetaStore, ) -> Result<(Duration, Arc)> { // TODO: generate if tenant or cluster id is empty @@ -192,7 +195,7 @@ impl ClusterDiscovery { Ok((lift_time, Arc::new(cluster_manager))) } - pub async fn discover(&self, config: &Setting) -> Result> { + pub async fn discover(&self, config: &InnerConfig) -> Result> { match self.api_provider.get_nodes().await { Err(cause) => { label_counter_with_val_and_labels( @@ -351,7 +354,7 @@ impl ClusterDiscovery { }; } - pub async fn register_to_metastore(self: &Arc, cfg: &Setting) -> Result<()> { + pub async fn register_to_metastore(self: &Arc, cfg: &InnerConfig) -> Result<()> { let cpus = cfg.query.num_cpus; let mut address = cfg.query.flight_api_address.clone(); @@ -503,7 +506,7 @@ impl ClusterHeartbeat { } } -pub async fn create_client(config: &Setting, address: &str) -> Result { +pub async fn create_client(config: &InnerConfig, address: &str) -> Result { match config.tls_query_cli_enabled() { true => Ok(FlightClient::new(FlightServiceClient::new( ConnectionFactory::create_rpc_channel( diff --git a/src/query/service/src/databases/database_factory.rs b/src/query/service/src/databases/database_factory.rs index 41d7030d95be3..91e2390aed618 100644 --- a/src/query/service/src/databases/database_factory.rs +++ b/src/query/service/src/databases/database_factory.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::DatabaseInfo; @@ -45,7 +45,7 @@ pub struct DatabaseFactory { } impl DatabaseFactory { - pub fn create(_: Setting) -> Self { + pub fn create(_: InnerConfig) -> Self { let creators: DashMap> = DashMap::new(); creators.insert( DefaultDatabase::NAME.to_string(), diff --git a/src/query/service/src/databases/system/system_database.rs b/src/query/service/src/databases/system/system_database.rs index f1fe1f2e7e569..a614f81080883 100644 --- a/src/query/service/src/databases/system/system_database.rs +++ b/src/query/service/src/databases/system/system_database.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_meta_app::schema::DatabaseIdent; use common_meta_app::schema::DatabaseInfo; use common_meta_app::schema::DatabaseMeta; @@ -54,7 +54,7 @@ pub struct SystemDatabase { } impl SystemDatabase { - pub fn create(sys_db_meta: &mut InMemoryMetas, config: &Setting) -> Self { + pub fn create(sys_db_meta: &mut InMemoryMetas, config: &InnerConfig) -> Self { let table_list: Vec> = vec![ OneTable::create(sys_db_meta.next_table_id()), FunctionsTable::create(sys_db_meta.next_table_id()), diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 2dc2c86550d24..48edc92382e84 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -15,8 +15,8 @@ use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_catalog::catalog::CatalogManager; -use common_config::GlobalSetting; -use common_config::Setting; +use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::Result; use common_storage::CacheOperator; use common_storage::DataOperator; @@ -35,14 +35,14 @@ use crate::sessions::SessionManager; pub struct GlobalServices; impl GlobalServices { - pub async fn init(config: Setting) -> Result<()> { + pub async fn init(config: InnerConfig) -> Result<()> { GlobalInstance::init_production(); GlobalServices::init_with(config).await } - pub async fn init_with(config: Setting) -> Result<()> { + pub async fn init_with(config: InnerConfig) -> Result<()> { // The order of initialization is very important - GlobalSetting::init(config.clone())?; + GlobalConfig::init(config.clone())?; let app_name_shuffle = format!("{}-{}", config.query.tenant_id, config.query.cluster_id); diff --git a/src/query/service/src/interpreters/access/management_mode_access.rs b/src/query/service/src/interpreters/access/management_mode_access.rs index 46d147d1c7efe..c202e450f0143 100644 --- a/src/query/service/src/interpreters/access/management_mode_access.rs +++ b/src/query/service/src/interpreters/access/management_mode_access.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; @@ -32,7 +32,7 @@ impl AccessChecker for ManagementModeAccess { // Check what we can do if in management mode. async fn check(&self, plan: &Plan) -> Result<()> { // Allows for management-mode. - if GlobalSetting::instance().query.management_mode { + if GlobalConfig::instance().query.management_mode { let ok = match plan { Plan::Query {rewrite_kind, .. } => { use common_sql::plans::RewriteKind; diff --git a/src/query/service/src/interpreters/interpreter_metrics.rs b/src/query/service/src/interpreters/interpreter_metrics.rs index 267701431d3ba..d42d9af0bc0a0 100644 --- a/src/query/service/src/interpreters/interpreter_metrics.rs +++ b/src/query/service/src/interpreters/interpreter_metrics.rs @@ -16,7 +16,7 @@ use std::time::Duration; use std::time::SystemTime; use std::time::UNIX_EPOCH; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_metrics::label_counter_with_val_and_labels; use common_metrics::label_histogram_with_val; @@ -56,7 +56,7 @@ impl InterpreterMetrics { let handler_type = ctx.get_current_session().get_type().to_string(); let query_kind = ctx.get_query_kind(); let tenant_id = ctx.get_tenant(); - let cluster_id = GlobalSetting::instance().query.cluster_id.clone(); + let cluster_id = GlobalConfig::instance().query.cluster_id.clone(); vec![ (LABEL_HANDLER, handler_type), diff --git a/src/query/service/src/interpreters/interpreter_query_log.rs b/src/query/service/src/interpreters/interpreter_query_log.rs index 85a70b7c47875..995f306e1c4f2 100644 --- a/src/query/service/src/interpreters/interpreter_query_log.rs +++ b/src/query/service/src/interpreters/interpreter_query_log.rs @@ -18,7 +18,7 @@ use std::time::Duration; use std::time::SystemTime; use std::time::UNIX_EPOCH; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_storages_system::LogType; @@ -81,7 +81,7 @@ impl InterpreterQueryLog { // User. let handler_type = ctx.get_current_session().get_type().to_string(); let tenant_id = ctx.get_tenant(); - let cluster_id = GlobalSetting::instance().query.cluster_id.clone(); + let cluster_id = GlobalConfig::instance().query.cluster_id.clone(); let user = ctx.get_current_user()?; let sql_user = user.name; let sql_user_quota = format!("{:?}", user.quota); @@ -185,8 +185,8 @@ impl InterpreterQueryLog { pub fn log_finish(ctx: &QueryContext, now: SystemTime, err: Option) -> Result<()> { // User. let handler_type = ctx.get_current_session().get_type().to_string(); - let tenant_id = GlobalSetting::instance().query.tenant_id.clone(); - let cluster_id = GlobalSetting::instance().query.cluster_id.clone(); + let tenant_id = GlobalConfig::instance().query.tenant_id.clone(); + let cluster_id = GlobalConfig::instance().query.cluster_id.clone(); let user = ctx.get_current_user()?; let sql_user = user.name; let sql_user_quota = format!("{:?}", user.quota); diff --git a/src/query/service/src/interpreters/interpreter_unsetting.rs b/src/query/service/src/interpreters/interpreter_unsetting.rs index 8bdc36a40ac05..7c0407d5e1c0b 100644 --- a/src/query/service/src/interpreters/interpreter_unsetting.rs +++ b/src/query/service/src/interpreters/interpreter_unsetting.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::Result; use common_settings::ScopeLevel::Global; use common_sql::plans::UnSettingPlan; @@ -58,14 +58,14 @@ impl Interpreter for UnSettingInterpreter { } let default_val = { if setting == "max_memory_usage" { - let conf = GlobalSetting::instance(); + let conf = GlobalConfig::instance(); if conf.query.max_server_memory_usage == 0 { settings.check_and_get_default_value(setting)?.to_string() } else { conf.query.max_server_memory_usage.to_string() } } else if setting == "max_threads" { - let conf = GlobalSetting::instance(); + let conf = GlobalConfig::instance(); if conf.query.num_cpus == 0 { settings.check_and_get_default_value(setting)?.to_string() } else { diff --git a/src/query/service/src/procedures/procedure.rs b/src/query/service/src/procedures/procedure.rs index 2e285d39074cc..ce9b7ea94b281 100644 --- a/src/query/service/src/procedures/procedure.rs +++ b/src/query/service/src/procedures/procedure.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -45,7 +45,7 @@ pub trait Procedure: Sync + Send { features.num_arguments, )?; - if features.management_mode_required && !GlobalSetting::instance().query.management_mode { + if features.management_mode_required && !GlobalConfig::instance().query.management_mode { return Err(ErrorCode::ManagementModePermissionDenied(format!( "Access denied: '{}' only used in management-mode", self.name() diff --git a/src/query/service/src/servers/http/http_services.rs b/src/query/service/src/servers/http/http_services.rs index 32f998d59e194..dcbc2bf25614f 100644 --- a/src/query/service/src/servers/http/http_services.rs +++ b/src/query/service/src/servers/http/http_services.rs @@ -15,8 +15,8 @@ use std::net::SocketAddr; use std::path::Path; -use common_config::GlobalSetting; -use common_config::Setting; +use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_http::HttpError; use common_http::HttpShutdownHandler; @@ -82,7 +82,7 @@ impl HttpHandler { }) } - async fn build_router(&self, config: &Setting, sock: SocketAddr) -> impl Endpoint { + async fn build_router(&self, config: &InnerConfig, sock: SocketAddr) -> impl Endpoint { let ep = match self.kind { HttpHandlerKind::Query => Route::new() .at( @@ -107,7 +107,7 @@ impl HttpHandler { .boxed() } - fn build_tls(config: &Setting) -> Result { + fn build_tls(config: &InnerConfig) -> Result { let certificate = RustlsCertificate::new() .cert(std::fs::read( config.query.http_handler_tls_server_cert.as_str(), @@ -127,7 +127,7 @@ impl HttpHandler { async fn start_with_tls(&mut self, listening: SocketAddr) -> Result { info!("Http Handler TLS enabled"); - let config = GlobalSetting::instance(); + let config = GlobalConfig::instance(); let tls_config = Self::build_tls(config.as_ref()) .map_err(|e: std::io::Error| HttpError::TlsConfigError(AnyError::new(&e)))?; @@ -140,7 +140,7 @@ impl HttpHandler { async fn start_without_tls(&mut self, listening: SocketAddr) -> Result { let router = self - .build_router(GlobalSetting::instance().as_ref(), listening) + .build_router(GlobalConfig::instance().as_ref(), listening) .await; self.shutdown_handler .start_service(listening, None, router, None) @@ -155,7 +155,7 @@ impl Server for HttpHandler { } async fn start(&mut self, listening: SocketAddr) -> Result { - let config = GlobalSetting::instance(); + let config = GlobalConfig::instance(); let res = match config.query.http_handler_tls_server_key.is_empty() || config.query.http_handler_tls_server_cert.is_empty() diff --git a/src/query/service/src/servers/http/v1/query/http_query_manager.rs b/src/query/service/src/servers/http/v1/query/http_query_manager.rs index 24d12eb707fd9..66a06380ac719 100644 --- a/src/query/service/src/servers/http/v1/query/http_query_manager.rs +++ b/src/query/service/src/servers/http/v1/query/http_query_manager.rs @@ -21,7 +21,7 @@ use common_base::base::tokio::time::sleep; use common_base::base::GlobalInstance; use common_base::runtime::GlobalIORuntime; use common_base::runtime::TrySpawn; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::Result; use parking_lot::Mutex; use tracing::warn; @@ -48,7 +48,7 @@ pub struct HttpQueryManager { } impl HttpQueryManager { - pub async fn init(cfg: &Setting) -> Result<()> { + pub async fn init(cfg: &InnerConfig) -> Result<()> { GlobalInstance::set(Arc::new(HttpQueryManager { queries: Arc::new(RwLock::new(HashMap::new())), sessions: Mutex::new(ExpiringMap::default()), diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs index d9b0d29c92666..7ebf724eac09b 100644 --- a/src/query/service/src/sessions/query_ctx_shared.rs +++ b/src/query/service/src/sessions/query_ctx_shared.rs @@ -23,7 +23,7 @@ use std::time::SystemTime; use common_base::base::Progress; use common_base::runtime::Runtime; use common_catalog::table_context::StageAttachment; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::DataBlock; @@ -84,7 +84,7 @@ pub struct QueryContextShared { impl QueryContextShared { pub fn try_create( - config: &Setting, + config: &InnerConfig, session: Arc, cluster_cache: Arc, ) -> Result> { diff --git a/src/query/service/src/sessions/session.rs b/src/query/service/src/sessions/session.rs index 9b2f1a2f50069..ad9b90e1c7e83 100644 --- a/src/query/service/src/sessions/session.rs +++ b/src/query/service/src/sessions/session.rs @@ -16,7 +16,7 @@ use std::net::SocketAddr; use std::sync::Arc; use chrono_tz::Tz; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_io::prelude::FormatSettings; @@ -126,7 +126,7 @@ impl Session { /// For a query, execution environment(e.g cluster) should be immutable. /// We can bind the environment to the context in create_context method. pub async fn create_query_context(self: &Arc) -> Result> { - let config = GlobalSetting::instance(); + let config = GlobalConfig::instance(); let session = self.clone(); let cluster = ClusterDiscovery::instance().discover(&config).await?; let shared = QueryContextShared::try_create(&config, session, cluster)?; diff --git a/src/query/service/src/sessions/session_ctx.rs b/src/query/service/src/sessions/session_ctx.rs index f88e58d5a791e..8dc9c5278bfe2 100644 --- a/src/query/service/src/sessions/session_ctx.rs +++ b/src/query/service/src/sessions/session_ctx.rs @@ -18,7 +18,7 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::sync::Weak; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::Result; use common_meta_app::principal::RoleInfo; use common_meta_app::principal::UserInfo; @@ -129,7 +129,7 @@ impl SessionContext { } pub fn get_current_tenant(&self) -> String { - let conf = GlobalSetting::instance(); + let conf = GlobalConfig::instance(); if conf.query.internal_enable_sandbox_tenant { let sandbox_tenant = self.settings.get_sandbox_tenant().unwrap_or_default(); diff --git a/src/query/service/src/sessions/session_mgr.rs b/src/query/service/src/sessions/session_mgr.rs index a1377818d06e9..cbcaeaf0e50fe 100644 --- a/src/query/service/src/sessions/session_mgr.rs +++ b/src/query/service/src/sessions/session_mgr.rs @@ -24,8 +24,8 @@ use std::time::Duration; use common_base::base::tokio; use common_base::base::GlobalInstance; use common_base::base::SignalStream; -use common_config::GlobalSetting; -use common_config::Setting; +use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_metrics::label_counter; @@ -59,13 +59,13 @@ pub struct SessionManager { } impl SessionManager { - pub fn init(conf: &Setting) -> Result<()> { + pub fn init(conf: &InnerConfig) -> Result<()> { GlobalInstance::set(Self::create(conf)); Ok(()) } - pub fn create(conf: &Setting) -> Arc { + pub fn create(conf: &InnerConfig) -> Arc { let max_sessions = conf.query.max_active_sessions as usize; Arc::new(SessionManager { max_sessions, @@ -82,7 +82,7 @@ impl SessionManager { pub async fn create_session(&self, typ: SessionType) -> Result> { // TODO: maybe deadlock - let config = GlobalSetting::instance(); + let config = GlobalConfig::instance(); { let sessions = self.active_sessions.read(); self.validate_max_active_sessions(sessions.len(), "active sessions")?; @@ -149,7 +149,7 @@ impl SessionManager { } pub fn destroy_session(&self, session_id: &String) { - let config = GlobalSetting::instance(); + let config = GlobalConfig::instance(); label_counter( METRIC_SESSION_CLOSE_NUMBERS, &config.query.tenant_id, diff --git a/src/query/service/tests/it/configs.rs b/src/query/service/tests/it/configs.rs index b5b8eea2521a3..e30a22875c728 100644 --- a/src/query/service/tests/it/configs.rs +++ b/src/query/service/tests/it/configs.rs @@ -17,10 +17,10 @@ use std::env::temp_dir; use std::fs; use std::io::Write; -use common_config::CatalogHiveSetting; -use common_config::CatalogSetting; -use common_config::ExternalCacheStorageTypeConfig; -use common_config::Setting; +use common_config::CacheStorageTypeConfig; +use common_config::CatalogConfig; +use common_config::CatalogHiveConfig; +use common_config::InnerConfig; use common_config::ThriftProtocol; use common_exception::ErrorCode; use common_exception::Result; @@ -84,7 +84,7 @@ fn test_env_config_s3() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Setting::load_for_test() + let configured = InnerConfig::load_for_test() .expect("must success") .into_config(); @@ -202,7 +202,7 @@ fn test_env_config_fs() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Setting::load_for_test() + let configured = InnerConfig::load_for_test() .expect("must success") .into_config(); @@ -320,7 +320,7 @@ fn test_env_config_gcs() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Setting::load_for_test() + let configured = InnerConfig::load_for_test() .expect("must success") .into_config(); @@ -447,7 +447,7 @@ fn test_env_config_oss() -> Result<()> { ("CONFIG_FILE", None), ], || { - let configured = Setting::load_for_test() + let configured = InnerConfig::load_for_test() .expect("must success") .into_config(); @@ -659,7 +659,7 @@ path = "_cache" ("STORAGE_TYPE", None), ], || { - let cfg = Setting::load_for_test() + let cfg = InnerConfig::load_for_test() .expect("config load success") .into_config(); @@ -670,7 +670,7 @@ path = "_cache" let cache_config = &cfg.cache; assert_eq!( cache_config.data_cache_storage, - ExternalCacheStorageTypeConfig::Disk + CacheStorageTypeConfig::Disk ); assert_eq!(cache_config.disk_cache_config.path, "_cache"); @@ -691,7 +691,7 @@ path = "_cache" assert!(inner.is_ok(), "casting must success"); let cfg = inner.unwrap(); match cfg { - CatalogSetting::Hive(cfg) => { + CatalogConfig::Hive(cfg) => { assert_eq!("127.0.0.1:9083", cfg.address, "address incorrect"); assert_eq!("binary", cfg.protocol.to_string(), "protocol incorrect"); } @@ -726,11 +726,11 @@ protocol = "binary" temp_env::with_vars( vec![("CONFIG_FILE", Some(file_path.to_string_lossy().as_ref()))], || { - let cfg = Setting::load_for_test().expect("config load success"); + let cfg = InnerConfig::load_for_test().expect("config load success"); assert_eq!( cfg.catalogs["hive"], - CatalogSetting::Hive(CatalogHiveSetting { + CatalogConfig::Hive(CatalogHiveConfig { address: "1.1.1.1:10000".to_string(), protocol: ThriftProtocol::Binary, }) @@ -766,11 +766,11 @@ protocol = "binary" temp_env::with_vars( vec![("CONFIG_FILE", Some(file_path.to_string_lossy().as_ref()))], || { - let cfg = Setting::load_for_test().expect("config load success"); + let cfg = InnerConfig::load_for_test().expect("config load success"); assert_eq!( cfg.catalogs["my_hive"], - CatalogSetting::Hive(CatalogHiveSetting { + CatalogConfig::Hive(CatalogHiveConfig { address: "1.1.1.1:12000".to_string(), protocol: ThriftProtocol::Binary, }) @@ -801,7 +801,7 @@ fn test_env_config_obsoleted() -> Result<()> { for env_var in obsoleted { temp_env::with_vars(vec![env_var], || { - let r = Setting::load_for_test(); + let r = InnerConfig::load_for_test(); assert!(r.is_err(), "expecting `Err`, but got `Ok`"); assert_eq!(r.unwrap_err().code(), ErrorCode::INVALID_CONFIG) }); diff --git a/src/query/service/tests/it/servers/http/clickhouse_handler.rs b/src/query/service/tests/it/servers/http/clickhouse_handler.rs index 7f71b524184f2..ef00f659c7959 100644 --- a/src/query/service/tests/it/servers/http/clickhouse_handler.rs +++ b/src/query/service/tests/it/servers/http/clickhouse_handler.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use common_base::base::tokio; -use common_config::Setting; +use common_config::InnerConfig; use databend_query::auth::AuthMgr; use databend_query::servers::http::middleware::HTTPSessionEndpoint; use databend_query::servers::http::middleware::HTTPSessionMiddleware; @@ -421,7 +421,7 @@ struct Server { } impl Server { - pub async fn new(config: &Setting) -> Self { + pub async fn new(config: &InnerConfig) -> Self { let session_middleware = HTTPSessionMiddleware::create(HttpHandlerKind::Clickhouse, AuthMgr::create(config)); let endpoint = Route::new() diff --git a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs index e7f6ac3081ea4..b42bdab35c7df 100644 --- a/src/query/service/tests/it/storages/fuse/table_test_fixture.rs +++ b/src/query/service/tests/it/storages/fuse/table_test_fixture.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use common_ast::ast::Engine; use common_catalog::catalog_kind::CATALOG_DEFAULT; use common_catalog::table::AppendMode; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::Result; use common_expression::block_debug::assert_blocks_sorted_eq_with_name; use common_expression::infer_table_schema; @@ -426,7 +426,7 @@ pub async fn check_data_dir( check_last_snapshot: Option<()>, check_table_statistic_file: Option<()>, ) -> Result<()> { - let data_path = match &GlobalSetting::instance().storage.params { + let data_path = match &GlobalConfig::instance().storage.params { StorageParams::Fs(v) => v.root.clone(), _ => panic!("storage type is not fs"), }; diff --git a/src/query/service/tests/it/tests/config.rs b/src/query/service/tests/it/tests/config.rs index 17834d16efcf2..b66de9347c8f3 100644 --- a/src/query/service/tests/it/tests/config.rs +++ b/src/query/service/tests/it/tests/config.rs @@ -14,17 +14,17 @@ use std::collections::HashMap; -use common_config::Setting; +use common_config::InnerConfig; use common_meta_app::principal::AuthInfo; use common_users::idm_config::IDMConfig; pub struct ConfigBuilder { - conf: Setting, + conf: InnerConfig, } impl ConfigBuilder { pub fn create() -> ConfigBuilder { - let mut conf = Setting::default(); + let mut conf = InnerConfig::default(); conf.query.tenant_id = "test".to_string(); conf.log = common_tracing::Config::new_testing(); @@ -128,11 +128,11 @@ impl ConfigBuilder { self } - pub fn build(self) -> Setting { + pub fn build(self) -> InnerConfig { self.conf } - pub fn config(&self) -> Setting { + pub fn config(&self) -> InnerConfig { self.conf.clone() } } diff --git a/src/query/service/tests/it/tests/context.rs b/src/query/service/tests/it/tests/context.rs index f01bb9fcf6f85..7674613fbd023 100644 --- a/src/query/service/tests/it/tests/context.rs +++ b/src/query/service/tests/it/tests/context.rs @@ -14,7 +14,7 @@ use std::sync::Arc; -use common_config::Setting; +use common_config::InnerConfig; use common_config::DATABEND_COMMIT_VERSION; use common_exception::Result; use common_meta_app::principal::AuthInfo; @@ -69,7 +69,7 @@ pub async fn create_query_context_with_session( } pub async fn create_query_context_with_config( - config: Setting, + config: InnerConfig, mut current_user: Option, ) -> Result<(TestGuard, Arc)> { let guard = TestGlobalServices::setup(config).await?; diff --git a/src/query/service/tests/it/tests/sessions.rs b/src/query/service/tests/it/tests/sessions.rs index d82f90e291597..ea5f15379d5ce 100644 --- a/src/query/service/tests/it/tests/sessions.rs +++ b/src/query/service/tests/it/tests/sessions.rs @@ -13,7 +13,7 @@ // limitations under the License. use common_base::base::GlobalInstance; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::Result; use common_tracing::set_panic_hook; use databend_query::clusters::ClusterDiscovery; @@ -27,7 +27,7 @@ unsafe impl Send for TestGlobalServices {} unsafe impl Sync for TestGlobalServices {} impl TestGlobalServices { - pub async fn setup(config: Setting) -> Result { + pub async fn setup(config: InnerConfig) -> Result { set_panic_hook(); std::env::set_var("UNIT_TEST", "TRUE"); diff --git a/src/query/settings/src/lib.rs b/src/query/settings/src/lib.rs index a31e890ce767e..04ae7786239d1 100644 --- a/src/query/settings/src/lib.rs +++ b/src/query/settings/src/lib.rs @@ -25,8 +25,8 @@ use std::sync::Arc; use common_ast::Dialect; use common_base::runtime::GlobalIORuntime; use common_base::runtime::TrySpawn; -use common_config::GlobalSetting; -use common_config::Setting; +use common_config::GlobalConfig; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::UserSetting; @@ -80,7 +80,7 @@ impl Settings { user_api: Arc, tenant: String, ) -> Result> { - let config = GlobalSetting::instance(); + let config = GlobalConfig::instance(); let settings = Self::default_settings(&tenant, config)?; let ret = { @@ -107,7 +107,7 @@ impl Settings { Ok(ret) } - pub fn default_settings(tenant: &str, conf: Arc) -> Result> { + pub fn default_settings(tenant: &str, conf: Arc) -> Result> { let memory_info = sys_info::mem_info().map_err(ErrorCode::from_std_error)?; let mut num_cpus = num_cpus::get() as u64; if conf.query.num_cpus != 0 { @@ -484,7 +484,7 @@ impl Settings { // Only used for testings pub fn default_test_settings() -> Result> { - Self::default_settings("default", Arc::new(Setting::default())) + Self::default_settings("default", Arc::new(InnerConfig::default())) } // Get max_block_size. diff --git a/src/query/sharing/src/signer.rs b/src/query/sharing/src/signer.rs index cfaf461429cea..1cd1018e36e5b 100644 --- a/src/query/sharing/src/signer.rs +++ b/src/query/sharing/src/signer.rs @@ -22,7 +22,7 @@ use anyhow::anyhow; use anyhow::Result; use bytes::Bytes; use common_auth::RefreshableToken; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use http::header::AUTHORIZATION; use http::header::CONTENT_LENGTH; use http::Method; @@ -152,7 +152,7 @@ impl SharedSigner { .collect(); let bs = Bytes::from(serde_json::to_vec(&reqs)?); let auth = self.token.to_header().await?; - let requester = GlobalSetting::instance().as_ref().query.tenant_id.clone(); + let requester = GlobalConfig::instance().as_ref().query.tenant_id.clone(); let req = Request::builder() .method(Method::POST) .uri(&self.endpoint) diff --git a/src/query/sql/src/planner/binder/copy.rs b/src/query/sql/src/planner/binder/copy.rs index 476bb2cefbb0f..bdd8253fac1d7 100644 --- a/src/query/sql/src/planner/binder/copy.rs +++ b/src/query/sql/src/planner/binder/copy.rs @@ -29,7 +29,7 @@ use common_catalog::plan::DataSourcePlan; use common_catalog::plan::Partitions; use common_catalog::plan::StageTableInfo; use common_catalog::table_context::TableContext; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::principal::FileFormatOptions; @@ -287,7 +287,7 @@ impl<'a> Binder { .await?; let (storage_params, path) = parse_uri_location(src_uri_location)?; - if !storage_params.is_secure() && !GlobalSetting::instance().storage.allow_insecure { + if !storage_params.is_secure() && !GlobalConfig::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy from insecure storage is not allowed", )); @@ -405,7 +405,7 @@ impl<'a> Binder { .map_err(ErrorCode::SyntaxException)?; let (storage_params, path) = parse_uri_location(dst_uri_location)?; - if !storage_params.is_secure() && !GlobalSetting::instance().storage.allow_insecure { + if !storage_params.is_secure() && !GlobalConfig::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy into insecure storage is not allowed", )); @@ -469,7 +469,7 @@ impl<'a> Binder { .map_err(ErrorCode::SyntaxException)?; let (storage_params, path) = parse_uri_location(dst_uri_location)?; - if !storage_params.is_secure() && !GlobalSetting::instance().storage.allow_insecure { + if !storage_params.is_secure() && !GlobalConfig::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy into insecure storage is not allowed", )); diff --git a/src/query/sql/src/planner/binder/location.rs b/src/query/sql/src/planner/binder/location.rs index 36e3a3df0a962..b24b5ef1553a8 100644 --- a/src/query/sql/src/planner/binder/location.rs +++ b/src/query/sql/src/planner/binder/location.rs @@ -18,7 +18,7 @@ use std::io::Result; use anyhow::anyhow; use common_ast::ast::UriLocation; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_meta_app::storage::StorageAzblobConfig; use common_meta_app::storage::StorageFsConfig; use common_meta_app::storage::StorageGcsConfig; @@ -164,7 +164,7 @@ fn parse_s3_params(l: &mut UriLocation, root: String) -> Result { root, // Disable credential load by default. // TODO(xuanwo): we should support AssumeRole. - disable_credential_loader: !GlobalSetting::instance().storage.allow_insecure, + disable_credential_loader: !GlobalConfig::instance().storage.allow_insecure, enable_virtual_host_style, role_arn, external_id, diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 156b337c2aa08..3d47e3aedf2bd 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -37,7 +37,7 @@ use common_catalog::table::ColumnStatistics; use common_catalog::table::NavigationPoint; use common_catalog::table::Table; use common_catalog::table_function::TableFunction; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; @@ -298,7 +298,7 @@ impl Binder { FileLocation::Uri(mut l) => { let (storage_params, path) = parse_uri_location(&mut l)?; if !storage_params.is_secure() - && !GlobalSetting::instance().storage.allow_insecure + && !GlobalConfig::instance().storage.allow_insecure { return Err(ErrorCode::StorageInsecure( "copy from insecure storage is not allowed", diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index a39d89e548a62..00d54ac5f3cdb 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -23,7 +23,7 @@ use common_base::base::tokio::task::block_in_place; use common_catalog::catalog::CATALOG_DEFAULT; use common_catalog::table::Table; use common_catalog::table_context::TableContext; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; @@ -54,7 +54,7 @@ pub fn parse_exprs( unwrap_tuple: bool, sql: &str, ) -> Result> { - let settings = Settings::default_settings("", GlobalSetting::instance())?; + let settings = Settings::default_settings("", GlobalConfig::instance())?; let mut bind_context = BindContext::new(); let metadata = Arc::new(RwLock::new(Metadata::default())); let table_index = metadata.write().add_table( diff --git a/src/query/sql/tests/location.rs b/src/query/sql/tests/location.rs index 5e6da6835c4e3..4e0f36d6515fb 100644 --- a/src/query/sql/tests/location.rs +++ b/src/query/sql/tests/location.rs @@ -21,8 +21,8 @@ use std::collections::BTreeMap; use anyhow::Result; use common_ast::ast::UriLocation; use common_base::base::GlobalInstance; -use common_config::GlobalSetting; -use common_config::Setting; +use common_config::GlobalConfig; +use common_config::InnerConfig; use common_meta_app::storage::StorageFsConfig; // use common_storage::StorageFtpConfig; use common_meta_app::storage::StorageGcsConfig; @@ -44,7 +44,7 @@ fn test_parse_uri_location() -> Result<()> { }; GlobalInstance::init_testing(&thread_name); - GlobalSetting::init(Setting::default())?; + GlobalConfig::init(InnerConfig::default())?; let cases = vec![ ( diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 15282f8ebc8d2..7daafff5b4bc8 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -19,8 +19,8 @@ use std::sync::Arc; use common_base::base::GlobalInstance; use common_cache::CountableMeter; use common_cache::DefaultHashBuilder; -use common_config::CacheSetting; -use common_config::ExternalCacheStorageTypeSetting; +use common_config::CacheConfig; +use common_config::CacheStorageTypeInnerConfig; use common_exception::Result; use storages_common_cache::InMemoryCacheBuilder; use storages_common_cache::InMemoryItemCacheHolder; @@ -54,12 +54,12 @@ pub struct CacheManager { impl CacheManager { /// Initialize the caches according to the relevant configurations. - pub fn init(config: &CacheSetting, tenant_id: impl Into) -> Result<()> { + pub fn init(config: &CacheConfig, tenant_id: impl Into) -> Result<()> { // setup table data cache let table_data_cache = { match config.data_cache_storage { - ExternalCacheStorageTypeSetting::None => None, - ExternalCacheStorageTypeSetting::Disk => { + CacheStorageTypeInnerConfig::None => None, + CacheStorageTypeInnerConfig::Disk => { let real_disk_cache_root = PathBuf::from(&config.disk_cache_config.path) .join(tenant_id.into()) .join("v1"); diff --git a/src/query/storages/factory/src/storage_factory.rs b/src/query/storages/factory/src/storage_factory.rs index b6938150b6525..0105aab999cfb 100644 --- a/src/query/storages/factory/src/storage_factory.rs +++ b/src/query/storages/factory/src/storage_factory.rs @@ -15,7 +15,7 @@ use std::sync::Arc; pub use common_catalog::catalog::StorageDescription; -use common_config::Setting; +use common_config::InnerConfig; use common_exception::ErrorCode; use common_exception::Result; use common_meta_app::schema::TableInfo; @@ -67,7 +67,7 @@ pub struct StorageFactory { } impl StorageFactory { - pub fn create(conf: Setting) -> Self { + pub fn create(conf: InnerConfig) -> Self { let creators: DashMap = Default::default(); // Register memory table engine. diff --git a/src/query/storages/system/src/configs_table.rs b/src/query/storages/system/src/configs_table.rs index c96b33ee87cd0..9173c255ed56e 100644 --- a/src/query/storages/system/src/configs_table.rs +++ b/src/query/storages/system/src/configs_table.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use common_base::base::mask_string; use common_catalog::table::Table; use common_catalog::table_context::TableContext; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_config::QueryConfig; use common_exception::Result; use common_expression::types::StringType; @@ -48,7 +48,7 @@ impl SyncSystemTable for ConfigsTable { } fn get_full_data(&self, _ctx: Arc) -> Result { - let config = GlobalSetting::instance().as_ref().clone().into_config(); + let config = GlobalConfig::instance().as_ref().clone().into_config(); let mut names: Vec = vec![]; let mut values: Vec = vec![]; let mut groups: Vec = vec![]; diff --git a/src/query/storages/system/src/tracing_table.rs b/src/query/storages/system/src/tracing_table.rs index de664d7e4e369..e90a4d2f67bfa 100644 --- a/src/query/storages/system/src/tracing_table.rs +++ b/src/query/storages/system/src/tracing_table.rs @@ -25,7 +25,7 @@ use common_catalog::plan::Partitions; use common_catalog::plan::PushDownInfo; use common_catalog::table::Table; use common_catalog::table_context::TableContext; -use common_config::GlobalSetting; +use common_config::GlobalConfig; use common_exception::ErrorCode; use common_exception::Result; use common_expression::types::DataType; @@ -77,9 +77,9 @@ impl TracingTable { fn log_files() -> Result> { debug!( "list log files from {:?}", - std::fs::canonicalize(GlobalSetting::instance().log.file.dir.as_str()) + std::fs::canonicalize(GlobalConfig::instance().log.file.dir.as_str()) ); - WalkDir::new(GlobalSetting::instance().log.file.dir.as_str()) + WalkDir::new(GlobalConfig::instance().log.file.dir.as_str()) // NOTE:(everpcpc) ignore log files in subdir with different format .max_depth(1) .sort_by_key(|file| file.file_name().to_owned()) From 8e147202279bde2539a7a02de1f6219ec62cefba Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 23:00:21 +0800 Subject: [PATCH 76/80] fix typo --- src/query/config/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index e2c181ef46f4a..30e20237f8854 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -2103,7 +2103,7 @@ impl QueryConfig { "cache-enable-table-meta-caches", r#" [cache] - table-meta-cache-enabled=["true"|"false"] + table-meta-cache-enabled=[true|false] "#, "CACHE_ENABLE_TABLE_META_CACHE", ), From 46def0bcc93ccda95cf6058866d2c2260a9d10f2 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 23:15:48 +0800 Subject: [PATCH 77/80] update doc: 70-system-tables/system-configs.md --- .../70-system-tables/system-configs.md | 173 +++++++++++++----- 1 file changed, 127 insertions(+), 46 deletions(-) diff --git a/docs/doc/13-sql-reference/70-system-tables/system-configs.md b/docs/doc/13-sql-reference/70-system-tables/system-configs.md index 24e01fc96a887..69e6ffc95dc40 100644 --- a/docs/doc/13-sql-reference/70-system-tables/system-configs.md +++ b/docs/doc/13-sql-reference/70-system-tables/system-configs.md @@ -5,50 +5,131 @@ title: system.configs Contains information about Databend server configs. ```sql -SELECT * FROM system.configs; -+--------------------------------------+------------------+-------+-------------+ -| name | value | group | description | -+--------------------------------------+------------------+-------+-------------+ -| tenant_id | | query | | -| cluster_id | | query | | -| num_cpus | 16 | query | | -| mysql_handler_host | 127.0.0.1 | query | | -| mysql_handler_port | 3307 | query | | -| max_active_sessions | 256 | query | | -| max_memory_usage | 0 | query | | -| clickhouse_handler_host | 127.0.0.1 | query | | -| clickhouse_handler_port | 9000 | query | | -| http_handler_host | 127.0.0.1 | query | | -| http_handler_port | 8000 | query | | -| flight_api_address | 127.0.0.1:9090 | query | | -| admin_api_address | 127.0.0.1:8080 | query | | -| metric_api_address | 127.0.0.1:7070 | query | | -| http_handler_tls_server_cert | | query | | -| http_handler_tls_server_key | | query | | -| http_handler_tls_server_root_ca_cert | | query | | -| api_tls_server_cert | | query | | -| api_tls_server_key | | query | | -| api_tls_server_root_ca_cert | | query | | -| rpc_tls_server_cert | | query | | -| rpc_tls_server_key | | query | | -| rpc_tls_query_server_root_ca_cert | | query | | -| rpc_tls_query_service_domain_name | localhost | query | | -| table_engine_memory_enabled | true | query | | -| database_engine_github_enabled | true | query | | -| wait_timeout_mills | 5000 | query | | -| max_query_log_size | 10000 | query | | -| table_cache_enabled | false | query | | -| table_memory_cache_mb_size | 256 | query | | -| table_disk_cache_root | _cache | query | | -| table_disk_cache_mb_size | 1024 | query | | -| log_level | INFO | log | | -| log_dir | ./_logs | log | | -| meta_embedded_dir | | meta | | -| meta_address | | meta | | -| meta_username | | meta | | -| meta_password | | meta | | -| meta_client_timeout_in_second | 10 | meta | | -| rpc_tls_meta_server_root_ca_cert | | meta | | -| rpc_tls_meta_service_domain_name | localhost | meta | | -+--------------------------------------+------------------+-------+-------------+ +mysql> SELECT * FROM system.configs; ++---------+----------------------------------------+--------------------------------+-------------+ +| group | name | value | description | ++---------+----------------------------------------+--------------------------------+-------------+ +| query | tenant_id | admin | | +| query | cluster_id | | | +| query | num_cpus | 0 | | +| query | mysql_handler_host | 127.0.0.1 | | +| query | mysql_handler_port | 3307 | | +| query | max_active_sessions | 256 | | +| query | max_server_memory_usage | 0 | | +| query | max_memory_limit_enabled | false | | +| query | clickhouse_handler_host | 127.0.0.1 | | +| query | clickhouse_handler_port | 9000 | | +| query | clickhouse_http_handler_host | 127.0.0.1 | | +| query | clickhouse_http_handler_port | 8124 | | +| query | http_handler_host | 127.0.0.1 | | +| query | http_handler_port | 8000 | | +| query | http_handler_result_timeout_secs | 60 | | +| query | flight_api_address | 127.0.0.1:9090 | | +| query | admin_api_address | 127.0.0.1:8080 | | +| query | metric_api_address | 127.0.0.1:7070 | | +| query | http_handler_tls_server_cert | | | +| query | http_handler_tls_server_key | | | +| query | http_handler_tls_server_root_ca_cert | | | +| query | api_tls_server_cert | | | +| query | api_tls_server_key | | | +| query | api_tls_server_root_ca_cert | | | +| query | rpc_tls_server_cert | | | +| query | rpc_tls_server_key | | | +| query | rpc_tls_query_server_root_ca_cert | | | +| query | rpc_tls_query_service_domain_name | localhost | | +| query | table_engine_memory_enabled | true | | +| query | database_engine_github_enabled | true | | +| query | wait_timeout_mills | 5000 | | +| query | max_query_log_size | 10000 | | +| query | management_mode | false | | +| query | jwt_key_file | | | +| query | jwt_key_files | | | +| query | async_insert_max_data_size | 10000 | | +| query | async_insert_busy_timeout | 200 | | +| query | async_insert_stale_timeout | 0 | | +| query | users | | | +| query | share_endpoint_address | | | +| query | share_endpoint_auth_token_file | | | +| query | quota | null | | +| query | internal_enable_sandbox_tenant | false | | +| log | level | INFO | | +| log | dir | ./.databend/logs | | +| log | query_enabled | false | | +| log | file.on | true | | +| log | file.level | INFO | | +| log | file.dir | ./.databend/logs | | +| log | file.format | json | | +| log | stderr.on | false | | +| log | stderr.level | INFO | | +| log | stderr.format | text | | +| meta | embedded_dir | .databend/meta | | +| meta | endpoints | | | +| meta | username | root | | +| meta | password | | | +| meta | client_timeout_in_second | 10 | | +| meta | auto_sync_interval | 0 | | +| meta | rpc_tls_meta_server_root_ca_cert | | | +| meta | rpc_tls_meta_service_domain_name | localhost | | +| cache | enable_table_meta_caches | true | | +| cache | table_meta_snapshot_count | 256 | | +| cache | table_meta_segment_count | 10240 | | +| cache | table_meta_statistic_count | 256 | | +| cache | enable_table_index_bloom | true | | +| cache | table_bloom_index_meta_count | 3000 | | +| cache | table_bloom_index_filter_count | 1048576 | | +| cache | data_cache_storage | none | | +| cache | table_data_cache_population_queue_size | 65536 | | +| cache | disk.max_bytes | 21474836480 | | +| cache | disk.path | ./.databend/_cache | | +| cache | table_data_deserialized_data_bytes | 0 | | +| storage | type | fs | | +| storage | num_cpus | 0 | | +| storage | allow_insecure | false | | +| storage | fs.data_path | _data | | +| storage | gcs.endpoint_url | https://storage.googleapis.com | | +| storage | gcs.bucket | | | +| storage | gcs.root | | | +| storage | gcs.credential | | | +| storage | s3.region | | | +| storage | s3.endpoint_url | https://s3.amazonaws.com | | +| storage | s3.access_key_id | | | +| storage | s3.secret_access_key | | | +| storage | s3.security_token | | | +| storage | s3.bucket | | | +| storage | s3.root | | | +| storage | s3.master_key | | | +| storage | s3.enable_virtual_host_style | false | | +| storage | s3.role_arn | | | +| storage | s3.external_id | | | +| storage | azblob.account_name | | | +| storage | azblob.account_key | | | +| storage | azblob.container | | | +| storage | azblob.endpoint_url | | | +| storage | azblob.root | | | +| storage | hdfs.name_node | | | +| storage | hdfs.root | | | +| storage | obs.access_key_id | | | +| storage | obs.secret_access_key | | | +| storage | obs.bucket | | | +| storage | obs.endpoint_url | | | +| storage | obs.root | | | +| storage | oss.access_key_id | | | +| storage | oss.access_key_secret | | | +| storage | oss.bucket | | | +| storage | oss.endpoint_url | | | +| storage | oss.presign_endpoint_url | | | +| storage | oss.root | | | +| storage | cache.type | none | | +| storage | cache.num_cpus | 0 | | +| storage | cache.fs.data_path | _data | | +| storage | cache.moka.max_capacity | 1073741824 | | +| storage | cache.moka.time_to_live | 3600 | | +| storage | cache.moka.time_to_idle | 600 | | +| storage | cache.redis.endpoint_url | | | +| storage | cache.redis.username | | | +| storage | cache.redis.password | | | +| storage | cache.redis.root | | | +| storage | cache.redis.db | 0 | | +| storage | cache.redis.default_ttl | 0 | | ++---------+----------------------------------------+--------------------------------+-------------+ ``` From 49b30ff62d42322f3825b45da0402fd03dd3f534 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 23:31:01 +0800 Subject: [PATCH 78/80] fix: incorrect serde name --- src/query/config/src/config.rs | 6 +++--- src/query/service/tests/it/configs.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 30e20237f8854..deba34704d2fd 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -1834,7 +1834,7 @@ pub struct CacheConfig { /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches #[clap(long = "cache-enable-table-bloom-index-caches", default_value = "true")] #[serde(default = "bool_true")] - pub enable_table_index_bloom: bool, + pub enable_table_bloom_index_caches: bool, /// Max number of cached bloom index meta objects. Set it to 0 to disable it. #[clap(long = "cache-table-bloom-index-meta-count", default_value = "3000")] @@ -1984,7 +1984,7 @@ mod cache_config_converters { table_meta_snapshot_count: value.table_meta_snapshot_count, table_meta_segment_count: value.table_meta_segment_count, table_meta_statistic_count: value.table_meta_statistic_count, - enable_table_index_bloom: value.enable_table_index_bloom, + enable_table_index_bloom: value.enable_table_bloom_index_caches, table_bloom_index_meta_count: value.table_bloom_index_meta_count, table_bloom_index_filter_count: value.table_bloom_index_filter_count, data_cache_storage: value.data_cache_storage.try_into()?, @@ -2003,7 +2003,7 @@ mod cache_config_converters { table_meta_snapshot_count: value.table_meta_snapshot_count, table_meta_segment_count: value.table_meta_segment_count, table_meta_statistic_count: value.table_meta_statistic_count, - enable_table_index_bloom: value.enable_table_index_bloom, + enable_table_bloom_index_caches: value.enable_table_index_bloom, table_bloom_index_meta_count: value.table_bloom_index_meta_count, table_bloom_index_filter_count: value.table_bloom_index_filter_count, data_cache_storage: value.data_cache_storage.into(), diff --git a/src/query/service/tests/it/configs.rs b/src/query/service/tests/it/configs.rs index e30a22875c728..275761e6b0cea 100644 --- a/src/query/service/tests/it/configs.rs +++ b/src/query/service/tests/it/configs.rs @@ -129,7 +129,7 @@ fn test_env_config_s3() -> Result<()> { assert_eq!("us.bucket", configured.storage.s3.bucket); assert!(configured.cache.enable_table_meta_caches); - assert!(configured.cache.enable_table_index_bloom); + assert!(configured.cache.enable_table_bloom_index_caches); assert_eq!(10240, configured.cache.table_meta_segment_count); assert_eq!(256, configured.cache.table_meta_snapshot_count); assert_eq!(3000, configured.cache.table_bloom_index_meta_count); @@ -246,7 +246,7 @@ fn test_env_config_fs() -> Result<()> { assert_eq!("", configured.storage.gcs.gcs_root); assert_eq!("", configured.storage.gcs.credential); - assert!(configured.cache.enable_table_index_bloom); + assert!(configured.cache.enable_table_bloom_index_caches); assert!(configured.cache.enable_table_meta_caches); assert_eq!("_cache_env", configured.cache.disk_cache_config.path); assert_eq!(512, configured.cache.disk_cache_config.max_bytes); @@ -372,7 +372,7 @@ fn test_env_config_gcs() -> Result<()> { assert_eq!("", configured.storage.oss.oss_access_key_secret); assert!(configured.cache.enable_table_meta_caches); - assert!(configured.cache.enable_table_index_bloom); + assert!(configured.cache.enable_table_bloom_index_caches); assert_eq!("_cache_env", configured.cache.disk_cache_config.path); assert_eq!(512, configured.cache.disk_cache_config.max_bytes); assert_eq!(10240, configured.cache.table_meta_segment_count); From 05c5a2974e63e6887ed4a92d10b4d7224f62d9b9 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Wed, 15 Feb 2023 23:50:27 +0800 Subject: [PATCH 79/80] update golden file --- .../service/tests/it/storages/testdata/configs_table_basic.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 30510a4b0c0be..c30b43012e83a 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -7,7 +7,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | "cache" | "data_cache_storage" | "none" | "" | | "cache" | "disk.max_bytes" | "21474836480" | "" | | "cache" | "disk.path" | "./.databend/_cache" | "" | -| "cache" | "enable_table_index_bloom" | "true" | "" | +| "cache" | "enable_table_bloom_index_caches" | "true" | "" | | "cache" | "enable_table_meta_caches" | "true" | "" | | "cache" | "table_bloom_index_filter_count" | "1048576" | "" | | "cache" | "table_bloom_index_meta_count" | "3000" | "" | From 355d9f1a27afd624ccdeb490828c741608494b66 Mon Sep 17 00:00:00 2001 From: BohuTANG Date: Thu, 16 Feb 2023 08:38:06 +0800 Subject: [PATCH 80/80] enable_table_meta_caches -> enable_table_meta_cache, enable_table_bloom_index_caches -> enable_table_bloom_index_cache --- .../70-system-tables/system-configs.md | 2 +- .../deploy/config/databend-query-node-1.toml | 4 ++-- .../deploy/config/databend-query-node-2.toml | 4 ++-- .../deploy/config/databend-query-node-3.toml | 4 ++-- .../config/databend-query-node-shared.toml | 4 ++-- src/query/config/src/config.rs | 20 +++++++++---------- src/query/config/src/inner.rs | 4 ++-- src/query/service/tests/it/configs.rs | 20 +++++++++---------- .../storages/testdata/configs_table_basic.txt | 4 ++-- .../common/cache-manager/src/cache_manager.rs | 2 +- 10 files changed, 34 insertions(+), 34 deletions(-) diff --git a/docs/doc/13-sql-reference/70-system-tables/system-configs.md b/docs/doc/13-sql-reference/70-system-tables/system-configs.md index 69e6ffc95dc40..ab12f0a1d6a57 100644 --- a/docs/doc/13-sql-reference/70-system-tables/system-configs.md +++ b/docs/doc/13-sql-reference/70-system-tables/system-configs.md @@ -70,7 +70,7 @@ mysql> SELECT * FROM system.configs; | meta | auto_sync_interval | 0 | | | meta | rpc_tls_meta_server_root_ca_cert | | | | meta | rpc_tls_meta_service_domain_name | localhost | | -| cache | enable_table_meta_caches | true | | +| cache | enable_table_meta_cache | true | | | cache | table_meta_snapshot_count | 256 | | | cache | table_meta_segment_count | 10240 | | | cache | table_meta_statistic_count | 256 | | diff --git a/scripts/ci/deploy/config/databend-query-node-1.toml b/scripts/ci/deploy/config/databend-query-node-1.toml index f7991a81acd43..5e4e65229ac6f 100644 --- a/scripts/ci/deploy/config/databend-query-node-1.toml +++ b/scripts/ci/deploy/config/databend-query-node-1.toml @@ -110,7 +110,7 @@ data_path = "./.databend/stateless_test_data" ### table meta caches ### # Enable table meta cache. Default is true. # Set it to false wll disable all the table meta caches -enable_table_meta_caches = true +enable_table_meta_cache = true # Max number of cached table snapshot. Set it to 0 to disable it. table_meta_snapshot_count = 256 # Max number of cached table segment. Set it to 0 to disable it. @@ -121,7 +121,7 @@ table_meta_statistic_count = 256 ### table bloom index caches ### # Enable bloom index cache. Default is true # Set it to false will disable all the bloom index caches -enable_table_bloom_index_caches = true +enable_table_bloom_index_cache = true # Max number of cached bloom index meta objects. Set it to 0 to disable it. table_bloom_index_meta_count = 3000 # Max number of cached bloom index filters. Set it to 0 to disable it. diff --git a/scripts/ci/deploy/config/databend-query-node-2.toml b/scripts/ci/deploy/config/databend-query-node-2.toml index 8f2849693d872..8f66c5fdaa80e 100644 --- a/scripts/ci/deploy/config/databend-query-node-2.toml +++ b/scripts/ci/deploy/config/databend-query-node-2.toml @@ -63,7 +63,7 @@ data_path = "./.databend/stateless_test_data" ### table meta caches ### # Enable table meta cache. Default is true. # Set it to false wll disable all the table meta caches -enable_table_meta_caches = true +enable_table_meta_cache = true # Max number of cached table snapshot. Set it to 0 to disable it. table_meta_snapshot_count = 256 # Max number of cached table segment. Set it to 0 to disable it. @@ -74,7 +74,7 @@ table_meta_statistic_count = 256 ### table bloom index caches ### # Enable bloom index cache. Default is true # Set it to false will disable all the bloom index caches -enable_table_bloom_index_caches = true +enable_table_bloom_index_cache = true # Max number of cached bloom index meta objects. Set it to 0 to disable it. table_bloom_index_meta_count = 3000 # Max number of cached bloom index filters. Set it to 0 to disable it. diff --git a/scripts/ci/deploy/config/databend-query-node-3.toml b/scripts/ci/deploy/config/databend-query-node-3.toml index 9721d42fecd02..594e5338a0027 100644 --- a/scripts/ci/deploy/config/databend-query-node-3.toml +++ b/scripts/ci/deploy/config/databend-query-node-3.toml @@ -64,7 +64,7 @@ data_path = "./.databend/stateless_test_data" ### table meta caches ### # Enable table meta cache. Default is true. # Set it to false wll disable all the table meta caches -enable_table_meta_caches = true +enable_table_meta_cache = true # Max number of cached table snapshot. Set it to 0 to disable it. table_meta_snapshot_count = 256 # Max number of cached table segment. Set it to 0 to disable it. @@ -75,7 +75,7 @@ table_meta_statistic_count = 256 ### table bloom index caches ### # Enable bloom index cache. Default is true # Set it to false will disable all the bloom index caches -enable_table_bloom_index_caches = true +enable_table_bloom_index_cache = true # Max number of cached bloom index meta objects. Set it to 0 to disable it. table_bloom_index_meta_count = 3000 # Max number of cached bloom index filters. Set it to 0 to disable it. diff --git a/scripts/ci/deploy/config/databend-query-node-shared.toml b/scripts/ci/deploy/config/databend-query-node-shared.toml index f3eeffbe8c8c1..9d42539fa08ef 100644 --- a/scripts/ci/deploy/config/databend-query-node-shared.toml +++ b/scripts/ci/deploy/config/databend-query-node-shared.toml @@ -109,7 +109,7 @@ data_path = "./.databend/stateless_test_data" ### table meta caches ### # Enable table meta cache. Default is true. # Set it to false wll disable all the table meta caches -enable_table_meta_caches = true +enable_table_meta_cache = true # Max number of cached table snapshot. Set it to 0 to disable it. table_meta_snapshot_count = 256 # Max number of cached table segment. Set it to 0 to disable it. @@ -120,7 +120,7 @@ table_meta_statistic_count = 256 ### table bloom index caches ### # Enable bloom index cache. Default is true # Set it to false will disable all the bloom index caches -enable_table_bloom_index_caches = true +enable_table_bloom_index_cache = true # Max number of cached bloom index meta objects. Set it to 0 to disable it. table_bloom_index_meta_count = 3000 # Max number of cached bloom index filters. Set it to 0 to disable it. diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index deba34704d2fd..6f1020d2477d6 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -1815,9 +1815,9 @@ impl TryInto for LocalConfig { #[serde(default, deny_unknown_fields)] pub struct CacheConfig { /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches - #[clap(long = "cache-enable-table-meta-caches", default_value = "true")] + #[clap(long = "cache-enable-table-meta-cache", default_value = "true")] #[serde(default = "bool_true")] - pub enable_table_meta_caches: bool, + pub enable_table_meta_cache: bool, /// Max number of cached table snapshot #[clap(long = "cache-table-meta-snapshot-count", default_value = "256")] @@ -1832,9 +1832,9 @@ pub struct CacheConfig { pub table_meta_statistic_count: u64, /// Enable bloom index cache. Default is enabled. Set it to false to disable all the bloom index caches - #[clap(long = "cache-enable-table-bloom-index-caches", default_value = "true")] + #[clap(long = "cache-enable-table-bloom-index-cache", default_value = "true")] #[serde(default = "bool_true")] - pub enable_table_bloom_index_caches: bool, + pub enable_table_bloom_index_cache: bool, /// Max number of cached bloom index meta objects. Set it to 0 to disable it. #[clap(long = "cache-table-bloom-index-meta-count", default_value = "3000")] @@ -1980,11 +1980,11 @@ mod cache_config_converters { fn try_from(value: CacheConfig) -> std::result::Result { Ok(Self { - enable_table_meta_caches: value.enable_table_meta_caches, + enable_table_meta_cache: value.enable_table_meta_cache, table_meta_snapshot_count: value.table_meta_snapshot_count, table_meta_segment_count: value.table_meta_segment_count, table_meta_statistic_count: value.table_meta_statistic_count, - enable_table_index_bloom: value.enable_table_bloom_index_caches, + enable_table_index_bloom: value.enable_table_bloom_index_cache, table_bloom_index_meta_count: value.table_bloom_index_meta_count, table_bloom_index_filter_count: value.table_bloom_index_filter_count, data_cache_storage: value.data_cache_storage.try_into()?, @@ -1999,11 +1999,11 @@ mod cache_config_converters { impl From for CacheConfig { fn from(value: inner::CacheConfig) -> Self { Self { - enable_table_meta_caches: value.enable_table_meta_caches, + enable_table_meta_cache: value.enable_table_meta_cache, table_meta_snapshot_count: value.table_meta_snapshot_count, table_meta_segment_count: value.table_meta_segment_count, table_meta_statistic_count: value.table_meta_statistic_count, - enable_table_bloom_index_caches: value.enable_table_index_bloom, + enable_table_bloom_index_cache: value.enable_table_index_bloom, table_bloom_index_meta_count: value.table_bloom_index_meta_count, table_bloom_index_filter_count: value.table_bloom_index_filter_count, data_cache_storage: value.data_cache_storage.into(), @@ -2100,10 +2100,10 @@ impl QueryConfig { Self::check( &self.table_meta_cache_enabled, "table-meta-cache-enabled", - "cache-enable-table-meta-caches", + "cache-enable-table-meta-cache", r#" [cache] - table-meta-cache-enabled=[true|false] + enable_table_meta_cache=[true|false] "#, "CACHE_ENABLE_TABLE_META_CACHE", ), diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 2ae7806144cb4..f9cf2eb359591 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -397,7 +397,7 @@ impl Default for LocalConfig { #[derive(Clone, Debug, PartialEq, Eq)] pub struct CacheConfig { /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches - pub enable_table_meta_caches: bool, + pub enable_table_meta_cache: bool, /// Max number of cached table snapshot pub table_meta_snapshot_count: u64, @@ -482,7 +482,7 @@ impl Default for DiskCacheConfig { impl Default for CacheConfig { fn default() -> Self { Self { - enable_table_meta_caches: true, + enable_table_meta_cache: true, table_meta_snapshot_count: 256, table_meta_segment_count: 10240, table_meta_statistic_count: 256, diff --git a/src/query/service/tests/it/configs.rs b/src/query/service/tests/it/configs.rs index 275761e6b0cea..e1c8c6b64fc74 100644 --- a/src/query/service/tests/it/configs.rs +++ b/src/query/service/tests/it/configs.rs @@ -46,7 +46,7 @@ fn test_env_config_s3() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("CACHE_ENABLE_TABLE_META_CACHES", Some("true")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), ("CACHE_DISK_PATH", Some("_cache_env")), ("CACHE_DISK_MAX_BYES", Some("512")), ("CACHE_TABLE_META_SNAPSHOT_COUNT", Some("256")), @@ -128,8 +128,8 @@ fn test_env_config_s3() -> Result<()> { assert_eq!("us.key", configured.storage.s3.secret_access_key); assert_eq!("us.bucket", configured.storage.s3.bucket); - assert!(configured.cache.enable_table_meta_caches); - assert!(configured.cache.enable_table_bloom_index_caches); + assert!(configured.cache.enable_table_meta_cache); + assert!(configured.cache.enable_table_bloom_index_cache); assert_eq!(10240, configured.cache.table_meta_segment_count); assert_eq!(256, configured.cache.table_meta_snapshot_count); assert_eq!(3000, configured.cache.table_bloom_index_meta_count); @@ -246,8 +246,8 @@ fn test_env_config_fs() -> Result<()> { assert_eq!("", configured.storage.gcs.gcs_root); assert_eq!("", configured.storage.gcs.credential); - assert!(configured.cache.enable_table_bloom_index_caches); - assert!(configured.cache.enable_table_meta_caches); + assert!(configured.cache.enable_table_bloom_index_cache); + assert!(configured.cache.enable_table_meta_cache); assert_eq!("_cache_env", configured.cache.disk_cache_config.path); assert_eq!(512, configured.cache.disk_cache_config.max_bytes); assert_eq!(10240, configured.cache.table_meta_segment_count); @@ -371,8 +371,8 @@ fn test_env_config_gcs() -> Result<()> { assert_eq!("", configured.storage.oss.oss_access_key_id); assert_eq!("", configured.storage.oss.oss_access_key_secret); - assert!(configured.cache.enable_table_meta_caches); - assert!(configured.cache.enable_table_bloom_index_caches); + assert!(configured.cache.enable_table_meta_cache); + assert!(configured.cache.enable_table_bloom_index_cache); assert_eq!("_cache_env", configured.cache.disk_cache_config.path); assert_eq!(512, configured.cache.disk_cache_config.max_bytes); assert_eq!(10240, configured.cache.table_meta_segment_count); @@ -407,7 +407,7 @@ fn test_env_config_oss() -> Result<()> { ("QUERY_FLIGHT_API_ADDRESS", Some("1.2.3.4:9091")), ("QUERY_ADMIN_API_ADDRESS", Some("1.2.3.4:8081")), ("QUERY_METRIC_API_ADDRESS", Some("1.2.3.4:7071")), - ("CACHE_ENABLE_TABLE_META_CACHES", Some("true")), + ("CACHE_ENABLE_TABLE_META_CACHE", Some("true")), ("CACHE_DATA_CACHE_STORAGE", Some("disk")), ("TABLE_CACHE_BLOOM_INDEX_FILTER_COUNT", Some("1")), ("CACHE_DISK_PATH", Some("_cache_env")), @@ -505,7 +505,7 @@ fn test_env_config_oss() -> Result<()> { assert_eq!("", configured.storage.gcs.gcs_root); assert_eq!("", configured.storage.gcs.credential); - assert!(configured.cache.enable_table_meta_caches); + assert!(configured.cache.enable_table_meta_cache); assert_eq!("_cache_env", configured.cache.disk_cache_config.path); assert_eq!(512, configured.cache.disk_cache_config.max_bytes); assert_eq!(10240, configured.cache.table_meta_segment_count); @@ -634,7 +634,7 @@ protocol = "binary" [cache] -enable_table_meta_caches = false +enable_table_meta_cache = false table_meta_snapshot_count = 256 table_meta_segment_count = 10240 table_bloom_index_meta_count = 3000 diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index c30b43012e83a..f4ccbed3a45ad 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -7,8 +7,8 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | "cache" | "data_cache_storage" | "none" | "" | | "cache" | "disk.max_bytes" | "21474836480" | "" | | "cache" | "disk.path" | "./.databend/_cache" | "" | -| "cache" | "enable_table_bloom_index_caches" | "true" | "" | -| "cache" | "enable_table_meta_caches" | "true" | "" | +| "cache" | "enable_table_bloom_index_cache" | "true" | "" | +| "cache" | "enable_table_meta_cache" | "true" | "" | | "cache" | "table_bloom_index_filter_count" | "1048576" | "" | | "cache" | "table_bloom_index_meta_count" | "3000" | "" | | "cache" | "table_data_cache_population_queue_size" | "65536" | "" | diff --git a/src/query/storages/common/cache-manager/src/cache_manager.rs b/src/query/storages/common/cache-manager/src/cache_manager.rs index 7daafff5b4bc8..ebe7ed61558a3 100644 --- a/src/query/storages/common/cache-manager/src/cache_manager.rs +++ b/src/query/storages/common/cache-manager/src/cache_manager.rs @@ -80,7 +80,7 @@ impl CacheManager { ); // setup in-memory table meta cache - if !config.enable_table_meta_caches { + if !config.enable_table_meta_cache { GlobalInstance::set(Arc::new(Self { table_snapshot_cache: None, segment_info_cache: None,