diff --git a/Cargo.lock b/Cargo.lock index e3d254ebd23ae..d5c75c96cc914 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -111,7 +111,7 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "arrow-flight" version = "0.1.0" -source = "git+https://github.com/zhyass/arrow2?rev=2e3de5a#2e3de5a753dcadc31195de4d62e039c13e1e69d0" +source = "git+https://github.com/zhyass/arrow2?rev=23682f0#23682f033d6e473a8292e441414795e843b47188" dependencies = [ "arrow2", "bytes", @@ -125,7 +125,7 @@ dependencies = [ [[package]] name = "arrow2" version = "0.4.0" -source = "git+https://github.com/zhyass/arrow2?rev=2e3de5a#2e3de5a753dcadc31195de4d62e039c13e1e69d0" +source = "git+https://github.com/zhyass/arrow2?rev=23682f0#23682f033d6e473a8292e441414795e843b47188" dependencies = [ "ahash 0.7.4", "base64", diff --git a/common/arrow/Cargo.toml b/common/arrow/Cargo.toml index fde0270659039..d3f38513de93a 100644 --- a/common/arrow/Cargo.toml +++ b/common/arrow/Cargo.toml @@ -15,8 +15,8 @@ simd = ["arrow/simd"] # Workspace dependencies # Github dependencies -arrow = { package = "arrow2", git="https://github.com/zhyass/arrow2", rev = "2e3de5a" } -arrow-flight = { git="https://github.com/zhyass/arrow2", rev = "2e3de5a" } +arrow = { package = "arrow2", git="https://github.com/zhyass/arrow2", rev = "23682f0" } +arrow-flight = { git="https://github.com/zhyass/arrow2", rev = "23682f0" } parquet = {package = "parquet2", git = "https://github.com/datafuse-extras/parquet2", rev = "d28330f"} # Crates.io dependencies diff --git a/common/datablocks/src/kernels/data_block_concat_test.rs b/common/datablocks/src/kernels/data_block_concat_test.rs index 20d9b0f65f14f..2d6176ae0c223 100644 --- a/common/datablocks/src/kernels/data_block_concat_test.rs +++ b/common/datablocks/src/kernels/data_block_concat_test.rs @@ -23,7 +23,7 @@ use crate::*; fn test_data_block_concat() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ DataField::new("a", DataType::Int64, false), - DataField::new("b", DataType::Utf8, false), + DataField::new("b", DataType::String, false), ]); let blocks = vec![ diff --git a/common/datablocks/src/kernels/data_block_group_by_hash_test.rs b/common/datablocks/src/kernels/data_block_group_by_hash_test.rs index 246a926c7da9f..2f75f39f8367b 100644 --- a/common/datablocks/src/kernels/data_block_group_by_hash_test.rs +++ b/common/datablocks/src/kernels/data_block_group_by_hash_test.rs @@ -23,7 +23,7 @@ fn test_data_block_group_by_hash() -> Result<()> { DataField::new("a", DataType::Int8, false), DataField::new("b", DataType::Int8, false), DataField::new("c", DataType::Int8, false), - DataField::new("x", DataType::Utf8, false), + DataField::new("x", DataType::String, false), ]); let block = DataBlock::create_by_array(schema.clone(), vec![ diff --git a/common/datablocks/src/kernels/data_block_group_by_test.rs b/common/datablocks/src/kernels/data_block_group_by_test.rs index 9b1570fb0a61a..eabf260109ea7 100644 --- a/common/datablocks/src/kernels/data_block_group_by_test.rs +++ b/common/datablocks/src/kernels/data_block_group_by_test.rs @@ -21,7 +21,7 @@ use crate::*; fn test_data_block_group_by() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ DataField::new("a", DataType::Int8, false), - DataField::new("b", DataType::Utf8, false), + DataField::new("b", DataType::String, false), ]); let block = DataBlock::create_by_array(schema.clone(), vec![ diff --git a/common/datablocks/src/kernels/data_block_sort_test.rs b/common/datablocks/src/kernels/data_block_sort_test.rs index 94021cbf95051..7208841756e84 100644 --- a/common/datablocks/src/kernels/data_block_sort_test.rs +++ b/common/datablocks/src/kernels/data_block_sort_test.rs @@ -21,7 +21,7 @@ use crate::*; fn test_data_block_sort() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ DataField::new("a", DataType::Int64, false), - DataField::new("b", DataType::Utf8, false), + DataField::new("b", DataType::String, false), ]); let raw = DataBlock::create_by_array(schema.clone(), vec![ @@ -77,7 +77,7 @@ fn test_data_block_sort() -> Result<()> { fn test_data_block_merge_sort() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ DataField::new("a", DataType::Int64, false), - DataField::new("b", DataType::Utf8, false), + DataField::new("b", DataType::String, false), ]); let raw1 = DataBlock::create_by_array(schema.clone(), vec![ diff --git a/common/datablocks/src/kernels/data_block_take_test.rs b/common/datablocks/src/kernels/data_block_take_test.rs index be32f3929a25c..a9df94fa258f2 100644 --- a/common/datablocks/src/kernels/data_block_take_test.rs +++ b/common/datablocks/src/kernels/data_block_take_test.rs @@ -21,7 +21,7 @@ use crate::*; fn test_data_block_take() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ DataField::new("a", DataType::Int64, false), - DataField::new("b", DataType::Utf8, false), + DataField::new("b", DataType::String, false), ]); let raw = DataBlock::create_by_array(schema.clone(), vec![ diff --git a/common/datavalues/src/arrays/ops/agg_test.rs b/common/datavalues/src/arrays/ops/agg_test.rs index 62affdb97a4d0..511e29b299541 100644 --- a/common/datavalues/src/arrays/ops/agg_test.rs +++ b/common/datavalues/src/arrays/ops/agg_test.rs @@ -76,8 +76,8 @@ fn test_boolean_array_agg() -> Result<()> { } #[test] -fn test_utf8_array_agg() -> Result<()> { - let array = DFUtf8Array::new_from_slice(&vec!["h", "e", "l", "o"]); +fn test_string_array_agg() -> Result<()> { + let array = DFStringArray::new_from_slice(&vec!["h", "e", "l", "o"]); let value = [ array.max()?, @@ -87,15 +87,15 @@ fn test_utf8_array_agg() -> Result<()> { ]; let expected = [ - DataValue::Utf8(Some("o".to_string())), - DataValue::Utf8(Some("e".to_string())), + DataValue::String(Some("o".as_bytes().to_vec())), + DataValue::String(Some("e".as_bytes().to_vec())), DataValue::Struct(vec![ DataValue::UInt64(Some(1)), - DataValue::Utf8(Some("e".to_string())), + DataValue::String(Some("e".as_bytes().to_vec())), ]), DataValue::Struct(vec![ DataValue::UInt64(Some(3)), - DataValue::Utf8(Some("o".to_string())), + DataValue::String(Some("o".as_bytes().to_vec())), ]), ]; let len = value.len(); diff --git a/common/datavalues/src/arrays/ops/apply_test.rs b/common/datavalues/src/arrays/ops/apply_test.rs index 9a20feb50cbc1..bc5087214ddb0 100644 --- a/common/datavalues/src/arrays/ops/apply_test.rs +++ b/common/datavalues/src/arrays/ops/apply_test.rs @@ -46,8 +46,8 @@ fn new_test_boolean_array(cap: usize, begin: i32, end: i32) -> DFBooleanArray { builder.finish() } -fn new_test_utf8_array(cap: usize, begin: i32, end: i32) -> DFUtf8Array { - let mut builder = Utf8ArrayBuilder::with_capacity(cap); +fn new_test_string_array(cap: usize, begin: i32, end: i32) -> DFStringArray { + let mut builder = StringArrayBuilder::with_capacity(cap); let s = vec!["ax", "by", "cz", "dm", "13"]; (begin..end).for_each(|index| { @@ -164,15 +164,15 @@ fn test_boolean_array_apply() -> Result<()> { } #[test] -fn test_utf8_array_apply() -> Result<()> { +fn test_string_array_apply() -> Result<()> { // array=[null, "by", "cz", null, "13"] - let array = new_test_utf8_array(5, 0, 5); + let array = new_test_string_array(5, 0, 5); let arrays = vec![ array.apply(|arr| Cow::from(&arr[1..])), array.apply_with_idx(|(_, arr)| Cow::from(&arr[..1])), array.apply_with_idx_on_opt(|(_, arr)| match arr { Some(v) => Some(Cow::from(&v[0..])), - None => Some(Cow::from("ff")), + None => Some(Cow::from("ff".as_bytes())), }), ]; @@ -190,26 +190,26 @@ fn test_utf8_array_apply() -> Result<()> { assert_eq!(2, values[0].null_count()); assert_eq!(true, values[0].is_null(0)); - assert_eq!("y", values[0].value(1)); - assert_eq!("z", values[0].value(2)); + assert_eq!(b"y", values[0].value(1)); + assert_eq!(b"z", values[0].value(2)); assert_eq!(true, values[0].is_null(3)); - assert_eq!("3", values[0].value(4)); + assert_eq!(b"3", values[0].value(4)); assert_eq!(true, values[0].is_null(3)); assert_eq!(2, values[1].null_count()); assert_eq!(true, values[1].is_null(0)); - assert_eq!("b", values[1].value(1)); - assert_eq!("c", values[1].value(2)); + assert_eq!(b"b", values[1].value(1)); + assert_eq!(b"c", values[1].value(2)); assert_eq!(true, values[1].is_null(3)); - assert_eq!("1", values[1].value(4)); + assert_eq!(b"1", values[1].value(4)); assert_eq!(true, values[1].is_null(3)); assert_eq!(0, values[2].null_count()); - assert_eq!("ff", values[2].value(0)); - assert_eq!("by", values[2].value(1)); - assert_eq!("cz", values[2].value(2)); - assert_eq!("ff", values[2].value(3)); - assert_eq!("13", values[2].value(4)); + assert_eq!(b"ff", values[2].value(0)); + assert_eq!(b"by", values[2].value(1)); + assert_eq!(b"cz", values[2].value(2)); + assert_eq!(b"ff", values[2].value(3)); + assert_eq!(b"13", values[2].value(4)); assert_eq!(2, cast_values[0].null_count()); assert_eq!(true, cast_values[0].is_null(0)); diff --git a/common/datavalues/src/arrays/ops/contain_test.rs b/common/datavalues/src/arrays/ops/contain_test.rs index 9b5ae390f0c37..6fd7e8db0b36d 100644 --- a/common/datavalues/src/arrays/ops/contain_test.rs +++ b/common/datavalues/src/arrays/ops/contain_test.rs @@ -33,22 +33,22 @@ fn test_contain() -> Result<()> { let values = boolean?.collect_values(); assert_eq!(&[Some(true), Some(false), Some(true)], values.as_slice()); - // Test DFUtf8Array - let mut utf8_builder = Utf8ArrayBuilder::with_capacity(3); - utf8_builder.append_value("1a"); - utf8_builder.append_value("2b"); - utf8_builder.append_value("3c"); - utf8_builder.append_value("4d"); - let df_utf8_array = utf8_builder.finish(); - - let mut builder = get_list_builder(&DataType::Utf8, 12, 1); + // Test DFStringArray + let mut string_builder = StringArrayBuilder::with_capacity(3); + string_builder.append_value("1a"); + string_builder.append_value("2b"); + string_builder.append_value("3c"); + string_builder.append_value("4d"); + let df_string_array = string_builder.finish(); + + let mut builder = get_list_builder(&DataType::String, 12, 1); builder.append_series(&Series::new(vec!["2b", "4d"])); builder.append_series(&Series::new(vec!["2b", "4d"])); builder.append_series(&Series::new(vec!["2b", "4d"])); builder.append_series(&Series::new(vec!["2b", "4d"])); let df_list = builder.finish(); - let boolean = df_utf8_array.contain(&df_list); + let boolean = df_string_array.contain(&df_list); let values = boolean?.collect_values(); assert_eq!( &[Some(false), Some(true), Some(false), Some(true)], diff --git a/common/datavalues/src/arrays/ops/fill_test.rs b/common/datavalues/src/arrays/ops/fill_test.rs index c9dfe20307d82..aa4fe1a5c9fe6 100644 --- a/common/datavalues/src/arrays/ops/fill_test.rs +++ b/common/datavalues/src/arrays/ops/fill_test.rs @@ -42,22 +42,22 @@ fn test_array_fill() -> Result<()> { assert_eq!(true, df_boolean_array.is_null(1)); assert_eq!(true, df_boolean_array.is_null(2)); - // Test full for Utf8Array - let mut df_utf8_array = DFUtf8Array::full("ab", 3); - assert_eq!(0, df_utf8_array.null_count()); - assert_eq!(false, df_utf8_array.is_null(0)); - assert_eq!(false, df_utf8_array.is_null(1)); - assert_eq!(false, df_utf8_array.is_null(2)); - assert_eq!("ab", df_utf8_array.inner().value(0)); - assert_eq!("ab", df_utf8_array.inner().value(1)); - assert_eq!("ab", df_utf8_array.inner().value(2)); - - // Test full_null for Utf8Array - df_utf8_array = DFUtf8Array::full_null(3); - assert_eq!(3, df_utf8_array.null_count()); - assert_eq!(true, df_utf8_array.is_null(0)); - assert_eq!(true, df_utf8_array.is_null(1)); - assert_eq!(true, df_utf8_array.is_null(2)); + // Test full for StringArray + let mut df_string_array = DFStringArray::full("ab".as_bytes(), 3); + assert_eq!(0, df_string_array.null_count()); + assert_eq!(false, df_string_array.is_null(0)); + assert_eq!(false, df_string_array.is_null(1)); + assert_eq!(false, df_string_array.is_null(2)); + assert_eq!("ab".as_bytes(), df_string_array.inner().value(0)); + assert_eq!("ab".as_bytes(), df_string_array.inner().value(1)); + assert_eq!("ab".as_bytes(), df_string_array.inner().value(2)); + + // Test full_null for StringArray + df_string_array = DFStringArray::full_null(3); + assert_eq!(3, df_string_array.null_count()); + assert_eq!(true, df_string_array.is_null(0)); + assert_eq!(true, df_string_array.is_null(1)); + assert_eq!(true, df_string_array.is_null(2)); Ok(()) } diff --git a/common/datavalues/src/arrays/ops/if_test.rs b/common/datavalues/src/arrays/ops/if_test.rs index 30303ae75bd15..e076c2f56dff0 100644 --- a/common/datavalues/src/arrays/ops/if_test.rs +++ b/common/datavalues/src/arrays/ops/if_test.rs @@ -17,6 +17,7 @@ use common_arrow::arrow::array::NullArray; use common_arrow::arrow::array::UInt64Array; use common_arrow::arrow::compute::comparison::compare_scalar; use common_arrow::arrow::compute::comparison::Operator; +use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::arrow::scalar::PrimitiveScalar; use common_exception::Result; @@ -58,19 +59,19 @@ fn test_array_if() -> Result<()> { assert_eq!(true, res.inner().value(1)); assert_eq!(true, res.inner().value(2)); - // DFUtf8Array. - let lhs = DFUtf8Array::new_from_slice(&["a"]); - let rhs = DFUtf8Array::new_from_slice(&["b"]); + // DFStringArray. + let lhs = DFStringArray::new_from_slice(&["a"]); + let rhs = DFStringArray::new_from_slice(&["b"]); let res = lhs.if_then_else(&rhs, &conds[0])?; assert_eq!(3, res.len()); - assert_eq!("a", res.inner().value(0)); - assert_eq!("b", res.inner().value(1)); - assert_eq!("a", res.inner().value(2)); + assert_eq!(b"a", res.inner().value(0)); + assert_eq!(b"b", res.inner().value(1)); + assert_eq!(b"a", res.inner().value(2)); // DFNullArray. - let lhs = NullArray::new_null(2); + let lhs = NullArray::new_null(ArrowType::Null, 2); let lhs: DFNullArray = lhs.into(); - let rhs = NullArray::new_null(1); + let rhs = NullArray::new_null(ArrowType::Null, 1); let rhs: DFNullArray = rhs.into(); let res = lhs.if_then_else(&rhs, &conds[0])?; assert_eq!(2, res.len()); diff --git a/common/datavalues/src/arrays/ops/scatter_test.rs b/common/datavalues/src/arrays/ops/scatter_test.rs index 9bbe1fa554eb5..964646ebf2e3a 100644 --- a/common/datavalues/src/arrays/ops/scatter_test.rs +++ b/common/datavalues/src/arrays/ops/scatter_test.rs @@ -33,16 +33,16 @@ fn test_scatter() -> Result<()> { assert_eq!(&[2u16, 6], array_vec[2].inner().values().as_slice()); assert_eq!(&[3u16, 5, 8], array_vec[3].inner().values().as_slice()); - // Test DFUint16Array - let df_utf8_array = DFUtf8Array::new_from_slice(&["a", "b", "c", "d"]); + // Test DFStringArray + let df_string_array = DFStringArray::new_from_slice(&["a", "b", "c", "d"]); let indices = vec![1, 0, 1, 1]; - assert_eq!(df_utf8_array.len(), indices.len()); + assert_eq!(df_string_array.len(), indices.len()); - let array_vec = unsafe { df_utf8_array.scatter_unchecked(&mut indices.into_iter(), 2)? }; - let v1: Vec<&str> = array_vec[0].into_no_null_iter().collect(); - let v2: Vec<&str> = array_vec[1].into_no_null_iter().collect(); - assert_eq!(vec!["b"], v1); - assert_eq!(vec!["a", "c", "d"], v2); + let array_vec = unsafe { df_string_array.scatter_unchecked(&mut indices.into_iter(), 2)? }; + let v1: Vec<&[u8]> = array_vec[0].into_no_null_iter().collect(); + let v2: Vec<&[u8]> = array_vec[1].into_no_null_iter().collect(); + assert_eq!(vec![b"b"], v1); + assert_eq!(vec![b"a", b"c", b"d"], v2); // Test BooleanArray let df_bool_array = DFBooleanArray::new_from_slice(&[true, false, true, false]); diff --git a/common/datavalues/src/arrays/ops/take_random_test.rs b/common/datavalues/src/arrays/ops/take_random_test.rs index 0cf501ce633c0..2370edec643fa 100644 --- a/common/datavalues/src/arrays/ops/take_random_test.rs +++ b/common/datavalues/src/arrays/ops/take_random_test.rs @@ -52,18 +52,18 @@ fn test_take_random() -> Result<()> { let expected = Series::new(vec![1_u16, 2, 3]); assert!(result.series_equal(&expected)); - // Test DFUtf8Array - let mut utf8_builder = Utf8ArrayBuilder::with_capacity(3); - utf8_builder.append_value("1a"); - utf8_builder.append_value("2b"); - utf8_builder.append_value("3c"); - let df_utf8_array = &utf8_builder.finish(); + // Test DFStringArray + let mut string_builder = StringArrayBuilder::with_capacity(3); + string_builder.append_value("1a"); + string_builder.append_value("2b"); + string_builder.append_value("3c"); + let df_string_array = &string_builder.finish(); // Create TakeRandBranch for the array - let taker = df_utf8_array.take_rand(); - assert_eq!(Some("1a"), taker.get(0)); + let taker = df_string_array.take_rand(); + assert_eq!(Some("1a".as_bytes()), taker.get(0)); // Test get_unchecked let result = unsafe { taker.get_unchecked(1) }; - assert_eq!("2b", result); + assert_eq!(b"2b", result); Ok(()) } diff --git a/common/datavalues/src/arrays/ops/take_test.rs b/common/datavalues/src/arrays/ops/take_test.rs index 546f55c8e1ad7..d6091b08ef9fe 100644 --- a/common/datavalues/src/arrays/ops/take_test.rs +++ b/common/datavalues/src/arrays/ops/take_test.rs @@ -60,21 +60,21 @@ fn test_take() -> Result<()> { let expected = Series::new(vec![7_u16, 8, 9]); assert!(vs[0].series_equal(&expected)); - // Test DFUtf8Array - let mut utf8_builder = Utf8ArrayBuilder::with_capacity(3); - utf8_builder.append_value("1a"); - utf8_builder.append_value("2b"); - utf8_builder.append_value("3c"); - let df_utf8_array = &utf8_builder.finish(); + // Test DFStringArray + let mut string_builder = StringArrayBuilder::with_capacity(3); + string_builder.append_value("1a"); + string_builder.append_value("2b"); + string_builder.append_value("3c"); + let df_string_array = &string_builder.finish(); let index = TakeIdx::from(vec![0, 1].into_iter()); - let take_res = df_utf8_array.take(index)?; + let take_res = df_string_array.take(index)?; let vs: Vec<_> = take_res.into_no_null_iter().collect(); - assert_eq!(&vs, &["1a", "2b"]); + assert_eq!(&vs, &[b"1a", b"2b"]); let index = TakeIdx::from(vec![2, 1].into_iter()); - let take_res = unsafe { df_utf8_array.take_unchecked(index)? }; + let take_res = unsafe { df_string_array.take_unchecked(index)? }; let vs: Vec<_> = take_res.into_no_null_iter().collect(); - assert_eq!(&vs, &["3c", "2b"]); + assert_eq!(&vs, &[b"3c", b"2b"]); Ok(()) } diff --git a/common/datavalues/src/arrays/primitive/mod.rs b/common/datavalues/src/arrays/primitive/mod.rs index 96dedc6b6831f..9954154b431ff 100644 --- a/common/datavalues/src/arrays/primitive/mod.rs +++ b/common/datavalues/src/arrays/primitive/mod.rs @@ -12,15 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_arrow::arrow::array::Array; -use common_arrow::arrow::array::PrimitiveArray; -use common_arrow::arrow::bitmap::Bitmap; -use common_arrow::arrow::buffer::Buffer; -use common_exception::ErrorCode; -use common_exception::Result; - -use crate::prelude::*; - mod builder; mod iterator; @@ -28,8 +19,16 @@ mod iterator; mod builder_test; pub use builder::*; +use common_arrow::arrow::array::Array; +use common_arrow::arrow::array::PrimitiveArray; +use common_arrow::arrow::bitmap::Bitmap; +use common_arrow::arrow::buffer::Buffer; +use common_exception::ErrorCode; +use common_exception::Result; pub use iterator::*; +use crate::prelude::*; + /// DFPrimitiveArray is generic struct which wrapped arrow's PrimitiveArray #[derive(Debug, Clone)] pub struct DFPrimitiveArray { diff --git a/common/datavalues/src/arrays/string/builder.rs b/common/datavalues/src/arrays/string/builder.rs index c77b53f24c02c..9ed4c6930b68e 100644 --- a/common/datavalues/src/arrays/string/builder.rs +++ b/common/datavalues/src/arrays/string/builder.rs @@ -60,14 +60,14 @@ impl ArrayDeserializer for StringArrayBuilder { let offset: u64 = reader.read_uvarint()?; let mut values: Vec = Vec::with_capacity(offset as usize); reader.read_exact(&mut values)?; - self.append_value(reader.clone()); + self.append_value(reader); Ok(()) } fn de_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { for row in 0..rows { let reader = &reader[step * row..]; - self.append_value(reader.clone()); + self.append_value(reader); } Ok(()) } diff --git a/common/datavalues/src/arrays/utf8/builder_test.rs b/common/datavalues/src/arrays/string/builder_test.rs similarity index 74% rename from common/datavalues/src/arrays/utf8/builder_test.rs rename to common/datavalues/src/arrays/string/builder_test.rs index 27e8f148aba99..504d04226bd1d 100644 --- a/common/datavalues/src/arrays/utf8/builder_test.rs +++ b/common/datavalues/src/arrays/string/builder_test.rs @@ -16,15 +16,15 @@ use crate::prelude::*; #[test] fn test_empty_array() { - let mut builder = Utf8ArrayBuilder::with_capacity(16); + let mut builder = StringArrayBuilder::with_capacity(16); let data_array = builder.finish(); assert_eq!(true, data_array.is_empty()); - assert_eq!(&DataType::Utf8, data_array.data_type()); + assert_eq!(&DataType::String, data_array.data_type()); } #[test] fn test_fill_data() { - let mut builder = Utf8ArrayBuilder::with_capacity(16); + let mut builder = StringArrayBuilder::with_capacity(16); builder.append_value("你好"); builder.append_option(Some("\u{1F378}")); builder.append_null(); @@ -33,19 +33,19 @@ fn test_fill_data() { let mut iter = data_array.into_iter(); assert_eq!(3, data_array.len()); - assert_eq!(Some(Some("你好")), iter.next()); - assert_eq!(Some(Some("🍸")), iter.next()); + assert_eq!(Some(Some("你好".as_bytes())), iter.next()); + assert_eq!(Some(Some("🍸".as_bytes())), iter.next()); assert_eq!(Some(None), iter.next()); assert_eq!(None, iter.next()); } #[test] fn test_new_from_opt_slice() { - let data_array = DFUtf8Array::new_from_opt_slice(&[Some("你好"), None]); + let data_array = DFStringArray::new_from_opt_slice(&[Some("你好"), None]); let mut iter = data_array.into_iter(); assert_eq!(2, data_array.len()); - assert_eq!(Some(Some("你好")), iter.next()); + assert_eq!(Some(Some("你好".as_bytes())), iter.next()); assert_eq!(Some(None), iter.next()); assert_eq!(None, iter.next()); } @@ -55,11 +55,11 @@ fn test_new_from_opt_iter() { let v = vec![None, Some("你好"), None]; let mut iter = v.into_iter(); iter.next(); // move iterator and create data array from second element - let data_array = DFUtf8Array::new_from_opt_iter(iter); + let data_array = DFStringArray::new_from_opt_iter(iter); let mut iter = data_array.into_iter(); assert_eq!(2, data_array.len()); - assert_eq!(Some(Some("你好")), iter.next()); + assert_eq!(Some(Some("你好".as_bytes())), iter.next()); assert_eq!(Some(None), iter.next()); assert_eq!(None, iter.next()); } diff --git a/common/datavalues/src/arrays/string/mod.rs b/common/datavalues/src/arrays/string/mod.rs index 7e6670410ecc9..3869424d1ef87 100644 --- a/common/datavalues/src/arrays/string/mod.rs +++ b/common/datavalues/src/arrays/string/mod.rs @@ -15,6 +15,9 @@ mod builder; mod iterator; +#[cfg(test)] +mod builder_test; + pub use builder::*; use common_arrow::arrow::array::*; use common_arrow::arrow::bitmap::Bitmap; diff --git a/common/datavalues/src/arrays/utf8/builder.rs b/common/datavalues/src/arrays/utf8/builder.rs deleted file mode 100644 index 80a974badc3a0..0000000000000 --- a/common/datavalues/src/arrays/utf8/builder.rs +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2020 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_arrow::arrow::array::*; -use common_exception::Result; -use common_io::prelude::BinaryRead; - -use crate::prelude::*; -use crate::utils::get_iter_capacity; - -pub struct Utf8ArrayBuilder { - pub builder: MutableUtf8Array, -} - -impl Utf8ArrayBuilder { - /// Create a new UtfArrayBuilder - /// - /// # Arguments - /// - /// * `capacity` - Number of string elements in the final array. - pub fn with_capacity(bytes_capacity: usize) -> Self { - Utf8ArrayBuilder { - builder: MutableUtf8Array::with_capacity(bytes_capacity), - } - } - - /// Appends a value of type `T` into the builder - #[inline] - pub fn append_value>(&mut self, v: S) { - self.builder.push(Some(v)) - } - - /// Appends a null slot into the builder - #[inline] - pub fn append_null(&mut self) { - self.builder.push_null(); - } - - #[inline] - pub fn append_option>(&mut self, opt: Option) { - match opt { - Some(s) => self.append_value(s.as_ref()), - None => self.append_null(), - } - } - - pub fn finish(&mut self) -> DFUtf8Array { - let array = self.builder.as_arc(); - DFUtf8Array::from_arrow_array(array.as_ref()) - } -} - -impl ArrayDeserializer for Utf8ArrayBuilder { - fn de(&mut self, reader: &mut &[u8]) -> Result<()> { - let value: String = reader.read_string()?; - self.append_value(value); - Ok(()) - } - - fn de_batch(&mut self, reader: &[u8], step: usize, rows: usize) -> Result<()> { - for row in 0..rows { - let mut reader = &reader[step * row..]; - let value: String = reader.read_string()?; - self.append_value(&value); - } - Ok(()) - } - - fn finish_to_series(&mut self) -> Series { - self.finish().into_series() - } - - fn de_text(&mut self, reader: &[u8]) { - match std::str::from_utf8(reader) { - Ok(v) => self.append_value(v), - Err(_) => self.append_null(), - } - } - - fn de_null(&mut self) { - self.append_null() - } -} - -impl NewDataArray for DFUtf8Array -where S: AsRef -{ - fn new_from_slice(v: &[S]) -> Self { - let values_size = v.iter().fold(0, |acc, s| acc + s.as_ref().len()); - let mut builder = Utf8ArrayBuilder::with_capacity(values_size); - v.iter().for_each(|val| { - builder.append_value(val.as_ref()); - }); - - builder.finish() - } - - fn new_from_opt_slice(opt_v: &[Option]) -> Self { - let values_size = opt_v.iter().fold(0, |acc, s| match s { - Some(s) => acc + s.as_ref().len(), - None => acc, - }); - let mut builder = Utf8ArrayBuilder::with_capacity(values_size); - opt_v.iter().for_each(|opt| match opt { - Some(v) => builder.append_value(v.as_ref()), - None => builder.append_null(), - }); - builder.finish() - } - - fn new_from_opt_iter(it: impl Iterator>) -> Self { - let cap = get_iter_capacity(&it); - let mut builder = Utf8ArrayBuilder::with_capacity(cap * 5); - it.for_each(|opt| builder.append_option(opt)); - builder.finish() - } - - /// Create a new DataArray from an iterator. - fn new_from_iter(it: impl Iterator) -> Self { - let cap = get_iter_capacity(&it); - let mut builder = Utf8ArrayBuilder::with_capacity(cap * 5); - it.for_each(|v| builder.append_value(v)); - builder.finish() - } -} diff --git a/common/datavalues/src/arrays/utf8/iterator.rs b/common/datavalues/src/arrays/utf8/iterator.rs deleted file mode 100644 index 0f637b7da0310..0000000000000 --- a/common/datavalues/src/arrays/utf8/iterator.rs +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2020 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_arrow::arrow::array::*; -use common_arrow::arrow::bitmap::utils::ZipValidity; -use common_arrow::arrow::trusted_len::TrustedLen; - -use crate::prelude::*; - -impl<'a> IntoIterator for &'a DFUtf8Array { - type Item = Option<&'a str>; - type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, i64>>; - fn into_iter(self) -> Self::IntoIter { - self.array.iter() - } -} - -/// all arrays have known size. -impl<'a> ExactSizeIterator for Utf8IterNoNull<'a> {} -unsafe impl<'a> TrustedLen for Utf8IterNoNull<'a> {} - -pub struct Utf8IterNoNull<'a> { - array: &'a LargeUtf8Array, - current: usize, - current_end: usize, -} - -impl<'a> Utf8IterNoNull<'a> { - /// create a new iterator - pub fn new(array: &'a LargeUtf8Array) -> Self { - Utf8IterNoNull { - array, - current: 0, - current_end: array.len(), - } - } -} - -impl<'a> Iterator for Utf8IterNoNull<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option { - if self.current == self.current_end { - None - } else { - let old = self.current; - self.current += 1; - unsafe { Some(self.array.value_unchecked(old)) } - } - } - - fn size_hint(&self) -> (usize, Option) { - ( - self.array.len() - self.current, - Some(self.array.len() - self.current), - ) - } -} - -impl DFUtf8Array { - pub fn into_no_null_iter<'a>(&'a self) -> impl TrustedLen + '_ + Send + Sync { - Utf8IterNoNull::new(self.inner()) - } -} diff --git a/common/datavalues/src/arrays/utf8/mod.rs b/common/datavalues/src/arrays/utf8/mod.rs deleted file mode 100644 index 19935f9fd77d0..0000000000000 --- a/common/datavalues/src/arrays/utf8/mod.rs +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright 2020 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use common_arrow::arrow::array::*; -use common_arrow::arrow::bitmap::Bitmap; -use common_exception::ErrorCode; -use common_exception::Result; - -use crate::prelude::*; - -mod builder; -mod iterator; - -#[cfg(test)] -mod builder_test; - -pub use builder::*; -pub use iterator::*; - -#[derive(Debug, Clone)] -pub struct DFUtf8Array { - pub(crate) array: LargeUtf8Array, -} - -impl From for DFUtf8Array { - fn from(array: LargeUtf8Array) -> Self { - Self { array } - } -} - -impl DFUtf8Array { - pub fn new(array: LargeUtf8Array) -> Self { - Self { array } - } - - pub fn from_arrow_array(array: &dyn Array) -> Self { - Self::new( - array - .as_any() - .downcast_ref::() - .unwrap() - .clone(), - ) - } - - pub fn data_type(&self) -> &DataType { - &DataType::Utf8 - } - - pub fn inner(&self) -> &LargeUtf8Array { - &self.array - } - - /// # Safety - /// Note this doesn't do any bound checking, for performance reason. - pub unsafe fn try_get(&self, index: usize) -> Result { - let v = match self.array.is_null(index) { - true => None, - false => Some(self.array.value_unchecked(index)), - }; - Ok(v.into()) - } - - pub fn len(&self) -> usize { - self.array.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - #[inline] - pub fn null_count(&self) -> usize { - self.array.null_count() - } - - #[inline] - pub fn is_null(&self, i: usize) -> bool { - self.array.is_null(i) - } - - #[inline] - pub fn all_is_null(&self) -> bool { - self.null_count() == self.len() - } - - #[inline] - /// Get the null count and the buffer of bits representing null values - pub fn null_bits(&self) -> (usize, &Option) { - (self.array.null_count(), self.array.validity()) - } - - /// Take a view of top n elements - pub fn limit(&self, num_elements: usize) -> Self { - self.slice(0, num_elements) - } - - pub fn slice(&self, offset: usize, length: usize) -> Self { - let array = self.array.slice(offset, length); - Self::new(array) - } - - /// Unpack a array to the same physical type. - /// - /// # Safety - /// - /// This is unsafe as the data_type may be uncorrect and - /// is assumed to be correct in other unsafe code. - pub unsafe fn unpack(&self, array: &Series) -> Result<&Self> { - let array_trait = &**array; - if self.data_type() == array.data_type() { - let ca = &*(array_trait as *const dyn SeriesTrait as *const Self); - Ok(ca) - } else { - Err(ErrorCode::IllegalDataType(format!( - "cannot unpack array {:?} into matching type {:?}", - array, - self.data_type() - ))) - } - } - - pub fn collect_values(&self) -> Vec> { - self.inner().iter().collect() - } -} - -/// # Safety -/// Note this doesn't do any bound checking, for performance reason. -pub unsafe fn take_utf8_iter_unchecked>( - arr: &LargeUtf8Array, - indices: I, -) -> LargeUtf8Array { - match arr.null_count() { - 0 => { - let iter = indices - .into_iter() - .map(|idx| Some(arr.value_unchecked(idx))); - LargeUtf8Array::from_trusted_len_iter_unchecked(iter) - } - _ => { - let iter = indices.into_iter().map(|idx| { - if arr.is_null(idx) { - None - } else { - Some(arr.value_unchecked(idx)) - } - }); - LargeUtf8Array::from_trusted_len_iter_unchecked(iter) - } - } -} - -/// # Safety -/// Note this doesn't do any bound checking, for performance reason. -pub unsafe fn take_utf8_opt_iter_unchecked>>( - arr: &LargeUtf8Array, - indices: I, -) -> LargeUtf8Array { - match arr.null_count() { - 0 => { - let iter = indices - .into_iter() - .map(|opt_idx| opt_idx.map(|idx| arr.value_unchecked(idx))); - - LargeUtf8Array::from_trusted_len_iter_unchecked(iter) - } - _ => { - let iter = indices.into_iter().map(|opt_idx| { - opt_idx.and_then(|idx| { - if arr.is_null(idx) { - None - } else { - Some(arr.value_unchecked(idx)) - } - }) - }); - - LargeUtf8Array::from_trusted_len_iter_unchecked(iter) - } - } -} diff --git a/common/datavalues/src/data_group_value.rs b/common/datavalues/src/data_group_value.rs index b9a487eef31bb..5aedd58772b58 100644 --- a/common/datavalues/src/data_group_value.rs +++ b/common/datavalues/src/data_group_value.rs @@ -34,7 +34,7 @@ pub enum DataGroupValue { Int16(i16), Int32(i32), Int64(i64), - String(Box>), + String(Vec), Boolean(bool), } @@ -54,7 +54,7 @@ impl TryFrom<&DataValue> for DataGroupValue { DataValue::UInt16(Some(v)) => DataGroupValue::UInt16(*v), DataValue::UInt32(Some(v)) => DataGroupValue::UInt32(*v), DataValue::UInt64(Some(v)) => DataGroupValue::UInt64(*v), - DataValue::String(Some(v)) => DataGroupValue::String(Box::new(v.clone())), + DataValue::String(Some(v)) => DataGroupValue::String(v.clone()), DataValue::Float32(None) | DataValue::Float64(None) diff --git a/common/datavalues/src/data_value.rs b/common/datavalues/src/data_value.rs index 70a6afb922dfe..5ce499ca6e485 100644 --- a/common/datavalues/src/data_value.rs +++ b/common/datavalues/src/data_value.rs @@ -367,13 +367,15 @@ impl fmt::Display for DataValue { DataValue::UInt32(v) => format_data_value_with_option!(f, v), DataValue::UInt64(v) => format_data_value_with_option!(f, v), DataValue::String(None) => write!(f, "NULL"), - DataValue::String(Some(v)) => { - for c in v { - write!(f, "{:02x}", c)?; + DataValue::String(Some(v)) => match std::str::from_utf8(v) { + Ok(v) => write!(f, "{}", v), + Err(_e) => { + for c in v { + write!(f, "{:02x}", c)?; + } + Ok(()) } - Ok(()) - } - + }, DataValue::List(None, ..) => write!(f, "NULL"), DataValue::List(Some(v), ..) => { write!( diff --git a/common/datavalues/src/series/arithmetic_test.rs b/common/datavalues/src/series/arithmetic_test.rs index 9588ccaaa7389..99c7dff41ea3f 100644 --- a/common/datavalues/src/series/arithmetic_test.rs +++ b/common/datavalues/src/series/arithmetic_test.rs @@ -108,7 +108,9 @@ fn test_arithmetic_series() { Series::new(vec![5.0f64, 5.0, 5.0, 5.0]), Series::new(vec![5.0f64, 5.0, 5.0, 5.0]), ], - error: vec!["Code: 10, displayText = DataValue Error: Unsupported (Utf8) plus (Utf8)."], + error: vec![ + "Code: 10, displayText = DataValue Error: Unsupported (String) plus (String).", + ], }, ArrayTest { name: "minus-passed", @@ -149,7 +151,7 @@ fn test_arithmetic_series() { Series::new(vec![3.0f64, 1.0, -1.0, -3.0]), ], error: vec![ - "Code: 10, displayText = DataValue Error: Unsupported (Utf8) minus (Utf8).", + "Code: 10, displayText = DataValue Error: Unsupported (String) minus (String).", ], }, ArrayTest { @@ -189,7 +191,7 @@ fn test_arithmetic_series() { Series::new(vec![4.0f64, 6.0, 6.0, 4.0]), ], error: vec![ - "Code: 10, displayText = DataValue Error: Unsupported (Utf8) multiply (Utf8).", + "Code: 10, displayText = DataValue Error: Unsupported (String) multiply (String).", ], }, ArrayTest { @@ -253,7 +255,7 @@ fn test_arithmetic_series() { Series::new(vec![4.0, 1.5, 0.6666666666666666, 0.25]), ], error: vec![ - "Code: 10, displayText = DataValue Error: Unsupported (Utf8) divide (Utf8).", + "Code: 10, displayText = DataValue Error: Unsupported (String) divide (String).", ], }, ArrayTest { @@ -274,7 +276,7 @@ fn test_arithmetic_series() { Series::new(vec![0i64, 0, 0, 0]), ], error: vec![ - "Code: 10, displayText = DataValue Error: Unsupported (Utf8) modulo (Utf8).", + "Code: 10, displayText = DataValue Error: Unsupported (String) modulo (String).", ], }, ]; diff --git a/common/datavalues/src/series/series_impl.rs b/common/datavalues/src/series/series_impl.rs index 254bec6ebbd66..f80e047d4e96b 100644 --- a/common/datavalues/src/series/series_impl.rs +++ b/common/datavalues/src/series/series_impl.rs @@ -214,6 +214,18 @@ macro_rules! impl_from { }; } +impl<'a, T: AsRef<[&'a str]>> SeriesFrom for Series { + fn new(v: T) -> Self { + DFStringArray::new_from_slice(v.as_ref()).into_series() + } +} + +impl<'a, T: AsRef<[Option<&'a str>]>> SeriesFrom]> for Series { + fn new(v: T) -> Self { + DFStringArray::new_from_opt_slice(v.as_ref()).into_series() + } +} + impl<'a, T: AsRef<[&'a [u8]]>> SeriesFrom for Series { fn new(v: T) -> Self { DFStringArray::new_from_slice(v.as_ref()).into_series() diff --git a/common/functions/src/scalars/expressions/cast_test.rs b/common/functions/src/scalars/expressions/cast_test.rs index 75c6d0e2d0606..362321c1d1b80 100644 --- a/common/functions/src/scalars/expressions/cast_test.rs +++ b/common/functions/src/scalars/expressions/cast_test.rs @@ -88,7 +88,7 @@ fn test_cast_function() -> Result<()> { }, ]; - let dummy = DataField::new("dummy", DataType::Utf8, false); + let dummy = DataField::new("dummy", DataType::String, false); for t in tests { let rows = t.columns[0].len(); diff --git a/common/functions/src/scalars/strings/substring_test.rs b/common/functions/src/scalars/strings/substring_test.rs index b1ea07abdd1c0..4e8ea3333c642 100644 --- a/common/functions/src/scalars/strings/substring_test.rs +++ b/common/functions/src/scalars/strings/substring_test.rs @@ -34,7 +34,7 @@ fn test_substring_function() -> Result<()> { } let schema = DataSchemaRefExt::create(vec![ - DataField::new("a", DataType::Utf8, false), + DataField::new("a", DataType::String, false), DataField::new("b", DataType::Int64, false), DataField::new("c", DataType::UInt64, false), ]); diff --git a/common/functions/src/scalars/udfs/database_test.rs b/common/functions/src/scalars/udfs/database_test.rs index 16862302baf06..aa84cdd8c366f 100644 --- a/common/functions/src/scalars/udfs/database_test.rs +++ b/common/functions/src/scalars/udfs/database_test.rs @@ -30,7 +30,7 @@ fn test_database_function() -> Result<()> { error: &'static str, func: Box, } - let dummy = DataField::new("dummy", DataType::Utf8, false); + let dummy = DataField::new("dummy", DataType::String, false); let tests = vec![Test { name: "database-function-passed", diff --git a/common/functions/src/scalars/udfs/version_test.rs b/common/functions/src/scalars/udfs/version_test.rs index 5856b90b22119..18a081acd3f57 100644 --- a/common/functions/src/scalars/udfs/version_test.rs +++ b/common/functions/src/scalars/udfs/version_test.rs @@ -47,7 +47,7 @@ fn test_version_function() -> Result<()> { error: "", }]; - let dummy = DataField::new("dummy", DataType::Utf8, false); + let dummy = DataField::new("dummy", DataType::String, false); for t in tests { let rows = t.columns[0].len(); let func = t.func; diff --git a/common/indexing/src/index_min_max_test.rs b/common/indexing/src/index_min_max_test.rs index 53ff1e644990a..7cf33792d990d 100644 --- a/common/indexing/src/index_min_max_test.rs +++ b/common/indexing/src/index_min_max_test.rs @@ -31,7 +31,7 @@ use crate::MinMaxIndex; #[test] fn test_min_max_index() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ - DataField::new("name", DataType::Utf8, true), + DataField::new("name", DataType::String, true), DataField::new("age", DataType::Int32, false), ]); @@ -48,8 +48,8 @@ fn test_min_max_index() -> Result<()> { let idx_slice = vec![ MinMaxIndex { col: "name".to_string(), - min: DataValue::Utf8(Some("jack".to_string())), - max: DataValue::Utf8(Some("xbohu".to_string())), + min: DataValue::String(Some("jack".as_bytes().to_vec())), + max: DataValue::String(Some("xbohu".as_bytes().to_vec())), version: IndexSchemaVersion::V1, }, MinMaxIndex { diff --git a/common/indexing/src/index_partition_test.rs b/common/indexing/src/index_partition_test.rs index c9d6d7051c942..eadb091d53f32 100644 --- a/common/indexing/src/index_partition_test.rs +++ b/common/indexing/src/index_partition_test.rs @@ -25,8 +25,8 @@ use crate::PartitionIndex; fn test_partition_index() -> Result<()> { // Apply index. { - let partition_value = DataValue::Utf8(Some("datafuse".to_string())); - let expr = col("name").eq(lit("bohu")); + let partition_value = DataValue::String(Some("datafuse".as_bytes().to_vec())); + let expr = col("name").eq(lit("bohu".as_bytes())); let actual = PartitionIndex::apply_index(partition_value, &expr)?; let expected = true; assert_eq!(actual, expected); diff --git a/common/indexing/src/index_sparse_test.rs b/common/indexing/src/index_sparse_test.rs index 1c86b1dc5e13f..4c71fb9702196 100644 --- a/common/indexing/src/index_sparse_test.rs +++ b/common/indexing/src/index_sparse_test.rs @@ -32,7 +32,7 @@ use crate::SparseIndexValue; #[test] fn test_sparse_index() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ - DataField::new("name", DataType::Utf8, true), + DataField::new("name", DataType::String, true), DataField::new("age", DataType::Int32, false), ]); @@ -51,13 +51,13 @@ fn test_sparse_index() -> Result<()> { col: "name".to_string(), values: vec![ SparseIndexValue { - min: DataValue::Utf8(Some("jack".to_string())), - max: DataValue::Utf8(Some("bohu".to_string())), + min: DataValue::String(Some("jack".as_bytes().to_vec())), + max: DataValue::String(Some("bohu".as_bytes().to_vec())), page_no: 0, }, SparseIndexValue { - min: DataValue::Utf8(Some("xjack".to_string())), - max: DataValue::Utf8(Some("xbohu".to_string())), + min: DataValue::String(Some("xjack".as_bytes().to_vec())), + max: DataValue::String(Some("xbohu".as_bytes().to_vec())), page_no: 1, }, ], diff --git a/common/planners/src/plan_describe_table_test.rs b/common/planners/src/plan_describe_table_test.rs index 8a5592945e25c..814dc2ed1ac40 100644 --- a/common/planners/src/plan_describe_table_test.rs +++ b/common/planners/src/plan_describe_table_test.rs @@ -23,9 +23,9 @@ use crate::*; #[test] fn test_describe_table_plan() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ - DataField::new("Field", DataType::Utf8, false), - DataField::new("Type", DataType::Utf8, false), - DataField::new("Null", DataType::Utf8, false), + DataField::new("Field", DataType::String, false), + DataField::new("Type", DataType::String, false), + DataField::new("Null", DataType::String, false), ]); let describe = PlanNode::DescribeTable(DescribeTablePlan { @@ -36,9 +36,9 @@ fn test_describe_table_plan() -> Result<()> { let expect = "\ DataSchema { fields: [\ - DataField { name: \"Field\", data_type: Utf8, nullable: false }, \ - DataField { name: \"Type\", data_type: Utf8, nullable: false }, \ - DataField { name: \"Null\", data_type: Utf8, nullable: false }\ + DataField { name: \"Field\", data_type: String, nullable: false }, \ + DataField { name: \"Type\", data_type: String, nullable: false }, \ + DataField { name: \"Null\", data_type: String, nullable: false }\ ] }"; let actual = format!("{:?}", describe.schema()); assert_eq!(expect, actual); diff --git a/common/planners/src/plan_explain_test.rs b/common/planners/src/plan_explain_test.rs index fda6357462a52..fd4bca8c53bf9 100644 --- a/common/planners/src/plan_explain_test.rs +++ b/common/planners/src/plan_explain_test.rs @@ -43,7 +43,7 @@ fn test_explain_plan() -> Result<()> { assert_eq!(expect, actual); assert_eq!(explain.schema().fields().clone(), vec![DataField::new( "explain", - DataType::Utf8, + DataType::String, false )]); diff --git a/common/planners/src/plan_expression_test.rs b/common/planners/src/plan_expression_test.rs index 531e171472435..b358e3b394a3d 100644 --- a/common/planners/src/plan_expression_test.rs +++ b/common/planners/src/plan_expression_test.rs @@ -25,7 +25,7 @@ use crate::*; fn test_expression_plan_format() -> Result<()> { use pretty_assertions::assert_eq; - let schema = DataSchemaRefExt::create(vec![DataField::new("a", DataType::Utf8, false)]); + let schema = DataSchemaRefExt::create(vec![DataField::new("a", DataType::String, false)]); let empty_plan = EmptyPlan::create_with_schema(schema.clone()); let expression = PlanNode::Expression(ExpressionPlan { @@ -35,7 +35,7 @@ fn test_expression_plan_format() -> Result<()> { desc: "".to_string(), }); let _ = expression.schema(); - let expect = "Expression: a:Utf8 ()"; + let expect = "Expression: a:String ()"; let actual = format!("{:?}", expression); assert_eq!(expect, actual); Ok(()) diff --git a/common/planners/src/plan_projection_test.rs b/common/planners/src/plan_projection_test.rs index 8463e8d39029a..d7c59b0aea849 100644 --- a/common/planners/src/plan_projection_test.rs +++ b/common/planners/src/plan_projection_test.rs @@ -23,16 +23,16 @@ use crate::*; fn test_projection_plan() -> Result<()> { use pretty_assertions::assert_eq; - let schema = DataSchemaRefExt::create(vec![DataField::new("a", DataType::Utf8, false)]); + let schema = DataSchemaRefExt::create(vec![DataField::new("a", DataType::String, false)]); let empty_plan = EmptyPlan::create_with_schema(schema.clone()); let projection = PlanNode::Projection(ProjectionPlan { expr: vec![col("a")], - schema: DataSchemaRefExt::create(vec![DataField::new("a", DataType::Utf8, false)]), + schema: DataSchemaRefExt::create(vec![DataField::new("a", DataType::String, false)]), input: Arc::from(PlanBuilder::from(&PlanNode::Empty(empty_plan)).build()?), }); let _ = projection.schema(); - let expect = "Projection: a:Utf8"; + let expect = "Projection: a:String"; let actual = format!("{:?}", projection); assert_eq!(expect, actual); Ok(()) diff --git a/common/planners/src/plan_scan_test.rs b/common/planners/src/plan_scan_test.rs index 4b6bb1ece894e..b74f891bb92be 100644 --- a/common/planners/src/plan_scan_test.rs +++ b/common/planners/src/plan_scan_test.rs @@ -25,11 +25,11 @@ fn test_scan_plan() -> Result<()> { schema_name: "scan_test".to_string(), table_id: 0, table_version: None, - table_schema: DataSchemaRefExt::create(vec![DataField::new("a", DataType::Utf8, false)]), + table_schema: DataSchemaRefExt::create(vec![DataField::new("a", DataType::String, false)]), table_args: None, projected_schema: DataSchemaRefExt::create(vec![DataField::new( "a", - DataType::Utf8, + DataType::String, false, )]), push_downs: Extras::default(), diff --git a/common/planners/src/plan_select_test.rs b/common/planners/src/plan_select_test.rs index 9acb5695e4330..9e59fad82d73f 100644 --- a/common/planners/src/plan_select_test.rs +++ b/common/planners/src/plan_select_test.rs @@ -23,12 +23,12 @@ use crate::*; fn test_select_wildcard_plan() -> Result<()> { use pretty_assertions::assert_eq; - let schema = DataSchemaRefExt::create(vec![DataField::new("a", DataType::Utf8, false)]); + let schema = DataSchemaRefExt::create(vec![DataField::new("a", DataType::String, false)]); let plan = PlanBuilder::create(schema).project(&[col("a")])?.build()?; let select = PlanNode::Select(SelectPlan { input: Arc::new(plan), }); - let expect = "Projection: a:Utf8"; + let expect = "Projection: a:String"; let actual = format!("{:?}", select); assert_eq!(expect, actual); diff --git a/common/streams/src/sources/source_test.rs b/common/streams/src/sources/source_test.rs index 3e081b55d5e77..c97af785a566b 100644 --- a/common/streams/src/sources/source_test.rs +++ b/common/streams/src/sources/source_test.rs @@ -28,7 +28,7 @@ fn test_parse_values() { let schema = DataSchemaRefExt::create(vec![ DataField::new("a", DataType::Int8, false), - DataField::new("b", DataType::Utf8, false), + DataField::new("b", DataType::String, false), DataField::new("c", DataType::Float64, false), ]); let mut values_source = ValueSource::new(buffer.as_bytes(), schema, 10); @@ -57,7 +57,7 @@ fn test_parse_csvs() { let schema = DataSchemaRefExt::create(vec![ DataField::new("a", DataType::Int8, false), - DataField::new("b", DataType::Utf8, false), + DataField::new("b", DataType::String, false), DataField::new("c", DataType::Float64, false), ]); let mut values_source = CsvSource::new(buffer.as_bytes(), schema, 10); diff --git a/common/streams/src/stream_datablock_test.rs b/common/streams/src/stream_datablock_test.rs index 63e74904e3109..dee165b7e606e 100644 --- a/common/streams/src/stream_datablock_test.rs +++ b/common/streams/src/stream_datablock_test.rs @@ -23,7 +23,7 @@ use crate::*; async fn test_datablock_stream() { let schema = DataSchemaRefExt::create(vec![ DataField::new("name", DataType::Int32, false), - DataField::new("age", DataType::Utf8, false), + DataField::new("age", DataType::String, false), ]); let data_blocks = vec![ diff --git a/common/streams/src/stream_limit_by.rs b/common/streams/src/stream_limit_by.rs index 924c3d0f0978e..61833667dab89 100644 --- a/common/streams/src/stream_limit_by.rs +++ b/common/streams/src/stream_limit_by.rs @@ -19,8 +19,8 @@ use std::task::Poll; use common_arrow::arrow; use common_arrow::arrow::array::BooleanArray; -use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::arrow::bitmap::MutableBitmap; +use common_arrow::arrow::datatypes::DataType as ArrowType; use common_datablocks::DataBlock; use common_datablocks::HashMethod; use common_datablocks::HashMethodSerializer; @@ -71,7 +71,7 @@ impl LimitByStream { } } - let array = BooleanArray::from_data(ArrowType::Boolean,filter_vec.into(), None); + let array = BooleanArray::from_data(ArrowType::Boolean, filter.into(), None); let batch = block.clone().try_into()?; let batch = arrow::compute::filter::filter_record_batch(&batch, &array)?; Some(batch.try_into()).transpose() diff --git a/common/streams/src/stream_limit_by_test.rs b/common/streams/src/stream_limit_by_test.rs index 618edd75482f4..5169d280a7ef9 100644 --- a/common/streams/src/stream_limit_by_test.rs +++ b/common/streams/src/stream_limit_by_test.rs @@ -24,7 +24,7 @@ use crate::*; async fn test_limitby_stream() -> Result<()> { let schema = DataSchemaRefExt::create(vec![ DataField::new("id", DataType::UInt8, false), - DataField::new("name", DataType::Utf8, false), + DataField::new("name", DataType::String, false), ]); let ids = vec![2u8, 2, 2, 2, 3, 3, 3]; diff --git a/common/streams/src/stream_skip_test.rs b/common/streams/src/stream_skip_test.rs index 157ed1c88e759..54db094cc8fc7 100644 --- a/common/streams/src/stream_skip_test.rs +++ b/common/streams/src/stream_skip_test.rs @@ -23,22 +23,28 @@ use crate::*; async fn test_skipstream() { let schema = DataSchemaRefExt::create(vec![ DataField::new("id", DataType::Int32, false), - DataField::new("name", DataType::Utf8, false), + DataField::new("name", DataType::String, false), ]); // create a data block with 'id' from 0 to 20 let ids = (0..20).collect::>(); let names = (0..20) - .map(|n| format!("Alice-{}", n)) - .collect::>(); + .map(|n| { + let str = format!("Alice-{}", n); + str.into_bytes() + }) + .collect::>>(); let block0 = DataBlock::create_by_array(schema.clone(), vec![Series::new(ids), Series::new(names)]); // create a data block with 'id' from 20 to 40 let ids = (20..40).collect::>(); let names = (20..40) - .map(|n| format!("Bob-{}", n)) - .collect::>(); + .map(|n| { + let str = format!("Bob-{}", n); + str.into_bytes() + }) + .collect::>>(); let block1 = DataBlock::create_by_array(schema.clone(), vec![Series::new(ids), Series::new(names)]); diff --git a/query/src/interpreters/interpreter_describe_table_test.rs b/query/src/interpreters/interpreter_describe_table_test.rs index 95e98429d1347..52567a8184ccc 100644 --- a/query/src/interpreters/interpreter_describe_table_test.rs +++ b/query/src/interpreters/interpreter_describe_table_test.rs @@ -51,7 +51,7 @@ async fn interpreter_describe_table_test() -> Result<()> { "+-------+--------+------+", "| a | Int64 | NO |", "| b | Int32 | NO |", - "| c | Utf8 | NO |", + "| c | String | NO |", "| d | Int16 | NO |", "| e | Date16 | NO |", "+-------+--------+------+", diff --git a/query/src/interpreters/interpreter_show_create_table_test.rs b/query/src/interpreters/interpreter_show_create_table_test.rs index 2a2e4625479eb..15debd3aa1a7e 100644 --- a/query/src/interpreters/interpreter_show_create_table_test.rs +++ b/query/src/interpreters/interpreter_show_create_table_test.rs @@ -51,7 +51,7 @@ async fn interpreter_show_create_table_test() -> Result<()> { "| a | CREATE TABLE `a` ( |", "| | `a` Int64, |", "| | `b` Int32, |", - "| | `c` Utf8, |", + "| | `c` String, |", "| | `d` Int16, |", "| | `e` Date16, |", "| | ) ENGINE=Null |", diff --git a/query/src/interpreters/interpreter_table_create_test.rs b/query/src/interpreters/interpreter_table_create_test.rs index 8df2190f224f3..39e8d95cadf02 100644 --- a/query/src/interpreters/interpreter_table_create_test.rs +++ b/query/src/interpreters/interpreter_table_create_test.rs @@ -34,7 +34,7 @@ async fn test_create_table_interpreter() -> Result<()> { assert_eq!(plan.schema().field_with_name("a")?.data_type(), &DataType::Int64); assert_eq!(plan.schema().field_with_name("b")?.data_type(), &DataType::Int32); - assert_eq!(plan.schema().field_with_name("c")?.data_type(), &DataType::Utf8); + assert_eq!(plan.schema().field_with_name("c")?.data_type(), &DataType::String); assert_eq!(plan.schema().field_with_name("d")?.data_type(), &DataType::Int16); assert_eq!(plan.schema().field_with_name("e")?.data_type(), &DataType::Date16); diff --git a/query/src/optimizers/optimizer_constant_folding_test.rs b/query/src/optimizers/optimizer_constant_folding_test.rs index 087e5ed614959..54cc42778c1c4 100644 --- a/query/src/optimizers/optimizer_constant_folding_test.rs +++ b/query/src/optimizers/optimizer_constant_folding_test.rs @@ -81,7 +81,7 @@ mod tests { query: "SELECT sipHash('test_string')", expect: "\ Projection: sipHash('test_string'):UInt64\ - \n Expression: 17123704338732264132:UInt64 (Before Projection)\ + \n Expression: 15735157695654173841:UInt64 (Before Projection)\ \n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", }, Test { @@ -96,16 +96,16 @@ mod tests { name: "Projection strings const recursion", query: "SELECT SUBSTRING('1234567890' FROM 3 FOR 3)", expect: "\ - Projection: substring('1234567890', 3, 3):Utf8\ - \n Expression: 345:Utf8 (Before Projection)\ + Projection: substring('1234567890', 3, 3):String\ + \n Expression: 345:String (Before Projection)\ \n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", }, Test { name: "Projection to type name const recursion", query: "SELECT toTypeName('1234567890')", expect: "\ - Projection: toTypeName('1234567890'):Utf8\ - \n Expression: Utf8:Utf8 (Before Projection)\ + Projection: toTypeName('1234567890'):String\ + \n Expression: String:String (Before Projection)\ \n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", }, ]; diff --git a/query/src/optimizers/optimizer_projection_push_down_test.rs b/query/src/optimizers/optimizer_projection_push_down_test.rs index 49968d00bca09..92acb70fe8263 100644 --- a/query/src/optimizers/optimizer_projection_push_down_test.rs +++ b/query/src/optimizers/optimizer_projection_push_down_test.rs @@ -29,16 +29,16 @@ fn test_projection_push_down_optimizer_1() -> Result<()> { let ctx = crate::tests::try_create_context()?; let schema = DataSchemaRefExt::create(vec![ - DataField::new("a", DataType::Utf8, false), - DataField::new("b", DataType::Utf8, false), - DataField::new("c", DataType::Utf8, false), - DataField::new("d", DataType::Utf8, false), + DataField::new("a", DataType::String, false), + DataField::new("b", DataType::String, false), + DataField::new("c", DataType::String, false), + DataField::new("d", DataType::String, false), ]); let output_schema = DataSchemaRefExt::create(vec![ - DataField::new("a", DataType::Utf8, false), - DataField::new("b", DataType::Utf8, false), - DataField::new("c", DataType::Utf8, false), + DataField::new("a", DataType::String, false), + DataField::new("b", DataType::String, false), + DataField::new("c", DataType::String, false), ]); let plan = PlanNode::Projection(ProjectionPlan { @@ -53,7 +53,7 @@ fn test_projection_push_down_optimizer_1() -> Result<()> { let optimized = projection_push_down.optimize(&plan)?; let expect = "\ - Projection: a:Utf8, b:Utf8, c:Utf8"; + Projection: a:String, b:String, c:String"; let actual = format!("{:?}", optimized); assert_eq!(expect, actual); @@ -72,10 +72,10 @@ fn test_projection_push_down_optimizer_group_by() -> Result<()> { let optimized = project_push_down.optimize(&plan)?; let expect = "\ - Projection: max(value) as c1:Utf8, name as c2:Utf8\ + Projection: max(value) as c1:String, name as c2:String\ \n AggregatorFinal: groupBy=[[name]], aggr=[[max(value)]]\ \n AggregatorPartial: groupBy=[[name]], aggr=[[max(value)]]\ - \n ReadDataSource: scan partitions: [1], scan schema: [name:Utf8, value:Utf8], statistics: [read_rows: 0, read_bytes: 0]"; + \n ReadDataSource: scan partitions: [1], scan schema: [name:String, value:String], statistics: [read_rows: 0, read_bytes: 0]"; let actual = format!("{:?}", optimized); assert_eq!(expect, actual); @@ -96,9 +96,9 @@ fn test_projection_push_down_optimizer_2() -> Result<()> { table_id: 0, table_version: None, schema: DataSchemaRefExt::create(vec![ - DataField::new("a", DataType::Utf8, false), - DataField::new("b", DataType::Utf8, false), - DataField::new("c", DataType::Utf8, false), + DataField::new("a", DataType::String, false), + DataField::new("b", DataType::String, false), + DataField::new("c", DataType::String, false), ]), parts: generate_partitions(8, total as u64), statistics: statistics.clone(), @@ -118,7 +118,7 @@ fn test_projection_push_down_optimizer_2() -> Result<()> { let plan = PlanNode::Projection(ProjectionPlan { expr: vec![col("a")], - schema: DataSchemaRefExt::create(vec![DataField::new("a", DataType::Utf8, false)]), + schema: DataSchemaRefExt::create(vec![DataField::new("a", DataType::String, false)]), input: Arc::from(filter_plan), }); @@ -126,9 +126,9 @@ fn test_projection_push_down_optimizer_2() -> Result<()> { let optimized = projection_push_down.optimize(&plan)?; let expect = "\ - Projection: a:Utf8\ + Projection: a:String\ \n Filter: ((a > 6) and (b <= 10))\ - \n ReadDataSource: scan partitions: [8], scan schema: [a:Utf8, b:Utf8], statistics: [read_rows: 10000, read_bytes: 80000]"; + \n ReadDataSource: scan partitions: [8], scan schema: [a:String, b:String], statistics: [read_rows: 10000, read_bytes: 80000]"; let actual = format!("{:?}", optimized); assert_eq!(expect, actual); @@ -149,13 +149,13 @@ fn test_projection_push_down_optimizer_3() -> Result<()> { table_id: 0, table_version: None, schema: DataSchemaRefExt::create(vec![ - DataField::new("a", DataType::Utf8, false), - DataField::new("b", DataType::Utf8, false), - DataField::new("c", DataType::Utf8, false), - DataField::new("d", DataType::Utf8, false), - DataField::new("e", DataType::Utf8, false), - DataField::new("f", DataType::Utf8, false), - DataField::new("g", DataType::Utf8, false), + DataField::new("a", DataType::String, false), + DataField::new("b", DataType::String, false), + DataField::new("c", DataType::String, false), + DataField::new("d", DataType::String, false), + DataField::new("e", DataType::String, false), + DataField::new("f", DataType::String, false), + DataField::new("g", DataType::String, false), ]), parts: generate_partitions(8, total as u64), statistics: statistics.clone(), @@ -186,14 +186,14 @@ fn test_projection_push_down_optimizer_3() -> Result<()> { let optimized = projection_push_down.optimize(&plan)?; let expect = "\ - Projection: a:Utf8\ + Projection: a:String\ \n Limit: 10\ - \n Sort: c:Utf8\ + \n Sort: c:String\ \n Having: (a < 10)\ \n AggregatorFinal: groupBy=[[a, c]], aggr=[[]]\ \n AggregatorPartial: groupBy=[[a, c]], aggr=[[]]\ \n Filter: (b = 10)\ - \n ReadDataSource: scan partitions: [8], scan schema: [a:Utf8, b:Utf8, c:Utf8], statistics: [read_rows: 10000, read_bytes: 80000]"; + \n ReadDataSource: scan partitions: [8], scan schema: [a:String, b:String, c:String], statistics: [read_rows: 10000, read_bytes: 80000]"; let actual = format!("{:?}", optimized); assert_eq!(expect, actual); @@ -211,9 +211,9 @@ fn test_projection_push_down_optimizer_4() -> Result<()> { let mut project_push_down = ProjectionPushDownOptimizer::create(ctx); let optimized = project_push_down.optimize(&plan)?; - let expect = "Projection: substring(value, 1, 3) as c1:Utf8\ - \n Expression: substring(value, 1, 3):Utf8 (Before Projection)\ - \n ReadDataSource: scan partitions: [1], scan schema: [value:Utf8], statistics: [read_rows: 0, read_bytes: 0]"; + let expect = "Projection: substring(value, 1, 3) as c1:String\ + \n Expression: substring(value, 1, 3):String (Before Projection)\ + \n ReadDataSource: scan partitions: [1], scan schema: [value:String], statistics: [read_rows: 0, read_bytes: 0]"; let actual = format!("{:?}", optimized); assert_eq!(expect, actual); diff --git a/query/src/optimizers/optimizer_statistics_exact_test.rs b/query/src/optimizers/optimizer_statistics_exact_test.rs index b5a62d2ca5a68..4df90f723fdf6 100644 --- a/query/src/optimizers/optimizer_statistics_exact_test.rs +++ b/query/src/optimizers/optimizer_statistics_exact_test.rs @@ -39,9 +39,9 @@ mod tests { table_id: 0, table_version: None, schema: DataSchemaRefExt::create(vec![ - DataField::new("a", DataType::Utf8, false), - DataField::new("b", DataType::Utf8, false), - DataField::new("c", DataType::Utf8, false), + DataField::new("a", DataType::String, false), + DataField::new("b", DataType::String, false), + DataField::new("c", DataType::String, false), ]), parts: generate_partitions(8, total as u64), statistics: statistics.clone(), diff --git a/query/src/pipelines/transforms/transform_group_by_partial_test.rs b/query/src/pipelines/transforms/transform_group_by_partial_test.rs index 39fdb90fcba73..554012e40574c 100644 --- a/query/src/pipelines/transforms/transform_group_by_partial_test.rs +++ b/query/src/pipelines/transforms/transform_group_by_partial_test.rs @@ -61,15 +61,15 @@ async fn test_transform_partial_group_by() -> Result<()> { // SELECT SUM(number), AVG(number), number ... GROUP BY number; // binary-state let expected = vec![ - "+--------------------+----------------------------------+---------------+", - "| sum(number) | avg(number) | _group_by_key |", - "+--------------------+----------------------------------+---------------+", - "| 010000000000000000 | 00000000000000000100000000000000 | 0 |", - "| 010100000000000000 | 01000000000000000100000000000000 | 1 |", - "| 010200000000000000 | 02000000000000000100000000000000 | 2 |", - "| 010300000000000000 | 03000000000000000100000000000000 | 3 |", - "| 010400000000000000 | 04000000000000000100000000000000 | 4 |", - "+--------------------+----------------------------------+---------------+", + "+-------------+-------------+---------------+", + "| sum(number) | avg(number) | _group_by_key |", + "+-------------+-------------+---------------+", + "| \u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | \u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | 0 |", + "| \u{1}\u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | \u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | 1 |", + "| \u{1}\u{2}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | \u{2}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | 2 |", + "| \u{1}\u{3}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | \u{3}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | 3 |", + "| \u{1}\u{4}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | \u{4}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{1}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0}\u{0} | 4 |", + "+-------------+-------------+---------------+", ]; common_datablocks::assert_blocks_sorted_eq(expected, result.as_slice()); diff --git a/query/src/sessions/settings.rs b/query/src/sessions/settings.rs index 53e5d00e54abf..c75d321d2a416 100644 --- a/query/src/sessions/settings.rs +++ b/query/src/sessions/settings.rs @@ -27,11 +27,11 @@ pub struct Settings { impl Settings { apply_macros! { apply_getter_setter_settings, apply_initial_settings, apply_update_settings, - ("max_block_size", u64, 10000, "Maximum block size for reading".as_bytes().to_vec()), - ("max_threads", u64, 16, "The maximum number of threads to execute the request. By default, it is determined automatically.".as_bytes().to_vec()), - ("flight_client_timeout", u64, 60, "Max duration the flight client request is allowed to take in seconds. By default, it is 60 seconds".as_bytes().to_vec()), - ("min_distributed_rows", u64, 100000000, "Minimum distributed read rows. In cluster mode, when read rows exceeds this value, the local table converted to distributed query.".as_bytes().to_vec()), - ("min_distributed_bytes", u64, 500 * 1024 * 1024, "Minimum distributed read bytes. In cluster mode, when read bytes exceeds this value, the local table converted to distributed query.".as_bytes().to_vec()) + ("max_block_size", u64, 10000, "Maximum block size for reading"), + ("max_threads", u64, 16, "The maximum number of threads to execute the request. By default, it is determined automatically."), + ("flight_client_timeout", u64, 60, "Max duration the flight client request is allowed to take in seconds. By default, it is 60 seconds"), + ("min_distributed_rows", u64, 100000000, "Minimum distributed read rows. In cluster mode, when read rows exceeds this value, the local table converted to distributed query."), + ("min_distributed_bytes", u64, 500 * 1024 * 1024, "Minimum distributed read bytes. In cluster mode, when read bytes exceeds this value, the local table converted to distributed query.") } pub fn try_create() -> Result> { @@ -68,12 +68,12 @@ impl SettingsBase { // TODO, to use macro generate this codes #[allow(unused)] - pub fn try_set_u64(&self, key: &'static str, val: u64, desc: Vec) -> Result<()> { + pub fn try_set_u64(&self, key: &'static str, val: u64, desc: &str) -> Result<()> { let mut settings = self.settings.write(); let setting_val = DataValue::Struct(vec![ DataValue::UInt64(Some(val)), DataValue::UInt64(Some(val)), - DataValue::String(Some(desc)), + DataValue::String(Some(desc.as_bytes().to_vec())), ]); settings.insert(key, setting_val); Ok(()) @@ -117,12 +117,12 @@ impl SettingsBase { } #[allow(unused)] - pub fn try_set_i64(&self, key: &'static str, val: i64, desc: Vec) -> Result<()> { + pub fn try_set_i64(&self, key: &'static str, val: i64, desc: &str) -> Result<()> { let mut settings = self.settings.write(); let setting_val = DataValue::Struct(vec![ DataValue::Int64(Some(val)), DataValue::Int64(Some(val)), - DataValue::String(Some(desc)), + DataValue::String(Some(desc.as_bytes().to_vec())), ]); settings.insert(key, setting_val); Ok(()) @@ -166,12 +166,12 @@ impl SettingsBase { } #[allow(unused)] - pub fn try_set_f64(&self, key: &'static str, val: f64, desc: Vec) -> Result<()> { + pub fn try_set_f64(&self, key: &'static str, val: f64, desc: &str) -> Result<()> { let mut settings = self.settings.write(); let setting_val = DataValue::Struct(vec![ DataValue::Float64(Some(val)), DataValue::Float64(Some(val)), - DataValue::String(Some(desc)), + DataValue::String(Some(desc.as_bytes().to_vec())), ]); settings.insert(key, setting_val); Ok(()) @@ -215,20 +215,20 @@ impl SettingsBase { } #[allow(unused)] - pub fn try_set_string(&self, key: &'static str, val: Vec, desc: Vec) -> Result<()> { + pub fn try_set_string(&self, key: &'static str, val: &str, desc: &str) -> Result<()> { let mut settings = self.settings.write(); - let default_value = val.clone(); + let default_value = val; let setting_val = DataValue::Struct(vec![ - DataValue::String(Some(val)), - DataValue::String(Some(default_value)), - DataValue::String(Some(desc)), + DataValue::String(Some(val.as_bytes().to_vec())), + DataValue::String(Some(default_value.as_bytes().to_vec())), + DataValue::String(Some(desc.as_bytes().to_vec())), ]); settings.insert(key, setting_val); Ok(()) } #[allow(unused)] - pub fn try_update_string(&self, key: &'static str, val: Vec) -> Result<()> { + pub fn try_update_string(&self, key: &'static str, val: &str) -> Result<()> { let mut settings = self.settings.write(); let setting_val = settings .get(key) @@ -236,7 +236,7 @@ impl SettingsBase { if let DataValue::Struct(values) = setting_val { let v = DataValue::Struct(vec![ - DataValue::String(Some(val)), + DataValue::String(Some(val.as_bytes().to_vec())), values[1].clone(), values[2].clone(), ]); diff --git a/query/src/sql/plan_parser_test.rs b/query/src/sql/plan_parser_test.rs index ab5c07f1a0527..de8ce735567b0 100644 --- a/query/src/sql/plan_parser_test.rs +++ b/query/src/sql/plan_parser_test.rs @@ -54,13 +54,13 @@ fn test_plan_parser() -> Result<()> { Test { name: "create-table-passed", sql: "CREATE TABLE t(c1 int, c2 bigint, c3 varchar(255) ) ENGINE = Parquet location = 'foo.parquet' ", - expect: "Create table default.t DataField { name: \"c1\", data_type: Int32, nullable: false }, DataField { name: \"c2\", data_type: Int64, nullable: false }, DataField { name: \"c3\", data_type: Utf8, nullable: false }, engine: Parquet, if_not_exists:false, option: {\"location\": \"foo.parquet\"}", + expect: "Create table default.t DataField { name: \"c1\", data_type: Int32, nullable: false }, DataField { name: \"c2\", data_type: Int64, nullable: false }, DataField { name: \"c3\", data_type: String, nullable: false }, engine: Parquet, if_not_exists:false, option: {\"location\": \"foo.parquet\"}", error: "", }, Test { name: "create-table-if-not-exists-passed", sql: "CREATE TABLE IF NOT EXISTS t(c1 int, c2 bigint, c3 varchar(255) ) ENGINE = Parquet location = 'foo.parquet' ", - expect: "Create table default.t DataField { name: \"c1\", data_type: Int32, nullable: false }, DataField { name: \"c2\", data_type: Int64, nullable: false }, DataField { name: \"c3\", data_type: Utf8, nullable: false }, engine: Parquet, if_not_exists:true, option: {\"location\": \"foo.parquet\"}", + expect: "Create table default.t DataField { name: \"c1\", data_type: Int32, nullable: false }, DataField { name: \"c2\", data_type: Int64, nullable: false }, DataField { name: \"c3\", data_type: String, nullable: false }, engine: Parquet, if_not_exists:true, option: {\"location\": \"foo.parquet\"}", error: "", }, Test { @@ -108,7 +108,7 @@ fn test_plan_parser() -> Result<()> { Test { name: "database-passed", sql: "select database()", - expect: "Projection: database():Utf8\n Expression: database(default):Utf8 (Before Projection)\n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", + expect: "Projection: database():String\n Expression: database(default):String (Before Projection)\n ReadDataSource: scan partitions: [1], scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1]", error: "", }, Test { diff --git a/store/src/api/rpc/flight_service_test.rs b/store/src/api/rpc/flight_service_test.rs index 89dd64cfd3eb6..00447056354cf 100644 --- a/store/src/api/rpc/flight_service_test.rs +++ b/store/src/api/rpc/flight_service_test.rs @@ -474,7 +474,7 @@ async fn test_do_append() -> anyhow::Result<()> { let schema = Arc::new(DataSchema::new(vec![ DataField::new("col_i", DataType::Int64, false), - DataField::new("col_s", DataType::Utf8, false), + DataField::new("col_s", DataType::String, false), ])); let db_name = "test_db"; let tbl_name = "test_tbl"; @@ -546,7 +546,7 @@ async fn test_scan_partition() -> anyhow::Result<()> { let schema = Arc::new(DataSchema::new(vec![ DataField::new("col_i", DataType::Int64, false), - DataField::new("col_s", DataType::Utf8, false), + DataField::new("col_s", DataType::String, false), ])); let db_name = "test_db"; let tbl_name = "test_tbl"; diff --git a/store/src/data_part/appender_test.rs b/store/src/data_part/appender_test.rs index 4bdf6282a718c..288231741680a 100644 --- a/store/src/data_part/appender_test.rs +++ b/store/src/data_part/appender_test.rs @@ -37,7 +37,7 @@ mod test { fn test_in_memory_write() -> anyhow::Result<()> { let schema = Arc::new(DataSchema::new(vec![ DataField::new("col_i", DataType::Int64, false), - DataField::new("col_s", DataType::Utf8, false), + DataField::new("col_s", DataType::String, false), ])); let col0 = Series::new(vec![0 as i64, 1, 2]); @@ -64,7 +64,7 @@ mod test { #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn test_append() -> anyhow::Result<()> { let col0: ArrayRef = Arc::new(Int64Array::from_values(vec![0, 1, 2])); - let col1: ArrayRef = Arc::new(LargeUtf8Array::from_iter_values( + let col1: ArrayRef = Arc::new(LargeBinaryArray::from_iter_values( vec!["str1", "str2", "str3"].iter(), )); diff --git a/tests/suites/0_stateless/02_0009_function_siphash64.result b/tests/suites/0_stateless/02_0009_function_siphash64.result index cf934d83124d4..30c1fe855859c 100644 --- a/tests/suites/0_stateless/02_0009_function_siphash64.result +++ b/tests/suites/0_stateless/02_0009_function_siphash64.result @@ -1,6 +1,6 @@ -9027491583908826579 +5091324831805182738 4952851536318644461 2854037594257667269 -9027491583908826579 +5091324831805182738 4952851536318644461 2854037594257667269 diff --git a/tests/suites/0_stateless/08_0000_optimizer_cluster.result b/tests/suites/0_stateless/08_0000_optimizer_cluster.result index 7d84397f96d74..28e145b3ff07e 100644 --- a/tests/suites/0_stateless/08_0000_optimizer_cluster.result +++ b/tests/suites/0_stateless/08_0000_optimizer_cluster.result @@ -8,6 +8,6 @@ RedistributeStage[expr: 0] Expression: ((number % 3) + 1):UInt16, (number + 1):UInt64 (Before GroupBy) ReadDataSource: scan partitions: [16], scan schema: [number:UInt64], statistics: [read_rows: 10000, read_bytes: 80000] projection push down: push (name and value) to read datasource -Projection: name:Utf8 +Projection: name:String Filter: (value > 10) - ReadDataSource: scan partitions: [1], scan schema: [name:Utf8, value:Utf8], statistics: [read_rows: 0, read_bytes: 0] + ReadDataSource: scan partitions: [1], scan schema: [name:String, value:String], statistics: [read_rows: 0, read_bytes: 0] diff --git a/website/datafuse/docs/sqlstatement/conversion-functions/cast.md b/website/datafuse/docs/sqlstatement/conversion-functions/cast.md index e0f7026d6567b..c7808783a7845 100644 --- a/website/datafuse/docs/sqlstatement/conversion-functions/cast.md +++ b/website/datafuse/docs/sqlstatement/conversion-functions/cast.md @@ -26,11 +26,11 @@ Converted value. ``` mysql> SELECT CAST(1 AS VARCHAR); -+-----------------+ -| cast(1 as Utf8) | -+-----------------+ -| 1 | -+-----------------+ ++-------------------+ +| cast(1 as String) | ++-------------------+ +| 1 | ++-------------------+ mysql> SELECT CAST(1 AS UInt64); +-------------------+