diff --git a/Cargo.toml b/Cargo.toml index e779422396a..7175283b7cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ bench = false num = "^0.4" chrono = "^0.4" # To efficiently cast numbers to strings -lexical-core = "^0.7" +lexical-core = { version = "0.7", optional = true } # We need to Hash values before sending them to an hasher. This # crate provides HashMap that assumes pre-hashed values. hash_hasher = "^2.0.3" @@ -50,8 +50,6 @@ base64 = { version = "0.13.0", optional = true } packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" } -multiversion = "0.6.1" - # to write to parquet as a stream futures = { version = "0.3", optional = true } @@ -62,6 +60,7 @@ parquet2 = { version = "0.3", optional = true, default_features = false, feature # for division/remainder optimization at runtime strength_reduce = { version = "0.2", optional = true } +multiversion = { version = "0.6.1", optional = true } [dev-dependencies] rand = "0.8" @@ -87,7 +86,7 @@ default = [ "compute", ] merge_sort = ["itertools"] -io_csv = ["csv", "lazy_static", "regex"] +io_csv = ["csv", "lazy_static", "regex", "lexical-core"] io_json = ["serde", "serde_json", "indexmap"] io_ipc = ["flatbuffers"] io_ipc_compression = ["lz4", "zstd"] @@ -103,7 +102,7 @@ io_parquet_compression = [ io_json_integration = ["io_json", "serde_derive", "hex"] io_print = ["comfy-table"] # the compute kernels. Disabling this significantly reduces compile time. -compute = ["strength_reduce"] +compute = ["strength_reduce", "multiversion", "lexical-core"] # base64 + io_ipc because arrow schemas are stored as base64-encoded ipc format. io_parquet = ["parquet2", "io_ipc", "base64", "futures"] benchmarks = ["rand"] diff --git a/src/util/lexical.rs b/src/util/lexical.rs new file mode 100644 index 00000000000..bbeeed6c15a --- /dev/null +++ b/src/util/lexical.rs @@ -0,0 +1,23 @@ +/// Converts numeric type to a `String` +#[inline] +pub fn lexical_to_bytes(n: N) -> Vec { + let mut buf = Vec::::with_capacity(N::FORMATTED_SIZE_DECIMAL); + unsafe { + // JUSTIFICATION + // Benefit + // Allows using the faster serializer lexical core and convert to string + // Soundness + // Length of buf is set as written length afterwards. lexical_core + // creates a valid string, so doesn't need to be checked. + let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity()); + let len = lexical_core::write(n, slice).len(); + buf.set_len(len); + } + buf +} + +/// Converts numeric type to a `String` +#[inline] +pub fn lexical_to_string(n: N) -> String { + unsafe { String::from_utf8_unchecked(lexical_to_bytes(n)) } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 53b658b8416..71e30dfea7f 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -15,29 +15,10 @@ // specific language governing permissions and limitations // under the License. -/// Converts numeric type to a `String` -#[inline] -pub fn lexical_to_bytes(n: N) -> Vec { - let mut buf = Vec::::with_capacity(N::FORMATTED_SIZE_DECIMAL); - unsafe { - // JUSTIFICATION - // Benefit - // Allows using the faster serializer lexical core and convert to string - // Soundness - // Length of buf is set as written length afterwards. lexical_core - // creates a valid string, so doesn't need to be checked. - let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity()); - let len = lexical_core::write(n, slice).len(); - buf.set_len(len); - } - buf -} - -/// Converts numeric type to a `String` -#[inline] -pub fn lexical_to_string(n: N) -> String { - unsafe { String::from_utf8_unchecked(lexical_to_bytes(n)) } -} +#[cfg(any(feature = "compute", feature = "io_csv"))] +mod lexical; +#[cfg(any(feature = "compute", feature = "io_csv"))] +pub use lexical::*; #[cfg(feature = "benchmarks")] pub mod bench_util;