From cd5bb8bd8a5a2d1dacd8dab188e64b744269291d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Jun 2021 16:40:24 -0400 Subject: [PATCH 1/9] update rand --- arrow/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 9875262c31bc..79dc28148c95 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -40,7 +40,7 @@ serde = { version = "1.0", features = ["rc"] } serde_derive = "1.0" serde_json = { version = "1.0", features = ["preserve_order"] } indexmap = "1.6" -rand = "0.7" +rand = "0.8" num = "0.4" csv_crate = { version = "1.1", optional = true, package="csv" } regex = "1.3" From 578f77021e8fb76af73d415bbfe63f3fe4bfe5d3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Jun 2021 07:04:32 -0400 Subject: [PATCH 2/9] Update code for new rand interface --- arrow/src/util/bench_util.rs | 5 +++-- arrow/src/util/bit_util.rs | 2 +- arrow/src/util/data_gen.rs | 2 +- arrow/src/util/test_util.rs | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs index fd0ece830a16..40340336882b 100644 --- a/arrow/src/util/bench_util.rs +++ b/arrow/src/util/bench_util.rs @@ -102,7 +102,8 @@ pub fn create_string_array( if rng.gen::() < null_density { None } else { - let value = rng.sample_iter(&Alphanumeric).take(4).collect::(); + let value = rng.sample_iter(&Alphanumeric).take(4).collect(); + let value = String::from_utf8(value).unwrap(); Some(value) } }) @@ -124,7 +125,7 @@ pub fn create_binary_array( } else { let value = rng .sample_iter::(Standard) - .take(range_rng.gen_range(0, 8)) + .take(range_rng.gen_range(0..8)) .collect::>(); Some(value) } diff --git a/arrow/src/util/bit_util.rs b/arrow/src/util/bit_util.rs index c11e10081243..d046f781f053 100644 --- a/arrow/src/util/bit_util.rs +++ b/arrow/src/util/bit_util.rs @@ -271,7 +271,7 @@ mod tests { let mut v = HashSet::new(); let mut rng = seedable_rng(); for _ in 0..NUM_SETS { - let offset = rng.gen_range(0, 8 * NUM_BYTES); + let offset = rng.gen_range(0..8 * NUM_BYTES); v.insert(offset); set_bit(&mut buffer[..], offset); } diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index cd1f25efea06..08624ae86ee8 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -218,7 +218,7 @@ fn create_random_offsets( offsets.push(current_offset); (0..size).for_each(|_| { - current_offset += rng.gen_range(min, max); + current_offset += rng.gen_range(min..max); offsets.push(current_offset); }); diff --git a/arrow/src/util/test_util.rs b/arrow/src/util/test_util.rs index 261b2456dc6d..4b193f774178 100644 --- a/arrow/src/util/test_util.rs +++ b/arrow/src/util/test_util.rs @@ -25,7 +25,7 @@ pub fn random_bytes(n: usize) -> Vec { let mut result = vec![]; let mut rng = seedable_rng(); for _ in 0..n { - result.push(rng.gen_range(0, 255)); + result.push(rng.gen_range(0..255)); } result } From 4dd809065a4cebfd0afe35e061b59cfa7e9fd36c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Jun 2021 16:44:08 -0400 Subject: [PATCH 3/9] update zstd --- parquet/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index d66ee237eeca..120b81d1a3f8 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -38,7 +38,7 @@ snap = { version = "1.0", optional = true } brotli = { version = "3.3", optional = true } flate2 = { version = "1.0", optional = true } lz4 = { version = "1.23", optional = true } -zstd = { version = "0.8", optional = true } +zstd = { version = "0.9", optional = true } chrono = "0.4" num-bigint = "0.4" arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT", optional = true } @@ -54,7 +54,6 @@ snap = "1.0" brotli = "3.3" flate2 = "1.0" lz4 = "1.23" -zstd = "0.8" arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" } serde_json = { version = "1.0", features = ["preserve_order"] } From 24801d78b4a71c1aeed731d8cfd3c3a12186fade Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 21 Jun 2021 16:45:36 -0400 Subject: [PATCH 4/9] update proc-macro2 --- arrow-flight/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml index 04a1a937d082..941cc2bdbd53 100644 --- a/arrow-flight/Cargo.toml +++ b/arrow-flight/Cargo.toml @@ -39,7 +39,7 @@ futures = { version = "0.3", default-features = false, features = ["alloc"]} tonic-build = "0.4" # Pin specific version of the tonic-build dependencies to avoid auto-generated # (and checked in) arrow.flight.protocol.rs from changing -proc-macro2 = "=1.0.24" +proc-macro2 = "=1.0.27" #[lib] #name = "flight" From efec71cdbd0d252fd599492220cdc9a0414c112d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Jun 2021 07:07:04 -0400 Subject: [PATCH 5/9] another --- arrow/benches/mutable_array.rs | 4 ++-- arrow/benches/take_kernels.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow/benches/mutable_array.rs b/arrow/benches/mutable_array.rs index 52da38a1d543..3a42ec1be3c3 100644 --- a/arrow/benches/mutable_array.rs +++ b/arrow/benches/mutable_array.rs @@ -31,8 +31,8 @@ fn create_slices(size: usize) -> Vec<(usize, usize)> { (0..size) .map(|_| { - let start = rng.gen_range(0, size / 2); - let end = rng.gen_range(start + 1, size); + let start = rng.gen_range(0..size / 2); + let end = rng.gen_range(start + 1..size); (start, end) }) .collect() diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs index b1d03d70c9a7..5de198b3f5ae 100644 --- a/arrow/benches/take_kernels.rs +++ b/arrow/benches/take_kernels.rs @@ -35,7 +35,7 @@ fn create_random_index(size: usize, null_density: f32) -> UInt32Array { if rng.gen::() < null_density { builder.append_null().unwrap() } else { - let value = rng.gen_range::(0u32, size as u32); + let value = rng.gen_range::(0u32..size as u32); builder.append_value(value).unwrap(); } } From 1d228ccb9b41c35bddbeeb5c8714d54e8c89fcfd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Jun 2021 15:14:41 -0400 Subject: [PATCH 6/9] Specify js feature of getrandom --- arrow/Cargo.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 79dc28148c95..5a14f1f0fafb 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -40,7 +40,10 @@ serde = { version = "1.0", features = ["rc"] } serde_derive = "1.0" serde_json = { version = "1.0", features = ["preserve_order"] } indexmap = "1.6" -rand = "0.8" +rand = { version = "0.8", default-features = false } +# getrandom is a dependency of rand, not (directly) of arrow +# need to specify `js` feature to build on wasm +getrandom = { version = "0.2", features = ["js"] } num = "0.4" csv_crate = { version = "1.1", optional = true, package="csv" } regex = "1.3" From e07ca053f51afc409325fc4accc799fbd85e0150 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 22 Jun 2021 16:34:52 -0400 Subject: [PATCH 7/9] enable rng std stuff --- arrow-pyarrow-integration-testing/Cargo.toml | 2 +- arrow/Cargo.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 34d243591724..d5b44023cf77 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -31,7 +31,7 @@ name = "arrow_pyarrow_integration_testing" crate-type = ["cdylib"] [dependencies] -arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" } +arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT", features=["std"] } pyo3 = { version = "0.12.1", features = ["extension-module"] } [package.metadata.maturin] diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 5a14f1f0fafb..478f336787c1 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -64,6 +64,8 @@ csv = ["csv_crate"] ipc = ["flatbuffers"] simd = ["packed_simd"] prettyprint = ["prettytable-rs"] +# enable features that rely on std +std = ["rand/std", "rand/std_rng"] # this is only intended to be used in single-threaded programs: it verifies that # all allocated memory is being released (no memory leaks). # See README for details From e349b96a1b6d313e3c36aaae1f040f46e7dc1181 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 6 Jul 2021 14:29:14 -0400 Subject: [PATCH 8/9] checkpoint --- arrow-pyarrow-integration-testing/Cargo.toml | 2 +- arrow/Cargo.toml | 5 ++--- arrow/src/util/bit_util.rs | 3 ++- arrow/src/util/mod.rs | 3 +++ 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index d5b44023cf77..34d243591724 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -31,7 +31,7 @@ name = "arrow_pyarrow_integration_testing" crate-type = ["cdylib"] [dependencies] -arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT", features=["std"] } +arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" } pyo3 = { version = "0.12.1", features = ["extension-module"] } [package.metadata.maturin] diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 478f336787c1..09ecaa93f224 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -58,14 +58,13 @@ multiversion = "0.6.1" bitflags = "1.2.1" [features] -default = ["csv", "ipc"] +default = ["csv", "ipc", "randstd"] avx512 = [] csv = ["csv_crate"] ipc = ["flatbuffers"] simd = ["packed_simd"] prettyprint = ["prettytable-rs"] -# enable features that rely on std -std = ["rand/std", "rand/std_rng"] +randstd = ["rand/std", "rand/std_rng"] # this is only intended to be used in single-threaded programs: it verifies that # all allocated memory is being released (no memory leaks). # See README for details diff --git a/arrow/src/util/bit_util.rs b/arrow/src/util/bit_util.rs index d046f781f053..11ba9d2b576b 100644 --- a/arrow/src/util/bit_util.rs +++ b/arrow/src/util/bit_util.rs @@ -125,7 +125,8 @@ where simd_result.write_to_slice_unaligned_unchecked(result); } -#[cfg(test)] +// seednable_rng is not available when not build with randstd support +#[cfg(all(test, randstd))] mod tests { use std::collections::HashSet; diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs index b2fd4f786616..e616f9ba2bec 100644 --- a/arrow/src/util/mod.rs +++ b/arrow/src/util/mod.rs @@ -15,9 +15,11 @@ // specific language governing permissions and limitations // under the License. +#[cfg(feature = "randstd")] pub mod bench_util; pub mod bit_chunk_iterator; pub mod bit_util; +#[cfg(feature = "randstd")] pub mod data_gen; pub mod display; pub mod integration_util; @@ -25,6 +27,7 @@ pub mod integration_util; pub mod pretty; pub(crate) mod serialization; pub mod string_writer; +#[cfg(feature = "randstd")] pub mod test_util; mod trusted_len; From 6ccb25a6d7c2a4d58c48bf6e0f341129184f696d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 6 Jul 2021 14:32:05 -0400 Subject: [PATCH 9/9] cleanup --- arrow-pyarrow-integration-testing/Cargo.toml | 2 ++ arrow/Cargo.toml | 3 +-- arrow/src/util/bit_util.rs | 3 +-- arrow/src/util/mod.rs | 3 --- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 34d243591724..55bbd77c8ab0 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -31,6 +31,8 @@ name = "arrow_pyarrow_integration_testing" crate-type = ["cdylib"] [dependencies] +# ensure we get the std version of rand so arrow builds without default features +rand = { version = "0.8" } arrow = { path = "../arrow", version = "5.0.0-SNAPSHOT" } pyo3 = { version = "0.12.1", features = ["extension-module"] } diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 09ecaa93f224..5a14f1f0fafb 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -58,13 +58,12 @@ multiversion = "0.6.1" bitflags = "1.2.1" [features] -default = ["csv", "ipc", "randstd"] +default = ["csv", "ipc"] avx512 = [] csv = ["csv_crate"] ipc = ["flatbuffers"] simd = ["packed_simd"] prettyprint = ["prettytable-rs"] -randstd = ["rand/std", "rand/std_rng"] # this is only intended to be used in single-threaded programs: it verifies that # all allocated memory is being released (no memory leaks). # See README for details diff --git a/arrow/src/util/bit_util.rs b/arrow/src/util/bit_util.rs index 11ba9d2b576b..d046f781f053 100644 --- a/arrow/src/util/bit_util.rs +++ b/arrow/src/util/bit_util.rs @@ -125,8 +125,7 @@ where simd_result.write_to_slice_unaligned_unchecked(result); } -// seednable_rng is not available when not build with randstd support -#[cfg(all(test, randstd))] +#[cfg(test)] mod tests { use std::collections::HashSet; diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs index e616f9ba2bec..b2fd4f786616 100644 --- a/arrow/src/util/mod.rs +++ b/arrow/src/util/mod.rs @@ -15,11 +15,9 @@ // specific language governing permissions and limitations // under the License. -#[cfg(feature = "randstd")] pub mod bench_util; pub mod bit_chunk_iterator; pub mod bit_util; -#[cfg(feature = "randstd")] pub mod data_gen; pub mod display; pub mod integration_util; @@ -27,7 +25,6 @@ pub mod integration_util; pub mod pretty; pub(crate) mod serialization; pub mod string_writer; -#[cfg(feature = "randstd")] pub mod test_util; mod trusted_len;