Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Mitigated RUSTSEC-2020-0159 #595

Merged
merged 1 commit into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .cargo/audit.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,8 @@ ignore = [
# Therefore, this advisory does not affect us.
"RUSTSEC-2020-0071",
"RUSTSEC-2020-0159", # same as previous

# this cannot be addressed, only mitigated.
# See [.github/workflows/security.yml] for details on how we mitigate this.
"RUSTSEC-2021-0122",
]
24 changes: 24 additions & 0 deletions .github/workflows/security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,27 @@ jobs:
- uses: actions-rs/audit-check@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}

# mitigation for RUSTSEC-2021-0122
# flatbuffers' usage of `unsafe` is problematic and a risk.
# This performs a round-trip over IPC (that uses flatbuffers) for some arrow types
# using miri, which hits much of `flatbuffers` usage in this crate.
miri-checks:
name: RUSTSEC-2021-0122 mitigation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
submodules: true # needed to test IPC, which are located in a submodule
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly-2021-10-24
override: true
- uses: Swatinem/rust-cache@v1
- name: Install Miri
run: |
rustup component add miri
cargo miri setup
- name: Run
run: MIRIFLAGS="-Zmiri-disable-stacked-borrows -Zmiri-disable-isolation" cargo miri test --tests --features io_ipc,io_ipc_compression,io_json_integration io::ipc::write::file::write_100_nested
89 changes: 76 additions & 13 deletions tests/it/io/ipc/write/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fn round_trip(batch: RecordBatch) -> Result<()> {
// write IPC version 5
let written_result = {
let options = WriteOptions {
compression: Some(Compression::ZSTD),
compression: Some(Compression::LZ4),
};
let mut writer = FileWriter::try_new(result, batch.schema(), options)?;
writer.write(&batch)?;
Expand Down Expand Up @@ -85,39 +85,64 @@ fn test_file(version: &str, file_name: &str, compressed: bool) -> Result<()> {
#[test]
fn write_100_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive", false)?;
test_file("1.0.0-bigendian", "generated_primitive", false)?;
test_file("1.0.0-bigendian", "generated_primitive", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive", true)?;
test_file("1.0.0-bigendian", "generated_primitive", true)
}

#[test]
fn write_100_datetime() -> Result<()> {
test_file("1.0.0-littleendian", "generated_datetime", false)?;
test_file("1.0.0-bigendian", "generated_datetime", false)?;
test_file("1.0.0-bigendian", "generated_datetime", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_datetime() -> Result<()> {
test_file("1.0.0-littleendian", "generated_datetime", true)?;
test_file("1.0.0-bigendian", "generated_datetime", true)
}

#[test]
fn write_100_dictionary_unsigned() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary_unsigned", false)?;
test_file("1.0.0-bigendian", "generated_dictionary_unsigned", false)?;
test_file("1.0.0-bigendian", "generated_dictionary_unsigned", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_dictionary_unsigned() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary_unsigned", true)?;
test_file("1.0.0-bigendian", "generated_dictionary_unsigned", true)
}

#[test]
fn write_100_dictionary() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary", false)?;
test_file("1.0.0-bigendian", "generated_dictionary", false)?;
test_file("1.0.0-bigendian", "generated_dictionary", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_dictionary() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary", true)?;
test_file("1.0.0-bigendian", "generated_dictionary", true)
}

#[test]
fn write_100_interval() -> Result<()> {
test_file("1.0.0-littleendian", "generated_interval", false)?;
test_file("1.0.0-bigendian", "generated_interval", false)?;
test_file("1.0.0-bigendian", "generated_interval", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_interval() -> Result<()> {
test_file("1.0.0-littleendian", "generated_interval", true)?;
test_file("1.0.0-bigendian", "generated_interval", true)
}
Expand All @@ -132,7 +157,12 @@ fn write_100_large_batch() -> Result<()> {
#[test]
fn write_100_nested() -> Result<()> {
test_file("1.0.0-littleendian", "generated_nested", false)?;
test_file("1.0.0-bigendian", "generated_nested", false)?;
test_file("1.0.0-bigendian", "generated_nested", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_nested() -> Result<()> {
test_file("1.0.0-littleendian", "generated_nested", true)?;
test_file("1.0.0-bigendian", "generated_nested", true)
}
Expand All @@ -144,23 +174,38 @@ fn write_100_nested_large_offsets() -> Result<()> {
"generated_nested_large_offsets",
false,
)?;
test_file("1.0.0-bigendian", "generated_nested_large_offsets", false)?;
test_file("1.0.0-bigendian", "generated_nested_large_offsets", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_nested_large_offsets() -> Result<()> {
test_file("1.0.0-littleendian", "generated_nested_large_offsets", true)?;
test_file("1.0.0-bigendian", "generated_nested_large_offsets", true)
}

#[test]
fn write_100_null_trivial() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null_trivial", false)?;
test_file("1.0.0-bigendian", "generated_null_trivial", false)?;
test_file("1.0.0-bigendian", "generated_null_trivial", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_null_trivial() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null_trivial", true)?;
test_file("1.0.0-bigendian", "generated_null_trivial", true)
}

#[test]
fn write_100_null() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null", false)?;
test_file("1.0.0-bigendian", "generated_null", false)?;
test_file("1.0.0-bigendian", "generated_null", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_null() -> Result<()> {
test_file("1.0.0-littleendian", "generated_null", true)?;
test_file("1.0.0-bigendian", "generated_null", true)
}
Expand All @@ -176,7 +221,12 @@ fn write_100_primitive_large_offsets() -> Result<()> {
"1.0.0-bigendian",
"generated_primitive_large_offsets",
false,
)?;
)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive_large_offsets() -> Result<()> {
test_file(
"1.0.0-littleendian",
"generated_primitive_large_offsets",
Expand All @@ -192,7 +242,12 @@ fn write_100_primitive_no_batches() -> Result<()> {
"generated_primitive_no_batches",
false,
)?;
test_file("1.0.0-bigendian", "generated_primitive_no_batches", false)?;
test_file("1.0.0-bigendian", "generated_primitive_no_batches", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive_no_batches() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive_no_batches", true)?;
test_file("1.0.0-bigendian", "generated_primitive_no_batches", true)
}
Expand All @@ -204,7 +259,12 @@ fn write_100_primitive_zerolength() -> Result<()> {
"generated_primitive_zerolength",
false,
)?;
test_file("1.0.0-bigendian", "generated_primitive_zerolength", false)?;
test_file("1.0.0-bigendian", "generated_primitive_zerolength", false)
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_100_compressed_primitive_zerolength() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive_zerolength", true)?;
test_file("1.0.0-bigendian", "generated_primitive_zerolength", true)
}
Expand Down Expand Up @@ -262,6 +322,7 @@ fn write_generated_017_union() -> Result<()> {
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_boolean() -> Result<()> {
use std::sync::Arc;
let array = Arc::new(BooleanArray::from([
Expand All @@ -275,6 +336,7 @@ fn write_boolean() -> Result<()> {
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_sliced_utf8() -> Result<()> {
use std::sync::Arc;
let array = Arc::new(Utf8Array::<i32>::from_slice(["aa", "bb"]).slice(1, 1)) as Arc<dyn Array>;
Expand All @@ -283,6 +345,7 @@ fn write_sliced_utf8() -> Result<()> {
}

#[test]
#[cfg_attr(miri, ignore)] // compression uses FFI, which miri does not support
fn write_sliced_list() -> Result<()> {
let data = vec![
Some(vec![Some(1i32), Some(2), Some(3)]),
Expand Down
88 changes: 1 addition & 87 deletions tests/it/test_util.rs
Original file line number Diff line number Diff line change
@@ -1,89 +1,3 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Utils to make testing easier
use std::{env, error::Error, path::PathBuf};

/// Returns the arrow test data directory, which is by default stored
/// in a git submodule rooted at `arrow/testing/data`.
///
/// The default can be overridden by the optional environment
/// variable `ARROW_TEST_DATA`
///
/// panics when the directory can not be found.
///
/// Example:
/// ```
/// let testdata = arrow::util::test_util::arrow_test_data();
/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
/// assert!(std::path::PathBuf::from(csvdata).exists());
/// ```
pub fn arrow_test_data() -> String {
match get_data_dir("ARROW_TEST_DATA", "testing/arrow-testing/data") {
Ok(pb) => pb.display().to_string(),
Err(err) => panic!("failed to get arrow data dir: {}", err),
}
}

/// Returns a directory path for finding test data.
///
/// udf_env: name of an environment variable
///
/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
///
/// Returns either:
/// The path referred to in `udf_env` if that variable is set and refers to a directory
/// The submodule_data directory relative to CARGO_MANIFEST_PATH
fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result<PathBuf, Box<dyn Error>> {
// Try user defined env.
if let Ok(dir) = env::var(udf_env) {
let trimmed = dir.trim().to_string();
if !trimmed.is_empty() {
let pb = PathBuf::from(trimmed);
if pb.is_dir() {
return Ok(pb);
} else {
return Err(format!(
"the data dir `{}` defined by env {} not found",
pb.display().to_string(),
udf_env
)
.into());
}
}
}

// The env is undefined or its value is trimmed to empty, let's try default dir.

// env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
// set by `cargo run` or `cargo test`, see:
// https://doc.rust-lang.org/cargo/reference/environment-variables.html
let dir = env!("CARGO_MANIFEST_DIR");

let pb = PathBuf::from(dir).join(submodule_data);
if pb.is_dir() {
Ok(pb)
} else {
Err(format!(
"env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
HINT: try running `git submodule update --init`",
udf_env,
pb.display().to_string(),
).into())
}
"testing/arrow-testing/data".to_string()
}