Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: add arrow-rs features for buffer and array interop #100

Merged
merged 31 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
4d0d333
Setup `arrow-buffer` interop
mbrobbel Aug 1, 2023
3cfe877
Some fixes and tests
mbrobbel Aug 1, 2023
7b5dab9
Use `BufferBuilder` abstraction
mbrobbel Aug 4, 2023
26e746c
Add `arrow-array` feature for zero-copy array interop
mbrobbel Aug 4, 2023
e2b40f1
Make conversion generic over buffer type
mbrobbel Aug 5, 2023
44e3567
Add `BufferType` implementation for `arrow_buffer::ScalarBuffer`
mbrobbel Aug 5, 2023
bdb17bc
Merge branch 'main' into arrow-array
mbrobbel Aug 5, 2023
47de9f0
Bump MSRV to `1.70.0`
mbrobbel Aug 5, 2023
9494ce2
Implicit conversion to `ArrowBuffer` from `VecBuffer` is now supported
mbrobbel Aug 7, 2023
42ba214
Merge branch 'main' into arrow-array
mbrobbel Aug 7, 2023
c5c09f0
Fix warning
mbrobbel Aug 7, 2023
e39ad3b
Move `arrow-rs` interop to `arrow` module
mbrobbel Aug 7, 2023
afbb962
Rename feature to `arrow-rs`
mbrobbel Aug 7, 2023
21547b1
Add `BooleanArray` conversion
mbrobbel Aug 7, 2023
8ddf760
Merge branch 'main' into arrow-array
mbrobbel Aug 7, 2023
8ee0a2c
Merge branch 'main' into arrow-array
mbrobbel Dec 6, 2023
1c7ee08
Update MSRV in README.md
mbrobbel Dec 6, 2023
6c43438
Change interaction with `ArrowNativeType`
mbrobbel Dec 7, 2023
ef98d35
Add `cargo-expand` to added `minimal` job
mbrobbel Dec 7, 2023
e8f94b1
And also to `msrv` job
mbrobbel Dec 7, 2023
25c7367
Fix some clippy warnings
mbrobbel Dec 7, 2023
12dd332
Fix miri job
mbrobbel Dec 7, 2023
4b24ad9
Don't run miri on expand tests
mbrobbel Dec 7, 2023
ea99db7
Split out implementations and add more tests
mbrobbel Dec 11, 2023
66b4970
Generalize stringarray extend impl
mbrobbel Dec 11, 2023
bc0f459
Generalize more string array methods
mbrobbel Dec 11, 2023
094f3a0
Add parquet example
mbrobbel Dec 11, 2023
d9c076f
Remove a comment
mbrobbel Dec 11, 2023
fefa1bc
Fix build issues
mbrobbel Dec 12, 2023
8f0c5b2
Add direct `RecordBatch` conversion for `StructArray`
mbrobbel Dec 12, 2023
eb7edbb
Use `apache/arrow-rs` instead of fork and update default features
mbrobbel Dec 12, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-bench-
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-
${{ runner.os }}-cargo-
- run: cargo bench --bench narrow -- --output-format=bencher | tee output.txt
- run: cargo bench --bench narrow --all-features -- --output-format=bencher | tee output.txt
- uses: actions/upload-artifact@v3
with:
name: benchmark-results
Expand Down
49 changes: 41 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,44 @@ permissions:
contents: read

jobs:
minimal:
name: Minimal
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
id: rust-toolchain
- uses: dtolnay/install@master
with:
crate: cargo-expand
- uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-minimal-${{ hashFiles('**/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-minimal-
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-
${{ runner.os }}-cargo-
- run: cargo check --workspace --all-targets --no-default-features
- run: cargo test --workspace --all-targets --no-default-features
- run: cargo test --workspace --doc --no-default-features
- run: cargo clippy --workspace --all-targets --no-default-features -- -Dwarnings

msrv:
name: Minimum supported Rust version
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@1.67.0
- uses: dtolnay/rust-toolchain@1.70.0
id: rust-toolchain
- uses: dtolnay/install@master
with:
crate: cargo-expand
- uses: actions/cache@v3
with:
path: |
Expand All @@ -27,8 +58,9 @@ jobs:
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-msrv-
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-
${{ runner.os }}-cargo-
- run: cargo check --all --all-features

- run: cargo check --workspace --all-targets --all-features
- run: cargo test --all --all-targets --all-features
- run: cargo test --all --doc --all-features
check:
name: Check
runs-on: ubuntu-latest
Expand Down Expand Up @@ -75,8 +107,8 @@ jobs:
- uses: dtolnay/install@master
with:
crate: cargo-expand
- run: cargo test --all --all-targets --all-features
- run: cargo test --all --doc --all-features
- run: cargo test --workspace --all-targets --all-features
- run: cargo test --workspace --doc --all-features

rustfmt:
name: Rustfmt
Expand Down Expand Up @@ -110,7 +142,7 @@ jobs:
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-clippy-
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-
${{ runner.os }}-cargo-
- run: cargo clippy --all --all-targets --all-features -- -Dwarnings
- run: cargo clippy --workspace --all-targets --all-features -- -Dwarnings

miri:
name: Miri
Expand All @@ -133,6 +165,7 @@ jobs:
${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-
${{ runner.os }}-cargo-
- run: cargo miri setup
- run: cargo miri test --no-default-features
- run: cargo miri test --all-features

coverage:
Expand Down Expand Up @@ -163,8 +196,8 @@ jobs:
- uses: dtolnay/install@master
with:
crate: cargo-expand
- run: cargo build --all --all-targets --all-features
- run: cargo test --all --all-targets --all-features
- run: cargo build --workspace --all-targets --all-features
- run: cargo test --workspace --all-targets --all-features
env:
LLVM_PROFILE_FILE: "narrow-%p-%m.profraw"
- name: Install grcov
Expand Down
13 changes: 12 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[workspace.package]
authors = ["Matthijs Brobbel <[email protected]>"]
edition = "2021"
rust-version = "1.67.0"
rust-version = "1.70.0"
description = "An implementation of Apache Arrow"
readme = "README.md"
repository = "https://github.com/mbrobbel/narrow"
Expand Down Expand Up @@ -29,14 +29,21 @@ categories.workspace = true

[features]
default = []
arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema", "narrow-derive?/arrow-rs"]
derive = ["dep:narrow-derive"]

[dependencies]
arrow-array = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", optional = true }
arrow-buffer = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", optional = true }
arrow-schema = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", optional = true }
narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true }

[dev-dependencies]
arrow-cast = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", default-features = false, features = ["prettyprint"] }
bytes = "1.5.0"
criterion = { version = "0.5.1", default-features = false }
rand = { version = "0.8.5", default-features = false, features = ["small_rng"] }
parquet = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", features = ["arrow"] }

[profile.bench]
lto = true
Expand All @@ -45,3 +52,7 @@ codegen-units = 1
[[bench]]
name = "narrow"
harness = false

[[example]]
name = "parquet"
required-features = ["arrow-rs", "derive"]
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ This crate provides types to support reading and writing instances of abstract d

## Minimum supported Rust version

The minimum supported Rust version for this crate is Rust 1.67.0.
The minimum supported Rust version for this crate is Rust 1.70.0.

## License

Expand Down
55 changes: 55 additions & 0 deletions examples/parquet.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
fn main() {
use arrow_array::RecordBatch;
use arrow_cast::pretty;
use bytes::Bytes;
use narrow::{array::StructArray, arrow::buffer_builder::ArrowBufferBuilder, ArrayType};
use parquet::arrow::{arrow_reader::ParquetRecordBatchReader, ArrowWriter};

#[derive(ArrayType, Default)]
struct Bar(Option<bool>);

#[derive(ArrayType, Default)]
struct Foo {
a: u32,
b: Option<u8>,
c: bool,
d: String,
e: Option<Vec<Option<bool>>>,
f: Bar,
}
let input = [
Foo {
a: 1,
b: Some(2),
c: true,
d: "hello world!".to_string(),
e: Some(vec![Some(true), None]),
f: Bar(Some(true)),
},
Foo {
a: 42,
b: None,
c: false,
d: "narrow".to_string(),
e: None,
f: Bar(None),
},
];

let narrow_array = input
.into_iter()
.collect::<StructArray<Foo, false, ArrowBufferBuilder>>();

let record_batch = RecordBatch::from(narrow_array);
pretty::print_batches(&[record_batch.clone()]).unwrap();

let mut buffer = Vec::new();
let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), None).unwrap();
writer.write(&record_batch).unwrap();
writer.close().unwrap();

let mut reader = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 1024).unwrap();
let read = reader.next().unwrap().unwrap();
pretty::print_batches(&[read.clone()]).unwrap();
assert_eq!(record_batch, read);
}

Check warning on line 55 in examples/parquet.rs

View check run for this annotation

Codecov / codecov/patch

examples/parquet.rs#L1-L55

Added lines #L1 - L55 were not covered by tests
4 changes: 4 additions & 0 deletions narrow-derive/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ license.workspace = true
keywords.workspace = true
categories.workspace = true

[features]
default = []
arrow-rs = []

[lib]
proc-macro = true

Expand Down
3 changes: 2 additions & 1 deletion narrow-derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ const CRATE: &str = "narrow";

static NARROW: Lazy<String> = Lazy::new(|| match proc_macro_crate::crate_name(CRATE) {
Ok(found) => match found {
FoundCrate::Itself => "crate".to_string(),
// Requires `extern crate self as narrow`
FoundCrate::Itself => CRATE.to_string(),
FoundCrate::Name(name) => name,
},
_ => CRATE.to_string(),
Expand Down
Loading