Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Split "compute" feature #634

Merged
merged 1 commit into from
Nov 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ jobs:
with:
use-cross: true
command: check
args: --features=merge_sort,io_ipc,io_csv,io_print,io_json,io_parquet --target ${{ matrix.target }}
args: --features=compute_merge_sort,io_ipc,io_csv,io_print,io_json,io_parquet --target ${{ matrix.target }}

linux-simd-test:
name: SIMD
Expand Down
86 changes: 52 additions & 34 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,10 @@ full = [
"io_avro_compression",
"io_avro_async",
"regex",
"merge_sort",
"compute",
# parses timezones used in timestamp conversions
"chrono-tz"
]
merge_sort = ["itertools"]
io_csv = ["io_csv_read", "io_csv_write"]
io_csv_async = ["io_csv_read_async"]
io_csv_read = ["csv", "lexical-core"]
Expand Down Expand Up @@ -149,7 +147,56 @@ io_avro_async = ["io_avro", "futures"]
io_json_integration = ["io_json", "serde_derive", "hex"]
io_print = ["comfy-table"]
# the compute kernels. Disabling this significantly reduces compile time.
compute = ["strength_reduce", "multiversion", "lexical-core", "ahash"]
compute_aggregate = ["multiversion"]
compute_arithmetics = ["strength_reduce"]
compute_bitwise = []
compute_boolean = []
compute_boolean_kleene = []
compute_cast = ["lexical-core"]
compute_comparison = []
compute_concatenate = []
compute_contains = []
compute_filter = []
compute_hash = ["multiversion", "ahash"]
compute_if_then_else = []
compute_length = []
compute_like = ["regex"]
compute_limit = []
compute_merge_sort = ["itertools", "compute_sort"]
compute_nullif = ["compute_comparison"]
compute_partition = ["compute_sort"]
compute_regex_match = ["regex"]
compute_sort = ["compute_take"]
compute_substring = []
compute_take = []
compute_temporal = []
compute_window = ["compute_concatenate"]
compute = [
"compute_aggregate",
"compute_arithmetics",
"compute_bitwise",
"compute_boolean",
"compute_boolean_kleene",
"compute_cast",
"compute_comparison",
"compute_concatenate",
"compute_contains",
"compute_filter",
"compute_hash",
"compute_if_then_else",
"compute_length",
"compute_like",
"compute_limit",
"compute_merge_sort",
"compute_nullif",
"compute_partition",
"compute_regex_match",
"compute_sort",
"compute_substring",
"compute_take",
"compute_temporal",
"compute_window",
]
# base64 + io_ipc because arrow schemas are stored as base64-encoded ipc format.
io_parquet = ["parquet2", "io_ipc", "base64", "futures"]
benchmarks = ["rand"]
Expand All @@ -159,36 +206,7 @@ simd = ["packed_simd"]
cache_aligned = []

[package.metadata.cargo-all-features]
skip_feature_sets = [
# full is tested independently and is not to be used with other features.
["full"],
# very small scope with no API changes.
["ahash"],
["benchmarks"],
["merge_sort"],
# io are additive APIs and do not interact
["io_csv"],
["io_csv_read"],
["io_csv_write"],
["io_csv_async"],
["io_csv_read_async"],
["io_avro"],
["io_avro_async"],
["io_avro_compression"],
["io_json"],
["io_flight"],
["io_ipc"],
["io_ipc_write_async"],
["io_parquet"],
["io_json_integration"],
# this does not change the public API
["io_parquet_compression"],
["io_ipc_compression"],
# tested in separate
["simd"],
]

skip_optional_dependencies = true
allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"]

[[bench]]
name = "take_kernels"
Expand Down Expand Up @@ -251,7 +269,7 @@ name = "bitmap"
harness = false

[[bench]]
name = "concat"
name = "concatenate"
harness = false

[[bench]]
Expand Down
10 changes: 5 additions & 5 deletions benches/concat.rs → benches/concatenate.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
extern crate arrow2;

use arrow2::{
compute::concat,
compute::concatenate::concatenate,
datatypes::DataType,
util::bench_util::{create_boolean_array, create_primitive_array},
};
Expand All @@ -17,15 +17,15 @@ fn add_benchmark(c: &mut Criterion) {

c.bench_function(&format!("int32 concat aligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});

let array1 = create_primitive_array::<i32>(9, DataType::Int32, 0.5);

c.bench_function(&format!("int32 concat unaligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});

Expand All @@ -34,15 +34,15 @@ fn add_benchmark(c: &mut Criterion) {

c.bench_function(&format!("boolean concat aligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});

let array1 = create_boolean_array(9, 0.5, 0.5);

c.bench_function(&format!("boolean concat unaligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});
});
Expand Down
31 changes: 0 additions & 31 deletions src/compute/arithmetics/basic/common.rs

This file was deleted.

2 changes: 1 addition & 1 deletion src/compute/arithmetics/basic/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ use std::ops::Div;

use num_traits::{CheckedDiv, NumCast, Zero};

use crate::compute::arithmetics::basic::check_same_len;
use crate::datatypes::DataType;
use crate::{
array::{Array, PrimitiveArray},
compute::{
arithmetics::{ArrayCheckedDiv, ArrayDiv, NativeArithmetics},
arity::{binary, binary_checked, unary, unary_checked},
utils::check_same_len,
},
types::NativeType,
};
Expand Down
3 changes: 0 additions & 3 deletions src/compute/arithmetics/basic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ pub use rem::*;
mod sub;
pub use sub::*;

mod common;
pub(crate) use common::*;

use std::ops::Neg;

use num_traits::{CheckedNeg, WrappingNeg};
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/add.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
//! Defines the addition arithmetic kernels for [`PrimitiveArray`] representing decimals.
use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayAdd, ArrayCheckedAdd, ArraySaturatingAdd},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
};
use crate::{
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/div.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! Defines the division arithmetic kernels for Decimal
//! `PrimitiveArrays`.

use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayCheckedDiv, ArrayDiv},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
datatypes::DataType,
error::{ArrowError, Result},
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/mul.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! Defines the multiplication arithmetic kernels for Decimal
//! `PrimitiveArrays`.

use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayCheckedMul, ArrayMul, ArraySaturatingMul},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
datatypes::DataType,
error::{ArrowError, Result},
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/sub.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
//! Defines the subtract arithmetic kernels for Decimal `PrimitiveArrays`.

use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayCheckedSub, ArraySaturatingSub, ArraySub},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
datatypes::DataType,
error::{ArrowError, Result},
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arity.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! Defines kernels suitable to perform operations to primitive arrays.
use super::utils::combine_validities;
use crate::compute::arithmetics::basic::check_same_len;
use super::utils::{check_same_len, combine_validities};
use crate::{
array::PrimitiveArray,
bitmap::{Bitmap, MutableBitmap},
Expand Down
23 changes: 3 additions & 20 deletions src/compute/concat.rs → src/compute/concatenate.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,14 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Contains the concatenate kernel
//!
//! Example:
//!
//! ```
//! use arrow2::array::Utf8Array;
//! use arrow2::compute::concat::concatenate;
//! use arrow2::compute::concatenate::concatenate;
//!
//! let arr = concatenate(&[
//! &Utf8Array::<i32>::from_slice(vec!["hello", "world"]),
//! &Utf8Array::<i32>::from_slice(vec!["!"]),
//! &Utf8Array::<i32>::from_slice(["hello", "world"]),
//! &Utf8Array::<i32>::from_slice(["!"]),
//! ]).unwrap();
//! assert_eq!(arr.len(), 3);
//! ```
Expand Down
4 changes: 1 addition & 3 deletions src/compute/contains.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
//! Declares the [`contains`] operator

use crate::types::NativeType;
use crate::{
array::{Array, BinaryArray, BooleanArray, ListArray, Offset, PrimitiveArray, Utf8Array},
bitmap::Bitmap,
};
use crate::{
datatypes::DataType,
error::{ArrowError, Result},
types::NativeType,
};

use super::utils::combine_validities;
Expand Down
4 changes: 2 additions & 2 deletions src/compute/if_then_else.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Contains the operator [`if_then_else`].
use crate::array::growable;
use crate::array::{growable, Array, BooleanArray};
use crate::bitmap::utils::SlicesIterator;
use crate::error::{ArrowError, Result};
use crate::{array::*, bitmap::utils::SlicesIterator};

/// Returns the values from `lhs` if the predicate is `true` or from the `lhs` if the predicate is false
/// Returns `None` if the predicate is `None`.
Expand Down
5 changes: 3 additions & 2 deletions src/compute/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ use std::collections::HashMap;
use regex::bytes::Regex as BytesRegex;
use regex::Regex;

use crate::datatypes::DataType;
use crate::{array::*, bitmap::Bitmap};
use crate::{
array::{BinaryArray, BooleanArray, Offset, Utf8Array},
bitmap::Bitmap,
compute::utils::combine_validities,
datatypes::DataType,
error::{ArrowError, Result},
};

Expand Down
Loading