Skip to content

Commit

Permalink
Fix Clippy for the Rust 1.80 release (#6116)
Browse files Browse the repository at this point in the history
* Fix clippy lints in arrow-data

* Fix clippy errors in arrow-array

* fix clippy in concat

* Clippy in arrow-string

* remove unecessary feature in arrow-array

* fix clippy in arrow-cast

* Fix clippy in parquet crate

* Fix clippy in arrow-flight
  • Loading branch information
alamb authored Jul 25, 2024
1 parent fa2fbfd commit 3ebb033
Show file tree
Hide file tree
Showing 21 changed files with 108 additions and 147 deletions.
1 change: 1 addition & 0 deletions arrow-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ hashbrown = { version = "0.14.2", default-features = false }

[features]
ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
force_validate = []

[dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ use super::ByteArrayType;
/// * Strings with length <= 12 are stored directly in the view.
///
/// * Strings with length > 12: The first four bytes are stored inline in the
/// view and the entire string is stored in one of the buffers.
/// view and the entire string is stored in one of the buffers.
///
/// Unlike [`GenericByteArray`], there are no constraints on the offsets other
/// than they must point into a valid buffer. However, they can be out of order,
Expand Down
64 changes: 0 additions & 64 deletions arrow-array/src/array/string_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,70 +376,6 @@ mod tests {
.expect("All null array has valid array data");
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_collect_string() {
use crate::util::test_util::BadIterator;
let data = vec![Some("foo"), None, Some("bar")];
let expected: StringArray = data.clone().into_iter().collect();

// Iterator reports too many items
let arr: StringArray = BadIterator::new(3, 10, data.clone()).collect();
assert_eq!(expected, arr);

// Iterator reports too few items
let arr: StringArray = BadIterator::new(3, 1, data.clone()).collect();
assert_eq!(expected, arr);
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_collect_large_string() {
use crate::util::test_util::BadIterator;
let data = vec![Some("foo"), None, Some("bar")];
let expected: LargeStringArray = data.clone().into_iter().collect();

// Iterator reports too many items
let arr: LargeStringArray = BadIterator::new(3, 10, data.clone()).collect();
assert_eq!(expected, arr);

// Iterator reports too few items
let arr: LargeStringArray = BadIterator::new(3, 1, data.clone()).collect();
assert_eq!(expected, arr);
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_iter_values_string() {
use crate::util::test_util::BadIterator;
let data = vec!["foo", "bar", "baz"];
let expected: StringArray = data.clone().into_iter().map(Some).collect();

// Iterator reports too many items
let arr = StringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
assert_eq!(expected, arr);

// Iterator reports too few items
let arr = StringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
assert_eq!(expected, arr);
}

#[cfg(feature = "test_utils")]
#[test]
fn bad_size_iter_values_large_string() {
use crate::util::test_util::BadIterator;
let data = vec!["foo", "bar", "baz"];
let expected: LargeStringArray = data.clone().into_iter().map(Some).collect();

// Iterator reports too many items
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
assert_eq!(expected, arr);

// Iterator reports too few items
let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
assert_eq!(expected, arr);
}

fn _test_generic_string_array_from_list_array<O: OffsetSizeTrait>() {
let values = b"HelloArrowAndParquet";
// "ArrowAndParquet"
Expand Down
1 change: 1 addition & 0 deletions arrow-cast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ features = ["prettyprint"]

[features]
prettyprint = ["comfy-table"]
force_validate = []

[dependencies]
arrow-array = { workspace = true }
Expand Down
33 changes: 16 additions & 17 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -568,28 +568,27 @@ fn timestamp_to_date32<T: ArrowTimestampType>(
/// Accepts [`CastOptions`] to specify cast behavior. See also [`cast()`].
///
/// # Behavior
/// * Boolean to Utf8: `true` => '1', `false` => `0`
/// * Utf8 to boolean: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
/// * `Boolean` to `Utf8`: `true` => '1', `false` => `0`
/// * `Utf8` to `Boolean`: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
/// short variants are accepted, other strings return null or error
/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
/// * `Utf8` to Numeric: strings that can't be parsed to numbers return null, float strings
/// in integer casts return null
/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
/// * List to List: the underlying data type is cast
/// * List to FixedSizeList: the underlying data type is cast. If safe is true and a list element
/// has the wrong length it will be replaced with NULL, otherwise an error will be returned
/// * Primitive to List: a list array with 1 value per slot is created
/// * Date32 and Date64: precision lost when going to higher interval
/// * Time32 and Time64: precision lost when going to higher interval
/// * Timestamp and Date{32|64}: precision lost when going to higher interval
/// * Temporal to/from backing primitive: zero-copy with data type change
/// * Casting from `float32/float64` to `Decimal(precision, scale)` rounds to the `scale` decimals
/// (i.e. casting `6.4999` to Decimal(10, 1) becomes `6.5`). Prior to version `26.0.0`,
/// casting would truncate instead (i.e. outputs `6.4` instead)
/// * Numeric to `Boolean`: 0 returns `false`, any other value returns `true`
/// * `List` to `List`: the underlying data type is cast
/// * `List` to `FixedSizeList`: the underlying data type is cast. If safe is true and a list element
/// has the wrong length it will be replaced with NULL, otherwise an error will be returned
/// * Primitive to `List`: a list array with 1 value per slot is created
/// * `Date32` and `Date64`: precision lost when going to higher interval
/// * `Time32 and `Time64`: precision lost when going to higher interval
/// * `Timestamp` and `Date{32|64}`: precision lost when going to higher interval
/// * Temporal to/from backing Primitive: zero-copy with data type change
/// * `Float32/Float64` to `Decimal(precision, scale)` rounds to the `scale` decimals
/// (i.e. casting `6.4999` to `Decimal(10, 1)` becomes `6.5`).
///
/// Unsupported Casts (check with `can_cast_types` before calling):
/// * To or from `StructArray`
/// * List to primitive
/// * Interval and duration
/// * `List` to `Primitive`
/// * `Interval` and `Duration`
///
/// # Timestamps and Timezones
///
Expand Down
6 changes: 3 additions & 3 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,12 +285,12 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
/// variants and converts it to nanoseconds since midnight.
///
/// Examples of accepted inputs:
///
/// * `09:26:56.123 AM`
/// * `23:59:59`
/// * `6:00 pm`
//
/// Internally, this function uses the `chrono` library for the
/// time parsing
///
/// Internally, this function uses the `chrono` library for the time parsing
///
/// ## Timezone / Offset Handling
///
Expand Down
6 changes: 4 additions & 2 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1179,8 +1179,10 @@ impl ArrayData {
///
/// Does not (yet) check
/// 1. Union type_ids are valid see [#85](https://github.com/apache/arrow-rs/issues/85)
/// Validates the the null count is correct and that any
/// nullability requirements of its children are correct
/// 2. the the null count is correct and that any
/// 3. nullability requirements of its children are correct
///
/// [#85]: https://github.com/apache/arrow-rs/issues/85
pub fn validate_nulls(&self) -> Result<(), ArrowError> {
if let Some(nulls) = &self.nulls {
let actual = nulls.len() - nulls.inner().count_set_bits();
Expand Down
13 changes: 10 additions & 3 deletions arrow-data/src/equal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,24 @@ fn equal_range(
}

/// Logically compares two [ArrayData].
///
/// Two arrays are logically equal if and only if:
/// * their data types are equal
/// * their lengths are equal
/// * their null counts are equal
/// * their null bitmaps are equal
/// * each of their items are equal
/// two items are equal when their in-memory representation is physically equal (i.e. same bit content).
///
/// Two items are equal when their in-memory representation is physically equal
/// (i.e. has the same bit content).
///
/// The physical comparison depend on the data type.
///
/// # Panics
/// This function may panic whenever any of the [ArrayData] does not follow the Arrow specification.
/// (e.g. wrong number of buffers, buffer `len` does not correspond to the declared `len`)
///
/// This function may panic whenever any of the [ArrayData] does not follow the
/// Arrow specification. (e.g. wrong number of buffers, buffer `len` does not
/// correspond to the declared `len`)
pub fn equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
utils::base_equal(lhs, rhs)
&& lhs.null_count() == rhs.null_count()
Expand Down
4 changes: 2 additions & 2 deletions arrow-flight/src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ impl futures::Stream for FlightRecordBatchStream {
/// Example usecases
///
/// 1. Using this low level stream it is possible to receive a steam
/// of RecordBatches in FlightData that have different schemas by
/// handling multiple schema messages separately.
/// of RecordBatches in FlightData that have different schemas by
/// handling multiple schema messages separately.
pub struct FlightDataDecoder {
/// Underlying data stream
response: BoxStream<'static, Result<FlightData>>,
Expand Down
13 changes: 8 additions & 5 deletions arrow-flight/src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ use futures::{ready, stream::BoxStream, Stream, StreamExt};
/// several have already been successfully produced.
///
/// # Caveats
/// 1. When [`DictionaryHandling`] is [`DictionaryHandling::Hydrate`], [`DictionaryArray`](arrow_array::array::DictionaryArray)s
/// are converted to their underlying types prior to transport.
/// When [`DictionaryHandling`] is [`DictionaryHandling::Resend`], Dictionary [`FlightData`] is sent with every
/// [`RecordBatch`] that contains a [`DictionaryArray`](arrow_array::array::DictionaryArray).
/// See <https://github.com/apache/arrow-rs/issues/3389>.
/// 1. When [`DictionaryHandling`] is [`DictionaryHandling::Hydrate`],
/// [`DictionaryArray`]s are converted to their underlying types prior to
/// transport.
/// When [`DictionaryHandling`] is [`DictionaryHandling::Resend`], Dictionary [`FlightData`] is sent with every
/// [`RecordBatch`] that contains a [`DictionaryArray`](arrow_array::array::DictionaryArray).
/// See <https://github.com/apache/arrow-rs/issues/3389>.
///
/// [`DictionaryArray`]: arrow_array::array::DictionaryArray
///
/// # Example
/// ```no_run
Expand Down
8 changes: 4 additions & 4 deletions arrow-flight/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
//! This crate contains:
//!
//! 1. Low level [prost] generated structs
//! for Flight gRPC protobuf messages, such as [`FlightData`], [`FlightInfo`],
//! [`Location`] and [`Ticket`].
//! for Flight gRPC protobuf messages, such as [`FlightData`], [`FlightInfo`],
//! [`Location`] and [`Ticket`].
//!
//! 2. Low level [tonic] generated [`flight_service_client`] and
//! [`flight_service_server`].
//! [`flight_service_server`].
//!
//! 3. Experimental support for [Flight SQL] in [`sql`]. Requires the
//! `flight-sql-experimental` feature of this crate to be activated.
//! `flight-sql-experimental` feature of this crate to be activated.
//!
//! [Flight SQL]: https://arrow.apache.org/docs/format/FlightSql.html
#![allow(rustdoc::invalid_html_tags)]
Expand Down
4 changes: 2 additions & 2 deletions arrow-select/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -612,13 +612,13 @@ mod tests {
fn test_string_dictionary_merge() {
let mut builder = StringDictionaryBuilder::<Int32Type>::new();
for i in 0..20 {
builder.append(&i.to_string()).unwrap();
builder.append(i.to_string()).unwrap();
}
let input_1 = builder.finish();

let mut builder = StringDictionaryBuilder::<Int32Type>::new();
for i in 0..30 {
builder.append(&i.to_string()).unwrap();
builder.append(i.to_string()).unwrap();
}
let input_2 = builder.finish();

Expand Down
34 changes: 20 additions & 14 deletions arrow-string/src/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ use std::sync::Arc;
/// # Arguments
///
/// * `start` - The start index of all substrings.
/// If `start >= 0`, then count from the start of the string,
/// otherwise count from the end of the string.
/// If `start >= 0`, then count from the start of the string,
/// otherwise count from the end of the string.
///
/// * `length`(option) - The length of all substrings.
/// If `length` is [None], then the substring is from `start` to the end of the string.
/// If `length` is [None], then the substring is from `start` to the end of the string.
///
/// Attention: Both `start` and `length` are counted by byte, not by char.
///
Expand All @@ -53,10 +53,13 @@ use std::sync::Arc;
/// ```
///
/// # Error
/// - The function errors when the passed array is not a [`GenericStringArray`], [`GenericBinaryArray`], [`FixedSizeBinaryArray`]
/// or [`DictionaryArray`] with supported array type as its value type.
/// - The function errors if the offset of a substring in the input array is at invalid char boundary (only for \[Large\]String array).
/// It is recommended to use [`substring_by_char`] if the input array may contain non-ASCII chars.
/// - The function errors when the passed array is not a [`GenericStringArray`],
/// [`GenericBinaryArray`], [`FixedSizeBinaryArray`] or [`DictionaryArray`]
/// with supported array type as its value type.
/// - The function errors if the offset of a substring in the input array is
/// at invalid char boundary (only for \[Large\]String array).
/// It is recommended to use [`substring_by_char`] if the input array may
/// contain non-ASCII chars.
///
/// ## Example of trying to get an invalid utf-8 format substring
/// ```
Expand Down Expand Up @@ -155,22 +158,25 @@ pub fn substring(
}
}

/// Substrings based on character index
///
/// # Arguments
/// * `array` - The input string array
///
/// * `start` - The start index of all substrings.
/// If `start >= 0`, then count from the start of the string,
/// otherwise count from the end of the string.
/// If `start >= 0`, then count from the start of the string,
/// otherwise count from the end of the string.
///
/// * `length`(option) - The length of all substrings.
/// If `length` is `None`, then the substring is from `start` to the end of the string.
/// If `length` is `None`, then the substring is from `start` to the end of the string.
///
/// Attention: Both `start` and `length` are counted by char.
///
/// # Performance
/// This function is slower than [substring].
/// Theoretically, the time complexity is `O(n)` where `n` is the length of the value buffer.
/// It is recommended to use [substring] if the input array only contains ASCII chars.
///
/// This function is slower than [substring]. Theoretically, the time complexity
/// is `O(n)` where `n` is the length of the value buffer. It is recommended to
/// use [substring] if the input array only contains ASCII chars.
///
/// # Basic usage
/// ```
Expand Down Expand Up @@ -396,7 +402,7 @@ mod tests {
/// A helper macro to test the substring functions.
/// # Arguments
/// * `cases` - The test cases which is a vector of `(input, start, len, result)`.
/// Please look at [`gen_test_cases`] to find how to generate it.
/// Please look at [`gen_test_cases`] to find how to generate it.
/// * `array_ty` - The array type.
/// * `substring_fn` - Either [`substring`] or [`substring_by_char`].
macro_rules! do_test {
Expand Down
2 changes: 1 addition & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pyarrow = ["pyo3", "ffi"]
# force_validate runs full data validation for all arrays that are created
# this is not enabled by default as it is too computationally expensive
# but is run as part of our CI checks
force_validate = ["arrow-data/force_validate"]
force_validate = ["arrow-array/force_validate", "arrow-data/force_validate"]
# Enable ffi support
ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"]
chrono-tz = ["arrow-array/chrono-tz"]
Expand Down
10 changes: 5 additions & 5 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,11 @@ impl ArrowReaderOptions {
/// This structure allows
///
/// 1. Loading metadata for a file once and then using that same metadata to
/// construct multiple separate readers, for example, to distribute readers
/// across multiple threads
/// construct multiple separate readers, for example, to distribute readers
/// across multiple threads
///
/// 2. Using a cached copy of the [`ParquetMetadata`] rather than reading it
/// from the file each time a reader is constructed.
/// from the file each time a reader is constructed.
///
/// [`ParquetMetadata`]: crate::file::metadata::ParquetMetaData
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -553,10 +553,10 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
/// This interface allows:
///
/// 1. Loading metadata once and using it to create multiple builders with
/// potentially different settings or run on different threads
/// potentially different settings or run on different threads
///
/// 2. Using a cached copy of the metadata rather than re-reading it from the
/// file each time a reader is constructed.
/// file each time a reader is constructed.
///
/// See the docs on [`ArrowReaderMetadata`] for more details
///
Expand Down
Loading

0 comments on commit 3ebb033

Please sign in to comment.