Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved documentation #860

Merged
merged 1 commit into from
Feb 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions benches/avro_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@ fn schema() -> AvroSchema {
fn write(size: usize, has_codec: bool) -> Result<Vec<u8>> {
let avro = schema();
// a writer needs a schema and something to write to
let mut writer: Writer<Vec<u8>>;
if has_codec {
writer = Writer::with_codec(&avro, Vec::new(), Codec::Deflate);
let mut writer = if has_codec {
Writer::with_codec(&avro, Vec::new(), Codec::Deflate)
} else {
writer = Writer::new(&avro, Vec::new());
}
Writer::new(&avro, Vec::new())
};

(0..size).for_each(|_| {
let mut record = Record::new(writer.schema()).unwrap();
Expand Down
3 changes: 1 addition & 2 deletions benches/write_parquet.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use std::io::Cursor;
use std::sync::Arc;

use criterion::{criterion_group, criterion_main, Criterion};
Expand Down Expand Up @@ -29,7 +28,7 @@ fn write(array: &dyn Array, encoding: Encoding) -> Result<()> {
vec![encoding],
)?;

let mut writer = vec![];
let writer = vec![];

let mut writer = FileWriter::try_new(writer, schema, options)?;

Expand Down
19 changes: 5 additions & 14 deletions examples/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ unsafe fn export(
array_ptr: *mut ffi::ArrowArray,
schema_ptr: *mut ffi::ArrowSchema,
) {
// exporting an array requires an associated field so that the consumer knows its datatype
let field = Field::new("a", array.data_type().clone(), true);
ffi::export_array_to_c(array, array_ptr);
ffi::export_field_to_c(&field, schema_ptr);
Expand All @@ -25,23 +26,13 @@ fn main() -> Result<()> {

// the goal is to export this array and import it back via FFI.
// to import, we initialize the structs that will receive the data
let array_ptr = Box::new(ffi::ArrowArray::empty());
let schema_ptr = Box::new(ffi::ArrowSchema::empty());

// since FFIs work in raw pointers, let's temporarily relinquish ownership so that producers
// can write into it in a thread-safe manner
let array_ptr = Box::into_raw(array_ptr);
let schema_ptr = Box::into_raw(schema_ptr);
let mut array_ptr = Box::new(ffi::ArrowArray::empty());
let mut schema_ptr = Box::new(ffi::ArrowSchema::empty());

// this is where a producer (in this case also us ^_^) writes to the pointers' location.
// `array` here could be anything or not even be available, if this was e.g. from Python.
// Safety: we just allocated the pointers correctly.
unsafe { export(array.clone(), array_ptr, schema_ptr) };

// we can now take ownership back, since we are responsible for deallocating this memory.
// Safety: we just into_raw them.
let array_ptr = unsafe { Box::from_raw(array_ptr) };
let schema_ptr = unsafe { Box::from_raw(schema_ptr) };
// Safety: we just allocated the pointers
unsafe { export(array.clone(), &mut *array_ptr, &mut *schema_ptr) };

// and finally interpret the written memory into a new array.
// Safety: we used `export`, which is a valid exporter to the C data interface
Expand Down
2 changes: 0 additions & 2 deletions examples/parquet_read_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ use std::sync::Arc;
use std::time::SystemTime;

use futures::future::BoxFuture;
use futures::FutureExt;
use tokio;
use tokio::fs::File;
use tokio::io::BufReader;
use tokio_util::compat::*;
Expand Down
3 changes: 2 additions & 1 deletion src/bitmap/bitmap_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use super::{
Bitmap,
};

/// Creates a [Vec<u8>] from an [`Iterator`] of [`BitChunk`].
/// # Safety
/// The iterator must be [`TrustedLen`].
pub unsafe fn from_chunk_iter_unchecked<T: BitChunk, I: Iterator<Item = T>>(
Expand Down Expand Up @@ -35,7 +36,7 @@ pub unsafe fn from_chunk_iter_unchecked<T: BitChunk, I: Iterator<Item = T>>(
buffer
}

/// Creates a Vec<u8> from a [`TrustedLen`] of [`BitChunk`],
/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
unsafe { from_chunk_iter_unchecked(iter) }
}
Expand Down
4 changes: 2 additions & 2 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ impl MutableBitmap {
}
}

/// Initializes an a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
/// Initializes a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self {
Expand All @@ -67,7 +67,7 @@ impl MutableBitmap {
}
}

/// Initializes an a pre-allocated [`MutableBitmap`] with capacity for `capacity` bits.
/// Reserves `additional` bits in the [`MutableBitmap`], potentially re-allocating its buffer.
#[inline(always)]
pub fn reserve(&mut self, additional: usize) {
self.buffer
Expand Down
16 changes: 9 additions & 7 deletions src/compute/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ fn is_like_pattern(c: char) -> bool {
c == '%' || c == '_'
}

fn replace_pattern(pattern: &str) -> String {
pattern.replace('%', ".*").replace('_', ".")
}

#[inline]
fn a_like_utf8<O: Offset, F: Fn(bool) -> bool>(
lhs: &Utf8Array<O>,
Expand All @@ -40,7 +44,7 @@ fn a_like_utf8<O: Offset, F: Fn(bool) -> bool>(
let pattern = if let Some(pattern) = map.get(pattern) {
pattern
} else {
let re_pattern = pattern.replace("%", ".*").replace("_", ".");
let re_pattern = replace_pattern(pattern);
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down Expand Up @@ -113,7 +117,7 @@ fn a_like_utf8_scalar<O: Offset, F: Fn(bool) -> bool>(
let ends_with = &rhs[1..];
Bitmap::from_trusted_len_iter(lhs.values_iter().map(|x| op(x.ends_with(ends_with))))
} else {
let re_pattern = rhs.replace("%", ".*").replace("_", ".");
let re_pattern = replace_pattern(rhs);
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down Expand Up @@ -187,10 +191,8 @@ fn a_like_binary<O: Offset, F: Fn(bool) -> bool>(
let pattern = if let Some(pattern) = map.get(pattern) {
pattern
} else {
let re_pattern = simdutf8::basic::from_utf8(pattern)
.unwrap()
.replace("%", ".*")
.replace("_", ".");
let re_pattern = simdutf8::basic::from_utf8(pattern).unwrap();
let re_pattern = replace_pattern(re_pattern);
let re = BytesRegex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down Expand Up @@ -270,7 +272,7 @@ fn a_like_binary_scalar<O: Offset, F: Fn(bool) -> bool>(
let ends_with = &rhs[1..];
Bitmap::from_trusted_len_iter(lhs.values_iter().map(|x| op(x.ends_with(ends_with))))
} else {
let re_pattern = pattern.replace("%", ".*").replace("_", ".");
let re_pattern = replace_pattern(pattern);
let re = BytesRegex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::InvalidArgumentError(format!(
"Unable to build regex from LIKE pattern: {}",
Expand Down
2 changes: 1 addition & 1 deletion src/compute/sort/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ where
values.chain(null_indices.into_iter()).collect::<Vec<I>>()
};

values.truncate(limit.unwrap_or_else(|| values.len()));
values.truncate(limit.unwrap_or(values.len()));

let data_type = I::PRIMITIVE.into();
PrimitiveArray::<I>::from_data(data_type, values.into(), None)
Expand Down
3 changes: 1 addition & 2 deletions src/io/ipc/read/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ pub fn read_record_batch<R: Read + Seek>(
Ok(None)
}
})
.map(|x| x.transpose())
.flatten()
.filter_map(|x| x.transpose())
.collect::<Result<Vec<_>>>()?
} else {
fields
Expand Down
3 changes: 1 addition & 2 deletions src/io/json/read/infer_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ pub fn infer_rows(rows: &[Value]) -> Result<DataType> {
// discard None values and deduplicate entries
let types = types
.into_iter()
.map(|x| x.transpose())
.flatten()
.filter_map(|x| x.transpose())
.collect::<Result<HashSet<_>>>()?;

Ok(if !types.is_empty() {
Expand Down
9 changes: 3 additions & 6 deletions src/io/json_integration/read/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ fn to_binary<O: Offset>(json_col: &ArrowJsonColumn, data_type: DataType) -> Arc<
.as_ref()
.unwrap()
.iter()
.map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.flatten()
.flat_map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.collect();
Arc::new(BinaryArray::from_data(data_type, offsets, values, validity))
}
Expand All @@ -184,8 +183,7 @@ fn to_utf8<O: Offset>(json_col: &ArrowJsonColumn, data_type: DataType) -> Arc<dy
.as_ref()
.unwrap()
.iter()
.map(|value| value.as_str().unwrap().as_bytes().to_vec())
.flatten()
.flat_map(|value| value.as_str().unwrap().as_bytes().to_vec())
.collect();
Arc::new(Utf8Array::from_data(data_type, offsets, values, validity))
}
Expand Down Expand Up @@ -309,8 +307,7 @@ pub fn to_array(
.as_ref()
.unwrap()
.iter()
.map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.flatten()
.flat_map(|value| value.as_str().map(|x| hex::decode(x).unwrap()).unwrap())
.collect();
Ok(Arc::new(FixedSizeBinaryArray::from_data(
data_type, values, validity,
Expand Down
8 changes: 2 additions & 6 deletions src/io/parquet/read/schema/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
/// Converts [`ParquetType`]s to a [`Field`], ignoring parquet fields that do not contain
/// any physical column.
pub fn parquet_to_arrow_schema(fields: &[ParquetType]) -> Vec<Field> {
fields.iter().map(to_field).flatten().collect::<Vec<_>>()
fields.iter().filter_map(to_field).collect::<Vec<_>>()
}

fn from_int32(
Expand Down Expand Up @@ -224,11 +224,7 @@ fn non_repeated_group(
/// Converts a parquet group type to an arrow [`DataType::Struct`].
/// Returns [`None`] if all its fields are empty
fn to_struct(fields: &[ParquetType]) -> Option<DataType> {
let fields = fields
.iter()
.map(to_field)
.flatten()
.collect::<Vec<Field>>();
let fields = fields.iter().filter_map(to_field).collect::<Vec<Field>>();
if fields.is_empty() {
None
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/statistics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ fn get_fields(field: &Field) -> Vec<&Field> {
match field.data_type.to_logical_type() {
DataType::List(inner) => get_fields(inner),
DataType::LargeList(inner) => get_fields(inner),
DataType::Struct(fields) => fields.iter().map(get_fields).flatten().collect(),
DataType::Struct(fields) => fields.iter().flat_map(get_fields).collect(),
_ => vec![field],
}
}
Expand Down
22 changes: 9 additions & 13 deletions src/io/parquet/write/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,15 @@ fn encode_keys<K: DictionaryKey>(
// encode indices
// compute the required number of bits
if let Some(validity) = validity {
let keys = array
.iter()
.flatten()
.map(|x| {
let index = x.to_usize().unwrap();
// discard indices whose values are null, since they are part of the def levels.
if validity.get_bit(index) {
Some(index as u32)
} else {
None
}
})
.flatten();
let keys = array.iter().flatten().filter_map(|x| {
let index = x.to_usize().unwrap();
// discard indices whose values are null, since they are part of the def levels.
if validity.get_bit(index) {
Some(index as u32)
} else {
None
}
});
let num_bits = utils::get_bit_width(keys.clone().max().unwrap_or(0) as u64) as u8;

let keys = utils::ExactSizedIter::new(keys, array.len() - null_count);
Expand Down
Loading