Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Made crate deny(missing_docs) #808

Merged
merged 1 commit into from
Feb 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Contains the [`Array`] and [`MutableArray`] trait objects declaring arrays,
//! as well as concrete arrays (such as [`Utf8Array`] and [`MutableUtf8Array`]).
//!
Expand Down
1 change: 0 additions & 1 deletion src/bitmap/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! contains [`Bitmap`] and [`MutableBitmap`], containers of `bool`.
mod immutable;
pub use immutable::*;
Expand Down
1 change: 0 additions & 1 deletion src/buffer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Contains [`Buffer`], an immutable container for all Arrow physical types (e.g. i32, f64).

mod immutable;
Expand Down
1 change: 0 additions & 1 deletion src/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! contains a wide range of compute operations (e.g.
//! [`arithmetics`], [`aggregate`],
//! [`filter`], [`comparison`], and [`sort`])
Expand Down
1 change: 0 additions & 1 deletion src/datatypes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
#![forbid(unsafe_code)]
//! Contains all metadata, such as [`PhysicalType`], [`DataType`], [`Field`] and [`Schema`].

Expand Down
1 change: 0 additions & 1 deletion src/ffi/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! contains FFI bindings to import and export [`Array`](crate::array::Array) via
//! Arrow's [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)
mod array;
Expand Down
1 change: 0 additions & 1 deletion src/io/avro/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Read and write from and to Apache Avro

pub mod read;
Expand Down
1 change: 0 additions & 1 deletion src/io/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Convert data between the Arrow and CSV (comma-separated values).

use crate::error::ArrowError;
Expand Down
1 change: 1 addition & 0 deletions src/io/flight/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! Serialization and deserialization to Arrow's flight protocol
use std::sync::Arc;

use arrow_format::flight::data::{FlightData, SchemaResult};
Expand Down
7 changes: 5 additions & 2 deletions src/io/ipc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,18 @@ const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
/// to specify the dictionary ids of the IPC fields when writing to IPC.
#[derive(Debug, Clone, PartialEq, Default)]
pub struct IpcField {
// optional children
/// optional children
pub fields: Vec<IpcField>,
// dictionary id
/// dictionary id
pub dictionary_id: Option<i64>,
}

/// Struct containing fields and whether the file is written in little or big endian.
#[derive(Debug, Clone, PartialEq)]
pub struct IpcSchema {
/// The fields in the schema
pub fields: Vec<IpcField>,
/// Endianness of the file
pub is_little_endian: bool,
}

Expand Down
2 changes: 1 addition & 1 deletion src/io/ipc/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub use reader::{read_file_metadata, FileMetadata, FileReader};
pub use schema::deserialize_schema;
pub use stream::{read_stream_metadata, StreamMetadata, StreamReader, StreamState};

// how dictionaries are tracked in this crate
/// how dictionaries are tracked in this crate
pub type Dictionaries = HashMap<i64, Arc<dyn Array>>;

pub(crate) type Node<'a> = arrow_format::ipc::FieldNodeRef<'a>;
Expand Down
1 change: 1 addition & 0 deletions src/io/ipc/read/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use super::schema::fb_to_schema;
use super::Dictionaries;
use arrow_format::ipc::planus::{ReadAsRoot, Vector};

/// Metadata of an Arrow IPC file, written in the footer of the file.
#[derive(Debug, Clone)]
pub struct FileMetadata {
/// The schema that is read from the file footer
Expand Down
3 changes: 3 additions & 0 deletions src/io/ipc/read/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@ use super::common::*;
use super::schema::fb_to_schema;
use super::Dictionaries;

/// Metadata of an Arrow IPC stream, written at the start of the stream
#[derive(Debug, Clone)]
pub struct StreamMetadata {
/// The schema that is read from the stream's first message
pub schema: Schema,

/// The IPC version of the stream
pub version: arrow_format::ipc::MetadataVersion,

/// The IPC fields tracking dictionaries
pub ipc_schema: IpcSchema,
}

Expand Down
2 changes: 2 additions & 0 deletions src/io/ipc/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ pub fn _write_dictionary<K: DictionaryKey>(
}
}

/// Writes a dictionary array
#[allow(clippy::too_many_arguments)]
pub fn write_dictionary(
array: &dyn Array,
Expand Down Expand Up @@ -488,6 +489,7 @@ pub fn write_dictionary(
}
}

/// Writes an [`Array`] to `arrow_data`
pub fn write(
array: &dyn Array,
buffers: &mut Vec<ipc::Buffer>,
Expand Down
1 change: 1 addition & 0 deletions src/io/ipc/write/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ impl<W: Write> FileWriter<W> {
})
}

/// Consumes itself into the inner writer
pub fn into_inner(self) -> W {
self.writer
}
Expand Down
1 change: 0 additions & 1 deletion src/io/json/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Convert data between the Arrow memory format and JSON line-delimited records.

pub mod read;
Expand Down
30 changes: 29 additions & 1 deletion src/io/json_integration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ pub mod write;
/// A struct that represents an Arrow file with a schema and record batches
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJson {
/// The schema
pub schema: ArrowJsonSchema,
/// The batches
pub batches: Vec<ArrowJsonBatch>,
/// The dictionaries
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionaries: Option<Vec<ArrowJsonDictionaryBatch>>,
}
Expand All @@ -22,39 +25,55 @@ pub struct ArrowJson {
/// Fields are left as JSON `Value` as they vary by `DataType`
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonSchema {
/// The fields
pub fields: Vec<ArrowJsonField>,
/// The metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}

/// Fields are left as JSON `Value` as they vary by `DataType`
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonField {
/// The name
pub name: String,
/// The type
#[serde(rename = "type")]
pub field_type: Value,
/// whether it is nullable
pub nullable: bool,
/// the children
pub children: Vec<ArrowJsonField>,
/// the dictionary
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionary: Option<ArrowJsonFieldDictionary>,
/// the fields' metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}

/// Dictionary metadata
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonFieldDictionary {
/// the dictionary id
pub id: i64,
/// the index type
#[serde(rename = "indexType")]
pub index_type: IntegerType,
/// whether it is ordered
#[serde(rename = "isOrdered")]
pub is_ordered: bool,
}

/// the type of the integer in the dictionary
#[derive(Deserialize, Serialize, Debug)]
pub struct IntegerType {
/// its name
pub name: String,
/// whether it is signed
#[serde(rename = "isSigned")]
pub is_signed: bool,
/// the bit width
#[serde(rename = "bitWidth")]
pub bit_width: i64,
}
Expand All @@ -63,29 +82,38 @@ pub struct IntegerType {
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonBatch {
count: usize,
/// the columns
pub columns: Vec<ArrowJsonColumn>,
}

/// A struct that partially reads the Arrow JSON dictionary batch
#[derive(Deserialize, Serialize, Debug)]
#[allow(non_snake_case)]
pub struct ArrowJsonDictionaryBatch {
/// the id
pub id: i64,
/// the dictionary batch
pub data: ArrowJsonBatch,
}

/// A struct that partially reads the Arrow JSON column/array
#[derive(Deserialize, Serialize, Clone, Debug)]
pub struct ArrowJsonColumn {
name: String,
/// the number of elements
pub count: usize,
/// the validity bitmap
#[serde(rename = "VALIDITY")]
pub validity: Option<Vec<u8>>,
/// the data
#[serde(rename = "DATA")]
pub data: Option<Vec<Value>>,
/// the offsets
#[serde(rename = "OFFSET")]
pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are strings
/// the type id for union types
#[serde(rename = "TYPE_ID")]
pub type_id: Option<Vec<Value>>, // for union types
pub type_id: Option<Vec<Value>>,
/// the children
pub children: Option<Vec<ArrowJsonColumn>>,
}
1 change: 1 addition & 0 deletions src/io/json_integration/read/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ pub fn to_array(
}
}

/// Deserializes a [`ArrowJsonBatch`] to a [`Chunk`]
pub fn deserialize_chunk(
schema: &Schema,
ipc_fields: &[IpcField],
Expand Down
1 change: 1 addition & 0 deletions src/io/json_integration/read/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! API to read from Arrow JSON integration format
mod array;
pub use array::*;
mod schema;
Expand Down
1 change: 1 addition & 0 deletions src/io/json_integration/write/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! API to write to Arrow JSON integration format
mod array;
pub use array::*;
mod schema;
Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/read/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ pub struct RowGroupReader<R: Read + Seek> {
}

impl<R: Read + Seek> RowGroupReader<R> {
/// Returns a new [`RowGroupReader`]
pub fn new(
reader: R,
schema: Schema,
Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub use schema::{get_schema, FileMetaData};
use self::nested_utils::{InitNested, NestedArrayIter, NestedState};
use deserialize::page_iter_to_arrays;

/// Trait describing a [`FallibleStreamingIterator`] of [`DataPage`]
pub trait DataPages:
FallibleStreamingIterator<Item = DataPage, Error = ParquetError> + Send + Sync
{
Expand Down
9 changes: 9 additions & 0 deletions src/io/parquet/read/statistics/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@ use crate::error::{ArrowError, Result};
/// Represents a `Binary` or `LargeBinary`
#[derive(Debug, Clone, PartialEq)]
pub struct BinaryStatistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<Vec<u8>>,
/// Maximum
pub max_value: Option<Vec<u8>>,
}

Expand Down Expand Up @@ -41,11 +45,16 @@ impl From<&ParquetByteArrayStatistics> for BinaryStatistics {
}
}

/// Statistics of a string parquet column
#[derive(Debug, Clone, PartialEq)]
pub struct Utf8Statistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<String>,
/// Maximum
pub max_value: Option<String>,
}

Expand Down
5 changes: 5 additions & 0 deletions src/io/parquet/read/statistics/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@ use std::any::Any;

use super::Statistics;

/// Statistics of a boolean parquet column
#[derive(Debug, Clone, PartialEq)]
pub struct BooleanStatistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<bool>,
/// Maximum
pub max_value: Option<bool>,
}

Expand Down
6 changes: 6 additions & 0 deletions src/io/parquet/read/statistics/fixlen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@ use parquet2::{

use super::Statistics;

/// Arrow-deserialized parquet Statistics of a fixed-len binary
#[derive(Debug, Clone, PartialEq)]
pub struct FixedLenStatistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<Vec<u8>>,
/// Maximum
pub max_value: Option<Vec<u8>>,
/// data type
pub data_type: DataType,
}

Expand Down
6 changes: 6 additions & 0 deletions src/io/parquet/read/statistics/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,18 @@ use std::any::Any;
use super::Statistics;
use crate::error::Result;

/// Arrow-deserialized parquet Statistics of a primitive type
#[derive(Debug, Clone, PartialEq)]
pub struct PrimitiveStatistics<T: NativeType> {
/// the data type
pub data_type: DataType,
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<T>,
/// Maximum
pub max_value: Option<T>,
}

Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/write/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub fn add_arrow_schema(
.or_else(|| Some(vec![schema_to_metadata_key(schema)]))
}

/// An interface to write a parquet to a [`Write`]
pub struct FileWriter<W: Write> {
writer: parquet2::write::FileWriter<W>,
schema: Schema,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![doc = include_str!("doc/lib.md")]
#![deny(missing_docs)]
// So that we have more control over what is `unsafe` inside an `unsafe` block
#![allow(unused_unsafe)]
//
Expand Down
1 change: 0 additions & 1 deletion src/scalar/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![warn(missing_docs)]
//! contains the [`Scalar`] trait object representing individual items of [`Array`](crate::array::Array)s,
//! as well as concrete implementations such as [`BooleanScalar`].
use std::any::Any;
Expand Down
1 change: 0 additions & 1 deletion src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Sealed traits and implementations to handle all _physical types_ used in this crate.
//!
//! Most physical types used in this crate are native Rust types, such as `i32`.
Expand Down