Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added deny(missing_docs) (#808)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Feb 5, 2022
1 parent 19c07ab commit 362ebf9
Show file tree
Hide file tree
Showing 30 changed files with 76 additions and 15 deletions.
1 change: 0 additions & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Contains the [`Array`] and [`MutableArray`] trait objects declaring arrays,
//! as well as concrete arrays (such as [`Utf8Array`] and [`MutableUtf8Array`]).
//!
Expand Down
1 change: 0 additions & 1 deletion src/bitmap/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! contains [`Bitmap`] and [`MutableBitmap`], containers of `bool`.
mod immutable;
pub use immutable::*;
Expand Down
1 change: 0 additions & 1 deletion src/buffer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Contains [`Buffer`], an immutable container for all Arrow physical types (e.g. i32, f64).
mod immutable;
Expand Down
1 change: 0 additions & 1 deletion src/compute/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! contains a wide range of compute operations (e.g.
//! [`arithmetics`], [`aggregate`],
//! [`filter`], [`comparison`], and [`sort`])
Expand Down
1 change: 0 additions & 1 deletion src/datatypes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
#![forbid(unsafe_code)]
//! Contains all metadata, such as [`PhysicalType`], [`DataType`], [`Field`] and [`Schema`].
Expand Down
1 change: 0 additions & 1 deletion src/ffi/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! contains FFI bindings to import and export [`Array`](crate::array::Array) via
//! Arrow's [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)
mod array;
Expand Down
1 change: 0 additions & 1 deletion src/io/avro/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Read and write from and to Apache Avro
pub mod read;
Expand Down
1 change: 0 additions & 1 deletion src/io/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Convert data between the Arrow and CSV (comma-separated values).
use crate::error::ArrowError;
Expand Down
1 change: 1 addition & 0 deletions src/io/flight/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! Serialization and deserialization to Arrow's flight protocol
use std::sync::Arc;

use arrow_format::flight::data::{FlightData, SchemaResult};
Expand Down
7 changes: 5 additions & 2 deletions src/io/ipc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,18 @@ const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
/// to specify the dictionary ids of the IPC fields when writing to IPC.
#[derive(Debug, Clone, PartialEq, Default)]
pub struct IpcField {
// optional children
/// optional children
pub fields: Vec<IpcField>,
// dictionary id
/// dictionary id
pub dictionary_id: Option<i64>,
}

/// Struct containing fields and whether the file is written in little or big endian.
#[derive(Debug, Clone, PartialEq)]
pub struct IpcSchema {
/// The fields in the schema
pub fields: Vec<IpcField>,
/// Endianness of the file
pub is_little_endian: bool,
}

Expand Down
2 changes: 1 addition & 1 deletion src/io/ipc/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub use reader::{read_file_metadata, FileMetadata, FileReader};
pub use schema::deserialize_schema;
pub use stream::{read_stream_metadata, StreamMetadata, StreamReader, StreamState};

// how dictionaries are tracked in this crate
/// how dictionaries are tracked in this crate
pub type Dictionaries = HashMap<i64, Arc<dyn Array>>;

pub(crate) type Node<'a> = arrow_format::ipc::FieldNodeRef<'a>;
Expand Down
1 change: 1 addition & 0 deletions src/io/ipc/read/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use super::schema::fb_to_schema;
use super::Dictionaries;
use arrow_format::ipc::planus::{ReadAsRoot, Vector};

/// Metadata of an Arrow IPC file, written in the footer of the file.
#[derive(Debug, Clone)]
pub struct FileMetadata {
/// The schema that is read from the file footer
Expand Down
3 changes: 3 additions & 0 deletions src/io/ipc/read/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@ use super::common::*;
use super::schema::fb_to_schema;
use super::Dictionaries;

/// Metadata of an Arrow IPC stream, written at the start of the stream
#[derive(Debug, Clone)]
pub struct StreamMetadata {
/// The schema that is read from the stream's first message
pub schema: Schema,

/// The IPC version of the stream
pub version: arrow_format::ipc::MetadataVersion,

/// The IPC fields tracking dictionaries
pub ipc_schema: IpcSchema,
}

Expand Down
2 changes: 2 additions & 0 deletions src/io/ipc/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ pub fn _write_dictionary<K: DictionaryKey>(
}
}

/// Writes a dictionary array
#[allow(clippy::too_many_arguments)]
pub fn write_dictionary(
array: &dyn Array,
Expand Down Expand Up @@ -488,6 +489,7 @@ pub fn write_dictionary(
}
}

/// Writes an [`Array`] to `arrow_data`
pub fn write(
array: &dyn Array,
buffers: &mut Vec<ipc::Buffer>,
Expand Down
1 change: 1 addition & 0 deletions src/io/ipc/write/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ impl<W: Write> FileWriter<W> {
})
}

/// Consumes itself into the inner writer
pub fn into_inner(self) -> W {
self.writer
}
Expand Down
1 change: 0 additions & 1 deletion src/io/json/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Convert data between the Arrow memory format and JSON line-delimited records.
pub mod read;
Expand Down
30 changes: 29 additions & 1 deletion src/io/json_integration/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ pub mod write;
/// A struct that represents an Arrow file with a schema and record batches
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJson {
/// The schema
pub schema: ArrowJsonSchema,
/// The batches
pub batches: Vec<ArrowJsonBatch>,
/// The dictionaries
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionaries: Option<Vec<ArrowJsonDictionaryBatch>>,
}
Expand All @@ -22,39 +25,55 @@ pub struct ArrowJson {
/// Fields are left as JSON `Value` as they vary by `DataType`
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonSchema {
/// The fields
pub fields: Vec<ArrowJsonField>,
/// The metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}

/// Fields are left as JSON `Value` as they vary by `DataType`
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonField {
/// The name
pub name: String,
/// The type
#[serde(rename = "type")]
pub field_type: Value,
/// whether it is nullable
pub nullable: bool,
/// the children
pub children: Vec<ArrowJsonField>,
/// the dictionary
#[serde(skip_serializing_if = "Option::is_none")]
pub dictionary: Option<ArrowJsonFieldDictionary>,
/// the fields' metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}

/// Dictionary metadata
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonFieldDictionary {
/// the dictionary id
pub id: i64,
/// the index type
#[serde(rename = "indexType")]
pub index_type: IntegerType,
/// whether it is ordered
#[serde(rename = "isOrdered")]
pub is_ordered: bool,
}

/// the type of the integer in the dictionary
#[derive(Deserialize, Serialize, Debug)]
pub struct IntegerType {
/// its name
pub name: String,
/// whether it is signed
#[serde(rename = "isSigned")]
pub is_signed: bool,
/// the bit width
#[serde(rename = "bitWidth")]
pub bit_width: i64,
}
Expand All @@ -63,29 +82,38 @@ pub struct IntegerType {
#[derive(Deserialize, Serialize, Debug)]
pub struct ArrowJsonBatch {
count: usize,
/// the columns
pub columns: Vec<ArrowJsonColumn>,
}

/// A struct that partially reads the Arrow JSON dictionary batch
#[derive(Deserialize, Serialize, Debug)]
#[allow(non_snake_case)]
pub struct ArrowJsonDictionaryBatch {
/// the id
pub id: i64,
/// the dictionary batch
pub data: ArrowJsonBatch,
}

/// A struct that partially reads the Arrow JSON column/array
#[derive(Deserialize, Serialize, Clone, Debug)]
pub struct ArrowJsonColumn {
name: String,
/// the number of elements
pub count: usize,
/// the validity bitmap
#[serde(rename = "VALIDITY")]
pub validity: Option<Vec<u8>>,
/// the data
#[serde(rename = "DATA")]
pub data: Option<Vec<Value>>,
/// the offsets
#[serde(rename = "OFFSET")]
pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are strings
/// the type id for union types
#[serde(rename = "TYPE_ID")]
pub type_id: Option<Vec<Value>>, // for union types
pub type_id: Option<Vec<Value>>,
/// the children
pub children: Option<Vec<ArrowJsonColumn>>,
}
1 change: 1 addition & 0 deletions src/io/json_integration/read/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ pub fn to_array(
}
}

/// Deserializes a [`ArrowJsonBatch`] to a [`Chunk`]
pub fn deserialize_chunk(
schema: &Schema,
ipc_fields: &[IpcField],
Expand Down
1 change: 1 addition & 0 deletions src/io/json_integration/read/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! API to read from Arrow JSON integration format
mod array;
pub use array::*;
mod schema;
Expand Down
1 change: 1 addition & 0 deletions src/io/json_integration/write/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//! API to write to Arrow JSON integration format
mod array;
pub use array::*;
mod schema;
Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/read/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ pub struct RowGroupReader<R: Read + Seek> {
}

impl<R: Read + Seek> RowGroupReader<R> {
/// Returns a new [`RowGroupReader`]
pub fn new(
reader: R,
schema: Schema,
Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub use schema::{get_schema, FileMetaData};
use self::nested_utils::{InitNested, NestedArrayIter, NestedState};
use deserialize::page_iter_to_arrays;

/// Trait describing a [`FallibleStreamingIterator`] of [`DataPage`]
pub trait DataPages:
FallibleStreamingIterator<Item = DataPage, Error = ParquetError> + Send + Sync
{
Expand Down
9 changes: 9 additions & 0 deletions src/io/parquet/read/statistics/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@ use crate::error::{ArrowError, Result};
/// Represents a `Binary` or `LargeBinary`
#[derive(Debug, Clone, PartialEq)]
pub struct BinaryStatistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<Vec<u8>>,
/// Maximum
pub max_value: Option<Vec<u8>>,
}

Expand Down Expand Up @@ -41,11 +45,16 @@ impl From<&ParquetByteArrayStatistics> for BinaryStatistics {
}
}

/// Statistics of a string parquet column
#[derive(Debug, Clone, PartialEq)]
pub struct Utf8Statistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<String>,
/// Maximum
pub max_value: Option<String>,
}

Expand Down
5 changes: 5 additions & 0 deletions src/io/parquet/read/statistics/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@ use std::any::Any;

use super::Statistics;

/// Statistics of a boolean parquet column
#[derive(Debug, Clone, PartialEq)]
pub struct BooleanStatistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<bool>,
/// Maximum
pub max_value: Option<bool>,
}

Expand Down
6 changes: 6 additions & 0 deletions src/io/parquet/read/statistics/fixlen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,18 @@ use parquet2::{

use super::Statistics;

/// Arrow-deserialized parquet Statistics of a fixed-len binary
#[derive(Debug, Clone, PartialEq)]
pub struct FixedLenStatistics {
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<Vec<u8>>,
/// Maximum
pub max_value: Option<Vec<u8>>,
/// data type
pub data_type: DataType,
}

Expand Down
6 changes: 6 additions & 0 deletions src/io/parquet/read/statistics/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,18 @@ use std::any::Any;
use super::Statistics;
use crate::error::Result;

/// Arrow-deserialized parquet Statistics of a primitive type
#[derive(Debug, Clone, PartialEq)]
pub struct PrimitiveStatistics<T: NativeType> {
/// the data type
pub data_type: DataType,
/// number of nulls
pub null_count: Option<i64>,
/// number of dictinct values
pub distinct_count: Option<i64>,
/// Minimum
pub min_value: Option<T>,
/// Maximum
pub max_value: Option<T>,
}

Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/write/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub fn add_arrow_schema(
.or_else(|| Some(vec![schema_to_metadata_key(schema)]))
}

/// An interface to write a parquet to a [`Write`]
pub struct FileWriter<W: Write> {
writer: parquet2::write::FileWriter<W>,
schema: Schema,
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![doc = include_str!("doc/lib.md")]
#![deny(missing_docs)]
// So that we have more control over what is `unsafe` inside an `unsafe` block
#![allow(unused_unsafe)]
//
Expand Down
1 change: 0 additions & 1 deletion src/scalar/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![warn(missing_docs)]
//! contains the [`Scalar`] trait object representing individual items of [`Array`](crate::array::Array)s,
//! as well as concrete implementations such as [`BooleanScalar`].
use std::any::Any;
Expand Down
1 change: 0 additions & 1 deletion src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![deny(missing_docs)]
//! Sealed traits and implementations to handle all _physical types_ used in this crate.
//!
//! Most physical types used in this crate are native Rust types, such as `i32`.
Expand Down

0 comments on commit 362ebf9

Please sign in to comment.