Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Removed Option from fields' metadata #715

Merged
merged 1 commit into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/metadata.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::{BTreeMap, HashMap};

use arrow2::datatypes::{DataType, Field, Schema};
use arrow2::datatypes::{DataType, Field, Metadata, Schema};

fn main() {
// two data types (logical types)
Expand All @@ -12,7 +12,7 @@ fn main() {
let field2 = Field::new("c2", type2_, true);

// which can contain extra metadata:
let mut metadata = BTreeMap::new();
let mut metadata = Metadata::new();
metadata.insert(
"Office Space".to_string(),
"Deals with real issues in the workplace.".to_string(),
Expand Down
102 changes: 22 additions & 80 deletions src/datatypes/field.rs
Original file line number Diff line number Diff line change
@@ -1,56 +1,18 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::collections::BTreeMap;

use crate::error::{ArrowError, Result};

use super::DataType;
use super::{DataType, Metadata};

/// A logical [`DataType`] and its associated metadata per
/// [Arrow specification](https://arrow.apache.org/docs/cpp/api/datatype.html)
#[derive(Debug, Clone, Eq)]
/// Represents the metadata of a "column".
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Field {
/// Its name
pub name: String,
/// Its logical [`DataType`]
pub data_type: DataType,
/// Whether its values can be null or not
/// Its nullability
pub nullable: bool,
/// A map of key-value pairs containing additional custom meta data.
pub metadata: Option<BTreeMap<String, String>>,
}

impl std::hash::Hash for Field {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state);
self.data_type.hash(state);
self.nullable.hash(state);
self.metadata.hash(state);
}
}

impl PartialEq for Field {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.data_type == other.data_type
&& self.nullable == other.nullable
&& self.metadata == other.metadata
}
/// Additional custom (opaque) metadata.
pub metadata: Metadata,
}

impl Field {
Expand All @@ -60,36 +22,24 @@ impl Field {
name: name.into(),
data_type,
nullable,
metadata: None,
metadata: Default::default(),
}
}

/// Creates a new [`Field`] with metadata.
#[inline]
pub fn with_metadata(self, metadata: BTreeMap<String, String>) -> Self {
pub fn with_metadata(self, metadata: Metadata) -> Self {
Self {
name: self.name,
data_type: self.data_type,
nullable: self.nullable,
metadata: Some(metadata),
metadata,
}
}

/// Sets the [`Field`]'s optional metadata.
/// The metadata is set as `None` for empty map.
/// Returns the [`Field`]'s metadata.
#[inline]
pub fn set_metadata(&mut self, metadata: Option<BTreeMap<String, String>>) {
self.metadata = None;
if let Some(v) = metadata {
if !v.is_empty() {
self.metadata = Some(v);
}
}
}

/// Returns the [`Field`]'s optional custom metadata.
#[inline]
pub const fn metadata(&self) -> &Option<BTreeMap<String, String>> {
pub const fn metadata(&self) -> &Metadata {
&self.metadata
}

Expand All @@ -105,7 +55,7 @@ impl Field {
&self.data_type
}

/// Returns the [`Field`] nullability.
/// Returns whether the [`Field`] should contain null values.
#[inline]
pub const fn is_nullable(&self) -> bool {
self.nullable
Expand All @@ -125,27 +75,19 @@ impl Field {
/// ```
pub fn try_merge(&mut self, from: &Field) -> Result<()> {
// merge metadata
match (self.metadata(), from.metadata()) {
(Some(self_metadata), Some(from_metadata)) => {
let mut merged = self_metadata.clone();
for (key, from_value) in from_metadata {
if let Some(self_value) = self_metadata.get(key) {
if self_value != from_value {
return Err(ArrowError::InvalidArgumentError(format!(
"Fail to merge field due to conflicting metadata data value for key {}", key),
));
}
} else {
merged.insert(key.clone(), from_value.clone());
}
for (key, from_value) in from.metadata() {
if let Some(self_value) = self.metadata.get(key) {
if self_value != from_value {
return Err(ArrowError::InvalidArgumentError(format!(
"Fail to merge field due to conflicting metadata data value for key {}",
key
)));
}
self.set_metadata(Some(merged));
} else {
self.metadata.insert(key.clone(), from_value.clone());
}
(None, Some(from_metadata)) => {
self.set_metadata(Some(from_metadata.clone()));
}
_ => {}
}

match &mut self.data_type {
DataType::Struct(nested_fields) => match &from.data_type {
DataType::Struct(from_nested_fields) => {
Expand Down
44 changes: 18 additions & 26 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#![deny(missing_docs)]
#![forbid(unsafe_code)]
//! Contains all metadata, such as [`PhysicalType`], [`DataType`], [`Field`] and [`Schema`].

mod field;
mod physical_type;
mod schema;
Expand All @@ -8,6 +10,14 @@ pub use field::Field;
pub use physical_type::*;
pub use schema::Schema;

use std::collections::BTreeMap;
use std::sync::Arc;

/// typedef for [BTreeMap<String, String>] denoting a [`Field`]'s metadata.
pub type Metadata = BTreeMap<String, String>;
/// typedef fpr [Option<(String, Option<String>)>] descr
pub(crate) type Extension = Option<(String, Option<String>)>;

/// The set of supported logical types.
/// Each variant uniquely identifies a logical type, which define specific semantics to the data (e.g. how it should be represented).
/// Each variant has a corresponding [`PhysicalType`], obtained via [`DataType::to_physical_type`],
Expand Down Expand Up @@ -185,17 +195,12 @@ pub enum TimeUnit {
/// Interval units defined in Arrow
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IntervalUnit {
/// Indicates the number of elapsed whole months, stored as 4-byte integers.
/// The number of elapsed whole months.
YearMonth,
/// Indicates the number of elapsed days and milliseconds,
/// stored as 2 contiguous 32-bit integers (8-bytes in total).
/// The number of elapsed days and milliseconds,
/// stored as 2 contiguous `i32`
DayTime,
/// The values are stored contiguously in 16 byte blocks. Months and
/// days are encoded as 32 bit integers and nanoseconds is encoded as a
/// 64 bit integer. All integers are signed. Each field is independent
/// (e.g. there is no constraint that nanoseconds have the same sign
/// as days or that the quantitiy of nanoseconds represents less
/// then a day's worth of time).
/// The number of elapsed months (i32), days (i32) and nanoseconds (i64).
MonthDayNano,
}

Expand Down Expand Up @@ -313,27 +318,14 @@ impl From<PrimitiveType> for DataType {
}
}

// backward compatibility
use std::collections::BTreeMap;
use std::sync::Arc;

/// typedef for [`Arc<Schema>`].
pub type SchemaRef = Arc<Schema>;

/// typedef for [Option<BTreeMap<String, String>>].
pub type Metadata = Option<BTreeMap<String, String>>;
/// typedef fpr [Option<(String, Option<String>)>].
pub type Extension = Option<(String, Option<String>)>;

/// support get extension for metadata
pub fn get_extension(metadata: &Option<BTreeMap<String, String>>) -> Extension {
if let Some(metadata) = metadata {
if let Some(name) = metadata.get("ARROW:extension:name") {
let metadata = metadata.get("ARROW:extension:metadata").cloned();
Some((name.clone(), metadata))
} else {
None
}
pub fn get_extension(metadata: &Metadata) -> Extension {
if let Some(name) = metadata.get("ARROW:extension:name") {
let metadata = metadata.get("ARROW:extension:metadata").cloned();
Some((name.clone(), metadata))
} else {
None
}
Expand Down
14 changes: 7 additions & 7 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl Ffi_ArrowSchema {

let metadata = if let DataType::Extension(name, _, extension_metadata) = field.data_type() {
// append extension information.
let mut metadata = metadata.clone().unwrap_or_default();
let mut metadata = metadata.clone();

// metadata
if let Some(extension_metadata) = extension_metadata {
Expand All @@ -118,8 +118,10 @@ impl Ffi_ArrowSchema {
metadata.insert("ARROW:extension:name".to_string(), name.clone());

Some(metadata_to_bytes(&metadata))
} else if !metadata.is_empty() {
Some(metadata_to_bytes(metadata))
} else {
metadata.as_ref().map(metadata_to_bytes)
None
};

let name = CString::new(name).unwrap();
Expand Down Expand Up @@ -227,9 +229,7 @@ pub(crate) unsafe fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
data_type
};

let mut field = Field::new(schema.name(), data_type, schema.nullable());
field.set_metadata(metadata);
Ok(field)
Ok(Field::new(schema.name(), data_type, schema.nullable()).with_metadata(metadata))
}

fn to_integer_type(format: &str) -> Result<IntegerType> {
Expand Down Expand Up @@ -494,7 +494,7 @@ unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str {
unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata, Extension) {
let mut data = data as *const u8; // u8 = i8
if data.is_null() {
return (None, None);
return (Metadata::default(), None);
};
let len = read_ne_i32(data);
data = data.add(4);
Expand Down Expand Up @@ -524,5 +524,5 @@ unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata,
};
}
let extension = extension_name.map(|name| (name, extension_metadata));
(Some(result), extension)
(result, extension)
}
24 changes: 10 additions & 14 deletions src/io/avro/read/schema.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::collections::BTreeMap;

use avro_schema::{Enum, Fixed, Record, Schema as AvroSchema};

use crate::datatypes::*;
Expand All @@ -19,8 +17,8 @@ fn aliased(name: &str, namespace: Option<&str>, default_namespace: Option<&str>)
}
}

fn external_props(schema: &AvroSchema) -> BTreeMap<String, String> {
let mut props = BTreeMap::new();
fn external_props(schema: &AvroSchema) -> Metadata {
let mut props = Metadata::new();
match &schema {
AvroSchema::Record(Record {
doc: Some(ref doc), ..
Expand Down Expand Up @@ -66,7 +64,7 @@ pub fn convert_schema(schema: &AvroSchema) -> Result<Schema> {
&field.schema,
Some(&field.name),
false,
Some(external_props(&field.schema)),
external_props(&field.schema),
)?)
}
}
Expand All @@ -84,7 +82,7 @@ fn schema_to_field(
schema: &AvroSchema,
name: Option<&str>,
mut nullable: bool,
props: Option<BTreeMap<String, String>>,
props: Metadata,
) -> Result<Field> {
let data_type = match schema {
AvroSchema::Null => DataType::Null,
Expand Down Expand Up @@ -129,7 +127,7 @@ fn schema_to_field(
item_schema,
Some("item"), // default name for list items
false,
None,
Metadata::default(),
)?)),
AvroSchema::Map(_) => todo!("Avro maps are mapped to MapArrays"),
AvroSchema::Union(schemas) => {
Expand All @@ -141,7 +139,7 @@ fn schema_to_field(
.iter()
.find(|&schema| !matches!(schema, AvroSchema::Null))
{
schema_to_field(schema, None, has_nullable, None)?
schema_to_field(schema, None, has_nullable, Metadata::default())?
.data_type()
.clone()
} else {
Expand All @@ -153,7 +151,7 @@ fn schema_to_field(
} else {
let fields = schemas
.iter()
.map(|s| schema_to_field(s, None, has_nullable, None))
.map(|s| schema_to_field(s, None, has_nullable, Metadata::default()))
.collect::<Result<Vec<Field>>>()?;
DataType::Union(fields, None, UnionMode::Dense)
}
Expand All @@ -162,7 +160,7 @@ fn schema_to_field(
let fields: Result<Vec<Field>> = fields
.iter()
.map(|field| {
let mut props = BTreeMap::new();
let mut props = Metadata::new();
if let Some(doc) = &field.doc {
props.insert("avro::doc".to_string(), doc.clone());
}
Expand All @@ -173,7 +171,7 @@ fn schema_to_field(
&field.schema,
Some(&format!("{}.{}", name, field.name)),
false,
Some(props),
props,
)
})
.collect();
Expand Down Expand Up @@ -201,7 +199,5 @@ fn schema_to_field(

let name = name.unwrap_or_default();

let mut field = Field::new(name, data_type, nullable);
field.set_metadata(props);
Ok(field)
Ok(Field::new(name, data_type, nullable).with_metadata(props))
}
Loading