Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Removed Option from Field's metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Dec 27, 2021
1 parent ccc7c8a commit 5986bd1
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 170 deletions.
4 changes: 2 additions & 2 deletions examples/metadata.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::{BTreeMap, HashMap};

use arrow2::datatypes::{DataType, Field, Schema};
use arrow2::datatypes::{DataType, Field, Metadata, Schema};

fn main() {
// two data types (logical types)
Expand All @@ -12,7 +12,7 @@ fn main() {
let field2 = Field::new("c2", type2_, true);

// which can contain extra metadata:
let mut metadata = BTreeMap::new();
let mut metadata = Metadata::new();
metadata.insert(
"Office Space".to_string(),
"Deals with real issues in the workplace.".to_string(),
Expand Down
102 changes: 22 additions & 80 deletions src/datatypes/field.rs
Original file line number Diff line number Diff line change
@@ -1,56 +1,18 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::collections::BTreeMap;

use crate::error::{ArrowError, Result};

use super::DataType;
use super::{DataType, Metadata};

/// A logical [`DataType`] and its associated metadata per
/// [Arrow specification](https://arrow.apache.org/docs/cpp/api/datatype.html)
#[derive(Debug, Clone, Eq)]
/// Represents the metadata of a "column".
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Field {
/// Its name
pub name: String,
/// Its logical [`DataType`]
pub data_type: DataType,
/// Whether its values can be null or not
/// Its nullability
pub nullable: bool,
/// A map of key-value pairs containing additional custom meta data.
pub metadata: Option<BTreeMap<String, String>>,
}

impl std::hash::Hash for Field {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state);
self.data_type.hash(state);
self.nullable.hash(state);
self.metadata.hash(state);
}
}

impl PartialEq for Field {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.data_type == other.data_type
&& self.nullable == other.nullable
&& self.metadata == other.metadata
}
/// Additional custom (opaque) metadata.
pub metadata: Metadata,
}

impl Field {
Expand All @@ -60,36 +22,24 @@ impl Field {
name: name.into(),
data_type,
nullable,
metadata: None,
metadata: Default::default(),
}
}

/// Creates a new [`Field`] with metadata.
#[inline]
pub fn with_metadata(self, metadata: BTreeMap<String, String>) -> Self {
pub fn with_metadata(self, metadata: Metadata) -> Self {
Self {
name: self.name,
data_type: self.data_type,
nullable: self.nullable,
metadata: Some(metadata),
metadata,
}
}

/// Sets the [`Field`]'s optional metadata.
/// The metadata is set as `None` for empty map.
/// Returns the [`Field`]'s metadata.
#[inline]
pub fn set_metadata(&mut self, metadata: Option<BTreeMap<String, String>>) {
self.metadata = None;
if let Some(v) = metadata {
if !v.is_empty() {
self.metadata = Some(v);
}
}
}

/// Returns the [`Field`]'s optional custom metadata.
#[inline]
pub const fn metadata(&self) -> &Option<BTreeMap<String, String>> {
pub const fn metadata(&self) -> &Metadata {
&self.metadata
}

Expand All @@ -105,7 +55,7 @@ impl Field {
&self.data_type
}

/// Returns the [`Field`] nullability.
/// Returns whether the [`Field`] should contain null values.
#[inline]
pub const fn is_nullable(&self) -> bool {
self.nullable
Expand All @@ -125,27 +75,19 @@ impl Field {
/// ```
pub fn try_merge(&mut self, from: &Field) -> Result<()> {
// merge metadata
match (self.metadata(), from.metadata()) {
(Some(self_metadata), Some(from_metadata)) => {
let mut merged = self_metadata.clone();
for (key, from_value) in from_metadata {
if let Some(self_value) = self_metadata.get(key) {
if self_value != from_value {
return Err(ArrowError::InvalidArgumentError(format!(
"Fail to merge field due to conflicting metadata data value for key {}", key),
));
}
} else {
merged.insert(key.clone(), from_value.clone());
}
for (key, from_value) in from.metadata() {
if let Some(self_value) = self.metadata.get(key) {
if self_value != from_value {
return Err(ArrowError::InvalidArgumentError(format!(
"Fail to merge field due to conflicting metadata data value for key {}",
key
)));
}
self.set_metadata(Some(merged));
} else {
self.metadata.insert(key.clone(), from_value.clone());
}
(None, Some(from_metadata)) => {
self.set_metadata(Some(from_metadata.clone()));
}
_ => {}
}

match &mut self.data_type {
DataType::Struct(nested_fields) => match &from.data_type {
DataType::Struct(from_nested_fields) => {
Expand Down
44 changes: 18 additions & 26 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#![deny(missing_docs)]
#![forbid(unsafe_code)]
//! Contains all metadata, such as [`PhysicalType`], [`DataType`], [`Field`] and [`Schema`].
mod field;
mod physical_type;
mod schema;
Expand All @@ -8,6 +10,14 @@ pub use field::Field;
pub use physical_type::*;
pub use schema::Schema;

use std::collections::BTreeMap;
use std::sync::Arc;

/// typedef for [BTreeMap<String, String>] denoting a [`Field`]'s metadata.
pub type Metadata = BTreeMap<String, String>;
/// typedef fpr [Option<(String, Option<String>)>] descr
pub(crate) type Extension = Option<(String, Option<String>)>;

/// The set of supported logical types.
/// Each variant uniquely identifies a logical type, which define specific semantics to the data (e.g. how it should be represented).
/// Each variant has a corresponding [`PhysicalType`], obtained via [`DataType::to_physical_type`],
Expand Down Expand Up @@ -185,17 +195,12 @@ pub enum TimeUnit {
/// Interval units defined in Arrow
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IntervalUnit {
/// Indicates the number of elapsed whole months, stored as 4-byte integers.
/// The number of elapsed whole months.
YearMonth,
/// Indicates the number of elapsed days and milliseconds,
/// stored as 2 contiguous 32-bit integers (8-bytes in total).
/// The number of elapsed days and milliseconds,
/// stored as 2 contiguous `i32`
DayTime,
/// The values are stored contiguously in 16 byte blocks. Months and
/// days are encoded as 32 bit integers and nanoseconds is encoded as a
/// 64 bit integer. All integers are signed. Each field is independent
/// (e.g. there is no constraint that nanoseconds have the same sign
/// as days or that the quantitiy of nanoseconds represents less
/// then a day's worth of time).
/// The number of elapsed months (i32), days (i32) and nanoseconds (i64).
MonthDayNano,
}

Expand Down Expand Up @@ -313,27 +318,14 @@ impl From<PrimitiveType> for DataType {
}
}

// backward compatibility
use std::collections::BTreeMap;
use std::sync::Arc;

/// typedef for [`Arc<Schema>`].
pub type SchemaRef = Arc<Schema>;

/// typedef for [Option<BTreeMap<String, String>>].
pub type Metadata = Option<BTreeMap<String, String>>;
/// typedef fpr [Option<(String, Option<String>)>].
pub type Extension = Option<(String, Option<String>)>;

/// support get extension for metadata
pub fn get_extension(metadata: &Option<BTreeMap<String, String>>) -> Extension {
if let Some(metadata) = metadata {
if let Some(name) = metadata.get("ARROW:extension:name") {
let metadata = metadata.get("ARROW:extension:metadata").cloned();
Some((name.clone(), metadata))
} else {
None
}
pub fn get_extension(metadata: &Metadata) -> Extension {
if let Some(name) = metadata.get("ARROW:extension:name") {
let metadata = metadata.get("ARROW:extension:metadata").cloned();
Some((name.clone(), metadata))
} else {
None
}
Expand Down
14 changes: 7 additions & 7 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl Ffi_ArrowSchema {

let metadata = if let DataType::Extension(name, _, extension_metadata) = field.data_type() {
// append extension information.
let mut metadata = metadata.clone().unwrap_or_default();
let mut metadata = metadata.clone();

// metadata
if let Some(extension_metadata) = extension_metadata {
Expand All @@ -118,8 +118,10 @@ impl Ffi_ArrowSchema {
metadata.insert("ARROW:extension:name".to_string(), name.clone());

Some(metadata_to_bytes(&metadata))
} else if !metadata.is_empty() {
Some(metadata_to_bytes(metadata))
} else {
metadata.as_ref().map(metadata_to_bytes)
None
};

let name = CString::new(name).unwrap();
Expand Down Expand Up @@ -227,9 +229,7 @@ pub(crate) unsafe fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
data_type
};

let mut field = Field::new(schema.name(), data_type, schema.nullable());
field.set_metadata(metadata);
Ok(field)
Ok(Field::new(schema.name(), data_type, schema.nullable()).with_metadata(metadata))
}

fn to_integer_type(format: &str) -> Result<IntegerType> {
Expand Down Expand Up @@ -494,7 +494,7 @@ unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str {
unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata, Extension) {
let mut data = data as *const u8; // u8 = i8
if data.is_null() {
return (None, None);
return (Metadata::default(), None);
};
let len = read_ne_i32(data);
data = data.add(4);
Expand Down Expand Up @@ -524,5 +524,5 @@ unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata,
};
}
let extension = extension_name.map(|name| (name, extension_metadata));
(Some(result), extension)
(result, extension)
}
24 changes: 10 additions & 14 deletions src/io/avro/read/schema.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::collections::BTreeMap;

use avro_schema::{Enum, Fixed, Record, Schema as AvroSchema};

use crate::datatypes::*;
Expand All @@ -19,8 +17,8 @@ fn aliased(name: &str, namespace: Option<&str>, default_namespace: Option<&str>)
}
}

fn external_props(schema: &AvroSchema) -> BTreeMap<String, String> {
let mut props = BTreeMap::new();
fn external_props(schema: &AvroSchema) -> Metadata {
let mut props = Metadata::new();
match &schema {
AvroSchema::Record(Record {
doc: Some(ref doc), ..
Expand Down Expand Up @@ -66,7 +64,7 @@ pub fn convert_schema(schema: &AvroSchema) -> Result<Schema> {
&field.schema,
Some(&field.name),
false,
Some(external_props(&field.schema)),
external_props(&field.schema),
)?)
}
}
Expand All @@ -84,7 +82,7 @@ fn schema_to_field(
schema: &AvroSchema,
name: Option<&str>,
mut nullable: bool,
props: Option<BTreeMap<String, String>>,
props: Metadata,
) -> Result<Field> {
let data_type = match schema {
AvroSchema::Null => DataType::Null,
Expand Down Expand Up @@ -129,7 +127,7 @@ fn schema_to_field(
item_schema,
Some("item"), // default name for list items
false,
None,
Metadata::default(),
)?)),
AvroSchema::Map(_) => todo!("Avro maps are mapped to MapArrays"),
AvroSchema::Union(schemas) => {
Expand All @@ -141,7 +139,7 @@ fn schema_to_field(
.iter()
.find(|&schema| !matches!(schema, AvroSchema::Null))
{
schema_to_field(schema, None, has_nullable, None)?
schema_to_field(schema, None, has_nullable, Metadata::default())?
.data_type()
.clone()
} else {
Expand All @@ -153,7 +151,7 @@ fn schema_to_field(
} else {
let fields = schemas
.iter()
.map(|s| schema_to_field(s, None, has_nullable, None))
.map(|s| schema_to_field(s, None, has_nullable, Metadata::default()))
.collect::<Result<Vec<Field>>>()?;
DataType::Union(fields, None, UnionMode::Dense)
}
Expand All @@ -162,7 +160,7 @@ fn schema_to_field(
let fields: Result<Vec<Field>> = fields
.iter()
.map(|field| {
let mut props = BTreeMap::new();
let mut props = Metadata::new();
if let Some(doc) = &field.doc {
props.insert("avro::doc".to_string(), doc.clone());
}
Expand All @@ -173,7 +171,7 @@ fn schema_to_field(
&field.schema,
Some(&format!("{}.{}", name, field.name)),
false,
Some(props),
props,
)
})
.collect();
Expand Down Expand Up @@ -201,7 +199,5 @@ fn schema_to_field(

let name = name.unwrap_or_default();

let mut field = Field::new(name, data_type, nullable);
field.set_metadata(props);
Ok(field)
Ok(Field::new(name, data_type, nullable).with_metadata(props))
}
Loading

0 comments on commit 5986bd1

Please sign in to comment.