Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added example and guide entry.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 27, 2021
1 parent dcbb8db commit a82714e
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 22 deletions.
51 changes: 51 additions & 0 deletions examples/extension.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use std::io::{Cursor, Seek, Write};
use std::sync::Arc;

use arrow2::array::*;
use arrow2::datatypes::*;
use arrow2::error::Result;
use arrow2::io::ipc::read;
use arrow2::io::ipc::write;
use arrow2::record_batch::RecordBatch;

fn write_ipc<W: Write + Seek>(writer: &mut W, array: impl Array + 'static) -> Result<()> {
// create a batch
let schema = Schema::new(vec![Field::new("a", array.data_type().clone(), false)]);

let mut writer = write::FileWriter::try_new(writer, &schema)?;

let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)])?;

writer.write(&batch)
}

fn read_ipc(reader: &[u8]) -> Result<RecordBatch> {
let mut reader = Cursor::new(reader);
let metadata = read::read_file_metadata(&mut reader)?;
let mut reader = read::FileReader::new(&mut reader, metadata, None);
reader.next().unwrap()
}

fn main() -> Result<()> {
let array = UInt16Array::from_slice([1, 2]);
let extension_type =
DataType::Extension("date16".to_string(), Box::new(DataType::UInt16), None);
let extension_array = ExtensionArray::from_data(extension_type.clone(), Arc::new(array));

// from here on, it is as usual
let mut buffer = Cursor::new(vec![]);

// write to IPC
write_ipc(&mut buffer, extension_array)?;

// read it back
let batch = read_ipc(&buffer.into_inner())?;

// and verify that the datatype is preserved.
let array = &batch.columns()[0];
assert_eq!(array.data_type(), &extension_type);

// see https://arrow.apache.org/docs/format/Columnar.html#extension-types
// for consuming by other consumers.
Ok(())
}
1 change: 1 addition & 0 deletions guide/src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- [Compute](./compute.md)
- [Metadata](./metadata.md)
- [Foreign interfaces](./ffi.md)
- [Extension](./extension.md)
- [IO](./io/README.md)
- [Read CSV](./io/csv_reader.md)
- [Write CSV](./io/csv_write.md)
Expand Down
9 changes: 9 additions & 0 deletions guide/src/extension.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Extension types

This crate supports Arrows' ["extension type"](https://arrow.apache.org/docs/format/Columnar.html#extension-types),
to declare, use, and share custom logical types. The follow example shows how
to declare one:

```rust
{{#include ../../../examples/extension.rs}}
```
47 changes: 25 additions & 22 deletions src/io/ipc/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,8 @@ pub(crate) struct FbFieldType<'b> {
fn write_metadata<'a>(
fbb: &mut FlatBufferBuilder<'a>,
metadata: &BTreeMap<String, String>,
) -> Vec<WIPOffset<ipc::KeyValue<'a>>> {
let mut kv_vec = vec![];
kv_vec: &mut Vec<WIPOffset<ipc::KeyValue<'a>>>,
) {
for (k, v) in metadata {
if k != "ARROW:extension:name" || k != "ARROW:extension:metadata" {
let kv_args = ipc::KeyValueArgs {
Expand All @@ -344,37 +344,40 @@ fn write_metadata<'a>(
kv_vec.push(ipc::KeyValue::create(fbb, &kv_args));
}
}
kv_vec
}

/// Create an IPC Field from an Arrow Field
pub(crate) fn build_field<'a>(
fbb: &mut FlatBufferBuilder<'a>,
field: &Field,
) -> WIPOffset<ipc::Field<'a>> {
// Optional custom metadata.
let mut fb_metadata = None;
// custom metadata.
let mut kv_vec = vec![];
if let DataType::Extension(name, _, metadata) = field.data_type() {
// append extension information.
let kv_args = ipc::KeyValueArgs {
key: Some(fbb.create_string("ARROW:extension:name")),
value: Some(fbb.create_string(name.as_str())),
};
kv_vec.push(ipc::KeyValue::create(fbb, &kv_args));
if let Some(metadata) = metadata {
let kv_args = ipc::KeyValueArgs {
key: Some(fbb.create_string("ARROW:extension:metadata")),
value: Some(fbb.create_string(metadata.as_str())),
};
kv_vec.push(ipc::KeyValue::create(fbb, &kv_args));
}
}
if let Some(metadata) = field.metadata() {
if !metadata.is_empty() {
let mut kv_vec = write_metadata(fbb, metadata);
if let DataType::Extension(name, _, metadata) = field.data_type() {
// append extension information.
let kv_args = ipc::KeyValueArgs {
key: Some(fbb.create_string("ARROW:extension:name")),
value: Some(fbb.create_string(name.as_str())),
};
kv_vec.push(ipc::KeyValue::create(fbb, &kv_args));
if let Some(metadata) = metadata {
let kv_args = ipc::KeyValueArgs {
key: Some(fbb.create_string("ARROW:extension:metadata")),
value: Some(fbb.create_string(metadata.as_str())),
};
kv_vec.push(ipc::KeyValue::create(fbb, &kv_args));
}
}
fb_metadata = Some(fbb.create_vector(&kv_vec));
write_metadata(fbb, metadata, &mut kv_vec);
}
};
let fb_metadata = if !kv_vec.is_empty() {
Some(fbb.create_vector(&kv_vec))
} else {
None
};

let fb_field_name = fbb.create_string(field.name().as_str());
let field_type = get_fb_field_type(field.data_type(), field.is_nullable(), fbb);
Expand Down

0 comments on commit a82714e

Please sign in to comment.