From 982d2596d8cad8fea4f6ff08e6a3814223c085cc Mon Sep 17 00:00:00 2001
From: "Jorge C. Leitao" <jorgecarleitao@gmail.com>
Date: Wed, 19 Jan 2022 19:58:39 +0000
Subject: [PATCH] Added Utf8Sequence

---
 src/array/display.rs                    |   2 +
 src/array/equal/mod.rs                  |  11 ++
 src/array/equal/utf8_sequence.rs        |   5 +
 src/array/ffi.rs                        |   2 +
 src/array/growable/mod.rs               |   1 +
 src/array/mod.rs                        |  10 ++
 src/array/string_sequence/iterator.rs   |  80 +++++++++
 src/array/string_sequence/mod.rs        | 220 ++++++++++++++++++++++++
 src/compute/aggregate/memory.rs         |  13 ++
 src/datatypes/mod.rs                    |   6 +
 src/datatypes/physical_type.rs          |   4 +
 src/ffi/array.rs                        |   2 +
 src/ffi/bridge.rs                       |   2 +
 src/ffi/schema.rs                       |   6 +
 src/io/ipc/read/deserialize.rs          |   8 +-
 src/io/ipc/write/common.rs              |   2 +-
 src/io/ipc/write/schema.rs              |   5 +
 src/io/ipc/write/serialize.rs           |   3 +
 src/io/json_integration/read/array.rs   |   3 +
 src/io/json_integration/write/schema.rs |   3 +
 src/io/parquet/read/mod.rs              |  11 +-
 src/scalar/mod.rs                       |   1 +
 22 files changed, 393 insertions(+), 7 deletions(-)
 create mode 100644 src/array/equal/utf8_sequence.rs
 create mode 100644 src/array/string_sequence/iterator.rs
 create mode 100644 src/array/string_sequence/mod.rs
diff --git a/src/array/display.rs b/src/array/display.rs
index de7314927c8..f15f681a576 100644
--- a/src/array/display.rs
+++ b/src/array/display.rs
@@ -124,6 +124,8 @@ pub fn get_value_display<'a>(array: &'a dyn Array) -> Box<dyn Fn(usize) -> Strin
         }),
         Utf8 => dyn_display!(array, Utf8Array<i32>, |x| x),
         LargeUtf8 => dyn_display!(array, Utf8Array<i64>, |x| x),
+        Utf8Sequence => dyn_display!(array, StringSequenceArray<i32>, |x| x),
+        LargeUtf8Sequence => dyn_display!(array, StringSequenceArray<i64>, |x| x),
         Decimal(_, scale) => {
             // The number 999.99 has a precision of 5 and scale of 2
             let scale = *scale as u32;
diff --git a/src/array/equal/mod.rs b/src/array/equal/mod.rs
index e916aaa0bb6..f00ddbc657b 100644
--- a/src/array/equal/mod.rs
+++ b/src/array/equal/mod.rs
@@ -14,6 +14,7 @@ mod primitive;
 mod struct_;
 mod union;
 mod utf8;
+mod utf8_sequence;
 
 impl PartialEq for dyn Array + '_ {
     fn eq(&self, that: &dyn Array) -> bool {
@@ -201,6 +202,16 @@ pub fn equal(lhs: &dyn Array, rhs: &dyn Array) -> bool {
             let rhs = rhs.as_any().downcast_ref().unwrap();
             utf8::equal::<i64>(lhs, rhs)
         }
+        Utf8Sequence => {
+            let lhs = lhs.as_any().downcast_ref().unwrap();
+            let rhs = rhs.as_any().downcast_ref().unwrap();
+            utf8_sequence::equal::<i32>(lhs, rhs)
+        }
+        LargeUtf8Sequence => {
+            let lhs = lhs.as_any().downcast_ref().unwrap();
+            let rhs = rhs.as_any().downcast_ref().unwrap();
+            utf8_sequence::equal::<i64>(lhs, rhs)
+        }
         Binary => {
             let lhs = lhs.as_any().downcast_ref().unwrap();
             let rhs = rhs.as_any().downcast_ref().unwrap();
diff --git a/src/array/equal/utf8_sequence.rs b/src/array/equal/utf8_sequence.rs
new file mode 100644
index 00000000000..6739fcbb79c
--- /dev/null
+++ b/src/array/equal/utf8_sequence.rs
@@ -0,0 +1,5 @@
+use crate::array::{Offset, StringSequenceArray};
+
+pub(super) fn equal<O: Offset>(lhs: &StringSequenceArray<O>, rhs: &StringSequenceArray<O>) -> bool {
+    lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter())
+}
diff --git a/src/array/ffi.rs b/src/array/ffi.rs
index a9d0a089074..c95435ed014 100644
--- a/src/array/ffi.rs
+++ b/src/array/ffi.rs
@@ -67,6 +67,8 @@ pub fn offset_buffers_children_dictionary(array: &dyn Array) -> BuffersChildren
         FixedSizeBinary => ffi_dyn!(array, FixedSizeBinaryArray),
         Utf8 => ffi_dyn!(array, Utf8Array::<i32>),
         LargeUtf8 => ffi_dyn!(array, Utf8Array::<i64>),
+        Utf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
+        LargeUtf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
         List => ffi_dyn!(array, ListArray::<i32>),
         LargeList => ffi_dyn!(array, ListArray::<i64>),
         FixedSizeList => ffi_dyn!(array, FixedSizeListArray),
diff --git a/src/array/growable/mod.rs b/src/array/growable/mod.rs
index c6d45356237..507c779caf7 100644
--- a/src/array/growable/mod.rs
+++ b/src/array/growable/mod.rs
@@ -130,5 +130,6 @@ pub fn make_growable<'a>(
                 ))
             })
         }
+        _ => todo!("Sequence views"),
     }
 }
diff --git a/src/array/mod.rs b/src/array/mod.rs
index ce205f32c89..6413f63a7c5 100644
--- a/src/array/mod.rs
+++ b/src/array/mod.rs
@@ -229,6 +229,8 @@ impl std::fmt::Debug for dyn Array + '_ {
             FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
             Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
             LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
+            Utf8Sequence => fmt_dyn!(self, StringSequenceArray::<i32>, f),
+            LargeUtf8Sequence => fmt_dyn!(self, StringSequenceArray::<i64>, f),
             List => fmt_dyn!(self, ListArray::<i32>, f),
             LargeList => fmt_dyn!(self, ListArray::<i64>, f),
             FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
@@ -258,6 +260,8 @@ pub fn new_empty_array(data_type: DataType) -> Box<dyn Array> {
         FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(data_type)),
         Utf8 => Box::new(Utf8Array::<i32>::new_empty(data_type)),
         LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(data_type)),
+        Utf8Sequence => Box::new(StringSequenceArray::<i32>::new_empty(data_type)),
+        LargeUtf8Sequence => Box::new(StringSequenceArray::<i64>::new_empty(data_type)),
         List => Box::new(ListArray::<i32>::new_empty(data_type)),
         LargeList => Box::new(ListArray::<i64>::new_empty(data_type)),
         FixedSizeList => Box::new(FixedSizeListArray::new_empty(data_type)),
@@ -288,6 +292,8 @@ pub fn new_null_array(data_type: DataType, length: usize) -> Box<dyn Array> {
         FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(data_type, length)),
         Utf8 => Box::new(Utf8Array::<i32>::new_null(data_type, length)),
         LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(data_type, length)),
+        Utf8Sequence => Box::new(StringSequenceArray::<i32>::new_null(data_type, length)),
+        LargeUtf8Sequence => Box::new(StringSequenceArray::<i64>::new_null(data_type, length)),
         List => Box::new(ListArray::<i32>::new_null(data_type, length)),
         LargeList => Box::new(ListArray::<i64>::new_null(data_type, length)),
         FixedSizeList => Box::new(FixedSizeListArray::new_null(data_type, length)),
@@ -326,6 +332,8 @@ pub fn clone(array: &dyn Array) -> Box<dyn Array> {
         FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
         Utf8 => clone_dyn!(array, Utf8Array::<i32>),
         LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
+        Utf8Sequence => clone_dyn!(array, StringSequenceArray::<i32>),
+        LargeUtf8Sequence => clone_dyn!(array, StringSequenceArray::<i64>),
         List => clone_dyn!(array, ListArray::<i32>),
         LargeList => clone_dyn!(array, ListArray::<i64>),
         FixedSizeList => clone_dyn!(array, FixedSizeListArray),
@@ -359,6 +367,7 @@ mod map;
 mod null;
 mod primitive;
 mod specification;
+mod string_sequence;
 mod struct_;
 mod union;
 mod utf8;
@@ -381,6 +390,7 @@ pub use list::{ListArray, MutableListArray};
 pub use map::MapArray;
 pub use null::NullArray;
 pub use primitive::*;
+pub use string_sequence::StringSequenceArray;
 pub use struct_::StructArray;
 pub use union::UnionArray;
 pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};
diff --git a/src/array/string_sequence/iterator.rs b/src/array/string_sequence/iterator.rs
new file mode 100644
index 00000000000..c0859680311
--- /dev/null
+++ b/src/array/string_sequence/iterator.rs
@@ -0,0 +1,80 @@
+use crate::bitmap::utils::{zip_validity, ZipValidity};
+use crate::{array::Offset, trusted_len::TrustedLen};
+
+use super::StringSequenceArray;
+
+/// Iterator of values of an `Utf8Array`.
+#[derive(Debug, Clone)]
+pub struct StringSequenceValuesIter<'a, O: Offset> {
+    array: &'a StringSequenceArray<O>,
+    index: usize,
+    end: usize,
+}
+
+impl<'a, O: Offset> StringSequenceValuesIter<'a, O> {
+    /// Creates a new [`StringSequenceValuesIter`]
+    pub fn new(array: &'a StringSequenceArray<O>) -> Self {
+        Self {
+            array,
+            index: 0,
+            end: array.len(),
+        }
+    }
+}
+
+impl<'a, O: Offset> Iterator for StringSequenceValuesIter<'a, O> {
+    type Item = &'a str;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.index == self.end {
+            return None;
+        }
+        let old = self.index;
+        self.index += 1;
+        Some(unsafe { self.array.value_unchecked(old) })
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.end - self.index, Some(self.end - self.index))
+    }
+}
+
+impl<'a, O: Offset> DoubleEndedIterator for StringSequenceValuesIter<'a, O> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if self.index == self.end {
+            None
+        } else {
+            self.end -= 1;
+            Some(unsafe { self.array.value_unchecked(self.end) })
+        }
+    }
+}
+
+impl<'a, O: Offset> IntoIterator for &'a StringSequenceArray<O> {
+    type Item = Option<&'a str>;
+    type IntoIter = ZipValidity<'a, &'a str, StringSequenceValuesIter<'a, O>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+impl<'a, O: Offset> StringSequenceArray<O> {
+    /// Returns an iterator of `Option<&str>`
+    pub fn iter(&'a self) -> ZipValidity<'a, &'a str, StringSequenceValuesIter<'a, O>> {
+        zip_validity(
+            StringSequenceValuesIter::new(self),
+            self.validity.as_ref().map(|x| x.iter()),
+        )
+    }
+
+    /// Returns an iterator of `&str`
+    pub fn values_iter(&'a self) -> StringSequenceValuesIter<'a, O> {
+        StringSequenceValuesIter::new(self)
+    }
+}
+
+unsafe impl<O: Offset> TrustedLen for StringSequenceValuesIter<'_, O> {}
diff --git a/src/array/string_sequence/mod.rs b/src/array/string_sequence/mod.rs
new file mode 100644
index 00000000000..3b261cde0a0
--- /dev/null
+++ b/src/array/string_sequence/mod.rs
@@ -0,0 +1,220 @@
+use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType};
+
+use super::{Array, Offset};
+
+mod iterator;
+
+/// An equivalent representation of [`Vec<Option<String>>`] with the following properties:
+/// * Clone is O(1)
+/// * Slice is O(1)
+/// * row-based operations are `O(N)`
+/// # Safety
+/// The following invariants are uphold:
+/// * offsets.len() == lengths.len()
+/// * offsets[i] + lengths[i] < values.len() for all i
+/// * `&values[offsets[i]..offsets[i] + lengths[i]]` is valid utf8 for all i
+#[derive(Debug, Clone)]
+pub struct StringSequenceArray<O: Offset> {
+    data_type: DataType,
+    validity: Option<Bitmap>,
+    values: Buffer<u8>,
+    offsets: Buffer<O>,
+    lengths: Buffer<O>,
+}
+
+impl<O: Offset> StringSequenceArray<O> {
+    /// returns the [`DataType`] of this [`StringSequenceArray`]
+    #[inline]
+    pub fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+
+    /// returns the length of this [`StringSequenceArray`]
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.offsets.len()
+    }
+
+    /// The validity
+    pub fn validity(&self) -> Option<&Bitmap> {
+        self.validity.as_ref()
+    }
+
+    /// Returns the offsets that together with `lengths` slice `.values()` of values.
+    #[inline]
+    pub fn offsets(&self) -> &Buffer<O> {
+        &self.offsets
+    }
+
+    /// Returns the offsets that together with `lengths` slice `.values()` of values.
+    #[inline]
+    pub fn lengths(&self) -> &Buffer<O> {
+        &self.lengths
+    }
+
+    /// Returns all values in this array. Use `.offsets()` to slice them.
+    #[inline]
+    pub fn values(&self) -> &Buffer<u8> {
+        &self.values
+    }
+
+    /// Returns the element at index `i`
+    /// # Panics
+    /// iff `i >= self.len()`
+    #[inline]
+    pub fn value(&self, i: usize) -> &str {
+        assert!(i < self.len());
+        // soundness: invariant verified above
+        unsafe { self.value_unchecked(i) }
+    }
+
+    /// Returns the element at index `i`
+    /// # Safety
+    /// Assumes that the `i < self.len`.
+    #[inline]
+    pub unsafe fn value_unchecked(&self, i: usize) -> &str {
+        let start = self.offsets.get_unchecked(i).to_usize();
+        let length = self.lengths.get_unchecked(i).to_usize();
+
+        // soundness: the invariant of the struct
+        let slice = unsafe { self.values.get_unchecked(start..start + length) };
+
+        // soundness: the invariant of the struct
+        std::str::from_utf8_unchecked(slice)
+    }
+}
+
+impl<O: Offset> StringSequenceArray<O> {
+    /// Creates an empty [`StringSequenceArray`], i.e. whose `.len` is zero.
+    pub fn new_empty(data_type: DataType) -> Self {
+        Self {
+            data_type,
+            validity: None,
+            lengths: vec![].into(),
+            offsets: vec![].into(),
+            values: vec![].into(),
+        }
+    }
+
+    /// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`.
+    #[inline]
+    pub fn new_null(data_type: DataType, length: usize) -> Self {
+        Self {
+            data_type,
+            validity: Some(Bitmap::new_zeroed(length)),
+            lengths: vec![O::default(); length].into(),
+            offsets: vec![O::default(); length].into(),
+            values: vec![].into(),
+        }
+    }
+
+    /// Creates a new [`StringSequenceArray`] with no validity
+    pub fn from_values<T: AsRef<str>, I: Iterator<Item = T>>(iter: I) -> Self {
+        let mut values = vec![];
+        let mut offsets = Vec::with_capacity(iter.size_hint().0);
+        let mut lengths = Vec::with_capacity(iter.size_hint().0);
+        let mut length = O::default();
+        for item in iter {
+            let item = item.as_ref();
+            values.extend_from_slice(item.as_bytes());
+
+            let new_length = O::from_usize(item.len()).unwrap();
+            lengths.push(new_length);
+            length += new_length;
+            offsets.push(length);
+        }
+
+        let data_type = if O::is_large() {
+            DataType::LargeUtf8Sequence
+        } else {
+            DataType::Utf8Sequence
+        };
+
+        Self {
+            data_type,
+            validity: None,
+            values: values.into(),
+            offsets: offsets.into(),
+            lengths: lengths.into(),
+        }
+    }
+
+    /// Creates a new [`StringSequenceArray`] by slicing this [`StringSequenceArray`].
+    /// # Implementation
+    /// This function is `O(1)`: all data will be shared between both arrays.
+    /// # Panics
+    /// iff `offset + length > self.len()`.
+    pub fn slice(&self, offset: usize, length: usize) -> Self {
+        assert!(
+            offset + length <= self.len(),
+            "the offset of the new Buffer cannot exceed the existing length"
+        );
+        unsafe { self.slice_unchecked(offset, length) }
+    }
+
+    /// Creates a new [`StringSequenceArray`] by slicing this [`StringSequenceArray`].
+    /// # Implementation
+    /// This function is `O(1)`: all data will be shared between both arrays.
+    /// # Safety
+    /// The caller must ensure that `offset + length <= self.len()`.
+    pub unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Self {
+        let validity = self
+            .validity
+            .clone()
+            .map(|x| x.slice_unchecked(offset, length));
+        let offsets = self.offsets.clone().slice_unchecked(offset, length);
+        let lengths = self.lengths.clone().slice_unchecked(offset, length);
+        Self {
+            data_type: self.data_type.clone(),
+            offsets,
+            lengths,
+            values: self.values.clone(),
+            validity,
+        }
+    }
+
+    /// Clones this [`StringSequenceArray`] with a different validity.
+    /// # Panic
+    /// Panics iff `validity.len() != self.len()`.
+    pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
+        if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
+            panic!("validity's length must be equal to the array's length")
+        }
+        let mut arr = self.clone();
+        arr.validity = validity;
+        arr
+    }
+}
+
+impl<O: Offset> Array for StringSequenceArray<O> {
+    #[inline]
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    #[inline]
+    fn len(&self) -> usize {
+        self.len()
+    }
+
+    #[inline]
+    fn data_type(&self) -> &DataType {
+        &self.data_type
+    }
+
+    fn validity(&self) -> Option<&Bitmap> {
+        self.validity.as_ref()
+    }
+
+    fn slice(&self, offset: usize, length: usize) -> Box<dyn Array> {
+        Box::new(self.slice(offset, length))
+    }
+
+    unsafe fn slice_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
+        Box::new(self.slice_unchecked(offset, length))
+    }
+
+    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
+        Box::new(self.with_validity(validity))
+    }
+}
diff --git a/src/compute/aggregate/memory.rs b/src/compute/aggregate/memory.rs
index 8c8617d936c..b61d6698998 100644
--- a/src/compute/aggregate/memory.rs
+++ b/src/compute/aggregate/memory.rs
@@ -16,6 +16,17 @@ macro_rules! dyn_binary {
     }};
 }
 
+macro_rules! dyn_sequence {
+    ($array:expr, $ty:ty, $o:ty) => {{
+        let array = $array.as_any().downcast_ref::<$ty>().unwrap();
+
+        array.values().len()
+            + array.offsets().len() * std::mem::size_of::<$o>()
+            + array.lengths().len() * std::mem::size_of::<$o>()
+            + validity_size(array.validity())
+    }};
+}
+
 /// Returns the total (heap) allocated size of the array in bytes.
 /// # Implementation
 /// This estimation is the sum of the size of its buffers, validity, including nested arrays.
@@ -54,6 +65,8 @@ pub fn estimated_bytes_size(array: &dyn Array) -> usize {
         LargeBinary => dyn_binary!(array, BinaryArray<i64>, i64),
         Utf8 => dyn_binary!(array, Utf8Array<i32>, i32),
         LargeUtf8 => dyn_binary!(array, Utf8Array<i64>, i64),
+        Utf8Sequence => dyn_sequence!(array, StringSequenceArray<i32>, i32),
+        LargeUtf8Sequence => dyn_sequence!(array, StringSequenceArray<i64>, i64),
         List => {
             let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();
             estimated_bytes_size(array.values().as_ref())
diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs
index 10cdfecf4c0..dc3659e220a 100644
--- a/src/datatypes/mod.rs
+++ b/src/datatypes/mod.rs
@@ -96,6 +96,10 @@ pub enum DataType {
     Utf8,
     /// A variable-length UTF-8 encoded string whose offsets are represented as [`i64`].
     LargeUtf8,
+    /// A another repr of utf8
+    Utf8Sequence,
+    /// A another repr of utf8
+    LargeUtf8Sequence,
     /// A list of some logical data type whose offsets are represented as [`i32`].
     List(Box<Field>),
     /// A list of some logical data type with a fixed number of elements.
@@ -243,6 +247,8 @@ impl DataType {
             LargeBinary => PhysicalType::LargeBinary,
             Utf8 => PhysicalType::Utf8,
             LargeUtf8 => PhysicalType::LargeUtf8,
+            Utf8Sequence => PhysicalType::Utf8Sequence,
+            LargeUtf8Sequence => PhysicalType::LargeUtf8Sequence,
             List(_) => PhysicalType::List,
             FixedSizeList(_, _) => PhysicalType::FixedSizeList,
             LargeList(_) => PhysicalType::LargeList,
diff --git a/src/datatypes/physical_type.rs b/src/datatypes/physical_type.rs
index 7e15cb19629..d226a282f06 100644
--- a/src/datatypes/physical_type.rs
+++ b/src/datatypes/physical_type.rs
@@ -21,6 +21,10 @@ pub enum PhysicalType {
     Utf8,
     /// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
     LargeUtf8,
+    /// A variable-length string in Unicode with UFT-8 encoding and 32-bit offsets.
+    Utf8Sequence,
+    /// A variable-length string in Unicode with UFT-8 encoding and 32-bit offsets.
+    LargeUtf8Sequence,
     /// A list of some data type with variable length.
     List,
     /// A list of some data type with fixed length.
diff --git a/src/ffi/array.rs b/src/ffi/array.rs
index 947a3c4e3df..4b0abac4752 100644
--- a/src/ffi/array.rs
+++ b/src/ffi/array.rs
@@ -19,6 +19,8 @@ pub unsafe fn try_from<A: ArrowArrayRef>(array: A) -> Result<Box<dyn Array>> {
         }),
         Utf8 => Box::new(Utf8Array::<i32>::try_from_ffi(array)?),
         LargeUtf8 => Box::new(Utf8Array::<i64>::try_from_ffi(array)?),
+        Utf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
+        LargeUtf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
         Binary => Box::new(BinaryArray::<i32>::try_from_ffi(array)?),
         LargeBinary => Box::new(BinaryArray::<i64>::try_from_ffi(array)?),
         FixedSizeBinary => Box::new(FixedSizeBinaryArray::try_from_ffi(array)?),
diff --git a/src/ffi/bridge.rs b/src/ffi/bridge.rs
index 1a51e56e780..0dff4f0c6e8 100644
--- a/src/ffi/bridge.rs
+++ b/src/ffi/bridge.rs
@@ -26,6 +26,8 @@ pub fn align_to_c_data_interface(array: Arc<dyn Array>) -> Arc<dyn Array> {
         FixedSizeBinary => ffi_dyn!(array, FixedSizeBinaryArray),
         Utf8 => ffi_dyn!(array, Utf8Array::<i32>),
         LargeUtf8 => ffi_dyn!(array, Utf8Array::<i64>),
+        Utf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
+        LargeUtf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
         List => ffi_dyn!(array, ListArray::<i32>),
         LargeList => ffi_dyn!(array, ListArray::<i64>),
         FixedSizeList => ffi_dyn!(array, FixedSizeListArray),
diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs
index 38214101879..8aaa1757c7c 100644
--- a/src/ffi/schema.rs
+++ b/src/ffi/schema.rs
@@ -396,6 +396,12 @@ fn to_format(data_type: &DataType) -> String {
         DataType::LargeBinary => "Z".to_string(),
         DataType::Utf8 => "u".to_string(),
         DataType::LargeUtf8 => "U".to_string(),
+        DataType::Utf8Sequence => {
+            todo!("Arrow does not yet support exporting sequence views via FFI")
+        }
+        DataType::LargeUtf8Sequence => {
+            todo!("Arrow does not yet support exporting sequence views via FFI")
+        }
         DataType::Date32 => "tdD".to_string(),
         DataType::Date64 => "tdm".to_string(),
         DataType::Time32(TimeUnit::Second) => "tts".to_string(),
diff --git a/src/io/ipc/read/deserialize.rs b/src/io/ipc/read/deserialize.rs
index 56bd0632128..398ddb649cb 100644
--- a/src/io/ipc/read/deserialize.rs
+++ b/src/io/ipc/read/deserialize.rs
@@ -9,7 +9,7 @@ use arrow_format::ipc::MetadataVersion;
 
 use crate::array::*;
 use crate::datatypes::{DataType, Field, PhysicalType};
-use crate::error::Result;
+use crate::error::{ArrowError, Result};
 use crate::io::ipc::IpcField;
 
 use super::{array::*, Dictionaries};
@@ -115,6 +115,9 @@ pub fn read<R: Read + Seek>(
             )?;
             Ok(Arc::new(array))
         }
+        LargeUtf8Sequence | Utf8Sequence => Err(ArrowError::OutOfSpec(
+            "Arrow does not yet support exporting sequence views via IPC".to_string(),
+        )),
         List => read_list::<i32, _>(
             field_nodes,
             data_type,
@@ -223,6 +226,9 @@ pub fn skip(
         Primitive(_) => skip_primitive(field_nodes, buffers),
         LargeBinary | Binary => skip_binary(field_nodes, buffers),
         LargeUtf8 | Utf8 => skip_utf8(field_nodes, buffers),
+        LargeUtf8Sequence | Utf8Sequence => Err(ArrowError::OutOfSpec(
+            "Arrow does not yet support exporting sequence views via IPC".to_string(),
+        )),
         FixedSizeBinary => skip_fixed_size_binary(field_nodes, buffers),
         List => skip_list::<i32>(field_nodes, data_type, buffers),
         LargeList => skip_list::<i64>(field_nodes, data_type, buffers),
diff --git a/src/io/ipc/write/common.rs b/src/io/ipc/write/common.rs
index 2db05ee4afc..ed95c282fa1 100644
--- a/src/io/ipc/write/common.rs
+++ b/src/io/ipc/write/common.rs
@@ -39,7 +39,7 @@ fn encode_dictionary(
     use PhysicalType::*;
     match array.data_type().to_physical_type() {
         Utf8 | LargeUtf8 | Binary | LargeBinary | Primitive(_) | Boolean | Null
-        | FixedSizeBinary => Ok(()),
+        | FixedSizeBinary | Utf8Sequence | LargeUtf8Sequence => Ok(()),
         Dictionary(key_type) => match_integer_type!(key_type, |$T| {
             let dict_id = field.dictionary_id
                 .ok_or_else(|| ArrowError::InvalidArgumentError("Dictionaries must have an associated id".to_string()))?;
diff --git a/src/io/ipc/write/schema.rs b/src/io/ipc/write/schema.rs
index 0636cb9e6c0..2c30a9db1a8 100644
--- a/src/io/ipc/write/schema.rs
+++ b/src/io/ipc/write/schema.rs
@@ -201,6 +201,9 @@ fn serialize_type(data_type: &DataType) -> arrow_format::ipc::Type {
         LargeBinary => ipc::Type::LargeBinary(Box::new(ipc::LargeBinary {})),
         Utf8 => ipc::Type::Utf8(Box::new(ipc::Utf8 {})),
         LargeUtf8 => ipc::Type::LargeUtf8(Box::new(ipc::LargeUtf8 {})),
+        Utf8Sequence | LargeUtf8Sequence => {
+            todo!("Arrow does not yet support exporting sequence views via IPC")
+        }
         FixedSizeBinary(size) => ipc::Type::FixedSizeBinary(Box::new(ipc::FixedSizeBinary {
             byte_width: *size as i32,
         })),
@@ -281,6 +284,8 @@ fn serialize_children(data_type: &DataType, ipc_field: &IpcField) -> Vec<arrow_f
         | LargeBinary
         | Utf8
         | LargeUtf8
+        | Utf8Sequence
+        | LargeUtf8Sequence
         | Decimal(_, _) => vec![],
         FixedSizeList(inner, _) | LargeList(inner) | List(inner) | Map(inner, _) => {
             vec![serialize_field(inner, &ipc_field.fields[0])]
diff --git a/src/io/ipc/write/serialize.rs b/src/io/ipc/write/serialize.rs
index c8f535c850f..78781db0302 100644
--- a/src/io/ipc/write/serialize.rs
+++ b/src/io/ipc/write/serialize.rs
@@ -555,6 +555,9 @@ pub fn write(
             is_little_endian,
             compression,
         ),
+        Utf8Sequence | LargeUtf8Sequence => {
+            todo!("Arrow does not yet support exporting sequence views via IPC")
+        }
         List => write_list::<i32>(
             array,
             buffers,
diff --git a/src/io/json_integration/read/array.rs b/src/io/json_integration/read/array.rs
index 1118bb1978b..718a1360340 100644
--- a/src/io/json_integration/read/array.rs
+++ b/src/io/json_integration/read/array.rs
@@ -301,6 +301,9 @@ pub fn to_array(
         LargeBinary => Ok(to_binary::<i64>(json_col, data_type)),
         Utf8 => Ok(to_utf8::<i32>(json_col, data_type)),
         LargeUtf8 => Ok(to_utf8::<i64>(json_col, data_type)),
+        LargeUtf8Sequence | Utf8Sequence => Err(ArrowError::OutOfSpec(
+            "Arrow does not yet support exporting sequence views".to_string(),
+        )),
         FixedSizeBinary => {
             let validity = to_validity(&json_col.validity);
 
diff --git a/src/io/json_integration/write/schema.rs b/src/io/json_integration/write/schema.rs
index f4297c3fb0f..86f031775b7 100644
--- a/src/io/json_integration/write/schema.rs
+++ b/src/io/json_integration/write/schema.rs
@@ -23,6 +23,9 @@ fn serialize_data_type(data_type: &DataType) -> Value {
         DataType::Float64 => json!({"name": "floatingpoint", "precision": "DOUBLE"}),
         DataType::Utf8 => json!({"name": "utf8"}),
         DataType::LargeUtf8 => json!({"name": "largeutf8"}),
+        DataType::Utf8Sequence | DataType::LargeUtf8Sequence => {
+            todo!("Arrow does not yet support sequence views")
+        }
         DataType::Binary => json!({"name": "binary"}),
         DataType::LargeBinary => json!({"name": "largebinary"}),
         DataType::FixedSizeBinary(byte_width) => {
diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs
index 32c1b941811..25e1ac46dae 100644
--- a/src/io/parquet/read/mod.rs
+++ b/src/io/parquet/read/mod.rs
@@ -211,7 +211,8 @@ fn column_offset(data_type: &DataType) -> usize {
     use crate::datatypes::PhysicalType::*;
     match data_type.to_physical_type() {
         Null | Boolean | Primitive(_) | FixedSizeBinary | Binary | LargeBinary | Utf8
-        | LargeUtf8 | Dictionary(_) | List | LargeList | FixedSizeList => 0,
+        | LargeUtf8 | Dictionary(_) | List | LargeList | FixedSizeList | Utf8Sequence
+        | LargeUtf8Sequence => 0,
         Struct => {
             if let DataType::Struct(v) = data_type.to_logical_type() {
                 v.iter().map(|x| 1 + column_offset(x.data_type())).sum()
@@ -228,7 +229,8 @@ fn column_datatype(data_type: &DataType, column: usize) -> DataType {
     use crate::datatypes::PhysicalType::*;
     match data_type.to_physical_type() {
         Null | Boolean | Primitive(_) | FixedSizeBinary | Binary | LargeBinary | Utf8
-        | LargeUtf8 | Dictionary(_) | List | LargeList | FixedSizeList => data_type.clone(),
+        | LargeUtf8 | Dictionary(_) | List | LargeList | FixedSizeList | Utf8Sequence
+        | LargeUtf8Sequence => data_type.clone(),
         Struct => {
             if let DataType::Struct(fields) = data_type.to_logical_type() {
                 let mut total_chunk = 0;
@@ -395,9 +397,8 @@ fn finish_array(data_type: DataType, arrays: &mut VecDeque<Box<dyn Array>>) -> B
     use crate::datatypes::PhysicalType::*;
     match data_type.to_physical_type() {
         Null | Boolean | Primitive(_) | FixedSizeBinary | Binary | LargeBinary | Utf8
-        | LargeUtf8 | List | LargeList | FixedSizeList | Dictionary(_) => {
-            arrays.pop_front().unwrap()
-        }
+        | LargeUtf8 | List | LargeList | FixedSizeList | Dictionary(_) | Utf8Sequence
+        | LargeUtf8Sequence => arrays.pop_front().unwrap(),
         Struct => {
             if let DataType::Struct(fields) = data_type.to_logical_type() {
                 let values = fields
diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs
index 9a2ca3fecf5..19657a344c8 100644
--- a/src/scalar/mod.rs
+++ b/src/scalar/mod.rs
@@ -103,6 +103,7 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box<dyn Scalar> {
         }),
         Utf8 => dyn_new_utf8!(array, index, i32),
         LargeUtf8 => dyn_new_utf8!(array, index, i64),
+        Utf8Sequence | LargeUtf8Sequence => todo!(),
         Binary => dyn_new_binary!(array, index, i32),
         LargeBinary => dyn_new_binary!(array, index, i64),
         List => dyn_new_list!(array, index, i32),