From 8476d3f48001d4bab24129a46b5f9878ad73c4b3 Mon Sep 17 00:00:00 2001
From: "Jorge C. Leitao" <jorgecarleitao@gmail.com>
Date: Sun, 25 Apr 2021 14:02:03 +0000
Subject: [PATCH] WIP.

---
 Cargo.toml                                    |   9 -
 README.md                                     |   8 +-
 datafusion-examples/examples/csv_sql.rs       |   2 +-
 datafusion-examples/examples/dataframe.rs     |   2 +-
 datafusion-examples/examples/flight_client.rs |   2 +-
 datafusion-examples/examples/flight_server.rs |   2 +-
 datafusion-examples/examples/parquet_sql.rs   |   2 +-
 datafusion/Cargo.toml                         |   4 +-
 datafusion/benches/aggregate_query_sql.rs     |   2 +-
 datafusion/benches/filter_query_sql.rs        |   2 +-
 datafusion/benches/math_query_sql.rs          |   2 +-
 datafusion/benches/sort_limit_query_sql.rs    |   7 +-
 datafusion/src/catalog/information_schema.rs  | 212 ++++---
 datafusion/src/dataframe.rs                   |   2 +-
 datafusion/src/datasource/csv.rs              |  23 +-
 datafusion/src/datasource/datasource.rs       |   4 +-
 datafusion/src/datasource/empty.rs            |   4 +-
 datafusion/src/datasource/memory.rs           |  56 +-
 datafusion/src/datasource/parquet.rs          |  22 +-
 datafusion/src/error.rs                       |  16 +-
 datafusion/src/execution/context.rs           | 153 +++--
 datafusion/src/execution/dataframe_impl.rs    |   6 +-
 datafusion/src/lib.rs                         |  25 +-
 datafusion/src/logical_plan/builder.rs        |  10 +-
 datafusion/src/logical_plan/dfschema.rs       |   6 +-
 datafusion/src/logical_plan/display.rs        |   6 +-
 datafusion/src/logical_plan/expr.rs           |  18 +-
 datafusion/src/logical_plan/plan.rs           |  12 +-
 datafusion/src/optimizer/constant_folding.rs  |   4 +-
 datafusion/src/optimizer/filter_push_down.rs  |  11 +-
 .../src/optimizer/hash_build_probe_order.rs   |   5 +-
 .../src/optimizer/projection_push_down.rs     |   6 +-
 datafusion/src/optimizer/utils.rs             |   4 +-
 .../src/physical_optimizer/repartition.rs     |  14 +-
 datafusion/src/physical_plan/aggregates.rs    |   2 +-
 .../src/physical_plan/array_expressions.rs    | 100 +---
 .../src/physical_plan/coalesce_batches.rs     |  20 +-
 datafusion/src/physical_plan/common.rs        |   7 +-
 datafusion/src/physical_plan/cross_join.rs    |  13 +-
 .../src/physical_plan/crypto_expressions.rs   |  16 +-
 datafusion/src/physical_plan/csv.rs           |  79 ++-
 .../src/physical_plan/datetime_expressions.rs | 159 ++----
 .../src/physical_plan/distinct_expressions.rs | 139 ++---
 datafusion/src/physical_plan/empty.rs         |  11 +-
 datafusion/src/physical_plan/explain.rs       |  19 +-
 .../src/physical_plan/expressions/average.rs  |  33 +-
 .../src/physical_plan/expressions/binary.rs   | 511 ++++++-----------
 .../src/physical_plan/expressions/case.rs     | 253 ++-------
 .../src/physical_plan/expressions/cast.rs     |  96 +---
 .../src/physical_plan/expressions/coercion.rs |   8 +-
 .../src/physical_plan/expressions/column.rs   |   2 +-
 .../src/physical_plan/expressions/count.rs    |  33 +-
 .../src/physical_plan/expressions/in_list.rs  |  23 +-
 .../physical_plan/expressions/is_not_null.rs  |  14 +-
 .../src/physical_plan/expressions/is_null.rs  |  14 +-
 .../src/physical_plan/expressions/literal.rs  |   6 +-
 .../src/physical_plan/expressions/min_max.rs  | 131 ++---
 .../src/physical_plan/expressions/mod.rs      |  14 +-
 .../src/physical_plan/expressions/negative.rs |  25 +-
 .../src/physical_plan/expressions/not.rs      |  10 +-
 .../src/physical_plan/expressions/nullif.rs   |  90 +--
 .../src/physical_plan/expressions/sum.rs      |  39 +-
 .../src/physical_plan/expressions/try_cast.rs |  38 +-
 datafusion/src/physical_plan/filter.rs        |  13 +-
 datafusion/src/physical_plan/functions.rs     | 106 ++--
 datafusion/src/physical_plan/group_scalar.rs  |  18 +-
 .../src/physical_plan/hash_aggregate.rs       | 213 +++----
 datafusion/src/physical_plan/hash_join.rs     | 229 +++-----
 datafusion/src/physical_plan/hash_utils.rs    |   2 +-
 datafusion/src/physical_plan/limit.rs         |  18 +-
 .../src/physical_plan/math_expressions.rs     |  45 +-
 datafusion/src/physical_plan/memory.rs        |   7 +-
 datafusion/src/physical_plan/merge.rs         |  11 +-
 datafusion/src/physical_plan/mod.rs           |  14 +-
 datafusion/src/physical_plan/parquet.rs       | 428 ++++----------
 datafusion/src/physical_plan/planner.rs       |  22 +-
 datafusion/src/physical_plan/projection.rs    |   9 +-
 .../src/physical_plan/regex_expressions.rs    |  26 +-
 datafusion/src/physical_plan/repartition.rs   |  44 +-
 datafusion/src/physical_plan/sort.rs          |  73 +--
 .../src/physical_plan/string_expressions.rs   |  83 ++-
 datafusion/src/physical_plan/type_coercion.rs |   4 +-
 datafusion/src/physical_plan/udaf.rs          |   2 +-
 datafusion/src/physical_plan/udf.rs           |   2 +-
 .../src/physical_plan/unicode_expressions.rs  | 116 ++--
 datafusion/src/physical_plan/union.rs         |   5 +-
 datafusion/src/scalar.rs                      | 537 +++++-------------
 datafusion/src/sql/planner.rs                 |   6 +-
 datafusion/src/test/exec.rs                   |   9 +-
 datafusion/src/test/mod.rs                    | 138 +++--
 datafusion/tests/custom_sources.rs            |   8 +-
 datafusion/tests/dataframe.rs                 |   4 +-
 datafusion/tests/provider_filter_pushdown.rs  |   6 +-
 datafusion/tests/sql.rs                       |  24 +-
 datafusion/tests/user_defined_plan.rs         |   2 +-
 95 files changed, 1886 insertions(+), 2906 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 351523d74c36a..ebb3051f3ea05 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,13 +18,4 @@
 [workspace]
 members = [
     "datafusion",
-    "datafusion-cli",
-    "datafusion-examples",
-    "benchmarks",
-    "ballista/rust/client",
-    "ballista/rust/core",
-    "ballista/rust/executor",
-    "ballista/rust/scheduler",
 ]
-
-exclude = ["python"]
diff --git a/README.md b/README.md
index f72c73bb80372..ec271b221798f 100644
--- a/README.md
+++ b/README.md
@@ -69,8 +69,8 @@ Run a SQL query against data stored in a CSV:
 
 ```rust
 use datafusion::prelude::*;
-use arrow::util::pretty::print_batches;
-use arrow::record_batch::RecordBatch;
+use arrow2::util::pretty::print_batches;
+use arrow2::record_batch::RecordBatch;
 
 #[tokio::main]
 async fn main() -> datafusion::error::Result<()> {
@@ -92,8 +92,8 @@ Use the DataFrame API to process data stored in a CSV:
 
 ```rust
 use datafusion::prelude::*;
-use arrow::util::pretty::print_batches;
-use arrow::record_batch::RecordBatch;
+use arrow2::util::pretty::print_batches;
+use arrow2::record_batch::RecordBatch;
 
 #[tokio::main]
 async fn main() -> datafusion::error::Result<()> {
diff --git a/datafusion-examples/examples/csv_sql.rs b/datafusion-examples/examples/csv_sql.rs
index 76c87960d71d3..95a9afb035eda 100644
--- a/datafusion-examples/examples/csv_sql.rs
+++ b/datafusion-examples/examples/csv_sql.rs
@@ -27,7 +27,7 @@ async fn main() -> Result<()> {
     // create local execution context
     let mut ctx = ExecutionContext::new();
 
-    let testdata = datafusion::arrow::util::test_util::arrow_test_data();
+    let testdata = datafusion::crate::test::arrow_test_data();
 
     // register csv file with the execution context
     ctx.register_csv(
diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs
index dcf6bc32be6b2..60147748d77a1 100644
--- a/datafusion-examples/examples/dataframe.rs
+++ b/datafusion-examples/examples/dataframe.rs
@@ -27,7 +27,7 @@ async fn main() -> Result<()> {
     // create local execution context
     let mut ctx = ExecutionContext::new();
 
-    let testdata = datafusion::arrow::util::test_util::parquet_test_data();
+    let testdata = datafusion::crate::test::parquet_test_data();
 
     let filename = &format!("{}/alltypes_plain.parquet", testdata);
 
diff --git a/datafusion-examples/examples/flight_client.rs b/datafusion-examples/examples/flight_client.rs
index 53347826ff89c..634652c6d9cb2 100644
--- a/datafusion-examples/examples/flight_client.rs
+++ b/datafusion-examples/examples/flight_client.rs
@@ -31,7 +31,7 @@ use arrow_flight::{FlightDescriptor, Ticket};
 /// This example is run along-side the example `flight_server`.
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let testdata = datafusion::arrow::util::test_util::parquet_test_data();
+    let testdata = datafusion::crate::test::parquet_test_data();
 
     // Create Flight client
     let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
diff --git a/datafusion-examples/examples/flight_server.rs b/datafusion-examples/examples/flight_server.rs
index 8496bcb18914f..06efb04f76e06 100644
--- a/datafusion-examples/examples/flight_server.rs
+++ b/datafusion-examples/examples/flight_server.rs
@@ -87,7 +87,7 @@ impl FlightService for FlightServiceImpl {
                 // create local execution context
                 let mut ctx = ExecutionContext::new();
 
-                let testdata = datafusion::arrow::util::test_util::parquet_test_data();
+                let testdata = datafusion::crate::test::parquet_test_data();
 
                 // register parquet file with the execution context
                 ctx.register_parquet(
diff --git a/datafusion-examples/examples/parquet_sql.rs b/datafusion-examples/examples/parquet_sql.rs
index f679b22ceb904..2a3becf9913f7 100644
--- a/datafusion-examples/examples/parquet_sql.rs
+++ b/datafusion-examples/examples/parquet_sql.rs
@@ -27,7 +27,7 @@ async fn main() -> Result<()> {
     // create local execution context
     let mut ctx = ExecutionContext::new();
 
-    let testdata = datafusion::arrow::util::test_util::parquet_test_data();
+    let testdata = datafusion::crate::test::parquet_test_data();
 
     // register parquet file with the execution context
     ctx.register_parquet(
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index a127076135f12..878446889762b 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -38,7 +38,6 @@ path = "src/lib.rs"
 
 [features]
 default = ["crypto_expressions", "regex_expressions", "unicode_expressions"]
-simd = ["arrow/simd"]
 crypto_expressions = ["md-5", "sha2"]
 regex_expressions = ["regex", "lazy_static"]
 unicode_expressions = ["unicode-segmentation"]
@@ -46,8 +45,7 @@ unicode_expressions = ["unicode-segmentation"]
 [dependencies]
 ahash = "0.7"
 hashbrown = "0.11"
-arrow = { git = "https://github.com/apache/arrow-rs", rev = "4449ee96fe3fd4a0b275da8dd25ce2792699bc98", features = ["prettyprint"] }
-parquet = { git = "https://github.com/apache/arrow-rs", rev = "4449ee96fe3fd4a0b275da8dd25ce2792699bc98", features = ["arrow"] }
+arrow2 = { git = "https://github.com/jorgecarleitao/arrow2", rev = "b645c6320a119b017fe147ea9edc59201284d4fa" }
 sqlparser = "0.9.0"
 paste = "^1.0"
 num_cpus = "1.13.0"
diff --git a/datafusion/benches/aggregate_query_sql.rs b/datafusion/benches/aggregate_query_sql.rs
index 8f1a97e198d3b..6f10b03ad4784 100644
--- a/datafusion/benches/aggregate_query_sql.rs
+++ b/datafusion/benches/aggregate_query_sql.rs
@@ -26,7 +26,7 @@ use tokio::runtime::Runtime;
 extern crate arrow;
 extern crate datafusion;
 
-use arrow::{
+use arrow2::{
     array::Float32Array,
     array::Float64Array,
     array::StringArray,
diff --git a/datafusion/benches/filter_query_sql.rs b/datafusion/benches/filter_query_sql.rs
index 8600bdc88c6af..c5637b1441fb2 100644
--- a/datafusion/benches/filter_query_sql.rs
+++ b/datafusion/benches/filter_query_sql.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::{
+use arrow2::{
     array::{Float32Array, Float64Array},
     datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
diff --git a/datafusion/benches/math_query_sql.rs b/datafusion/benches/math_query_sql.rs
index 1aaa2d3403cfd..71fc864a5439d 100644
--- a/datafusion/benches/math_query_sql.rs
+++ b/datafusion/benches/math_query_sql.rs
@@ -26,7 +26,7 @@ use tokio::runtime::Runtime;
 extern crate arrow;
 extern crate datafusion;
 
-use arrow::{
+use arrow2::{
     array::{Float32Array, Float64Array},
     datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
diff --git a/datafusion/benches/sort_limit_query_sql.rs b/datafusion/benches/sort_limit_query_sql.rs
index be065f32e0090..50b71e3c0179f 100644
--- a/datafusion/benches/sort_limit_query_sql.rs
+++ b/datafusion/benches/sort_limit_query_sql.rs
@@ -21,10 +21,7 @@ use criterion::Criterion;
 
 use std::sync::{Arc, Mutex};
 
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::datatypes::{DataType, Field, Schema};
+use arrow2::datatypes::{DataType, Field, Schema};
 
 use datafusion::datasource::{CsvFile, CsvReadOptions, MemTable};
 use datafusion::execution::context::ExecutionContext;
@@ -57,7 +54,7 @@ fn create_context() -> Arc<Mutex<ExecutionContext>> {
         Field::new("c13", DataType::Utf8, false),
     ]));
 
-    let testdata = arrow::util::test_util::arrow_test_data();
+    let testdata = crate::test::arrow_test_data();
 
     // create CSV data source
     let csv = CsvFile::try_new(
diff --git a/datafusion/src/catalog/information_schema.rs b/datafusion/src/catalog/information_schema.rs
index fd7fcb4b901a6..6b588ac1398a6 100644
--- a/datafusion/src/catalog/information_schema.rs
+++ b/datafusion/src/catalog/information_schema.rs
@@ -21,8 +21,8 @@
 
 use std::{any, sync::Arc};
 
-use arrow::{
-    array::{StringBuilder, UInt64Builder},
+use arrow2::{
+    array::*,
     datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
 };
@@ -189,23 +189,23 @@ impl SchemaProvider for InformationSchemaProvider {
 ///
 /// Columns are based on https://www.postgresql.org/docs/current/infoschema-columns.html
 struct InformationSchemaTablesBuilder {
-    catalog_names: StringBuilder,
-    schema_names: StringBuilder,
-    table_names: StringBuilder,
-    table_types: StringBuilder,
+    catalog_names: Utf8Primitive<i32>,
+    schema_names: Utf8Primitive<i32>,
+    table_names: Utf8Primitive<i32>,
+    table_types: Utf8Primitive<i32>,
 }
 
 impl InformationSchemaTablesBuilder {
     fn new() -> Self {
-        // StringBuilder requires providing an initial capacity, so
+        // Utf8Primitive<i32> requires providing an initial capacity, so
         // pick 10 here arbitrarily as this is not performance
         // critical code and the number of tables is unavailable here.
         let default_capacity = 10;
         Self {
-            catalog_names: StringBuilder::new(default_capacity),
-            schema_names: StringBuilder::new(default_capacity),
-            table_names: StringBuilder::new(default_capacity),
-            table_types: StringBuilder::new(default_capacity),
+            catalog_names: Utf8Primitive::with_capacity(default_capacity),
+            schema_names: Utf8Primitive::with_capacity(default_capacity),
+            table_names: Utf8Primitive::with_capacity(default_capacity),
+            table_types: Utf8Primitive::with_capacity(default_capacity),
         }
     }
 
@@ -217,20 +217,24 @@ impl InformationSchemaTablesBuilder {
         table_type: TableType,
     ) {
         // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-        self.table_types
-            .append_value(match table_type {
-                TableType::Base => "BASE TABLE",
-                TableType::View => "VIEW",
-                TableType::Temporary => "LOCAL TEMPORARY",
-            })
-            .unwrap();
+        self.catalog_names.push(Some(&catalog_name.as_ref()));
+        self.schema_names.push(Some(&schema_name.as_ref()));
+        self.table_names.push(Some(&table_name.as_ref()));
+        self.table_types.push(Some(&match table_type {
+            TableType::Base => "BASE TABLE",
+            TableType::View => "VIEW",
+            TableType::Temporary => "LOCAL TEMPORARY",
+        }));
+    }
+
+    fn add_system_table(
+        &mut self,
+        catalog_name: impl AsRef<str>,
+        schema_name: impl AsRef<str>,
+        table_name: impl AsRef<str>,
+    ) {
+        // Note: append_value is actually infallable.
+        self.catalog_names.push(Some(&catalog_name.as_ref()));
     }
 }
 
@@ -254,10 +258,10 @@ impl From<InformationSchemaTablesBuilder> for MemTable {
         let batch = RecordBatch::try_new(
             schema.clone(),
             vec![
-                Arc::new(catalog_names.finish()),
-                Arc::new(schema_names.finish()),
-                Arc::new(table_names.finish()),
-                Arc::new(table_types.finish()),
+                Arc::new(catalog_names.to()),
+                Arc::new(schema_names.to()),
+                Arc::new(table_names.to()),
+                Arc::new(table_types.to()),
             ],
         )
         .unwrap();
@@ -270,45 +274,45 @@ impl From<InformationSchemaTablesBuilder> for MemTable {
 ///
 /// Columns are based on https://www.postgresql.org/docs/current/infoschema-columns.html
 struct InformationSchemaColumnsBuilder {
-    catalog_names: StringBuilder,
-    schema_names: StringBuilder,
-    table_names: StringBuilder,
-    column_names: StringBuilder,
-    ordinal_positions: UInt64Builder,
-    column_defaults: StringBuilder,
-    is_nullables: StringBuilder,
-    data_types: StringBuilder,
-    character_maximum_lengths: UInt64Builder,
-    character_octet_lengths: UInt64Builder,
-    numeric_precisions: UInt64Builder,
-    numeric_precision_radixes: UInt64Builder,
-    numeric_scales: UInt64Builder,
-    datetime_precisions: UInt64Builder,
-    interval_types: StringBuilder,
+    catalog_names: Utf8Primitive<i32>,
+    schema_names: Utf8Primitive<i32>,
+    table_names: Utf8Primitive<i32>,
+    column_names: Utf8Primitive<i32>,
+    ordinal_positions: Primitive<u64>,
+    column_defaults: Utf8Primitive<i32>,
+    is_nullables: Utf8Primitive<i32>,
+    data_types: Utf8Primitive<i32>,
+    character_maximum_lengths: Primitive<u64>,
+    character_octet_lengths: Primitive<u64>,
+    numeric_precisions: Primitive<u64>,
+    numeric_precision_radixes: Primitive<u64>,
+    numeric_scales: Primitive<u64>,
+    datetime_precisions: Primitive<u64>,
+    interval_types: Utf8Primitive<i32>,
 }
 
 impl InformationSchemaColumnsBuilder {
     fn new() -> Self {
-        // StringBuilder requires providing an initial capacity, so
+        // Utf8Primitive<i32> requires providing an initial capacity, so
         // pick 10 here arbitrarily as this is not performance
         // critical code and the number of tables is unavailable here.
         let default_capacity = 10;
         Self {
-            catalog_names: StringBuilder::new(default_capacity),
-            schema_names: StringBuilder::new(default_capacity),
-            table_names: StringBuilder::new(default_capacity),
-            column_names: StringBuilder::new(default_capacity),
-            ordinal_positions: UInt64Builder::new(default_capacity),
-            column_defaults: StringBuilder::new(default_capacity),
-            is_nullables: StringBuilder::new(default_capacity),
-            data_types: StringBuilder::new(default_capacity),
-            character_maximum_lengths: UInt64Builder::new(default_capacity),
-            character_octet_lengths: UInt64Builder::new(default_capacity),
-            numeric_precisions: UInt64Builder::new(default_capacity),
-            numeric_precision_radixes: UInt64Builder::new(default_capacity),
-            numeric_scales: UInt64Builder::new(default_capacity),
-            datetime_precisions: UInt64Builder::new(default_capacity),
-            interval_types: StringBuilder::new(default_capacity),
+            catalog_names: Utf8Primitive::<i32>::with_capacity(default_capacity),
+            schema_names: Utf8Primitive::<i32>::with_capacity(default_capacity),
+            table_names: Utf8Primitive::<i32>::with_capacity(default_capacity),
+            column_names: Utf8Primitive::<i32>::with_capacity(default_capacity),
+            ordinal_positions: Primitive::<u64>::with_capacity(default_capacity),
+            column_defaults: Utf8Primitive::<i32>::with_capacity(default_capacity),
+            is_nullables: Utf8Primitive::<i32>::with_capacity(default_capacity),
+            data_types: Utf8Primitive::<i32>::with_capacity(default_capacity),
+            character_maximum_lengths: Primitive::<u64>::with_capacity(default_capacity),
+            character_octet_lengths: Primitive::<u64>::with_capacity(default_capacity),
+            numeric_precisions: Primitive::<u64>::with_capacity(default_capacity),
+            numeric_precision_radixes: Primitive::<u64>::with_capacity(default_capacity),
+            numeric_scales: Primitive::<u64>::with_capacity(default_capacity),
+            datetime_precisions: Primitive::<u64>::with_capacity(default_capacity),
+            interval_types: Utf8Primitive::<i32>::with_capacity(default_capacity),
         }
     }
 
@@ -326,33 +330,24 @@ impl InformationSchemaColumnsBuilder {
         use DataType::*;
 
         // Note: append_value is actually infallable.
-        self.catalog_names
-            .append_value(catalog_name.as_ref())
-            .unwrap();
-        self.schema_names
-            .append_value(schema_name.as_ref())
-            .unwrap();
-        self.table_names.append_value(table_name.as_ref()).unwrap();
-
-        self.column_names
-            .append_value(column_name.as_ref())
-            .unwrap();
-
-        self.ordinal_positions
-            .append_value(column_position as u64)
-            .unwrap();
+        self.catalog_names.push(Some(&catalog_name.as_ref()));
+        self.schema_names.push(Some(&schema_name.as_ref()));
+        self.table_names.push(Some(&table_name.as_ref()));
+
+        self.column_names.push(Some(&column_name.as_ref()));
+
+        self.ordinal_positions.push(Some(&(column_position as u64)));
 
         // DataFusion does not support column default values, so null
-        self.column_defaults.append_null().unwrap();
+        self.column_defaults.push(None);
 
         // "YES if the column is possibly nullable, NO if it is known not nullable. "
         let nullable_str = if is_nullable { "YES" } else { "NO" };
-        self.is_nullables.append_value(nullable_str).unwrap();
+        self.is_nullables.push(Some(&nullable_str));
 
         // "System supplied type" --> Use debug format of the datatype
         self.data_types
-            .append_value(format!("{:?}", data_type))
-            .unwrap();
+            .push(Some(&format!("{:?}", data_type).as_ref()));
 
         // "If data_type identifies a character or bit string type, the
         // declared maximum length; null for all other data types or
@@ -360,9 +355,7 @@ impl InformationSchemaColumnsBuilder {
         //
         // Arrow has no equivalent of VARCHAR(20), so we leave this as Null
         let max_chars = None;
-        self.character_maximum_lengths
-            .append_option(max_chars)
-            .unwrap();
+        self.character_maximum_lengths.push(max_chars);
 
         // "Maximum length, in bytes, for binary data, character data,
         // or text and image data."
@@ -371,9 +364,7 @@ impl InformationSchemaColumnsBuilder {
             LargeBinary | LargeUtf8 => Some(i64::MAX as u64),
             _ => None,
         };
-        self.character_octet_lengths
-            .append_option(char_len)
-            .unwrap();
+        self.character_octet_lengths.push(char_len.as_ref());
 
         // numeric_precision: "If data_type identifies a numeric type, this column
         // contains the (declared or implicit) precision of the type
@@ -414,16 +405,12 @@ impl InformationSchemaColumnsBuilder {
             _ => (None, None, None),
         };
 
-        self.numeric_precisions
-            .append_option(numeric_precision)
-            .unwrap();
-        self.numeric_precision_radixes
-            .append_option(numeric_radix)
-            .unwrap();
-        self.numeric_scales.append_option(numeric_scale).unwrap();
+        self.numeric_precisions.push(numeric_precision.as_ref());
+        self.numeric_precision_radixes.push(numeric_radix.as_ref());
+        self.numeric_scales.push(numeric_scale.as_ref());
 
-        self.datetime_precisions.append_option(None).unwrap();
-        self.interval_types.append_null().unwrap();
+        self.datetime_precisions.push(None);
+        self.interval_types.push(None);
     }
 }
 
@@ -464,26 +451,33 @@ impl From<InformationSchemaColumnsBuilder> for MemTable {
             mut datetime_precisions,
             mut interval_types,
         } = value;
+        let ordinal_positions: UInt64Array = ordinal_positions.into();
+        let character_maximum_lengths: UInt64Array = character_maximum_lengths.into();
+        let character_octet_lengths: UInt64Array = character_octet_lengths.into();
+        let numeric_precisions: UInt64Array = numeric_precisions.into();
+        let numeric_precision_radixes: UInt64Array = numeric_precision_radixes.into();
+        let numeric_scales: UInt64Array = numeric_scales.into();
+        let datetime_precisions: UInt64Array = datetime_precisions.into();
 
         let schema = Arc::new(schema);
         let batch = RecordBatch::try_new(
             schema.clone(),
             vec![
-                Arc::new(catalog_names.finish()),
-                Arc::new(schema_names.finish()),
-                Arc::new(table_names.finish()),
-                Arc::new(column_names.finish()),
-                Arc::new(ordinal_positions.finish()),
-                Arc::new(column_defaults.finish()),
-                Arc::new(is_nullables.finish()),
-                Arc::new(data_types.finish()),
-                Arc::new(character_maximum_lengths.finish()),
-                Arc::new(character_octet_lengths.finish()),
-                Arc::new(numeric_precisions.finish()),
-                Arc::new(numeric_precision_radixes.finish()),
-                Arc::new(numeric_scales.finish()),
-                Arc::new(datetime_precisions.finish()),
-                Arc::new(interval_types.finish()),
+                Arc::new(catalog_names.to()),
+                Arc::new(schema_names.to()),
+                Arc::new(table_names.to()),
+                Arc::new(column_names.to()),
+                Arc::new(ordinal_positions),
+                Arc::new(column_defaults.to()),
+                Arc::new(is_nullables.to()),
+                Arc::new(data_types.to()),
+                Arc::new(character_maximum_lengths),
+                Arc::new(character_octet_lengths),
+                Arc::new(numeric_precisions),
+                Arc::new(numeric_precision_radixes),
+                Arc::new(numeric_scales),
+                Arc::new(datetime_precisions),
+                Arc::new(interval_types.to()),
             ],
         )
         .unwrap();
diff --git a/datafusion/src/dataframe.rs b/datafusion/src/dataframe.rs
index 9c7c2ef96d6be..c244b2d1d71ea 100644
--- a/datafusion/src/dataframe.rs
+++ b/datafusion/src/dataframe.rs
@@ -17,7 +17,7 @@
 
 //! DataFrame API for building and executing query plans.
 
-use crate::arrow::record_batch::RecordBatch;
+use crate::arrow2::record_batch::RecordBatch;
 use crate::error::Result;
 use crate::logical_plan::{
     DFSchema, Expr, FunctionRegistry, JoinType, LogicalPlan, Partitioning,
diff --git a/datafusion/src/datasource/csv.rs b/datafusion/src/datasource/csv.rs
index 33cbeb12ca6bd..3e8f5726392f3 100644
--- a/datafusion/src/datasource/csv.rs
+++ b/datafusion/src/datasource/csv.rs
@@ -25,7 +25,7 @@
 //! use datafusion::datasource::TableProvider;
 //! use datafusion::datasource::csv::{CsvFile, CsvReadOptions};
 //!
-//! let testdata = arrow::util::test_util::arrow_test_data();
+//! let testdata = crate::test::arrow_test_data();
 //! let csvdata = CsvFile::try_new(
 //!     &format!("{}/csv/aggregate_test_100.csv", testdata),
 //!     CsvReadOptions::new().delimiter(b'|'),
@@ -33,12 +33,14 @@
 //! let schema = csvdata.schema();
 //! ```
 
-use arrow::datatypes::SchemaRef;
 use std::any::Any;
 use std::io::{Read, Seek};
 use std::string::String;
 use std::sync::{Arc, Mutex};
 
+use arrow2::datatypes::Schema;
+use arrow2::io::csv::read as csv_read;
+
 use crate::datasource::datasource::Statistics;
 use crate::datasource::TableProvider;
 use crate::error::{DataFusionError, Result};
@@ -47,6 +49,7 @@ use crate::physical_plan::csv::CsvExec;
 pub use crate::physical_plan::csv::CsvReadOptions;
 use crate::physical_plan::{common, ExecutionPlan};
 
+type SchemaRef = Arc<Schema>;
 enum Source {
     /// Path to a single CSV file or a directory containing one of more CSV files
     Path(String),
@@ -119,21 +122,25 @@ impl CsvFile {
 
     /// Attempt to initialize a `CsvRead` from a reader impls `Seek`. The schema can be inferred automatically.
     pub fn try_new_from_reader_infer_schema<R: Read + Seek + Send + Sync + 'static>(
-        mut reader: R,
+        reader: R,
         options: CsvReadOptions,
     ) -> Result<Self> {
+        let mut reader = csv_read::ReaderBuilder::new()
+            .delimiter(options.delimiter)
+            .from_reader(reader);
         let schema = Arc::new(match options.schema {
             Some(s) => s.clone(),
             None => {
-                let (schema, _) = arrow::csv::reader::infer_file_schema(
+                let schema = csv_read::infer_schema(
                     &mut reader,
-                    options.delimiter,
                     Some(options.schema_infer_max_records),
                     options.has_header,
+                    &csv_read::infer,
                 )?;
                 schema
             }
         });
+        let reader = reader.into_inner();
 
         Ok(Self {
             source: Source::Reader(Mutex::new(Some(Box::new(reader)))),
@@ -228,9 +235,11 @@ mod tests {
     use super::*;
     use crate::prelude::*;
 
+    use arrow2::array::*;
+
     #[tokio::test]
     async fn csv_file_from_reader() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let filename = "aggregate_test_100.csv";
         let path = format!("{}/csv/{}", testdata, filename);
         let buf = std::fs::read(path).unwrap();
@@ -249,7 +258,7 @@ mod tests {
             batches[0]
                 .column(0)
                 .as_any()
-                .downcast_ref::<arrow::array::Int64Array>()
+                .downcast_ref::<Int64Array>()
                 .unwrap()
                 .value(0),
             5
diff --git a/datafusion/src/datasource/datasource.rs b/datafusion/src/datasource/datasource.rs
index 0349a49e491ba..0c8065ae6f074 100644
--- a/datafusion/src/datasource/datasource.rs
+++ b/datafusion/src/datasource/datasource.rs
@@ -23,7 +23,9 @@ use std::sync::Arc;
 use crate::error::Result;
 use crate::logical_plan::Expr;
 use crate::physical_plan::ExecutionPlan;
-use crate::{arrow::datatypes::SchemaRef, scalar::ScalarValue};
+use crate::{arrow2::datatypes::Schema, scalar::ScalarValue};
+
+type SchemaRef = Arc<Schema>;
 
 /// This table statistics are estimates.
 /// It can not be used directly in the precise compute
diff --git a/datafusion/src/datasource/empty.rs b/datafusion/src/datasource/empty.rs
index e6140cdb8de69..e0033f29df2e1 100644
--- a/datafusion/src/datasource/empty.rs
+++ b/datafusion/src/datasource/empty.rs
@@ -20,7 +20,9 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::datatypes::*;
+use arrow2::datatypes::*;
+
+type SchemaRef = Arc<Schema>;
 
 use crate::datasource::datasource::Statistics;
 use crate::datasource::TableProvider;
diff --git a/datafusion/src/datasource/memory.rs b/datafusion/src/datasource/memory.rs
index af40480870287..02488aee112b1 100644
--- a/datafusion/src/datasource/memory.rs
+++ b/datafusion/src/datasource/memory.rs
@@ -24,8 +24,10 @@ use log::debug;
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::datatypes::{Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
+use arrow2::datatypes::{Field, Schema};
+use arrow2::record_batch::RecordBatch;
+
+type SchemaRef = Arc<Schema>;
 
 use crate::datasource::TableProvider;
 use crate::error::{DataFusionError, Result};
@@ -91,7 +93,7 @@ impl MemTable {
         if partitions
             .iter()
             .flatten()
-            .all(|batches| schema.contains(&batches.schema()))
+            .all(|batches| schema.as_ref() == batches.schema().as_ref())
         {
             let statistics = calculate_statistics(&schema, &partitions);
             debug!("MemTable statistics: {:?}", statistics);
@@ -221,8 +223,8 @@ impl TableProvider for MemTable {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::Int32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow2::array::Int32Array;
+    use arrow2::datatypes::{DataType, Field, Schema};
     use futures::StreamExt;
     use std::collections::HashMap;
 
@@ -238,10 +240,10 @@ mod tests {
         let batch = RecordBatch::try_new(
             schema.clone(),
             vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
-                Arc::new(Int32Array::from(vec![None, None, Some(9)])),
+                Arc::new(Int32Array::from_slice(&[1, 2, 3])),
+                Arc::new(Int32Array::from_slice(&[4, 5, 6])),
+                Arc::new(Int32Array::from_slice(&[7, 8, 9])),
+                Arc::new(Int32Array::from(&[None, None, Some(9)])),
             ],
         )?;
 
@@ -301,9 +303,9 @@ mod tests {
         let batch = RecordBatch::try_new(
             schema.clone(),
             vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
+                Arc::new(Int32Array::from_slice(&[1, 2, 3])),
+                Arc::new(Int32Array::from_slice(&[4, 5, 6])),
+                Arc::new(Int32Array::from_slice(&[7, 8, 9])),
             ],
         )?;
 
@@ -329,9 +331,9 @@ mod tests {
         let batch = RecordBatch::try_new(
             schema.clone(),
             vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
+                Arc::new(Int32Array::from_slice(&[1, 2, 3])),
+                Arc::new(Int32Array::from_slice(&[4, 5, 6])),
+                Arc::new(Int32Array::from_slice(&[7, 8, 9])),
             ],
         )?;
 
@@ -366,9 +368,9 @@ mod tests {
         let batch = RecordBatch::try_new(
             schema1,
             vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
+                Arc::new(Int32Array::from_slice(&[1, 2, 3])),
+                Arc::new(Int32Array::from_slice(&[4, 5, 6])),
+                Arc::new(Int32Array::from_slice(&[7, 8, 9])),
             ],
         )?;
 
@@ -399,8 +401,8 @@ mod tests {
         let batch = RecordBatch::try_new(
             schema1,
             vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![7, 5, 9])),
+                Arc::new(Int32Array::from_slice(&[1, 2, 3])),
+                Arc::new(Int32Array::from_slice(&[7, 5, 9])),
             ],
         )?;
 
@@ -420,7 +422,7 @@ mod tests {
         let mut metadata = HashMap::new();
         metadata.insert("foo".to_string(), "bar".to_string());
 
-        let schema1 = Schema::new_with_metadata(
+        let schema1 = Schema::new_from(
             vec![
                 Field::new("a", DataType::Int32, false),
                 Field::new("b", DataType::Int32, false),
@@ -442,18 +444,18 @@ mod tests {
         let batch1 = RecordBatch::try_new(
             Arc::new(schema1),
             vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
+                Arc::new(Int32Array::from_slice(&[1, 2, 3])),
+                Arc::new(Int32Array::from_slice(&[4, 5, 6])),
+                Arc::new(Int32Array::from_slice(&[7, 8, 9])),
             ],
         )?;
 
         let batch2 = RecordBatch::try_new(
             Arc::new(schema2),
             vec![
-                Arc::new(Int32Array::from(vec![1, 2, 3])),
-                Arc::new(Int32Array::from(vec![4, 5, 6])),
-                Arc::new(Int32Array::from(vec![7, 8, 9])),
+                Arc::new(Int32Array::from_slice(&[1, 2, 3])),
+                Arc::new(Int32Array::from_slice(&[4, 5, 6])),
+                Arc::new(Int32Array::from_slice(&[7, 8, 9])),
             ],
         )?;
 
diff --git a/datafusion/src/datasource/parquet.rs b/datafusion/src/datasource/parquet.rs
index 30e47df5f6491..7bea9042458b8 100644
--- a/datafusion/src/datasource/parquet.rs
+++ b/datafusion/src/datasource/parquet.rs
@@ -21,7 +21,7 @@ use std::any::Any;
 use std::string::String;
 use std::sync::Arc;
 
-use arrow::datatypes::*;
+use arrow2::datatypes::*;
 
 use crate::datasource::datasource::Statistics;
 use crate::datasource::TableProvider;
@@ -32,6 +32,8 @@ use crate::physical_plan::ExecutionPlan;
 
 use super::datasource::TableProviderFilterPushDown;
 
+type SchemaRef = Arc<Schema>;
+
 /// Table-based representation of a `ParquetFile`.
 pub struct ParquetTable {
     path: String,
@@ -43,7 +45,7 @@ pub struct ParquetTable {
 impl ParquetTable {
     /// Attempt to initialize a new `ParquetTable` from a file path.
     pub fn try_new(path: &str, max_concurrency: usize) -> Result<Self> {
-        let parquet_exec = ParquetExec::try_from_path(path, None, None, 0, 1, None)?;
+        let parquet_exec = ParquetExec::try_from_path(path, None, None, 1, None)?;
         let schema = parquet_exec.schema();
         Ok(Self {
             path: path.to_string(),
@@ -90,9 +92,6 @@ impl TableProvider for ParquetTable {
             &self.path,
             projection.clone(),
             predicate,
-            limit
-                .map(|l| std::cmp::min(l, batch_size))
-                .unwrap_or(batch_size),
             self.max_concurrency,
             limit,
         )?))
@@ -106,11 +105,8 @@ impl TableProvider for ParquetTable {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::{
-        BinaryArray, BooleanArray, Float32Array, Float64Array, Int32Array,
-        TimestampNanosecondArray,
-    };
-    use arrow::record_batch::RecordBatch;
+    use arrow2::array::*;
+    use arrow2::record_batch::RecordBatch;
     use futures::StreamExt;
 
     #[tokio::test]
@@ -234,7 +230,7 @@ mod tests {
         let array = batch
             .column(0)
             .as_any()
-            .downcast_ref::<TimestampNanosecondArray>()
+            .downcast_ref::<Int64Array>()
             .unwrap();
         let mut values: Vec<i64> = vec![];
         for i in 0..batch.num_rows() {
@@ -312,7 +308,7 @@ mod tests {
         let array = batch
             .column(0)
             .as_any()
-            .downcast_ref::<BinaryArray>()
+            .downcast_ref::<BinaryArray<i32>>()
             .unwrap();
         let mut values: Vec<&str> = vec![];
         for i in 0..batch.num_rows() {
@@ -328,7 +324,7 @@ mod tests {
     }
 
     fn load_table(name: &str) -> Result<Arc<dyn TableProvider>> {
-        let testdata = arrow::util::test_util::parquet_test_data();
+        let testdata = crate::test::parquet_test_data();
         let filename = format!("{}/{}", testdata, name);
         let table = ParquetTable::try_new(&filename, 2)?;
         Ok(Arc::new(table))
diff --git a/datafusion/src/error.rs b/datafusion/src/error.rs
index 903faeabf6954..b7b656733fde0 100644
--- a/datafusion/src/error.rs
+++ b/datafusion/src/error.rs
@@ -22,8 +22,7 @@ use std::fmt::{Display, Formatter};
 use std::io;
 use std::result;
 
-use arrow::error::ArrowError;
-use parquet::errors::ParquetError;
+use arrow2::error::ArrowError;
 use sqlparser::parser::ParserError;
 
 /// Result type for operations that could result in an [DataFusionError]
@@ -35,8 +34,6 @@ pub type Result<T> = result::Result<T, DataFusionError>;
 pub enum DataFusionError {
     /// Error returned by arrow.
     ArrowError(ArrowError),
-    /// Wraps an error from the Parquet crate
-    ParquetError(ParquetError),
     /// Error associated to I/O operations and associated traits.
     IoError(io::Error),
     /// Error returned when SQL is syntactically incorrect.
@@ -59,7 +56,7 @@ pub enum DataFusionError {
 }
 
 impl DataFusionError {
-    /// Wraps this [DataFusionError] as an [arrow::error::ArrowError].
+    /// Wraps this [DataFusionError] as an [arrow2::error::ArrowError].
     pub fn into_arrow_external_error(self) -> ArrowError {
         ArrowError::from_external_error(Box::new(self))
     }
@@ -77,12 +74,6 @@ impl From<ArrowError> for DataFusionError {
     }
 }
 
-impl From<ParquetError> for DataFusionError {
-    fn from(e: ParquetError) -> Self {
-        DataFusionError::ParquetError(e)
-    }
-}
-
 impl From<ParserError> for DataFusionError {
     fn from(e: ParserError) -> Self {
         DataFusionError::SQL(e)
@@ -93,9 +84,6 @@ impl Display for DataFusionError {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match *self {
             DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
-            DataFusionError::ParquetError(ref desc) => {
-                write!(f, "Parquet error: {}", desc)
-            }
             DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
             DataFusionError::SQL(ref desc) => {
                 write!(f, "SQL error: {:?}", desc)
diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs
index 272e75acba6fd..b7b51ca3b71bd 100644
--- a/datafusion/src/execution/context.rs
+++ b/datafusion/src/execution/context.rs
@@ -39,7 +39,9 @@ use std::{
 use futures::{StreamExt, TryStreamExt};
 use tokio::task::{self, JoinHandle};
 
-use arrow::csv;
+use arrow2::error::{ArrowError, Result as ArrowResult};
+use arrow2::io::csv::write as csv_write;
+use arrow2::io::parquet::write;
 
 use crate::catalog::{
     catalog::{CatalogProvider, MemoryCatalogProvider},
@@ -75,8 +77,6 @@ use crate::sql::{
 use crate::variable::{VarProvider, VarType};
 use crate::{dataframe::DataFrame, physical_plan::udaf::AggregateUDF};
 use chrono::{DateTime, Utc};
-use parquet::arrow::ArrowWriter;
-use parquet::file::properties::WriterProperties;
 
 /// ExecutionContext is the main interface for executing queries with DataFusion. The context
 /// provides the following functionality:
@@ -500,12 +500,21 @@ impl ExecutionContext {
                     let plan = plan.clone();
                     let filename = format!("part-{}.csv", i);
                     let path = fs_path.join(&filename);
-                    let file = fs::File::create(path)?;
-                    let mut writer = csv::Writer::new(file);
+
+                    let mut writer = csv_write::WriterBuilder::new()
+                        .from_path(path)
+                        .map_err(ArrowError::from)?;
+
+                    csv_write::write_header(&mut writer, plan.schema().as_ref())?;
+
+                    let options = csv_write::SerializeOptions::default();
+
                     let stream = plan.execute(i).await?;
                     let handle: JoinHandle<Result<()>> = task::spawn(async move {
                         stream
-                            .map(|batch| writer.write(&batch?))
+                            .map(|batch| {
+                                csv_write::write_batch(&mut writer, &batch?, &options)
+                            })
                             .try_collect()
                             .await
                             .map_err(DataFusionError::from)
@@ -527,7 +536,6 @@ impl ExecutionContext {
         &self,
         plan: Arc<dyn ExecutionPlan>,
         path: String,
-        writer_properties: Option<WriterProperties>,
     ) -> Result<()> {
         // create directory to contain the Parquet files (one per partition)
         let fs_path = Path::new(&path);
@@ -536,22 +544,45 @@ impl ExecutionContext {
                 let mut tasks = vec![];
                 for i in 0..plan.output_partitioning().partition_count() {
                     let plan = plan.clone();
+                    let schema = plan.schema();
                     let filename = format!("part-{}.parquet", i);
                     let path = fs_path.join(&filename);
-                    let file = fs::File::create(path)?;
-                    let mut writer = ArrowWriter::try_new(
-                        file.try_clone().unwrap(),
-                        plan.schema(),
-                        writer_properties.clone(),
-                    )?;
+
+                    let mut file = fs::File::create(path)?;
                     let stream = plan.execute(i).await?;
+
+                    let compression = write::CompressionCodec::Uncompressed;
+
                     let handle: JoinHandle<Result<()>> = task::spawn(async move {
-                        stream
-                            .map(|batch| writer.write(&batch?))
-                            .try_collect()
-                            .await
-                            .map_err(DataFusionError::from)?;
-                        writer.close().map_err(DataFusionError::from).map(|_| ())
+                        let parquet_types = schema
+                            .fields()
+                            .iter()
+                            .map(write::to_parquet_type)
+                            .collect::<ArrowResult<Vec<_>>>()?;
+
+                        // do not do this.
+                        let batches =
+                            crate::physical_plan::common::collect(stream).await?;
+
+                        let groups = batches.iter().map(|batch| {
+                            Ok(batch.columns().iter().zip(parquet_types.iter()).map(
+                                |(array, type_)| {
+                                    Ok(std::iter::once(write::array_to_page(
+                                        array.as_ref(),
+                                        type_,
+                                        compression,
+                                    )))
+                                },
+                            ))
+                        });
+
+                        Ok(write::write_file(
+                            &mut file,
+                            groups,
+                            schema.as_ref(),
+                            compression,
+                            None,
+                        )?)
                     });
                     tasks.push(handle);
                 }
@@ -905,20 +936,18 @@ mod tests {
         logical_plan::create_udaf,
         physical_plan::expressions::AvgAccumulator,
     };
-    use arrow::array::{
-        Array, ArrayRef, BinaryArray, DictionaryArray, Float64Array, Int32Array,
-        Int64Array, LargeBinaryArray, LargeStringArray, StringArray,
-        TimestampNanosecondArray,
-    };
-    use arrow::compute::add;
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
+    use arrow2::array::*;
+    use arrow2::datatypes::*;
+    use arrow2::record_batch::RecordBatch;
     use std::fs::File;
     use std::thread::{self, JoinHandle};
     use std::{io::prelude::*, sync::Mutex};
     use tempfile::TempDir;
     use test::*;
 
+    type ArrayRef = Arc<dyn Array>;
+    type SchemaRef = Arc<Schema>;
+
     #[tokio::test]
     async fn parallel_projection() -> Result<()> {
         let partition_count = 4;
@@ -1150,9 +1179,9 @@ mod tests {
         let partitions = vec![vec![RecordBatch::try_new(
             schema.clone(),
             vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
-                Arc::new(Int32Array::from(vec![3, 12, 12, 120])),
+                Arc::new(Int32Array::from_slice(&[1, 10, 10, 100])),
+                Arc::new(Int32Array::from_slice(&[2, 12, 12, 120])),
+                Arc::new(Int32Array::from_slice(&[3, 12, 12, 120])),
             ],
         )?]];
 
@@ -1710,13 +1739,13 @@ mod tests {
             // C, 1
             // A, 1
 
-            let str_array: LargeStringArray = vec!["A", "B", "A", "A", "C", "A"]
+            let str_array: Utf8Array<i64> = vec!["A", "B", "A", "A", "C", "A"]
                 .into_iter()
                 .map(Some)
                 .collect();
             let str_array = Arc::new(str_array);
 
-            let val_array: Int64Array = vec![1, 2, 2, 4, 1, 1].into();
+            let val_array = Int64Array::from_slice(&[1, 2, 2, 4, 1, 1]);
             let val_array = Arc::new(val_array);
 
             let schema = Arc::new(Schema::new(vec![
@@ -1750,7 +1779,7 @@ mod tests {
 
     #[tokio::test]
     async fn group_by_dictionary() {
-        async fn run_test_case<K: ArrowDictionaryKeyType>() {
+        async fn run_test_case<K: DictionaryKey>() {
             let mut ctx = ExecutionContext::new();
 
             // input data looks like:
@@ -1761,11 +1790,11 @@ mod tests {
             // C, 1
             // A, 1
 
-            let dict_array: DictionaryArray<K> =
-                vec!["A", "B", "A", "A", "C", "A"].into_iter().collect();
+            let dict_array: DictionaryPrimitive<K, Utf8Primitive<i32>, _> =
+                vec!["A", "B", "A", "A", "C", "A"].iter().collect();
             let dict_array = Arc::new(dict_array);
 
-            let val_array: Int64Array = vec![1, 2, 2, 4, 1, 1].into();
+            let val_array = Int64Array::from_slice(&[1, 2, 2, 4, 1, 1]);
             let val_array = Arc::new(val_array);
 
             let schema = Arc::new(Schema::new(vec![
@@ -1834,14 +1863,14 @@ mod tests {
             assert_batches_sorted_eq!(expected, &results);
         }
 
-        run_test_case::<Int8Type>().await;
-        run_test_case::<Int16Type>().await;
-        run_test_case::<Int32Type>().await;
-        run_test_case::<Int64Type>().await;
-        run_test_case::<UInt8Type>().await;
-        run_test_case::<UInt16Type>().await;
-        run_test_case::<UInt32Type>().await;
-        run_test_case::<UInt64Type>().await;
+        run_test_case::<i8>().await;
+        run_test_case::<i16>().await;
+        run_test_case::<i32>().await;
+        run_test_case::<i64>().await;
+        run_test_case::<u8>().await;
+        run_test_case::<u16>().await;
+        run_test_case::<u32>().await;
+        run_test_case::<u64>().await;
     }
 
     async fn run_count_distinct_integers_aggregated_scenario(
@@ -2046,7 +2075,7 @@ mod tests {
             vec![test::make_partition(4)],
             vec![test::make_partition(5)],
         ];
-        let schema = partitions[0][0].schema();
+        let schema = partitions[0][0].schema().clone();
         let provider = Arc::new(MemTable::try_new(schema, partitions).unwrap());
 
         ctx.register_table("t", provider).unwrap();
@@ -2384,8 +2413,8 @@ mod tests {
         let batch = RecordBatch::try_new(
             Arc::new(schema.clone()),
             vec![
-                Arc::new(Int32Array::from(vec![1, 10, 10, 100])),
-                Arc::new(Int32Array::from(vec![2, 12, 12, 120])),
+                Arc::new(Int32Array::from_slice(&[1, 10, 10, 100])),
+                Arc::new(Int32Array::from_slice(&[2, 12, 12, 120])),
             ],
         )?;
 
@@ -2403,7 +2432,7 @@ mod tests {
                 .as_any()
                 .downcast_ref::<Int32Array>()
                 .expect("cast failed");
-            Ok(Arc::new(add(l, r)?) as ArrayRef)
+            Ok(Arc::new(add::add(l, r)?) as ArrayRef)
         };
         let myfunc = make_scalar_function(myfunc);
 
@@ -2483,11 +2512,11 @@ mod tests {
 
         let batch1 = RecordBatch::try_new(
             Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+            vec![Arc::new(Int32Array::from_slice(&[1, 2, 3]))],
         )?;
         let batch2 = RecordBatch::try_new(
             Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![4, 5]))],
+            vec![Arc::new(Int32Array::from_slice(&[4, 5]))],
         )?;
 
         let mut ctx = ExecutionContext::new();
@@ -2520,11 +2549,11 @@ mod tests {
 
         let batch1 = RecordBatch::try_new(
             Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
+            vec![Arc::new(Int32Array::from_slice(&[1, 2, 3]))],
         )?;
         let batch2 = RecordBatch::try_new(
             Arc::new(schema.clone()),
-            vec![Arc::new(Int32Array::from(vec![4, 5]))],
+            vec![Arc::new(Int32Array::from_slice(&[4, 5]))],
         )?;
 
         let mut ctx = ExecutionContext::new();
@@ -2986,16 +3015,16 @@ mod tests {
         let batch = RecordBatch::try_new(
             Arc::new(schema.clone()),
             vec![
-                Arc::new(Int32Array::from(vec![1])),
-                Arc::new(Float64Array::from(vec![1.0])),
-                Arc::new(StringArray::from(vec![Some("foo")])),
-                Arc::new(LargeStringArray::from(vec![Some("bar")])),
-                Arc::new(BinaryArray::from(vec![b"foo" as &[u8]])),
-                Arc::new(LargeBinaryArray::from(vec![b"foo" as &[u8]])),
-                Arc::new(TimestampNanosecondArray::from_opt_vec(
-                    vec![Some(123)],
-                    None,
-                )),
+                Arc::new(Int32Array::from_slice(&[1])),
+                Arc::new(Float64Array::from_slice(&[1.0])),
+                Arc::new(Utf8Array::<i32>::from(vec![Some("foo")])),
+                Arc::new(Utf8Array::<i64>::from(vec![Some("bar")])),
+                Arc::new(BinaryArray::<i32>::from_slice(&[b"foo" as &[u8]])),
+                Arc::new(BinaryArray::<i64>::from_slice(&[b"foo" as &[u8]])),
+                Arc::new(
+                    Primitive::<i64>::from(vec![Some(123)])
+                        .to(DataType::Timestamp(TimeUnit::Nanosecond, None)),
+                ),
             ],
         )
         .unwrap();
diff --git a/datafusion/src/execution/dataframe_impl.rs b/datafusion/src/execution/dataframe_impl.rs
index fdc75f92f2e75..59af408b38592 100644
--- a/datafusion/src/execution/dataframe_impl.rs
+++ b/datafusion/src/execution/dataframe_impl.rs
@@ -19,7 +19,7 @@
 
 use std::sync::{Arc, Mutex};
 
-use crate::arrow::record_batch::RecordBatch;
+use crate::arrow2::record_batch::RecordBatch;
 use crate::error::Result;
 use crate::execution::context::{ExecutionContext, ExecutionContextState};
 use crate::logical_plan::{
@@ -184,7 +184,7 @@ mod tests {
     use crate::{assert_batches_sorted_eq, execution::context::ExecutionContext};
     use crate::{datasource::csv::CsvReadOptions, physical_plan::ColumnarValue};
     use crate::{physical_plan::functions::ScalarFunctionImplementation, test};
-    use arrow::datatypes::DataType;
+    use arrow2::datatypes::DataType;
 
     #[test]
     fn select_columns() -> Result<()> {
@@ -369,7 +369,7 @@ mod tests {
 
     fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> {
         let schema = test::aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         ctx.register_csv(
             "aggregate_test_100",
             &format!("{}/csv/aggregate_test_100.csv", testdata),
diff --git a/datafusion/src/lib.rs b/datafusion/src/lib.rs
index b6f64feb70d2a..ced5a9be6ba3c 100644
--- a/datafusion/src/lib.rs
+++ b/datafusion/src/lib.rs
@@ -39,7 +39,7 @@
 //! ```rust
 //! # use datafusion::prelude::*;
 //! # use datafusion::error::Result;
-//! # use arrow::record_batch::RecordBatch;
+//! # use arrow2::record_batch::RecordBatch;
 //!
 //! # #[tokio::main]
 //! # async fn main() -> Result<()> {
@@ -57,7 +57,7 @@
 //! let results: Vec<RecordBatch> = df.collect().await?;
 //!
 //! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
+//! let pretty_results = arrow2::util::pretty::pretty_format_batches(&results)?;
 //!
 //! let expected = vec![
 //!     "+---+--------+",
@@ -77,7 +77,7 @@
 //! ```
 //! # use datafusion::prelude::*;
 //! # use datafusion::error::Result;
-//! # use arrow::record_batch::RecordBatch;
+//! # use arrow2::record_batch::RecordBatch;
 //!
 //! # #[tokio::main]
 //! # async fn main() -> Result<()> {
@@ -92,7 +92,7 @@
 //! let results: Vec<RecordBatch> = df.collect().await?;
 //!
 //! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
+//! let pretty_results = arrow2::util::pretty::pretty_format_batches(&results)?;
 //!
 //! let expected = vec![
 //!     "+---+--------+",
@@ -130,7 +130,7 @@
 //! ### Logical plan
 //!
 //! Logical planning yields [`logical plans`](logical_plan::LogicalPlan) and [`logical expressions`](logical_plan::Expr).
-//! These are [`Schema`](arrow::datatypes::Schema)-aware traits that represent statements whose result is independent of how it should physically be executed.
+//! These are [`Schema`](arrow2::datatypes::Schema)-aware traits that represent statements whose result is independent of how it should physically be executed.
 //!
 //! A [`LogicalPlan`](logical_plan::LogicalPlan) is a Direct Asyclic graph of other [`LogicalPlan`s](logical_plan::LogicalPlan) and each node contains logical expressions ([`Expr`s](logical_plan::Expr)).
 //! All of these are located in [`logical_plan`](logical_plan).
@@ -152,12 +152,12 @@
 //! Broadly speaking,
 //!
 //! * an [`ExecutionPlan`](physical_plan::ExecutionPlan) receives a partition number and asyncronosly returns
-//!   an iterator over [`RecordBatch`](arrow::record_batch::RecordBatch)
-//!   (a node-specific struct that implements [`RecordBatchReader`](arrow::record_batch::RecordBatchReader))
-//! * a [`PhysicalExpr`](physical_plan::PhysicalExpr) receives a [`RecordBatch`](arrow::record_batch::RecordBatch)
-//!   and returns an [`Array`](arrow::array::Array)
-//! * an [`AggregateExpr`](physical_plan::AggregateExpr) receives [`RecordBatch`es](arrow::record_batch::RecordBatch)
-//!   and returns a [`RecordBatch`](arrow::record_batch::RecordBatch) of a single row(*)
+//!   an iterator over [`RecordBatch`](arrow2::record_batch::RecordBatch)
+//!   (a node-specific struct that implements [`RecordBatchReader`](arrow2::record_batch::RecordBatchReader))
+//! * a [`PhysicalExpr`](physical_plan::PhysicalExpr) receives a [`RecordBatch`](arrow2::record_batch::RecordBatch)
+//!   and returns an [`Array`](arrow2::array::Array)
+//! * an [`AggregateExpr`](physical_plan::AggregateExpr) receives [`RecordBatch`es](arrow2::record_batch::RecordBatch)
+//!   and returns a [`RecordBatch`](arrow2::record_batch::RecordBatch) of a single row(*)
 //!
 //! (*) Technically, it aggregates the results on each partition and then merges the results into a single partition.
 //!
@@ -200,8 +200,7 @@ pub mod sql;
 pub mod variable;
 
 // re-export dependencies from arrow-rs to minimise version maintenance for crate users
-pub use arrow;
-pub use parquet;
+pub use arrow2;
 
 #[cfg(test)]
 pub mod test;
diff --git a/datafusion/src/logical_plan/builder.rs b/datafusion/src/logical_plan/builder.rs
index 2e69814d2634e..e987fc9c335c2 100644
--- a/datafusion/src/logical_plan/builder.rs
+++ b/datafusion/src/logical_plan/builder.rs
@@ -19,11 +19,13 @@
 
 use std::{collections::HashMap, sync::Arc};
 
-use arrow::{
-    datatypes::{Schema, SchemaRef},
+use arrow2::{
+    datatypes::Schema,
     record_batch::RecordBatch,
 };
 
+type SchemaRef = Arc<Schema>;
+
 use crate::datasource::TableProvider;
 use crate::error::{DataFusionError, Result};
 use crate::{
@@ -44,7 +46,7 @@ use std::collections::HashSet;
 /// # use datafusion::prelude::*;
 /// # use datafusion::logical_plan::LogicalPlanBuilder;
 /// # use datafusion::error::Result;
-/// # use arrow::datatypes::{Schema, DataType, Field};
+/// # use arrow2::datatypes::{Schema, DataType, Field};
 /// #
 /// # fn main() -> Result<()> {
 /// #
@@ -416,7 +418,7 @@ fn validate_unique_names<'a>(
 
 #[cfg(test)]
 mod tests {
-    use arrow::datatypes::{DataType, Field};
+    use arrow2::datatypes::{DataType, Field};
 
     use super::super::{lit, sum};
     use super::*;
diff --git a/datafusion/src/logical_plan/dfschema.rs b/datafusion/src/logical_plan/dfschema.rs
index 9adb22b43d075..651eecb9aa185 100644
--- a/datafusion/src/logical_plan/dfschema.rs
+++ b/datafusion/src/logical_plan/dfschema.rs
@@ -24,9 +24,11 @@ use std::sync::Arc;
 
 use crate::error::{DataFusionError, Result};
 
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow2::datatypes::{DataType, Field, Schema};
 use std::fmt::{Display, Formatter};
 
+type SchemaRef = Arc<Schema>;
+
 /// A reference-counted reference to a `DFSchema`.
 pub type DFSchemaRef = Arc<DFSchema>;
 
@@ -356,7 +358,7 @@ impl DFField {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::datatypes::DataType;
+    use arrow2::datatypes::DataType;
 
     #[test]
     fn from_unqualified_field() {
diff --git a/datafusion/src/logical_plan/display.rs b/datafusion/src/logical_plan/display.rs
index f285534fdf1b6..8fe96ecf8aeec 100644
--- a/datafusion/src/logical_plan/display.rs
+++ b/datafusion/src/logical_plan/display.rs
@@ -17,7 +17,7 @@
 //! This module provides logic for displaying LogicalPlans in various styles
 
 use super::{LogicalPlan, PlanVisitor};
-use arrow::datatypes::Schema;
+use arrow2::datatypes::Schema;
 use std::fmt;
 
 /// Formats plans with a single line per node. For example:
@@ -81,7 +81,7 @@ impl<'a, 'b> PlanVisitor for IndentVisitor<'a, 'b> {
 /// `foo:Utf8;N` if `foo` is nullable.
 ///
 /// ```
-/// use arrow::datatypes::{Field, Schema, DataType};
+/// use arrow2::datatypes::{Field, Schema, DataType};
 /// # use datafusion::logical_plan::display_schema;
 /// let schema = Schema::new(vec![
 ///     Field::new("id", DataType::Int32, false),
@@ -238,7 +238,7 @@ impl<'a, 'b> PlanVisitor for GraphvizVisitor<'a, 'b> {
 
 #[cfg(test)]
 mod tests {
-    use arrow::datatypes::{DataType, Field};
+    use arrow2::datatypes::{DataType, Field};
 
     use super::*;
 
diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs
index 3365bf2603234..45b3541883c26 100644
--- a/datafusion/src/logical_plan/expr.rs
+++ b/datafusion/src/logical_plan/expr.rs
@@ -24,7 +24,7 @@ use std::fmt;
 use std::sync::Arc;
 
 use aggregates::{AccumulatorFunctionImplementation, StateTypeFunction};
-use arrow::{compute::can_cast_types, datatypes::DataType};
+use arrow2::{compute::cast::can_cast_types, datatypes::DataType};
 
 use crate::error::{DataFusionError, Result};
 use crate::logical_plan::{DFField, DFSchema};
@@ -39,7 +39,7 @@ use std::collections::HashSet;
 /// represent logical expressions such as `A + 1`, or `CAST(c1 AS
 /// int)`.
 ///
-/// An `Expr` can compute its [DataType](arrow::datatypes::DataType)
+/// An `Expr` can compute its [DataType](arrow2::datatypes::DataType)
 /// and nullability, and has functions for building up complex
 /// expressions.
 ///
@@ -211,11 +211,11 @@ pub enum Expr {
 }
 
 impl Expr {
-    /// Returns the [arrow::datatypes::DataType] of the expression based on [arrow::datatypes::Schema].
+    /// Returns the [arrow2::datatypes::DataType] of the expression based on [arrow2::datatypes::Schema].
     ///
     /// # Errors
     ///
-    /// This function errors when it is not possible to compute its [arrow::datatypes::DataType].
+    /// This function errors when it is not possible to compute its [arrow2::datatypes::DataType].
     /// This happens when e.g. the expression refers to a column that does not exist in the schema, or when
     /// the expression is incorrectly typed (e.g. `[utf8] + [bool]`).
     pub fn get_type(&self, schema: &DFSchema) -> Result<DataType> {
@@ -280,7 +280,7 @@ impl Expr {
         }
     }
 
-    /// Returns the nullability of the expression based on [arrow::datatypes::Schema].
+    /// Returns the nullability of the expression based on [arrow2::datatypes::Schema].
     ///
     /// # Errors
     ///
@@ -336,14 +336,14 @@ impl Expr {
         }
     }
 
-    /// Returns the name of this expression based on [arrow::datatypes::Schema].
+    /// Returns the name of this expression based on [arrow2::datatypes::Schema].
     ///
     /// This represents how a column with this expression is named when no alias is chosen
     pub fn name(&self, input_schema: &DFSchema) -> Result<String> {
         create_name(self, input_schema)
     }
 
-    /// Returns a [arrow::datatypes::Field] compatible with this expression.
+    /// Returns a [arrow2::datatypes::Field] compatible with this expression.
     pub fn to_field(&self, input_schema: &DFSchema) -> Result<DFField> {
         Ok(DFField::new(
             None, //TODO  qualifier
@@ -353,12 +353,12 @@ impl Expr {
         ))
     }
 
-    /// Wraps this expression in a cast to a target [arrow::datatypes::DataType].
+    /// Wraps this expression in a cast to a target [arrow2::datatypes::DataType].
     ///
     /// # Errors
     ///
     /// This function errors when it is impossible to cast the
-    /// expression to the target [arrow::datatypes::DataType].
+    /// expression to the target [arrow2::datatypes::DataType].
     pub fn cast_to(self, cast_to_type: &DataType, schema: &DFSchema) -> Result<Expr> {
         let this_type = self.get_type(schema)?;
         if this_type == *cast_to_type {
diff --git a/datafusion/src/logical_plan/plan.rs b/datafusion/src/logical_plan/plan.rs
index 8b9aac9ea73b9..82fcd3d3a55e9 100644
--- a/datafusion/src/logical_plan/plan.rs
+++ b/datafusion/src/logical_plan/plan.rs
@@ -23,7 +23,7 @@ use std::{
     sync::Arc,
 };
 
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow2::datatypes::{DataType, Field, Schema};
 
 use crate::datasource::TableProvider;
 use crate::sql::parser::FileType;
@@ -36,6 +36,8 @@ use super::{
 };
 use crate::logical_plan::dfschema::DFSchemaRef;
 
+type SchemaRef = Arc<Schema>;
+
 /// Join type
 #[derive(Debug, Clone, Copy)]
 pub enum JoinType {
@@ -468,7 +470,7 @@ impl LogicalPlan {
     /// ```
     ///
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
+    /// use arrow2::datatypes::{Field, Schema, DataType};
     /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
     /// let schema = Schema::new(vec![
     ///     Field::new("id", DataType::Int32, false),
@@ -509,7 +511,7 @@ impl LogicalPlan {
     /// ```
     ///
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
+    /// use arrow2::datatypes::{Field, Schema, DataType};
     /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
     /// let schema = Schema::new(vec![
     ///     Field::new("id", DataType::Int32, false),
@@ -549,7 +551,7 @@ impl LogicalPlan {
     /// structure, and one with additional details such as schema.
     ///
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
+    /// use arrow2::datatypes::{Field, Schema, DataType};
     /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
     /// let schema = Schema::new(vec![
     ///     Field::new("id", DataType::Int32, false),
@@ -608,7 +610,7 @@ impl LogicalPlan {
     /// Projection: #id
     /// ```
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
+    /// use arrow2::datatypes::{Field, Schema, DataType};
     /// use datafusion::logical_plan::{lit, col, LogicalPlanBuilder};
     /// let schema = Schema::new(vec![
     ///     Field::new("id", DataType::Int32, false),
diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs
index 51bf0ce1b5054..f85ef44929d10 100644
--- a/datafusion/src/optimizer/constant_folding.rs
+++ b/datafusion/src/optimizer/constant_folding.rs
@@ -20,7 +20,7 @@
 
 use std::sync::Arc;
 
-use arrow::datatypes::DataType;
+use arrow2::datatypes::DataType;
 
 use crate::error::Result;
 use crate::execution::context::ExecutionProps;
@@ -232,7 +232,7 @@ mod tests {
         col, lit, max, min, DFField, DFSchema, LogicalPlanBuilder,
     };
 
-    use arrow::datatypes::*;
+    use arrow2::datatypes::*;
     use chrono::{DateTime, Utc};
 
     fn test_table_scan() -> Result<LogicalPlan> {
diff --git a/datafusion/src/optimizer/filter_push_down.rs b/datafusion/src/optimizer/filter_push_down.rs
index 4c248e2b6483d..9e73ea9102c8d 100644
--- a/datafusion/src/optimizer/filter_push_down.rs
+++ b/datafusion/src/optimizer/filter_push_down.rs
@@ -453,7 +453,10 @@ mod tests {
     use crate::physical_plan::ExecutionPlan;
     use crate::test::*;
     use crate::{logical_plan::col, prelude::JoinType};
-    use arrow::datatypes::SchemaRef;
+    use arrow2::datatypes::Schema;
+    use std::sync::Arc;
+
+    type SchemaRef = Arc<Schema>;
 
     fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
         let rule = FilterPushDown::new();
@@ -958,10 +961,10 @@ mod tests {
 
     impl TableProvider for PushDownProvider {
         fn schema(&self) -> SchemaRef {
-            Arc::new(arrow::datatypes::Schema::new(vec![
-                arrow::datatypes::Field::new(
+            Arc::new(arrow2::datatypes::Schema::new(vec![
+                arrow2::datatypes::Field::new(
                     "a",
-                    arrow::datatypes::DataType::Int32,
+                    arrow2::datatypes::DataType::Int32,
                     true,
                 ),
             ]))
diff --git a/datafusion/src/optimizer/hash_build_probe_order.rs b/datafusion/src/optimizer/hash_build_probe_order.rs
index 168c4a17edfd0..72a86ed667493 100644
--- a/datafusion/src/optimizer/hash_build_probe_order.rs
+++ b/datafusion/src/optimizer/hash_build_probe_order.rs
@@ -224,6 +224,9 @@ mod tests {
         logical_plan::{DFSchema, Expr},
         test::*,
     };
+    use arrow2::datatypes::Schema;
+
+    type SchemaRef = Arc<Schema>;
 
     struct TestTableProvider {
         num_rows: usize,
@@ -233,7 +236,7 @@ mod tests {
         fn as_any(&self) -> &dyn std::any::Any {
             unimplemented!()
         }
-        fn schema(&self) -> arrow::datatypes::SchemaRef {
+        fn schema(&self) -> SchemaRef {
             unimplemented!()
         }
 
diff --git a/datafusion/src/optimizer/projection_push_down.rs b/datafusion/src/optimizer/projection_push_down.rs
index 21c9caba3316d..ac49ac9edea74 100644
--- a/datafusion/src/optimizer/projection_push_down.rs
+++ b/datafusion/src/optimizer/projection_push_down.rs
@@ -23,8 +23,8 @@ use crate::execution::context::ExecutionProps;
 use crate::logical_plan::{DFField, DFSchema, DFSchemaRef, LogicalPlan, ToDFSchema};
 use crate::optimizer::optimizer::OptimizerRule;
 use crate::optimizer::utils;
-use arrow::datatypes::Schema;
-use arrow::error::Result as ArrowResult;
+use arrow2::datatypes::Schema;
+use arrow2::error::Result as ArrowResult;
 use std::{collections::HashSet, sync::Arc};
 use utils::optimize_explain;
 
@@ -324,7 +324,7 @@ mod tests {
     use crate::logical_plan::{col, lit};
     use crate::logical_plan::{max, min, Expr, LogicalPlanBuilder};
     use crate::test::*;
-    use arrow::datatypes::DataType;
+    use arrow2::datatypes::DataType;
 
     #[test]
     fn aggregate_no_group_by() -> Result<()> {
diff --git a/datafusion/src/optimizer/utils.rs b/datafusion/src/optimizer/utils.rs
index 9288c65ac4dac..d2c962d38bceb 100644
--- a/datafusion/src/optimizer/utils.rs
+++ b/datafusion/src/optimizer/utils.rs
@@ -19,7 +19,7 @@
 
 use std::{collections::HashSet, sync::Arc};
 
-use arrow::datatypes::Schema;
+use arrow2::datatypes::Schema;
 
 use super::optimizer::OptimizerRule;
 use crate::execution::context::ExecutionProps;
@@ -419,7 +419,7 @@ pub fn rewrite_expression(expr: &Expr, expressions: &[Expr]) -> Result<Expr> {
 mod tests {
     use super::*;
     use crate::logical_plan::{col, LogicalPlanBuilder};
-    use arrow::datatypes::DataType;
+    use arrow2::datatypes::DataType;
     use std::collections::HashSet;
 
     #[test]
diff --git a/datafusion/src/physical_optimizer/repartition.rs b/datafusion/src/physical_optimizer/repartition.rs
index fee4b3e11e5d2..1518db37a84a4 100644
--- a/datafusion/src/physical_optimizer/repartition.rs
+++ b/datafusion/src/physical_optimizer/repartition.rs
@@ -106,7 +106,7 @@ impl PhysicalOptimizerRule for Repartition {
 }
 #[cfg(test)]
 mod tests {
-    use arrow::datatypes::Schema;
+    use arrow2::datatypes::Schema;
 
     use super::*;
     use crate::datasource::datasource::Statistics;
@@ -119,14 +119,12 @@ mod tests {
             vec![],
             Arc::new(ParquetExec::new(
                 vec![ParquetPartition {
-                    filenames: vec!["x".to_string()],
+                    filename: "x".to_string(),
                     statistics: Statistics::default(),
                 }],
-                Schema::empty(),
-                None,
+                Arc::new(Schema::empty()),
                 None,
                 2048,
-                None,
             )),
         )?;
 
@@ -155,14 +153,12 @@ mod tests {
                 vec![],
                 Arc::new(ParquetExec::new(
                     vec![ParquetPartition {
-                        filenames: vec!["x".to_string()],
+                        filename: "x".to_string(),
                         statistics: Statistics::default(),
                     }],
-                    Schema::empty(),
-                    None,
+                    Arc::new(Schema::empty()),
                     None,
                     2048,
-                    None,
                 )),
             )?),
         )?;
diff --git a/datafusion/src/physical_plan/aggregates.rs b/datafusion/src/physical_plan/aggregates.rs
index 9417c7c8f05a5..94266fc1255d9 100644
--- a/datafusion/src/physical_plan/aggregates.rs
+++ b/datafusion/src/physical_plan/aggregates.rs
@@ -34,7 +34,7 @@ use super::{
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::distinct_expressions;
 use crate::physical_plan::expressions;
-use arrow::datatypes::{DataType, Schema, TimeUnit};
+use arrow2::datatypes::{DataType, Schema, TimeUnit};
 use expressions::{avg_return_type, sum_return_type};
 use std::{fmt, str::FromStr, sync::Arc};
 
diff --git a/datafusion/src/physical_plan/array_expressions.rs b/datafusion/src/physical_plan/array_expressions.rs
index a7e03b70e5d21..ebc3722cb5f4a 100644
--- a/datafusion/src/physical_plan/array_expressions.rs
+++ b/datafusion/src/physical_plan/array_expressions.rs
@@ -18,75 +18,30 @@
 //! Array expressions
 
 use crate::error::{DataFusionError, Result};
-use arrow::array::*;
-use arrow::datatypes::DataType;
+use arrow2::array::*;
+use arrow2::compute::concat;
 use std::sync::Arc;
 
 use super::ColumnarValue;
 
-macro_rules! downcast_vec {
-    ($ARGS:expr, $ARRAY_TYPE:ident) => {{
-        $ARGS
-            .iter()
-            .map(|e| match e.as_any().downcast_ref::<$ARRAY_TYPE>() {
-                Some(array) => Ok(array),
-                _ => Err(DataFusionError::Internal("failed to downcast".to_string())),
-            })
-    }};
-}
+type ArrayRef = Arc<dyn Array>;
 
-macro_rules! array {
-    ($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
-        // downcast all arguments to their common format
-        let args =
-            downcast_vec!($ARGS, $ARRAY_TYPE).collect::<Result<Vec<&$ARRAY_TYPE>>>()?;
+fn array_array(arrays: &[&dyn Array]) -> Result<FixedSizeListArray> {
+    assert!(arrays.len() > 0);
+    let first = arrays[0];
+    assert!(arrays.iter().all(|x| x.len() == first.len()));
+    assert!(arrays.iter().all(|x| x.data_type() == first.data_type()));
 
-        let mut builder = FixedSizeListBuilder::<$BUILDER_TYPE>::new(
-            <$BUILDER_TYPE>::new(args[0].len()),
-            args.len() as i32,
-        );
-        // for each entry in the array
-        for index in 0..args[0].len() {
-            for arg in &args {
-                if arg.is_null(index) {
-                    builder.values().append_null()?;
-                } else {
-                    builder.values().append_value(arg.value(index))?;
-                }
-            }
-            builder.append(true)?;
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
+    let size = arrays.len();
+    let length = first.len();
 
-fn array_array(args: &[&dyn Array]) -> Result<ArrayRef> {
-    // do not accept 0 arguments.
-    if args.is_empty() {
-        return Err(DataFusionError::Internal(
-            "array requires at least one argument".to_string(),
-        ));
-    }
-
-    match args[0].data_type() {
-        DataType::Utf8 => array!(args, StringArray, StringBuilder),
-        DataType::LargeUtf8 => array!(args, LargeStringArray, LargeStringBuilder),
-        DataType::Boolean => array!(args, BooleanArray, BooleanBuilder),
-        DataType::Float32 => array!(args, Float32Array, Float32Builder),
-        DataType::Float64 => array!(args, Float64Array, Float64Builder),
-        DataType::Int8 => array!(args, Int8Array, Int8Builder),
-        DataType::Int16 => array!(args, Int16Array, Int16Builder),
-        DataType::Int32 => array!(args, Int32Array, Int32Builder),
-        DataType::Int64 => array!(args, Int64Array, Int64Builder),
-        DataType::UInt8 => array!(args, UInt8Array, UInt8Builder),
-        DataType::UInt16 => array!(args, UInt16Array, UInt16Builder),
-        DataType::UInt32 => array!(args, UInt32Array, UInt32Builder),
-        DataType::UInt64 => array!(args, UInt64Array, UInt64Builder),
-        data_type => Err(DataFusionError::NotImplemented(format!(
-            "Array is not implemented for type '{:?}'.",
-            data_type
-        ))),
-    }
+    let values = concat::concatenate(arrays)?;
+    let data_type = FixedSizeListArray::default_datatype(first.data_type().clone(), size);
+    Ok(FixedSizeListArray::from_data(
+        data_type,
+        values.into(),
+        None,
+    ))
 }
 
 /// put values in an array.
@@ -104,24 +59,5 @@ pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
         })
         .collect::<Result<_>>()?;
 
-    Ok(ColumnarValue::Array(array_array(&arrays)?))
+    Ok(ColumnarValue::Array(array_array(&arrays).map(Arc::new)?))
 }
-
-/// Currently supported types by the array function.
-/// The order of these types correspond to the order on which coercion applies
-/// This should thus be from least informative to most informative
-pub static SUPPORTED_ARRAY_TYPES: &[DataType] = &[
-    DataType::Boolean,
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-    DataType::Float32,
-    DataType::Float64,
-    DataType::Utf8,
-    DataType::LargeUtf8,
-];
diff --git a/datafusion/src/physical_plan/coalesce_batches.rs b/datafusion/src/physical_plan/coalesce_batches.rs
index e25412d9d6b8b..bc7a199fd7d3e 100644
--- a/datafusion/src/physical_plan/coalesce_batches.rs
+++ b/datafusion/src/physical_plan/coalesce_batches.rs
@@ -29,10 +29,11 @@ use crate::physical_plan::{
     SendableRecordBatchStream,
 };
 
-use arrow::compute::kernels::concat::concat;
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+use arrow2::compute::concat::concatenate;
+use arrow2::datatypes::Schema;
+type SchemaRef = Arc<Schema>;
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
 use async_trait::async_trait;
 use futures::stream::{Stream, StreamExt};
 use log::debug;
@@ -239,12 +240,13 @@ pub fn concat_batches(
     }
     let mut arrays = Vec::with_capacity(schema.fields().len());
     for i in 0..schema.fields().len() {
-        let array = concat(
+        let array = concatenate(
             &batches
                 .iter()
                 .map(|batch| batch.column(i).as_ref())
                 .collect::<Vec<_>>(),
-        )?;
+        )?
+        .into();
         arrays.push(array);
     }
     debug!(
@@ -259,8 +261,8 @@ pub fn concat_batches(
 mod tests {
     use super::*;
     use crate::physical_plan::{memory::MemoryExec, repartition::RepartitionExec};
-    use arrow::array::UInt32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow2::array::UInt32Array;
+    use arrow2::datatypes::{DataType, Field, Schema};
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_concat_batches() -> Result<()> {
@@ -299,7 +301,7 @@ mod tests {
     fn create_batch(schema: &Arc<Schema>) -> RecordBatch {
         RecordBatch::try_new(
             schema.clone(),
-            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
+            vec![Arc::new(UInt32Array::from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]))],
         )
         .unwrap()
     }
diff --git a/datafusion/src/physical_plan/common.rs b/datafusion/src/physical_plan/common.rs
index f1ed3742340b0..761eab48fbc2d 100644
--- a/datafusion/src/physical_plan/common.rs
+++ b/datafusion/src/physical_plan/common.rs
@@ -25,9 +25,10 @@ use std::task::{Context, Poll};
 use super::{RecordBatchStream, SendableRecordBatchStream};
 use crate::error::{DataFusionError, Result};
 
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+use arrow2::datatypes::Schema;
+type SchemaRef = Arc<Schema>;
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
 use futures::{Stream, TryStreamExt};
 
 /// Stream of record batches
diff --git a/datafusion/src/physical_plan/cross_join.rs b/datafusion/src/physical_plan/cross_join.rs
index f6f5da4cf8db9..06f209858a857 100644
--- a/datafusion/src/physical_plan/cross_join.rs
+++ b/datafusion/src/physical_plan/cross_join.rs
@@ -21,9 +21,10 @@
 use futures::{lock::Mutex, StreamExt};
 use std::{any::Any, sync::Arc, task::Poll};
 
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+use crate::physical_plan::memory::MemoryStream;
+use arrow2::datatypes::Schema;
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
 
 use futures::{Stream, TryStreamExt};
 
@@ -36,11 +37,13 @@ use async_trait::async_trait;
 use std::time::Instant;
 
 use super::{
-    coalesce_batches::concat_batches, memory::MemoryStream, DisplayFormatType,
-    ExecutionPlan, Partitioning, RecordBatchStream, SendableRecordBatchStream,
+    coalesce_batches::concat_batches, DisplayFormatType, ExecutionPlan, Partitioning,
+    RecordBatchStream, SendableRecordBatchStream,
 };
 use log::debug;
 
+type SchemaRef = Arc<Schema>;
+
 /// Data of the left side
 type JoinLeftData = RecordBatch;
 
diff --git a/datafusion/src/physical_plan/crypto_expressions.rs b/datafusion/src/physical_plan/crypto_expressions.rs
index 8ad876b24d0ce..07a68d30207ce 100644
--- a/datafusion/src/physical_plan/crypto_expressions.rs
+++ b/datafusion/src/physical_plan/crypto_expressions.rs
@@ -28,8 +28,8 @@ use crate::{
     error::{DataFusionError, Result},
     scalar::ScalarValue,
 };
-use arrow::{
-    array::{Array, BinaryArray, GenericStringArray, StringOffsetSizeTrait},
+use arrow2::{
+    array::{Array, BinaryArray, Offset, Utf8Array},
     datatypes::DataType,
 };
 
@@ -60,15 +60,15 @@ fn sha_process<D: SHA2Digest + Default>(input: &str) -> SHA2DigestOutput<D> {
 /// # Errors
 /// This function errors when:
 /// * the number of arguments is not 1
-/// * the first argument is not castable to a `GenericStringArray`
+/// * the first argument is not castable to a `Utf8Array`
 fn unary_binary_function<T, R, F>(
     args: &[&dyn Array],
     op: F,
     name: &str,
-) -> Result<BinaryArray>
+) -> Result<BinaryArray<i32>>
 where
     R: AsRef<[u8]>,
-    T: StringOffsetSizeTrait,
+    T: Offset,
     F: Fn(&str) -> R,
 {
     if args.len() != 1 {
@@ -81,7 +81,7 @@ where
 
     let array = args[0]
         .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
+        .downcast_ref::<Utf8Array<T>>()
         .ok_or_else(|| {
             DataFusionError::Internal("failed to downcast to string".to_string())
         })?;
@@ -137,9 +137,7 @@ where
     }
 }
 
-fn md5_array<T: StringOffsetSizeTrait>(
-    args: &[&dyn Array],
-) -> Result<GenericStringArray<i32>> {
+fn md5_array<T: Offset>(args: &[&dyn Array]) -> Result<Utf8Array<i32>> {
     unary_string_function::<T, i32, _, _>(args, md5_process, "md5")
 }
 
diff --git a/datafusion/src/physical_plan/csv.rs b/datafusion/src/physical_plan/csv.rs
index 96b24cc33201f..203442150bdab 100644
--- a/datafusion/src/physical_plan/csv.rs
+++ b/datafusion/src/physical_plan/csv.rs
@@ -19,10 +19,12 @@
 
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{common, DisplayFormatType, ExecutionPlan, Partitioning};
-use arrow::csv;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+
+use arrow2::datatypes::Schema;
+use arrow2::error::Result as ArrowResult;
+use arrow2::io::csv::read as csv_read;
+use arrow2::record_batch::RecordBatch;
+
 use futures::Stream;
 use std::any::Any;
 use std::fs::File;
@@ -32,6 +34,8 @@ use std::sync::Arc;
 use std::sync::Mutex;
 use std::task::{Context, Poll};
 
+type SchemaRef = Arc<Schema>;
+
 use super::{RecordBatchStream, SendableRecordBatchStream};
 use async_trait::async_trait;
 
@@ -200,6 +204,15 @@ pub struct CsvExec {
     limit: Option<usize>,
 }
 
+fn infer_schema_from_files(
+    filenames: &[String],
+    delimiter: u8,
+    max_records: &Option<usize>,
+    has_header: bool,
+) -> Result<Schema> {
+    todo!()
+}
+
 impl CsvExec {
     /// Create a new execution plan for reading a set of CSV files
     pub fn try_new(
@@ -331,12 +344,12 @@ impl CsvExec {
         filenames: &[String],
         options: &CsvReadOptions,
     ) -> Result<Schema> {
-        Ok(csv::infer_schema_from_files(
+        infer_schema_from_files(
             filenames,
             options.delimiter,
-            Some(options.schema_infer_max_records),
+            &Some(options.schema_infer_max_records),
             options.has_header,
-        )?)
+        )
     }
 }
 
@@ -437,8 +450,11 @@ impl ExecutionPlan for CsvExec {
 
 /// Iterator over batches
 struct CsvStream<R: Read> {
-    /// Arrow CSV reader
-    reader: csv::Reader<R>,
+    reader: csv_read::Reader<R>,
+    schema: SchemaRef,
+    batch_size: usize,
+    projection: Option<Vec<usize>>,
+    limit: Option<usize>,
 }
 impl CsvStream<File> {
     /// Create an iterator for a CSV file
@@ -468,20 +484,19 @@ impl<R: Read> CsvStream<R> {
         batch_size: usize,
         limit: Option<usize>,
     ) -> Result<CsvStream<R>> {
-        let start_line = if has_header { 1 } else { 0 };
-        let bounds = limit.map(|x| (0, x + start_line));
+        let reader = csv_read::ReaderBuilder::new()
+            .delimiter(delimiter.unwrap_or(b","[0]))
+            .has_headers(has_header)
+            .from_reader(reader);
 
-        let reader = csv::Reader::new(
+        let projection = projection.clone();
+        Ok(Self {
             reader,
             schema,
-            has_header,
-            delimiter,
             batch_size,
-            bounds,
-            projection.clone(),
-        );
-
-        Ok(Self { reader })
+            projection,
+            limit,
+        })
     }
 }
 
@@ -492,14 +507,30 @@ impl<R: Read + Unpin> Stream for CsvStream<R> {
         mut self: Pin<&mut Self>,
         _: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
-        Poll::Ready(self.reader.next())
+        let batch_size = self.batch_size;
+        let maybe_rows = csv_read::read_rows(&mut self.reader, 0, batch_size);
+        let maybe_batch = maybe_rows.and_then(|rows| {
+            if rows.is_empty() {
+                Ok(None)
+            } else {
+                csv_read::parse(
+                    &rows,
+                    self.schema.fields(),
+                    self.projection.as_ref().map(|x| x.as_ref()),
+                    0,
+                    &csv_read::DefaultParser::default(),
+                )
+                .map(Some)
+            }
+        });
+        Poll::Ready(maybe_batch.transpose())
     }
 }
 
 impl<R: Read + Unpin> RecordBatchStream for CsvStream<R> {
     /// Get the schema
     fn schema(&self) -> SchemaRef {
-        self.reader.schema()
+        self.schema.clone()
     }
 }
 
@@ -512,7 +543,7 @@ mod tests {
     #[tokio::test]
     async fn csv_exec_with_projection() -> Result<()> {
         let schema = aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let filename = "aggregate_test_100.csv";
         let path = format!("{}/csv/{}", testdata, filename);
         let csv = CsvExec::try_new(
@@ -540,7 +571,7 @@ mod tests {
     #[tokio::test]
     async fn csv_exec_without_projection() -> Result<()> {
         let schema = aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let filename = "aggregate_test_100.csv";
         let path = format!("{}/csv/{}", testdata, filename);
         let csv = CsvExec::try_new(
@@ -568,7 +599,7 @@ mod tests {
     #[tokio::test]
     async fn csv_exec_with_reader() -> Result<()> {
         let schema = aggr_test_schema();
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let filename = "aggregate_test_100.csv";
         let path = format!("{}/csv/{}", testdata, filename);
         let buf = std::fs::read(path).unwrap();
diff --git a/datafusion/src/physical_plan/datetime_expressions.rs b/datafusion/src/physical_plan/datetime_expressions.rs
index ec52e6bc4d528..c63e99dfc7429 100644
--- a/datafusion/src/physical_plan/datetime_expressions.rs
+++ b/datafusion/src/physical_plan/datetime_expressions.rs
@@ -21,24 +21,23 @@ use std::sync::Arc;
 use super::ColumnarValue;
 use crate::{
     error::{DataFusionError, Result},
-    scalar::{ScalarType, ScalarValue},
+    scalar::ScalarValue,
 };
-use arrow::{
-    array::{Array, ArrayRef, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait},
-    datatypes::{ArrowPrimitiveType, DataType, TimestampNanosecondType},
+use arrow2::{
+    array::*,
+    compute::cast,
+    datatypes::{DataType, TimeUnit},
+    types::NativeType,
 };
-use arrow::{
-    array::{
-        Date32Array, Date64Array, TimestampMicrosecondArray, TimestampMillisecondArray,
-        TimestampNanosecondArray, TimestampSecondArray,
-    },
-    compute::kernels::temporal,
-    datatypes::TimeUnit,
-    temporal_conversions::timestamp_ns_to_datetime,
-};
-use chrono::prelude::*;
+use arrow2::{compute::temporal, temporal_conversions::timestamp_ns_to_datetime};
+use chrono::prelude::{DateTime, Local, NaiveDateTime, Utc};
+use chrono::Datelike;
 use chrono::Duration;
 use chrono::LocalResult;
+use chrono::TimeZone;
+use chrono::Timelike;
+
+type ArrayRef = Arc<dyn Array>;
 
 #[inline]
 /// Accepts a string in RFC3339 / ISO8601 standard format and some
@@ -185,17 +184,18 @@ fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64>
 /// # Errors
 /// This function errors iff:
 /// * the number of arguments is not 1 or
-/// * the first argument is not castable to a `GenericStringArray` or
+/// * the first argument is not castable to a `Utf8Array` or
 /// * the function `op` errors
 pub(crate) fn unary_string_to_primitive_function<'a, T, O, F>(
     args: &[&'a dyn Array],
     op: F,
     name: &str,
+    data_type: DataType,
 ) -> Result<PrimitiveArray<O>>
 where
-    O: ArrowPrimitiveType,
-    T: StringOffsetSizeTrait,
-    F: Fn(&'a str) -> Result<O::Native>,
+    O: NativeType,
+    T: Offset,
+    F: Fn(&'a str) -> Result<O>,
 {
     if args.len() != 1 {
         return Err(DataFusionError::Internal(format!(
@@ -207,13 +207,17 @@ where
 
     let array = args[0]
         .as_any()
-        .downcast_ref::<GenericStringArray<T>>()
+        .downcast_ref::<Utf8Array<T>>()
         .ok_or_else(|| {
             DataFusionError::Internal("failed to downcast to string".to_string())
         })?;
 
     // first map is the iterator, second is for the `Option<_>`
-    array.iter().map(|x| x.map(|x| op(x)).transpose()).collect()
+    array
+        .iter()
+        .map(|x| x.map(|x| op(x)).transpose())
+        .collect::<Result<Primitive<O>>>()
+        .map(|x| x.to(data_type))
 }
 
 // given an function that maps a `&str` to a arrow native type,
@@ -223,19 +227,31 @@ fn handle<'a, O, F, S>(
     args: &'a [ColumnarValue],
     op: F,
     name: &str,
+    data_type: DataType,
 ) -> Result<ColumnarValue>
 where
-    O: ArrowPrimitiveType,
-    S: ScalarType<O::Native>,
-    F: Fn(&'a str) -> Result<O::Native>,
+    O: NativeType,
+    ScalarValue: From<Option<O>>,
+    S: NativeType,
+    F: Fn(&'a str) -> Result<O>,
 {
     match &args[0] {
         ColumnarValue::Array(a) => match a.data_type() {
             DataType::Utf8 => Ok(ColumnarValue::Array(Arc::new(
-                unary_string_to_primitive_function::<i32, O, _>(&[a.as_ref()], op, name)?,
+                unary_string_to_primitive_function::<i32, O, _>(
+                    &[a.as_ref()],
+                    op,
+                    name,
+                    data_type,
+                )?,
             ))),
             DataType::LargeUtf8 => Ok(ColumnarValue::Array(Arc::new(
-                unary_string_to_primitive_function::<i64, O, _>(&[a.as_ref()], op, name)?,
+                unary_string_to_primitive_function::<i64, O, _>(
+                    &[a.as_ref()],
+                    op,
+                    name,
+                    data_type,
+                )?,
             ))),
             other => Err(DataFusionError::Internal(format!(
                 "Unsupported data type {:?} for function {}",
@@ -245,11 +261,11 @@ where
         ColumnarValue::Scalar(scalar) => match scalar {
             ScalarValue::Utf8(a) => {
                 let result = a.as_ref().map(|x| (op)(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
+                Ok(ColumnarValue::Scalar(result.into()))
             }
             ScalarValue::LargeUtf8(a) => {
                 let result = a.as_ref().map(|x| (op)(x)).transpose()?;
-                Ok(ColumnarValue::Scalar(S::scalar(result)))
+                Ok(ColumnarValue::Scalar(result.into()))
             }
             other => Err(DataFusionError::Internal(format!(
                 "Unsupported data type {:?} for function {}",
@@ -261,10 +277,11 @@ where
 
 /// to_timestamp SQL function
 pub fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    handle::<TimestampNanosecondType, _, TimestampNanosecondType>(
+    handle::<i64, _, i64>(
         args,
         string_to_timestamp_nanos,
         "to_timestamp",
+        DataType::Timestamp(TimeUnit::Nanosecond, None),
     )
 }
 
@@ -337,12 +354,12 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             ));
         };
 
-    let f = |x: Option<i64>| x.map(|x| date_trunc_single(granularity, x)).transpose();
+    let f = |x: Option<&i64>| x.map(|x| date_trunc_single(granularity, *x)).transpose();
 
     Ok(match array {
         ColumnarValue::Scalar(scalar) => {
             if let ScalarValue::TimestampNanosecond(v) = scalar {
-                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond((f)(*v)?))
+                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond((f)(v.as_ref())?))
             } else {
                 return Err(DataFusionError::Execution(
                     "array of `date_trunc` must be non-null scalar Utf8".to_string(),
@@ -352,67 +369,19 @@ pub fn date_trunc(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         ColumnarValue::Array(array) => {
             let array = array
                 .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
+                .downcast_ref::<PrimitiveArray<i64>>()
                 .unwrap();
             let array = array
                 .iter()
                 .map(f)
-                .collect::<Result<TimestampNanosecondArray>>()?;
+                .collect::<Result<Primitive<i64>>>()?
+                .to(DataType::Int64);
 
             ColumnarValue::Array(Arc::new(array))
         }
     })
 }
 
-macro_rules! extract_date_part {
-    ($ARRAY: expr, $FN:expr) => {
-        match $ARRAY.data_type() {
-            DataType::Date32 => {
-                let array = $ARRAY.as_any().downcast_ref::<Date32Array>().unwrap();
-                Ok($FN(array)?)
-            }
-            DataType::Date64 => {
-                let array = $ARRAY.as_any().downcast_ref::<Date64Array>().unwrap();
-                Ok($FN(array)?)
-            }
-            DataType::Timestamp(time_unit, None) => match time_unit {
-                TimeUnit::Second => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampSecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Millisecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampMillisecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Microsecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampMicrosecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-                TimeUnit::Nanosecond => {
-                    let array = $ARRAY
-                        .as_any()
-                        .downcast_ref::<TimestampNanosecondArray>()
-                        .unwrap();
-                    Ok($FN(array)?)
-                }
-            },
-            datatype => Err(DataFusionError::Internal(format!(
-                "Extract does not support datatype {:?}",
-                datatype
-            ))),
-        }
-    };
-}
-
 /// DATE_PART SQL function
 pub fn date_part(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 2 {
@@ -438,8 +407,9 @@ pub fn date_part(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     };
 
     let arr = match date_part.to_lowercase().as_str() {
-        "hour" => extract_date_part!(array, temporal::hour),
-        "year" => extract_date_part!(array, temporal::year),
+        "hour" => Ok(temporal::hour(array.as_ref())
+            .map(|x| cast::primitive_to_primitive::<u32, i32>(&x, &DataType::Int32))?),
+        "year" => Ok(temporal::year(array.as_ref())?),
         _ => Err(DataFusionError::Execution(format!(
             "Date part '{}' not supported",
             date_part
@@ -460,7 +430,8 @@ pub fn date_part(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 mod tests {
     use std::sync::Arc;
 
-    use arrow::array::{ArrayRef, Int64Array, StringBuilder};
+    use arrow2::array::*;
+    use arrow2::datatypes::*;
 
     use super::*;
 
@@ -468,18 +439,15 @@ mod tests {
     fn to_timestamp_arrays_and_nulls() -> Result<()> {
         // ensure that arrow array implementation is wired up and handles nulls correctly
 
-        let mut string_builder = StringBuilder::new(2);
-        let mut ts_builder = TimestampNanosecondArray::builder(2);
+        let string_array =
+            Utf8Array::<i32>::from(&vec![Some("2020-09-08T13:42:29.190855Z"), None]);
 
-        string_builder.append_value("2020-09-08T13:42:29.190855Z")?;
-        ts_builder.append_value(1599572549190855000)?;
+        let ts_array = Primitive::<i64>::from(&[Some(1599572549190855000), None])
+            .to(DataType::Timestamp(TimeUnit::Nanosecond, None));
 
-        string_builder.append_null()?;
-        ts_builder.append_null()?;
-        let expected_timestamps = &ts_builder.finish() as &dyn Array;
+        let expected_timestamps = &ts_array as &dyn Array;
 
-        let string_array =
-            ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
+        let string_array = ColumnarValue::Array(Arc::new(string_array) as ArrayRef);
         let parsed_timestamps = to_timestamp(&[string_array])
             .expect("that to_timestamp parsed values without error");
         if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
@@ -554,9 +522,8 @@ mod tests {
         // pass the wrong type of input array to to_timestamp and test
         // that we get an error.
 
-        let mut builder = Int64Array::builder(1);
-        builder.append_value(1)?;
-        let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
+        let array = Int64Array::from_slice(&[1]);
+        let int64array = ColumnarValue::Array(Arc::new(array));
 
         let expected_err =
             "Internal error: Unsupported data type Int64 for function to_timestamp";
diff --git a/datafusion/src/physical_plan/distinct_expressions.rs b/datafusion/src/physical_plan/distinct_expressions.rs
index f3513c2950e4d..b8605ed94e1d2 100644
--- a/datafusion/src/physical_plan/distinct_expressions.rs
+++ b/datafusion/src/physical_plan/distinct_expressions.rs
@@ -18,23 +18,24 @@
 //! Implementations for DISTINCT expressions, e.g. `COUNT(DISTINCT c)`
 
 use std::any::Any;
+use std::collections::HashSet;
 use std::convert::TryFrom;
 use std::fmt::Debug;
-use std::hash::Hash;
 use std::sync::Arc;
 
-use arrow::datatypes::{DataType, Field};
-
 use ahash::RandomState;
-use std::collections::HashSet;
+
+use arrow2::array::Array;
+use arrow2::datatypes::{DataType, Field};
 
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::group_scalar::GroupByScalar;
 use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
 use crate::scalar::ScalarValue;
 
-#[derive(Debug, PartialEq, Eq, Hash, Clone)]
-struct DistinctScalarValues(Vec<GroupByScalar>);
+type ArrayRef = Arc<dyn Array>;
+
+type DistinctScalarValues = Vec<GroupByScalar>;
 
 fn format_state_name(name: &str, state_name: &str) -> String {
     format!("{}[{}]", name, state_name)
@@ -137,12 +138,12 @@ impl Accumulator for DistinctCountAccumulator {
     fn update(&mut self, values: &[ScalarValue]) -> Result<()> {
         // If a row has a NULL, it is not included in the final count.
         if !values.iter().any(|v| v.is_null()) {
-            self.values.insert(DistinctScalarValues(
+            self.values.insert(
                 values
                     .iter()
                     .map(GroupByScalar::try_from)
                     .collect::<Result<Vec<_>>>()?,
-            ));
+            );
         }
 
         Ok(())
@@ -167,38 +168,28 @@ impl Accumulator for DistinctCountAccumulator {
         (0..col_values[0].len()).try_for_each(|row_index| {
             let row_values = col_values
                 .iter()
-                .map(|col| col[row_index].clone())
-                .collect::<Vec<_>>();
+                .map(|col| ScalarValue::try_from_array(col, row_index))
+                .collect::<Result<Vec<_>>>()?;
             self.update(&row_values)
         })
     }
 
     fn state(&self) -> Result<Vec<ScalarValue>> {
-        let mut cols_out = self
-            .state_data_types
+        self.values
             .iter()
-            .map(|state_data_type| {
-                ScalarValue::List(Some(Vec::new()), state_data_type.clone())
-            })
-            .collect::<Vec<_>>();
-
-        let mut cols_vec = cols_out
-            .iter_mut()
-            .map(|c| match c {
-                ScalarValue::List(Some(ref mut v), _) => v,
-                _ => unreachable!(),
+            .map(|distinct_values| {
+                // create an array with all distinct values
+                let arrays = distinct_values
+                    .iter()
+                    .map(ScalarValue::from)
+                    .map(|x| x.to_array())
+                    .collect::<Vec<_>>();
+                let arrays = arrays.iter().map(|x| x.as_ref()).collect::<Vec<_>>();
+                Ok(arrow2::compute::concat::concatenate(&arrays).map(|x| x.into())?)
             })
-            .collect::<Vec<_>>();
-
-        self.values.iter().for_each(|distinct_values| {
-            distinct_values.0.iter().enumerate().for_each(
-                |(col_index, distinct_value)| {
-                    cols_vec[col_index].push(ScalarValue::from(distinct_value));
-                },
-            )
-        });
-
-        Ok(cols_out)
+            .zip(self.state_data_types.iter())
+            .map(|(x, type_)| x.map(|x| ScalarValue::List(Some(x), type_.clone())))
+            .collect()
     }
 
     fn evaluate(&self) -> Result<ScalarValue> {
@@ -214,42 +205,12 @@ impl Accumulator for DistinctCountAccumulator {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-
-    use arrow::array::{
-        ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
-        Int64Array, Int8Array, ListArray, UInt16Array, UInt32Array, UInt64Array,
-        UInt8Array,
-    };
-    use arrow::array::{Int32Builder, ListBuilder, UInt64Builder};
-    use arrow::datatypes::DataType;
-
-    macro_rules! build_list {
-        ($LISTS:expr, $BUILDER_TYPE:ident) => {{
-            let mut builder = ListBuilder::new($BUILDER_TYPE::new(0));
-            for list in $LISTS.iter() {
-                match list {
-                    Some(values) => {
-                        for value in values.iter() {
-                            match value {
-                                Some(v) => builder.values().append_value((*v).into())?,
-                                None => builder.values().append_null()?,
-                            }
-                        }
-
-                        builder.append(true)?;
-                    }
-                    None => {
-                        builder.append(false)?;
-                    }
-                }
-            }
+    use std::iter::FromIterator;
 
-            let array = Arc::new(builder.finish()) as ArrayRef;
+    use super::*;
 
-            Ok(array) as Result<ArrayRef>
-        }};
-    }
+    use arrow2::array::*;
+    use arrow2::datatypes::DataType;
 
     macro_rules! state_to_vec {
         ($LIST:expr, $DATA_TYPE:ident, $PRIM_TY:ty) => {{
@@ -333,7 +294,7 @@ mod tests {
         let agg = DistinctCount::new(
             arrays
                 .iter()
-                .map(|a| a.as_any().downcast_ref::<ListArray>().unwrap())
+                .map(|a| a.as_any().downcast_ref::<ListArray<i32>>().unwrap())
                 .map(|a| a.values().data_type().clone())
                 .collect::<Vec<_>>(),
             vec![],
@@ -516,13 +477,14 @@ mod tests {
             Ok((state_vec, count))
         };
 
-        let zero_count_values = BooleanArray::from(Vec::<bool>::new());
+        let zero_count_values = BooleanArray::from_slice(&[]);
 
-        let one_count_values = BooleanArray::from(vec![false, false]);
+        let one_count_values = BooleanArray::from_slice(&[false, false]);
         let one_count_values_with_null =
             BooleanArray::from(vec![Some(true), Some(true), None, None]);
 
-        let two_count_values = BooleanArray::from(vec![true, false, true, false, true]);
+        let two_count_values =
+            BooleanArray::from_slice(&[true, false, true, false, true]);
         let two_count_values_with_null = BooleanArray::from(vec![
             Some(true),
             Some(false),
@@ -583,8 +545,8 @@ mod tests {
 
     #[test]
     fn count_distinct_update_batch_multiple_columns() -> Result<()> {
-        let array_int8: ArrayRef = Arc::new(Int8Array::from(vec![1, 1, 2]));
-        let array_int16: ArrayRef = Arc::new(Int16Array::from(vec![3, 3, 4]));
+        let array_int8: ArrayRef = Arc::new(Int8Array::from_slice(&[1, 1, 2]));
+        let array_int16: ArrayRef = Arc::new(Int16Array::from_slice(&[3, 3, 4]));
         let arrays = vec![array_int8, array_int16];
 
         let (states, result) = run_update_batch(&arrays)?;
@@ -673,23 +635,20 @@ mod tests {
 
     #[test]
     fn count_distinct_merge_batch() -> Result<()> {
-        let state_in1 = build_list!(
-            vec![
-                Some(vec![Some(-1_i32), Some(-1_i32), Some(-2_i32), Some(-2_i32)]),
-                Some(vec![Some(-2_i32), Some(-3_i32)]),
-            ],
-            Int32Builder
-        )?;
-
-        let state_in2 = build_list!(
-            vec![
-                Some(vec![Some(5_u64), Some(6_u64), Some(5_u64), Some(7_u64)]),
-                Some(vec![Some(5_u64), Some(7_u64)]),
-            ],
-            UInt64Builder
-        )?;
-
-        let (states, result) = run_merge_batch(&[state_in1, state_in2])?;
+        let state_in1 = ListPrimitive::<i32, Primitive<i32>, i32>::from_iter(vec![
+            Some(vec![Some(-1_i32), Some(-1_i32), Some(-2_i32), Some(-2_i32)]),
+            Some(vec![Some(-2_i32), Some(-3_i32)]),
+        ])
+        .to(ListArray::default_datatype(DataType::Int32));
+
+        let state_in2 = ListPrimitive::<i32, Primitive<u64>, u64>::from_iter(vec![
+            Some(vec![Some(5_u64), Some(6_u64), Some(5_u64), Some(7_u64)]),
+            Some(vec![Some(5_u64), Some(7_u64)]),
+        ])
+        .to(ListArray::default_datatype(DataType::UInt64));
+
+        let (states, result) =
+            run_merge_batch(&[Arc::new(state_in1), Arc::new(state_in2)])?;
 
         let state_out_vec1 = state_to_vec!(&states[0], Int32, i32).unwrap();
         let state_out_vec2 = state_to_vec!(&states[1], UInt64, u64).unwrap();
diff --git a/datafusion/src/physical_plan/empty.rs b/datafusion/src/physical_plan/empty.rs
index 391a695f45014..1cb57b716d907 100644
--- a/datafusion/src/physical_plan/empty.rs
+++ b/datafusion/src/physical_plan/empty.rs
@@ -24,14 +24,17 @@ use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     memory::MemoryStream, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
 };
-use arrow::array::NullArray;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
+
+use arrow2::array::NullArray;
+use arrow2::datatypes::{DataType, Field, Schema};
+use arrow2::record_batch::RecordBatch;
 
 use super::SendableRecordBatchStream;
 
 use async_trait::async_trait;
 
+type SchemaRef = Arc<Schema>;
+
 /// Execution plan for empty relation (produces no rows)
 #[derive(Debug)]
 pub struct EmptyExec {
@@ -109,7 +112,7 @@ impl ExecutionPlan for EmptyExec {
                     DataType::Null,
                     true,
                 )])),
-                vec![Arc::new(NullArray::new(1))],
+                vec![Arc::new(NullArray::from_data(1))],
             )?]
         } else {
             vec![]
diff --git a/datafusion/src/physical_plan/explain.rs b/datafusion/src/physical_plan/explain.rs
index 3c5ef1af32366..801e72a24ea39 100644
--- a/datafusion/src/physical_plan/explain.rs
+++ b/datafusion/src/physical_plan/explain.rs
@@ -26,7 +26,9 @@ use crate::{
     physical_plan::Partitioning,
     physical_plan::{common::SizedRecordBatchStream, DisplayFormatType, ExecutionPlan},
 };
-use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
+use arrow2::{array::*, datatypes::Schema, record_batch::RecordBatch};
+
+type SchemaRef = Arc<Schema>;
 
 use super::SendableRecordBatchStream;
 use async_trait::async_trait;
@@ -100,20 +102,19 @@ impl ExecutionPlan for ExplainExec {
             )));
         }
 
-        let mut type_builder = StringBuilder::new(self.stringified_plans.len());
-        let mut plan_builder = StringBuilder::new(self.stringified_plans.len());
+        let mut type_builder =
+            Utf8Primitive::<i32>::with_capacity(self.stringified_plans.len());
+        let mut plan_builder =
+            Utf8Primitive::<i32>::with_capacity(self.stringified_plans.len());
 
         for p in &self.stringified_plans {
-            type_builder.append_value(&String::from(&p.plan_type))?;
-            plan_builder.append_value(&*p.plan)?;
+            type_builder.push(Some(&String::from(&p.plan_type).as_ref()));
+            plan_builder.push(Some(&p.plan.as_ref().as_ref()));
         }
 
         let record_batch = RecordBatch::try_new(
             self.schema.clone(),
-            vec![
-                Arc::new(type_builder.finish()),
-                Arc::new(plan_builder.finish()),
-            ],
+            vec![Arc::new(type_builder.to()), Arc::new(plan_builder.to())],
         )?;
 
         Ok(Box::pin(SizedRecordBatchStream::new(
diff --git a/datafusion/src/physical_plan/expressions/average.rs b/datafusion/src/physical_plan/expressions/average.rs
index 6a6332042188f..ca94d344ef95a 100644
--- a/datafusion/src/physical_plan/expressions/average.rs
+++ b/datafusion/src/physical_plan/expressions/average.rs
@@ -24,13 +24,15 @@ use std::sync::Arc;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
 use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{ArrayRef, UInt64Array},
+use arrow2::compute;
+use arrow2::datatypes::DataType;
+use arrow2::{
+    array::{Array, UInt64Array},
     datatypes::Field,
 };
 
+type ArrayRef = Arc<dyn Array>;
+
 use super::{format_state_name, sum};
 
 /// AVG aggregate expression
@@ -150,7 +152,7 @@ impl Accumulator for AvgAccumulator {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let values = &values[0];
 
-        self.count += (values.len() - values.data().null_count()) as u64;
+        self.count += (values.len() - values.null_count()) as u64;
         self.sum = sum::sum(&self.sum, &sum::sum_batch(values)?)?;
         Ok(())
     }
@@ -172,7 +174,7 @@ impl Accumulator for AvgAccumulator {
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
         let counts = states[0].as_any().downcast_ref::<UInt64Array>().unwrap();
         // counts are summed
-        self.count += compute::sum(counts).unwrap_or(0);
+        self.count += compute::aggregate::sum(counts).unwrap_or(0);
 
         // sums are summed
         self.sum = sum::sum(&self.sum, &sum::sum_batch(&states[1])?)?;
@@ -196,12 +198,12 @@ mod tests {
     use super::*;
     use crate::physical_plan::expressions::col;
     use crate::{error::Result, generic_test_op};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, datatypes::*};
+    use arrow2::record_batch::RecordBatch;
+    use arrow2::{array::*, datatypes::*};
 
     #[test]
     fn avg_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let a: ArrayRef = Arc::new(Int32Array::from_slice(&[1, 2, 3, 4, 5]));
         generic_test_op!(
             a,
             DataType::Int32,
@@ -243,8 +245,7 @@ mod tests {
 
     #[test]
     fn avg_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
+        let a: ArrayRef = Arc::new(UInt32Array::from_slice(&[1, 2, 3, 4, 5]));
         generic_test_op!(
             a,
             DataType::UInt32,
@@ -256,8 +257,9 @@ mod tests {
 
     #[test]
     fn avg_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
+        let a: ArrayRef = Arc::new(Float32Array::from_slice(&[
+            1_f32, 2_f32, 3_f32, 4_f32, 5_f32,
+        ]));
         generic_test_op!(
             a,
             DataType::Float32,
@@ -269,8 +271,9 @@ mod tests {
 
     #[test]
     fn avg_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
+        let a: ArrayRef = Arc::new(Float64Array::from_slice(&[
+            1_f64, 2_f64, 3_f64, 4_f64, 5_f64,
+        ]));
         generic_test_op!(
             a,
             DataType::Float64,
diff --git a/datafusion/src/physical_plan/expressions/binary.rs b/datafusion/src/physical_plan/expressions/binary.rs
index 5c2d9ce02f51f..a8d4d22db4aef 100644
--- a/datafusion/src/physical_plan/expressions/binary.rs
+++ b/datafusion/src/physical_plan/expressions/binary.rs
@@ -15,27 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{any::Any, sync::Arc};
-
-use arrow::array::*;
-use arrow::compute::kernels::arithmetic::{
-    add, divide, divide_scalar, multiply, subtract,
-};
-use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
-use arrow::compute::kernels::comparison::{eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow::compute::kernels::comparison::{
-    eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
-};
-use arrow::compute::kernels::comparison::{
-    eq_utf8, gt_eq_utf8, gt_utf8, like_utf8, like_utf8_scalar, lt_eq_utf8, lt_utf8,
-    neq_utf8, nlike_utf8, nlike_utf8_scalar,
-};
-use arrow::compute::kernels::comparison::{
-    eq_utf8_scalar, gt_eq_utf8_scalar, gt_utf8_scalar, lt_eq_utf8_scalar, lt_utf8_scalar,
-    neq_utf8_scalar,
-};
-use arrow::datatypes::{DataType, Schema, TimeUnit};
-use arrow::record_batch::RecordBatch;
+use std::{any::Any, convert::TryInto, sync::Arc};
+
+use arrow2::array::*;
+use arrow2::compute;
+use arrow2::datatypes::{DataType, Schema};
+use arrow2::record_batch::RecordBatch;
+
+type StringArray = Utf8Array<i32>;
 
 use crate::error::{DataFusionError, Result};
 use crate::logical_plan::Operator;
@@ -85,157 +72,6 @@ impl std::fmt::Display for BinaryExpr {
     }
 }
 
-/// Invoke a compute kernel on a pair of binary data arrays
-macro_rules! compute_utf8_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new(paste::expr! {[<$OP _utf8>]}(&ll, &rr)?))
-    }};
-}
-
-/// Invoke a compute kernel on a data array and a scalar value
-macro_rules! compute_utf8_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        if let ScalarValue::Utf8(Some(string_value)) = $RIGHT {
-            Ok(Arc::new(paste::expr! {[<$OP _utf8_scalar>]}(
-                &ll,
-                &string_value,
-            )?))
-        } else {
-            Err(DataFusionError::Internal(format!(
-                "compute_utf8_op_scalar failed to cast literal value {}",
-                $RIGHT
-            )))
-        }
-    }};
-}
-
-/// Invoke a compute kernel on a data array and a scalar value
-macro_rules! compute_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        use std::convert::TryInto;
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        // generate the scalar function name, such as lt_scalar, from the $OP parameter
-        // (which could have a value of lt) and the suffix _scalar
-        Ok(Arc::new(paste::expr! {[<$OP _scalar>]}(
-            &ll,
-            $RIGHT.try_into()?,
-        )?))
-    }};
-}
-
-/// Invoke a compute kernel on array(s)
-macro_rules! compute_op {
-    // invoke binary operator
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?))
-    }};
-    // invoke unary operator
-    ($OPERAND:expr, $OP:ident, $DT:ident) => {{
-        let operand = $OPERAND
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&operand)?))
-    }};
-}
-
-macro_rules! binary_string_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on string array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
-macro_rules! binary_string_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on string arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a compute kernel on a pair of arrays
-/// The binary_primitive_array_op macro only evaluates for primitive types
-/// like integers and floats.
-macro_rules! binary_primitive_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for binary operation on primitive arrays",
-                other
-            ))),
-        }
-    }};
-}
-
-/// Invoke a compute kernel on an array and a scalar
-/// The binary_primitive_array_op_scalar macro only evaluates for primitive
-/// types like integers and floats.
-macro_rules! binary_primitive_array_op_scalar {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        let result: Result<Arc<dyn Array>> = match $LEFT.data_type() {
-            DataType::Int8 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Data type {:?} not supported for scalar operation on primitive array",
-                other
-            ))),
-        };
-        Some(result)
-    }};
-}
-
 /// The binary_array_op_scalar macro includes types that extend beyond the primitive,
 /// such as Utf8 strings.
 #[macro_export]
@@ -252,12 +88,12 @@ macro_rules! binary_array_op_scalar {
             DataType::UInt64 => compute_op_scalar!($LEFT, $RIGHT, $OP, UInt64Array),
             DataType::Float32 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float32Array),
             DataType::Float64 => compute_op_scalar!($LEFT, $RIGHT, $OP, Float64Array),
-            DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, StringArray),
+            DataType::Utf8 => compute_utf8_op_scalar!($LEFT, $RIGHT, $OP, Utf8Array),
             DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                compute_op_scalar!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
+                compute_op_scalar!($LEFT, $RIGHT, $OP, Int64Array)
             }
             DataType::Date32 => {
-                compute_op_scalar!($LEFT, $RIGHT, $OP, Date32Array)
+                compute_op_scalar!($LEFT, $RIGHT, $OP, Int32Array)
             }
             other => Err(DataFusionError::Internal(format!(
                 "Data type {:?} not supported for scalar operation on dyn array",
@@ -276,8 +112,12 @@ macro_rules! binary_array_op {
         match $LEFT.data_type() {
             DataType::Int8 => compute_op!($LEFT, $RIGHT, $OP, Int8Array),
             DataType::Int16 => compute_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_op!($LEFT, $RIGHT, $OP, Int64Array),
+            DataType::Int32 | DataType::Date32 => {
+                compute_op!($LEFT, $RIGHT, $OP, Int32Array)
+            }
+            DataType::Int64 | DataType::Timestamp(_, None) | DataType::Date64 => {
+                compute_op!($LEFT, $RIGHT, $OP, Int64Array)
+            }
             DataType::UInt8 => compute_op!($LEFT, $RIGHT, $OP, UInt8Array),
             DataType::UInt16 => compute_op!($LEFT, $RIGHT, $OP, UInt16Array),
             DataType::UInt32 => compute_op!($LEFT, $RIGHT, $OP, UInt32Array),
@@ -285,15 +125,6 @@ macro_rules! binary_array_op {
             DataType::Float32 => compute_op!($LEFT, $RIGHT, $OP, Float32Array),
             DataType::Float64 => compute_op!($LEFT, $RIGHT, $OP, Float64Array),
             DataType::Utf8 => compute_utf8_op!($LEFT, $RIGHT, $OP, StringArray),
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                compute_op!($LEFT, $RIGHT, $OP, TimestampNanosecondArray)
-            }
-            DataType::Date32 => {
-                compute_op!($LEFT, $RIGHT, $OP, Date32Array)
-            }
-            DataType::Date64 => {
-                compute_op!($LEFT, $RIGHT, $OP, Date64Array)
-            }
             other => Err(DataFusionError::Internal(format!(
                 "Data type {:?} not supported for binary operation on dyn arrays",
                 other
@@ -304,19 +135,125 @@ macro_rules! binary_array_op {
 
 /// Invoke a boolean kernel on a pair of arrays
 macro_rules! boolean_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
+    ($LEFT:expr, $RIGHT:expr, $OP:expr) => {{
         let ll = $LEFT
             .as_any()
-            .downcast_ref::<BooleanArray>()
+            .downcast_ref()
             .expect("boolean_op failed to downcast array");
         let rr = $RIGHT
             .as_any()
-            .downcast_ref::<BooleanArray>()
+            .downcast_ref()
             .expect("boolean_op failed to downcast array");
         Ok(Arc::new($OP(&ll, &rr)?))
     }};
 }
 
+fn to_arrow_comparison(op: &Operator) -> compute::comparison::Operator {
+    match op {
+        Operator::Eq => compute::comparison::Operator::Eq,
+        Operator::NotEq => compute::comparison::Operator::Neq,
+        Operator::Lt => compute::comparison::Operator::Lt,
+        Operator::LtEq => compute::comparison::Operator::LtEq,
+        Operator::Gt => compute::comparison::Operator::Gt,
+        Operator::GtEq => compute::comparison::Operator::GtEq,
+        _ => unreachable!(),
+    }
+}
+
+fn to_arrow_arithmetics(op: &Operator) -> compute::arithmetics::Operator {
+    match op {
+        Operator::Plus => compute::arithmetics::Operator::Add,
+        Operator::Minus => compute::arithmetics::Operator::Subtract,
+        Operator::Multiply => compute::arithmetics::Operator::Divide,
+        Operator::Divide => compute::arithmetics::Operator::Multiply,
+        _ => unreachable!(),
+    }
+}
+
+fn evaluate(lhs: &dyn Array, op: &Operator, rhs: &dyn Array) -> Result<Arc<dyn Array>> {
+    use Operator::*;
+    if matches!(op, Plus | Minus | Divide | Multiply) {
+        let op = to_arrow_arithmetics(op);
+        Ok(compute::arithmetics::arithmetic(lhs, op, rhs).map(|x| x.into())?)
+    } else if matches!(op, Eq | NotEq | Lt | LtEq | Gt | GtEq) {
+        let op = to_arrow_comparison(op);
+        Ok(compute::comparison::compare(lhs, rhs, op).map(Arc::new)?)
+    } else if matches!(op, Or) {
+        boolean_op!(lhs, rhs, compute::boolean_kleene::or)
+    } else if matches!(op, And) {
+        boolean_op!(lhs, rhs, compute::boolean_kleene::and)
+    } else {
+        //Operator::Like => binary_string_array_op!(left, right, like),
+        //Operator::NotLike => binary_string_array_op!(left, right, nlike),
+        // add remaining:
+        /*
+        Modulus,
+        Like,
+        NotLike,
+        */
+        todo!()
+    }
+}
+
+macro_rules! dyn_scalar {
+    ($lhs:expr, $op:expr, $rhs:expr, $ty:ty) => {{
+        Arc::new(compute::arithmetics::arithmetic_primitive_scalar::<$ty>(
+            $lhs.as_any().downcast_ref().unwrap(),
+            $op,
+            &$rhs.clone().try_into().unwrap(),
+        )?)
+    }};
+}
+
+fn evaluate_scalar(
+    lhs: &dyn Array,
+    op: &Operator,
+    rhs: &ScalarValue,
+) -> Result<Option<Arc<dyn Array>>> {
+    use Operator::*;
+    if matches!(op, Plus | Minus | Divide | Multiply) {
+        let op = to_arrow_arithmetics(op);
+        Ok(Some(match lhs.data_type() {
+            DataType::Int8 => dyn_scalar!(lhs, op, rhs, i8),
+            DataType::Int16 => dyn_scalar!(lhs, op, rhs, i16),
+            DataType::Int32 => dyn_scalar!(lhs, op, rhs, i32),
+            DataType::Int64 => dyn_scalar!(lhs, op, rhs, i64),
+            DataType::UInt8 => dyn_scalar!(lhs, op, rhs, u8),
+            DataType::UInt16 => dyn_scalar!(lhs, op, rhs, u16),
+            DataType::UInt32 => dyn_scalar!(lhs, op, rhs, u32),
+            DataType::UInt64 => dyn_scalar!(lhs, op, rhs, u64),
+            DataType::Float32 => dyn_scalar!(lhs, op, rhs, f32),
+            DataType::Float64 => dyn_scalar!(lhs, op, rhs, f64),
+            _ => {
+                return Err(DataFusionError::NotImplemented(
+                    "This operation is not yet implemented".to_string(),
+                ))
+            }
+        }))
+    } else {
+        Ok(None)
+    }
+}
+
+fn evaluate_inverse_scalar(
+    lhs: &ScalarValue,
+    op: &Operator,
+    rhs: &dyn Array,
+) -> Result<Option<Arc<dyn Array>>> {
+    use Operator::*;
+    match op {
+        Lt => evaluate_scalar(rhs, &GtEq, lhs),
+        Gt => evaluate_scalar(rhs, &LtEq, lhs),
+        GtEq => evaluate_scalar(rhs, &Lt, lhs),
+        LtEq => evaluate_scalar(rhs, &Gt, lhs),
+        Eq => evaluate_scalar(rhs, &NotEq, lhs),
+        NotEq => evaluate_scalar(rhs, &Eq, lhs),
+        Plus => evaluate_scalar(rhs, &Plus, lhs),
+        Multiply => evaluate_scalar(rhs, &Multiply, lhs),
+        _ => Ok(None),
+    }
+}
+
 /// Coercion rules for all binary operators. Returns the output type
 /// of applying `op` to an argument of `lhs_type` and `rhs_type`.
 fn common_binary_type(
@@ -431,57 +368,16 @@ impl PhysicalExpr for BinaryExpr {
 
         let scalar_result = match (&left_value, &right_value) {
             (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) => {
-                // if left is array and right is literal - use scalar operations
-                match &self.op {
-                    Operator::Lt => binary_array_op_scalar!(array, scalar.clone(), lt),
-                    Operator::LtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), lt_eq)
-                    }
-                    Operator::Gt => binary_array_op_scalar!(array, scalar.clone(), gt),
-                    Operator::GtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), gt_eq)
-                    }
-                    Operator::Eq => binary_array_op_scalar!(array, scalar.clone(), eq),
-                    Operator::NotEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), neq)
-                    }
-                    Operator::Like => {
-                        binary_string_array_op_scalar!(array, scalar.clone(), like)
-                    }
-                    Operator::NotLike => {
-                        binary_string_array_op_scalar!(array, scalar.clone(), nlike)
-                    }
-                    Operator::Divide => {
-                        binary_primitive_array_op_scalar!(array, scalar.clone(), divide)
-                    }
-                    // if scalar operation is not supported - fallback to array implementation
-                    _ => None,
-                }
+                evaluate_scalar(array.as_ref(), &self.op, scalar)
             }
             (ColumnarValue::Scalar(scalar), ColumnarValue::Array(array)) => {
-                // if right is literal and left is array - reverse operator and parameters
-                match &self.op {
-                    Operator::Lt => binary_array_op_scalar!(array, scalar.clone(), gt),
-                    Operator::LtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), gt_eq)
-                    }
-                    Operator::Gt => binary_array_op_scalar!(array, scalar.clone(), lt),
-                    Operator::GtEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), lt_eq)
-                    }
-                    Operator::Eq => binary_array_op_scalar!(array, scalar.clone(), eq),
-                    Operator::NotEq => {
-                        binary_array_op_scalar!(array, scalar.clone(), neq)
-                    }
-                    // if scalar operation is not supported - fallback to array implementation
-                    _ => None,
-                }
+                evaluate_inverse_scalar(scalar, &self.op, array.as_ref())
             }
-            (_, _) => None,
-        };
+            (_, _) => Ok(None),
+        }?;
 
         if let Some(result) = scalar_result {
-            return result.map(|a| ColumnarValue::Array(a));
+            return Ok(ColumnarValue::Array(result));
         }
 
         // if both arrays or both literals - extract arrays and continue execution
@@ -490,45 +386,7 @@ impl PhysicalExpr for BinaryExpr {
             right_value.into_array(batch.num_rows()),
         );
 
-        let result: Result<ArrayRef> = match &self.op {
-            Operator::Like => binary_string_array_op!(left, right, like),
-            Operator::NotLike => binary_string_array_op!(left, right, nlike),
-            Operator::Lt => binary_array_op!(left, right, lt),
-            Operator::LtEq => binary_array_op!(left, right, lt_eq),
-            Operator::Gt => binary_array_op!(left, right, gt),
-            Operator::GtEq => binary_array_op!(left, right, gt_eq),
-            Operator::Eq => binary_array_op!(left, right, eq),
-            Operator::NotEq => binary_array_op!(left, right, neq),
-            Operator::Plus => binary_primitive_array_op!(left, right, add),
-            Operator::Minus => binary_primitive_array_op!(left, right, subtract),
-            Operator::Multiply => binary_primitive_array_op!(left, right, multiply),
-            Operator::Divide => binary_primitive_array_op!(left, right, divide),
-            Operator::And => {
-                if left_data_type == DataType::Boolean {
-                    boolean_op!(left, right, and_kleene)
-                } else {
-                    return Err(DataFusionError::Internal(format!(
-                        "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                        self.op,
-                        left.data_type(),
-                        right.data_type()
-                    )));
-                }
-            }
-            Operator::Or => {
-                if left_data_type == DataType::Boolean {
-                    boolean_op!(left, right, or_kleene)
-                } else {
-                    return Err(DataFusionError::Internal(format!(
-                        "Cannot evaluate binary expression {:?} with types {:?} and {:?}",
-                        self.op, left_data_type, right_data_type
-                    )));
-                }
-            }
-            Operator::Modulus => Err(DataFusionError::NotImplemented(
-                "Modulus operator is still not supported".to_string(),
-            )),
-        };
+        let result = evaluate(left.as_ref(), &self.op, right.as_ref());
         result.map(|a| ColumnarValue::Array(a))
     }
 }
@@ -567,8 +425,8 @@ pub fn binary(
 
 #[cfg(test)]
 mod tests {
-    use arrow::datatypes::{ArrowNumericType, Field, Int32Type, SchemaRef};
-    use arrow::util::display::array_value_to_string;
+    use arrow2::datatypes::*;
+    use arrow2::{array::*, types::NativeType};
 
     use super::*;
     use crate::error::Result;
@@ -590,8 +448,8 @@ mod tests {
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Int32, false),
         ]);
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 4, 8, 16]);
+        let a = Int32Array::from_slice(&[1, 2, 3, 4, 5]);
+        let b = Int32Array::from_slice(&[1, 2, 4, 8, 16]);
         let batch =
             RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?;
 
@@ -618,8 +476,8 @@ mod tests {
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Int32, false),
         ]);
-        let a = Int32Array::from(vec![2, 4, 6, 8, 10]);
-        let b = Int32Array::from(vec![2, 5, 4, 8, 8]);
+        let a = Int32Array::from_slice(&[2, 4, 6, 8, 10]);
+        let b = Int32Array::from_slice(&[2, 5, 4, 8, 8]);
         let batch =
             RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?;
 
@@ -659,8 +517,8 @@ mod tests {
                 Field::new("a", $A_TYPE, false),
                 Field::new("b", $B_TYPE, false),
             ]);
-            let a = $A_ARRAY::from($A_VEC);
-            let b = $B_ARRAY::from($B_VEC);
+            let a = $A_ARRAY::from_slice(&$A_VEC);
+            let b = $B_ARRAY::from_slice(&$B_VEC);
             let batch = RecordBatch::try_new(
                 Arc::new(schema.clone()),
                 vec![Arc::new(a), Arc::new(b)],
@@ -756,7 +614,7 @@ mod tests {
             StringArray,
             DataType::Utf8,
             vec!["1994-12-13", "1995-01-26"],
-            Date32Array,
+            Int32Array,
             DataType::Date32,
             vec![9112, 9156],
             Operator::Eq,
@@ -768,7 +626,7 @@ mod tests {
             StringArray,
             DataType::Utf8,
             vec!["1994-12-13", "1995-01-26"],
-            Date32Array,
+            Int32Array,
             DataType::Date32,
             vec![9113, 9154],
             Operator::Lt,
@@ -780,7 +638,7 @@ mod tests {
             StringArray,
             DataType::Utf8,
             vec!["1994-12-13T12:34:56", "1995-01-26T01:23:45"],
-            Date64Array,
+            Int64Array,
             DataType::Date64,
             vec![787322096000, 791083425000],
             Operator::Eq,
@@ -792,7 +650,7 @@ mod tests {
             StringArray,
             DataType::Utf8,
             vec!["1994-12-13T12:34:56", "1995-01-26T01:23:45"],
-            Date64Array,
+            Int64Array,
             DataType::Date64,
             vec![787322096001, 791083424999],
             Operator::Lt,
@@ -815,16 +673,12 @@ mod tests {
             DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
         let string_type = DataType::Utf8;
 
-        // build dictionary
-        let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
-        let values_builder = arrow::array::StringBuilder::new(10);
-        let mut dict_builder = StringDictionaryBuilder::new(keys_builder, values_builder);
-
-        dict_builder.append("one")?;
-        dict_builder.append_null()?;
-        dict_builder.append("three")?;
-        dict_builder.append("four")?;
-        let dict_array = dict_builder.finish();
+        let dict = DictionaryPrimitive::<i32, Utf8Primitive<i32>, &str>::with_capacity(0);
+        dict.push(Some(&"one".as_ref()));
+        dict.push(None);
+        dict.push(Some(&"three".as_ref()));
+        dict.push(Some(&"four".as_ref()));
+        let dict_array = dict.to(dict_type.clone());
 
         let str_array =
             StringArray::from(vec![Some("not one"), Some("two"), None, Some("four")]);
@@ -839,7 +693,7 @@ mod tests {
             vec![Arc::new(dict_array), Arc::new(str_array)],
         )?;
 
-        let expected = "false\n\n\ntrue";
+        let expected = BooleanArray::from(&[Some(false), None, Some(true)]);
 
         // Test 1: dict = str
 
@@ -852,7 +706,7 @@ mod tests {
         assert_eq!(result.data_type(), &DataType::Boolean);
 
         // verify that the result itself is correct
-        assert_eq!(expected, array_to_string(&result)?);
+        assert_eq!(expected, result.as_ref());
 
         // Test 2: now test the other direction
         // str = dict
@@ -866,34 +720,25 @@ mod tests {
         assert_eq!(result.data_type(), &DataType::Boolean);
 
         // verify that the result itself is correct
-        assert_eq!(expected, array_to_string(&result)?);
+        assert_eq!(expected, result.as_ref());
 
         Ok(())
     }
 
-    // Convert the array to a newline delimited string of pretty printed values
-    fn array_to_string(array: &ArrayRef) -> Result<String> {
-        let s = (0..array.len())
-            .map(|i| array_value_to_string(array, i))
-            .collect::<std::result::Result<Vec<_>, arrow::error::ArrowError>>()?
-            .join("\n");
-        Ok(s)
-    }
-
     #[test]
     fn plus_op() -> Result<()> {
         let schema = Schema::new(vec![
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Int32, false),
         ]);
-        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let b = Int32Array::from(vec![1, 2, 4, 8, 16]);
+        let a = Int32Array::from_slice(&[1, 2, 3, 4, 5]);
+        let b = Int32Array::from_slice(&[1, 2, 4, 8, 16]);
 
-        apply_arithmetic::<Int32Type>(
+        apply_arithmetic::<i32>(
             Arc::new(schema),
             vec![Arc::new(a), Arc::new(b)],
             Operator::Plus,
-            Int32Array::from(vec![2, 4, 7, 12, 21]),
+            Int32Array::from_slice(&[2, 4, 7, 12, 21]),
         )?;
 
         Ok(())
@@ -905,22 +750,22 @@ mod tests {
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Int32, false),
         ]));
-        let a = Arc::new(Int32Array::from(vec![1, 2, 4, 8, 16]));
-        let b = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let a = Arc::new(Int32Array::from_slice(&[1, 2, 4, 8, 16]));
+        let b = Arc::new(Int32Array::from_slice(&[1, 2, 3, 4, 5]));
 
-        apply_arithmetic::<Int32Type>(
+        apply_arithmetic::<i32>(
             schema.clone(),
             vec![a.clone(), b.clone()],
             Operator::Minus,
-            Int32Array::from(vec![0, 0, 1, 4, 11]),
+            Int32Array::from_slice(&[0, 0, 1, 4, 11]),
         )?;
 
         // should handle have negative values in result (for signed)
-        apply_arithmetic::<Int32Type>(
+        apply_arithmetic::<i32>(
             schema,
             vec![b, a],
             Operator::Minus,
-            Int32Array::from(vec![0, 0, -1, -4, -11]),
+            Int32Array::from_slice(&[0, 0, -1, -4, -11]),
         )?;
 
         Ok(())
@@ -932,14 +777,14 @@ mod tests {
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Int32, false),
         ]));
-        let a = Arc::new(Int32Array::from(vec![4, 8, 16, 32, 64]));
-        let b = Arc::new(Int32Array::from(vec![2, 4, 8, 16, 32]));
+        let a = Arc::new(Int32Array::from_slice(&[4, 8, 16, 32, 64]));
+        let b = Arc::new(Int32Array::from_slice(&[2, 4, 8, 16, 32]));
 
-        apply_arithmetic::<Int32Type>(
+        apply_arithmetic::<i32>(
             schema,
             vec![a, b],
             Operator::Multiply,
-            Int32Array::from(vec![8, 32, 128, 512, 2048]),
+            Int32Array::from_slice(&[8, 32, 128, 512, 2048]),
         )?;
 
         Ok(())
@@ -951,22 +796,22 @@ mod tests {
             Field::new("a", DataType::Int32, false),
             Field::new("b", DataType::Int32, false),
         ]));
-        let a = Arc::new(Int32Array::from(vec![8, 32, 128, 512, 2048]));
-        let b = Arc::new(Int32Array::from(vec![2, 4, 8, 16, 32]));
+        let a = Arc::new(Int32Array::from_slice(&[8, 32, 128, 512, 2048]));
+        let b = Arc::new(Int32Array::from_slice(&[2, 4, 8, 16, 32]));
 
-        apply_arithmetic::<Int32Type>(
+        apply_arithmetic::<i32>(
             schema,
             vec![a, b],
             Operator::Divide,
-            Int32Array::from(vec![4, 8, 16, 32, 64]),
+            Int32Array::from_slice(&[4, 8, 16, 32, 64]),
         )?;
 
         Ok(())
     }
 
-    fn apply_arithmetic<T: ArrowNumericType>(
-        schema: SchemaRef,
-        data: Vec<ArrayRef>,
+    fn apply_arithmetic<T: NativeType>(
+        schema: Arc<Schema>,
+        data: Vec<Arc<dyn Array>>,
         op: Operator,
         expected: PrimitiveArray<T>,
     ) -> Result<()> {
@@ -974,23 +819,23 @@ mod tests {
         let batch = RecordBatch::try_new(schema, data)?;
         let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows());
 
-        assert_eq!(result.as_ref(), &expected);
+        assert_eq!(expected, result.as_ref());
         Ok(())
     }
 
     fn apply_logic_op(
-        schema: SchemaRef,
+        schema: Arc<Schema>,
         left: BooleanArray,
         right: BooleanArray,
         op: Operator,
         expected: BooleanArray,
     ) -> Result<()> {
         let arithmetic_op = binary_simple(col("a"), op, col("b"));
-        let data: Vec<ArrayRef> = vec![Arc::new(left), Arc::new(right)];
+        let data: Vec<Arc<dyn Array>> = vec![Arc::new(left), Arc::new(right)];
         let batch = RecordBatch::try_new(schema, data)?;
         let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows());
 
-        assert_eq!(result.as_ref(), &expected);
+        assert_eq!(expected, result.as_ref());
         Ok(())
     }
 
diff --git a/datafusion/src/physical_plan/expressions/case.rs b/datafusion/src/physical_plan/expressions/case.rs
index 95ae5325af119..91bba0285f23f 100644
--- a/datafusion/src/physical_plan/expressions/case.rs
+++ b/datafusion/src/physical_plan/expressions/case.rs
@@ -17,12 +17,16 @@
 
 use std::{any::Any, sync::Arc};
 
+use arrow2::array::*;
+use arrow2::compute::comparison;
+use arrow2::compute::if_then_else;
+use arrow2::datatypes::{DataType, Schema};
+use arrow2::record_batch::RecordBatch;
+
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{ColumnarValue, PhysicalExpr};
-use arrow::array::{self, *};
-use arrow::compute::{eq, eq_utf8};
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
+
+use super::ArrayRef;
 
 /// The CASE expression is similar to a series of nested if/else and there are two forms that
 /// can be used. The first form consists of a series of boolean "when" expressions with
@@ -103,201 +107,6 @@ impl CaseExpr {
     }
 }
 
-macro_rules! if_then_else {
-    ($BUILDER_TYPE:ty, $ARRAY_TYPE:ty, $BOOLS:expr, $TRUE:expr, $FALSE:expr) => {{
-        let true_values = $TRUE
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$ARRAY_TYPE>()
-            .expect("true_values downcast failed");
-
-        let false_values = $FALSE
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$ARRAY_TYPE>()
-            .expect("false_values downcast failed");
-
-        let mut builder = <$BUILDER_TYPE>::new($BOOLS.len());
-        for i in 0..$BOOLS.len() {
-            if $BOOLS.is_null(i) {
-                if false_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(false_values.value(i))?;
-                }
-            } else if $BOOLS.value(i) {
-                if true_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(true_values.value(i))?;
-                }
-            } else {
-                if false_values.is_null(i) {
-                    builder.append_null()?;
-                } else {
-                    builder.append_value(false_values.value(i))?;
-                }
-            }
-        }
-        Ok(Arc::new(builder.finish()))
-    }};
-}
-
-fn if_then_else(
-    bools: &BooleanArray,
-    true_values: ArrayRef,
-    false_values: ArrayRef,
-    data_type: &DataType,
-) -> Result<ArrayRef> {
-    match data_type {
-        DataType::UInt8 => if_then_else!(
-            array::UInt8Builder,
-            array::UInt8Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt16 => if_then_else!(
-            array::UInt16Builder,
-            array::UInt16Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt32 => if_then_else!(
-            array::UInt32Builder,
-            array::UInt32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::UInt64 => if_then_else!(
-            array::UInt64Builder,
-            array::UInt64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int8 => if_then_else!(
-            array::Int8Builder,
-            array::Int8Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int16 => if_then_else!(
-            array::Int16Builder,
-            array::Int16Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int32 => if_then_else!(
-            array::Int32Builder,
-            array::Int32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Int64 => if_then_else!(
-            array::Int64Builder,
-            array::Int64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Float32 => if_then_else!(
-            array::Float32Builder,
-            array::Float32Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Float64 => if_then_else!(
-            array::Float64Builder,
-            array::Float64Array,
-            bools,
-            true_values,
-            false_values
-        ),
-        DataType::Utf8 => if_then_else!(
-            array::StringBuilder,
-            array::StringArray,
-            bools,
-            true_values,
-            false_values
-        ),
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
-macro_rules! array_equals {
-    ($TY:ty, $L:expr, $R:expr, $eq_fn:expr) => {{
-        let when_value = $L
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$TY>()
-            .expect("array_equals downcast failed");
-
-        let base_value = $R
-            .as_ref()
-            .as_any()
-            .downcast_ref::<$TY>()
-            .expect("array_equals downcast failed");
-
-        $eq_fn(when_value, base_value).map_err(DataFusionError::from)
-    }};
-}
-
-fn array_equals(
-    data_type: &DataType,
-    when_value: ArrayRef,
-    base_value: ArrayRef,
-) -> Result<BooleanArray> {
-    match data_type {
-        DataType::UInt8 => {
-            array_equals!(array::UInt8Array, when_value, base_value, eq)
-        }
-        DataType::UInt16 => {
-            array_equals!(array::UInt16Array, when_value, base_value, eq)
-        }
-        DataType::UInt32 => {
-            array_equals!(array::UInt32Array, when_value, base_value, eq)
-        }
-        DataType::UInt64 => {
-            array_equals!(array::UInt64Array, when_value, base_value, eq)
-        }
-        DataType::Int8 => {
-            array_equals!(array::Int8Array, when_value, base_value, eq)
-        }
-        DataType::Int16 => {
-            array_equals!(array::Int16Array, when_value, base_value, eq)
-        }
-        DataType::Int32 => {
-            array_equals!(array::Int32Array, when_value, base_value, eq)
-        }
-        DataType::Int64 => {
-            array_equals!(array::Int64Array, when_value, base_value, eq)
-        }
-        DataType::Float32 => {
-            array_equals!(array::Float32Array, when_value, base_value, eq)
-        }
-        DataType::Float64 => {
-            array_equals!(array::Float64Array, when_value, base_value, eq)
-        }
-        DataType::Utf8 => {
-            array_equals!(array::StringArray, when_value, base_value, eq_utf8)
-        }
-        other => Err(DataFusionError::Execution(format!(
-            "CASE does not support '{:?}'",
-            other
-        ))),
-    }
-}
-
 impl CaseExpr {
     /// This function evaluates the form of CASE that matches an expression to fixed values.
     ///
@@ -317,7 +126,7 @@ impl CaseExpr {
         let mut current_value: Option<ArrayRef> = if let Some(e) = &self.else_expr {
             Some(e.evaluate(batch)?.into_array(batch.num_rows()))
         } else {
-            Some(new_null_array(&return_type, batch.num_rows()))
+            Some(new_null_array(return_type, batch.num_rows()).into())
         };
 
         // walk backwards through the when/then expressions
@@ -331,14 +140,20 @@ impl CaseExpr {
             let then_value = then_value.into_array(batch.num_rows());
 
             // build boolean array representing which rows match the "when" value
-            let when_match = array_equals(&base_type, when_value, base_value.clone())?;
-
-            current_value = Some(if_then_else(
-                &when_match,
-                then_value,
-                current_value.unwrap(),
-                &return_type,
-            )?);
+            let when_match = comparison::compare(
+                when_value.as_ref(),
+                base_value.as_ref(),
+                comparison::Operator::Eq,
+            )?;
+
+            current_value = Some(
+                if_then_else::if_then_else(
+                    &when_match,
+                    then_value.as_ref(),
+                    current_value.unwrap().as_ref(),
+                )?
+                .into(),
+            );
         }
 
         Ok(ColumnarValue::Array(current_value.unwrap()))
@@ -358,7 +173,7 @@ impl CaseExpr {
         let mut current_value: Option<ArrayRef> = if let Some(e) = &self.else_expr {
             Some(e.evaluate(batch)?.into_array(batch.num_rows()))
         } else {
-            Some(new_null_array(&return_type, batch.num_rows()))
+            Some(new_null_array(return_type, batch.num_rows()).into())
         };
 
         // walk backwards through the when/then expressions
@@ -376,12 +191,14 @@ impl CaseExpr {
             let then_value = self.when_then_expr[i].1.evaluate(batch)?;
             let then_value = then_value.into_array(batch.num_rows());
 
-            current_value = Some(if_then_else(
-                &when_value,
-                then_value,
-                current_value.unwrap(),
-                &return_type,
-            )?);
+            current_value = Some(
+                if_then_else::if_then_else(
+                    &when_value,
+                    then_value.as_ref(),
+                    current_value.unwrap().as_ref(),
+                )?
+                .into(),
+            );
         }
 
         Ok(ColumnarValue::Array(current_value.unwrap()))
@@ -445,8 +262,8 @@ mod tests {
         physical_plan::expressions::{binary, col, lit},
         scalar::ScalarValue,
     };
-    use arrow::array::StringArray;
-    use arrow::datatypes::*;
+    use arrow2::array::Utf8Array;
+    use arrow2::datatypes::*;
 
     #[test]
     fn case_with_expr() -> Result<()> {
@@ -574,7 +391,7 @@ mod tests {
 
     fn case_test_batch() -> Result<RecordBatch> {
         let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-        let a = StringArray::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
+        let a = Utf8Array::<i32>::from(vec![Some("foo"), Some("baz"), None, Some("bar")]);
         let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?;
         Ok(batch)
     }
diff --git a/datafusion/src/physical_plan/expressions/cast.rs b/datafusion/src/physical_plan/expressions/cast.rs
index ba395f54d917c..451b676414f3e 100644
--- a/datafusion/src/physical_plan/expressions/cast.rs
+++ b/datafusion/src/physical_plan/expressions/cast.rs
@@ -23,15 +23,9 @@ use super::ColumnarValue;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::PhysicalExpr;
 use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::compute::kernels;
-use arrow::compute::CastOptions;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-use compute::can_cast_types;
-
-/// provide Datafusion default cast options
-pub const DEFAULT_DATAFUSION_CAST_OPTIONS: CastOptions = CastOptions { safe: false };
+use arrow2::compute::cast;
+use arrow2::datatypes::{DataType, Schema};
+use arrow2::record_batch::RecordBatch;
 
 /// CAST expression casts an expression to a specific data type and returns a runtime error on invalid cast
 #[derive(Debug)]
@@ -40,22 +34,12 @@ pub struct CastExpr {
     expr: Arc<dyn PhysicalExpr>,
     /// The data type to cast to
     cast_type: DataType,
-    /// Cast options
-    cast_options: CastOptions,
 }
 
 impl CastExpr {
     /// Create a new CastExpr
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        cast_type: DataType,
-        cast_options: CastOptions,
-    ) -> Self {
-        Self {
-            expr,
-            cast_type,
-            cast_options,
-        }
+    pub fn new(expr: Arc<dyn PhysicalExpr>, cast_type: DataType) -> Self {
+        Self { expr, cast_type }
     }
 
     /// The expression to cast
@@ -92,20 +76,13 @@ impl PhysicalExpr for CastExpr {
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
         let value = self.expr.evaluate(batch)?;
         match value {
-            ColumnarValue::Array(array) => {
-                Ok(ColumnarValue::Array(kernels::cast::cast_with_options(
-                    &array,
-                    &self.cast_type,
-                    &self.cast_options,
-                )?))
-            }
+            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(
+                cast::cast(array.as_ref(), &self.cast_type)?.into(),
+            )),
             ColumnarValue::Scalar(scalar) => {
                 let scalar_array = scalar.to_array();
-                let cast_array = kernels::cast::cast_with_options(
-                    &scalar_array,
-                    &self.cast_type,
-                    &self.cast_options,
-                )?;
+                let cast_array =
+                    cast::cast(scalar_array.as_ref(), &self.cast_type)?.into();
                 let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?;
                 Ok(ColumnarValue::Scalar(cast_scalar))
             }
@@ -121,13 +98,12 @@ pub fn cast_with_options(
     expr: Arc<dyn PhysicalExpr>,
     input_schema: &Schema,
     cast_type: DataType,
-    cast_options: CastOptions,
 ) -> Result<Arc<dyn PhysicalExpr>> {
     let expr_type = expr.data_type(input_schema)?;
     if expr_type == cast_type {
         Ok(expr.clone())
-    } else if can_cast_types(&expr_type, &cast_type) {
-        Ok(Arc::new(CastExpr::new(expr, cast_type, cast_options)))
+    } else if cast::can_cast_types(&expr_type, &cast_type) {
+        Ok(Arc::new(CastExpr::new(expr, cast_type)))
     } else {
         Err(DataFusionError::Internal(format!(
             "Unsupported CAST from {:?} to {:?}",
@@ -145,12 +121,7 @@ pub fn cast(
     input_schema: &Schema,
     cast_type: DataType,
 ) -> Result<Arc<dyn PhysicalExpr>> {
-    cast_with_options(
-        expr,
-        input_schema,
-        cast_type,
-        DEFAULT_DATAFUSION_CAST_OPTIONS,
-    )
+    cast_with_options(expr, input_schema, cast_type)
 }
 
 #[cfg(test)]
@@ -158,11 +129,9 @@ mod tests {
     use super::*;
     use crate::error::Result;
     use crate::physical_plan::expressions::col;
-    use arrow::array::{StringArray, Time64NanosecondArray};
-    use arrow::{
-        array::{Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array},
-        datatypes::*,
-    };
+    use arrow2::{array::*, datatypes::*};
+
+    type StringArray = Utf8Array<i32>;
 
     // runs an end-to-end test of physical type cast
     // 1. construct a record batch with a column "a" of type A
@@ -171,14 +140,14 @@ mod tests {
     // 4. verify that the resulting expression is of type B
     // 5. verify that the resulting values are downcastable and correct
     macro_rules! generic_test_cast {
-        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $TYPEARRAY:ident, $TYPE:expr, $VEC:expr, $CAST_OPTIONS:expr) => {{
+        ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $TYPEARRAY:ident, $TYPE:expr, $VEC:expr) => {{
             let schema = Schema::new(vec![Field::new("a", $A_TYPE, false)]);
-            let a = $A_ARRAY::from($A_VEC);
+            let a = $A_ARRAY::from_slice(&$A_VEC);
             let batch =
                 RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
 
             // verify that we can construct the expression
-            let expression = cast_with_options(col("a"), &schema, $TYPE, $CAST_OPTIONS)?;
+            let expression = cast_with_options(col("a"), &schema, $TYPE)?;
 
             // verify that its display is correct
             assert_eq!(format!("CAST(a AS {:?})", $TYPE), format!("{}", expression));
@@ -225,8 +194,7 @@ mod tests {
                 Some(3_u32),
                 Some(4_u32),
                 Some(5_u32)
-            ],
-            DEFAULT_DATAFUSION_CAST_OPTIONS
+            ]
         );
         Ok(())
     }
@@ -239,8 +207,7 @@ mod tests {
             vec![1, 2, 3, 4, 5],
             StringArray,
             DataType::Utf8,
-            vec![Some("1"), Some("2"), Some("3"), Some("4"), Some("5")],
-            DEFAULT_DATAFUSION_CAST_OPTIONS
+            vec![Some("1"), Some("2"), Some("3"), Some("4"), Some("5")]
         );
         Ok(())
     }
@@ -249,18 +216,14 @@ mod tests {
     #[test]
     fn test_cast_i64_t64() -> Result<()> {
         let original = vec![1, 2, 3, 4, 5];
-        let expected: Vec<Option<i64>> = original
-            .iter()
-            .map(|i| Some(Time64NanosecondArray::from(vec![*i]).value(0)))
-            .collect();
+        let expected: Vec<Option<i64>> = original.iter().map(|i| Some(*i)).collect();
         generic_test_cast!(
             Int64Array,
             DataType::Int64,
             original.clone(),
-            TimestampNanosecondArray,
+            Int64Array,
             DataType::Timestamp(TimeUnit::Nanosecond, None),
-            expected,
-            DEFAULT_DATAFUSION_CAST_OPTIONS
+            expected
         );
         Ok(())
     }
@@ -278,21 +241,16 @@ mod tests {
     fn invalid_cast_with_options_error() -> Result<()> {
         // Ensure a useful error happens at plan time if invalid casts are used
         let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
-        let a = StringArray::from(vec!["9.1"]);
+        let a = StringArray::from_slice(&["9.1"]);
         let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
-        let expression = cast_with_options(
-            col("a"),
-            &schema,
-            DataType::Int32,
-            DEFAULT_DATAFUSION_CAST_OPTIONS,
-        )?;
+        let expression = cast_with_options(col("a"), &schema, DataType::Int32)?;
         let result = expression.evaluate(&batch);
 
         match result {
             Ok(_) => panic!("expected error"),
             Err(e) => {
                 assert!(e.to_string().contains(
-                    "Cast error: Cannot cast string '9.1' to value of arrow::datatypes::types::Int32Type type"
+                    "Cast error: Cannot cast string '9.1' to value of arrow2::datatypes::types::Int32Type type"
                 ))
             }
         }
diff --git a/datafusion/src/physical_plan/expressions/coercion.rs b/datafusion/src/physical_plan/expressions/coercion.rs
index e9949f5199e88..73470d5428492 100644
--- a/datafusion/src/physical_plan/expressions/coercion.rs
+++ b/datafusion/src/physical_plan/expressions/coercion.rs
@@ -17,7 +17,7 @@
 
 //! Coercion rules used to coerce types to match existing expressions' implementations
 
-use arrow::datatypes::DataType;
+use arrow2::datatypes::DataType;
 
 /// Determine if a DataType is signed numeric or not
 pub fn is_signed_numeric(dt: &DataType) -> bool {
@@ -79,7 +79,7 @@ pub fn dictionary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
 /// Coercion rules for Strings: the type that both lhs and rhs can be
 /// casted to for the purpose of a string computation
 pub fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
+    use arrow2::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
         (Utf8, Utf8) => Some(Utf8),
         (LargeUtf8, Utf8) => Some(LargeUtf8),
@@ -92,7 +92,7 @@ pub fn string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataT
 /// Coercion rules for Temporal columns: the type that both lhs and rhs can be
 /// casted to for the purpose of a date computation
 pub fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
+    use arrow2::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
         (Utf8, Date32) => Some(Date32),
         (Date32, Utf8) => Some(Date32),
@@ -106,7 +106,7 @@ pub fn temporal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<Dat
 /// can be casted to for numerical calculation, while maintaining
 /// maximum precision
 pub fn numerical_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
-    use arrow::datatypes::DataType::*;
+    use arrow2::datatypes::DataType::*;
 
     // error on any non-numeric type
     if !is_numeric(lhs_type) || !is_numeric(rhs_type) {
diff --git a/datafusion/src/physical_plan/expressions/column.rs b/datafusion/src/physical_plan/expressions/column.rs
index 7e0304e51fe73..de64f8fed98a9 100644
--- a/datafusion/src/physical_plan/expressions/column.rs
+++ b/datafusion/src/physical_plan/expressions/column.rs
@@ -19,7 +19,7 @@
 
 use std::sync::Arc;
 
-use arrow::{
+use arrow2::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
diff --git a/datafusion/src/physical_plan/expressions/count.rs b/datafusion/src/physical_plan/expressions/count.rs
index 4a3fbe4fa7d3d..4390fb52d2798 100644
--- a/datafusion/src/physical_plan/expressions/count.rs
+++ b/datafusion/src/physical_plan/expressions/count.rs
@@ -20,15 +20,13 @@
 use std::any::Any;
 use std::sync::Arc;
 
+use super::ArrayRef;
 use crate::error::Result;
 use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
 use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{ArrayRef, UInt64Array},
-    datatypes::Field,
-};
+use arrow2::compute;
+use arrow2::datatypes::DataType;
+use arrow2::{array::UInt64Array, datatypes::Field};
 
 use super::format_state_name;
 
@@ -104,7 +102,7 @@ impl CountAccumulator {
 impl Accumulator for CountAccumulator {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let array = &values[0];
-        self.count += (array.len() - array.data().null_count()) as u64;
+        self.count += (array.len() - array.null_count()) as u64;
         Ok(())
     }
 
@@ -128,7 +126,7 @@ impl Accumulator for CountAccumulator {
 
     fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
         let counts = states[0].as_any().downcast_ref::<UInt64Array>().unwrap();
-        let delta = &compute::sum(counts);
+        let delta = &compute::aggregate::sum(counts);
         if let Some(d) = delta {
             self.count += *d;
         }
@@ -150,12 +148,12 @@ mod tests {
     use crate::physical_plan::expressions::col;
     use crate::physical_plan::expressions::tests::aggregate;
     use crate::{error::Result, generic_test_op};
-    use arrow::record_batch::RecordBatch;
-    use arrow::{array::*, datatypes::*};
+    use arrow2::record_batch::RecordBatch;
+    use arrow2::{array::*, datatypes::*};
 
     #[test]
     fn count_elements() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let a: ArrayRef = Arc::new(Int32Array::from_slice(&[1, 2, 3, 4, 5]));
         generic_test_op!(
             a,
             DataType::Int32,
@@ -200,8 +198,7 @@ mod tests {
 
     #[test]
     fn count_empty() -> Result<()> {
-        let a: Vec<bool> = vec![];
-        let a: ArrayRef = Arc::new(BooleanArray::from(a));
+        let a: ArrayRef = Arc::new(BooleanArray::new_empty());
         generic_test_op!(
             a,
             DataType::Boolean,
@@ -213,8 +210,9 @@ mod tests {
 
     #[test]
     fn count_utf8() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(StringArray::from(vec!["a", "bb", "ccc", "dddd", "ad"]));
+        let a: ArrayRef = Arc::new(Utf8Array::<i32>::from_slice(&[
+            "a", "bb", "ccc", "dddd", "ad",
+        ]));
         generic_test_op!(
             a,
             DataType::Utf8,
@@ -226,8 +224,9 @@ mod tests {
 
     #[test]
     fn count_large_utf8() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(LargeStringArray::from(vec!["a", "bb", "ccc", "dddd", "ad"]));
+        let a: ArrayRef = Arc::new(Utf8Array::<i64>::from_slice(&[
+            "a", "bb", "ccc", "dddd", "ad",
+        ]));
         generic_test_op!(
             a,
             DataType::LargeUtf8,
diff --git a/datafusion/src/physical_plan/expressions/in_list.rs b/datafusion/src/physical_plan/expressions/in_list.rs
index 41f111006ea2a..cab06344793e9 100644
--- a/datafusion/src/physical_plan/expressions/in_list.rs
+++ b/datafusion/src/physical_plan/expressions/in_list.rs
@@ -20,17 +20,15 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::GenericStringArray;
-use arrow::array::{
-    ArrayRef, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array,
-    Int64Array, Int8Array, StringOffsetSizeTrait, UInt16Array, UInt32Array, UInt64Array,
-    UInt8Array,
-};
-use arrow::{
+use arrow2::array::Utf8Array;
+use arrow2::array::*;
+use arrow2::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
 
+use super::ArrayRef;
+
 use crate::error::Result;
 use crate::physical_plan::{ColumnarValue, PhysicalExpr};
 use crate::scalar::ScalarValue;
@@ -130,16 +128,13 @@ impl InListExpr {
 
     /// Compare for specific utf8 types
     #[allow(clippy::unnecessary_wraps)]
-    fn compare_utf8<T: StringOffsetSizeTrait>(
+    fn compare_utf8<T: Offset>(
         &self,
         array: ArrayRef,
         list_values: Vec<ColumnarValue>,
         negated: bool,
     ) -> Result<ColumnarValue> {
-        let array = array
-            .as_any()
-            .downcast_ref::<GenericStringArray<T>>()
-            .unwrap();
+        let array = array.as_any().downcast_ref::<Utf8Array<T>>().unwrap();
 
         let mut contains_null = false;
         let values = list_values
@@ -288,7 +283,9 @@ pub fn in_list(
 
 #[cfg(test)]
 mod tests {
-    use arrow::{array::StringArray, datatypes::Field};
+    use arrow2::{array::Utf8Array, datatypes::Field};
+
+    type StringArray = Utf8Array<i32>;
 
     use super::*;
     use crate::error::Result;
diff --git a/datafusion/src/physical_plan/expressions/is_not_null.rs b/datafusion/src/physical_plan/expressions/is_not_null.rs
index 7ac2110b50221..16694b384accf 100644
--- a/datafusion/src/physical_plan/expressions/is_not_null.rs
+++ b/datafusion/src/physical_plan/expressions/is_not_null.rs
@@ -19,8 +19,8 @@
 
 use std::{any::Any, sync::Arc};
 
-use arrow::compute;
-use arrow::{
+use arrow2::compute;
+use arrow2::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
@@ -71,7 +71,7 @@ impl PhysicalExpr for IsNotNullExpr {
         let arg = self.arg.evaluate(batch)?;
         match arg {
             ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
-                compute::is_not_null(array.as_ref())?,
+                compute::boolean::is_not_null(array.as_ref()),
             ))),
             ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
                 ScalarValue::Boolean(Some(!scalar.is_null())),
@@ -89,13 +89,15 @@ pub fn is_not_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>>
 mod tests {
     use super::*;
     use crate::physical_plan::expressions::col;
-    use arrow::{
-        array::{BooleanArray, StringArray},
+    use arrow2::{
+        array::{BooleanArray, Utf8Array},
         datatypes::*,
         record_batch::RecordBatch,
     };
     use std::sync::Arc;
 
+    type StringArray = Utf8Array<i32>;
+
     #[test]
     fn is_not_null_op() -> Result<()> {
         let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
@@ -110,7 +112,7 @@ mod tests {
             .downcast_ref::<BooleanArray>()
             .expect("failed to downcast to BooleanArray");
 
-        let expected = &BooleanArray::from(vec![true, false]);
+        let expected = &BooleanArray::from_slice(&[true, false]);
 
         assert_eq!(expected, result);
 
diff --git a/datafusion/src/physical_plan/expressions/is_null.rs b/datafusion/src/physical_plan/expressions/is_null.rs
index dfa53f3f7d264..2fb4acbbd3171 100644
--- a/datafusion/src/physical_plan/expressions/is_null.rs
+++ b/datafusion/src/physical_plan/expressions/is_null.rs
@@ -19,8 +19,8 @@
 
 use std::{any::Any, sync::Arc};
 
-use arrow::compute;
-use arrow::{
+use arrow2::compute;
+use arrow2::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
@@ -71,7 +71,7 @@ impl PhysicalExpr for IsNullExpr {
         let arg = self.arg.evaluate(batch)?;
         match arg {
             ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new(
-                compute::is_null(array.as_ref())?,
+                compute::boolean::is_null(array.as_ref()),
             ))),
             ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar(
                 ScalarValue::Boolean(Some(scalar.is_null())),
@@ -89,13 +89,15 @@ pub fn is_null(arg: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExpr>> {
 mod tests {
     use super::*;
     use crate::physical_plan::expressions::col;
-    use arrow::{
-        array::{BooleanArray, StringArray},
+    use arrow2::{
+        array::{BooleanArray, Utf8Array},
         datatypes::*,
         record_batch::RecordBatch,
     };
     use std::sync::Arc;
 
+    type StringArray = Utf8Array<i32>;
+
     #[test]
     fn is_null_op() -> Result<()> {
         let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
@@ -110,7 +112,7 @@ mod tests {
             .downcast_ref::<BooleanArray>()
             .expect("failed to downcast to BooleanArray");
 
-        let expected = &BooleanArray::from(vec![false, true]);
+        let expected = &BooleanArray::from_slice(&[false, true]);
 
         assert_eq!(expected, result);
 
diff --git a/datafusion/src/physical_plan/expressions/literal.rs b/datafusion/src/physical_plan/expressions/literal.rs
index 3110d39c87e0b..0bf71d63d89b6 100644
--- a/datafusion/src/physical_plan/expressions/literal.rs
+++ b/datafusion/src/physical_plan/expressions/literal.rs
@@ -20,7 +20,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::{
+use arrow2::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
@@ -80,8 +80,8 @@ pub fn lit(value: ScalarValue) -> Arc<dyn PhysicalExpr> {
 mod tests {
     use super::*;
     use crate::error::Result;
-    use arrow::array::Int32Array;
-    use arrow::datatypes::*;
+    use arrow2::array::*;
+    use arrow2::datatypes::*;
 
     #[test]
     fn literal_i32() -> Result<()> {
diff --git a/datafusion/src/physical_plan/expressions/min_max.rs b/datafusion/src/physical_plan/expressions/min_max.rs
index ea917d30d940d..e1e299b2e7b56 100644
--- a/datafusion/src/physical_plan/expressions/min_max.rs
+++ b/datafusion/src/physical_plan/expressions/min_max.rs
@@ -21,20 +21,17 @@ use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
+use arrow2::array::*;
+use arrow2::compute::aggregate::*;
+use arrow2::datatypes::*;
+
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
 use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::{DataType, TimeUnit};
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, LargeStringArray, StringArray, TimestampMicrosecondArray,
-        TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
-        UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    datatypes::Field,
-};
+
+type StringArray = Utf8Array<i32>;
+type LargeStringArray = Utf8Array<i64>;
+type ArrayRef = Arc<dyn Array>;
 
 use super::format_state_name;
 
@@ -48,7 +45,7 @@ pub struct Max {
 }
 
 impl Max {
-    /// Create a new MAX aggregate function
+    /// Cre§ate a new MAX aggregate function
     pub fn new(expr: Arc<dyn PhysicalExpr>, name: String, data_type: DataType) -> Self {
         Self {
             name,
@@ -98,7 +95,7 @@ impl AggregateExpr for Max {
 macro_rules! typed_min_max_batch_string {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
         let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let value = compute::$OP(array);
+        let value = $OP(array);
         let value = value.and_then(|e| Some(e.to_string()));
         ScalarValue::$SCALAR(value)
     }};
@@ -108,7 +105,7 @@ macro_rules! typed_min_max_batch_string {
 macro_rules! typed_min_max_batch {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
         let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let value = compute::$OP(array);
+        let value = $OP(array);
         ScalarValue::$SCALAR(value)
     }};
 }
@@ -119,13 +116,9 @@ macro_rules! min_max_batch {
     ($VALUES:expr, $OP:ident) => {{
         match $VALUES.data_type() {
             // all types that have a natural order
-            DataType::Float64 => {
-                typed_min_max_batch!($VALUES, Float64Array, Float64, $OP)
+            DataType::Int64 | DataType::Timestamp(TimeUnit::Second, _) => {
+                typed_min_max_batch!($VALUES, Int64Array, Int64, $OP)
             }
-            DataType::Float32 => {
-                typed_min_max_batch!($VALUES, Float32Array, Float32, $OP)
-            }
-            DataType::Int64 => typed_min_max_batch!($VALUES, Int64Array, Int64, $OP),
             DataType::Int32 => typed_min_max_batch!($VALUES, Int32Array, Int32, $OP),
             DataType::Int16 => typed_min_max_batch!($VALUES, Int16Array, Int16, $OP),
             DataType::Int8 => typed_min_max_batch!($VALUES, Int8Array, Int8, $OP),
@@ -134,26 +127,17 @@ macro_rules! min_max_batch {
             DataType::UInt16 => typed_min_max_batch!($VALUES, UInt16Array, UInt16, $OP),
             DataType::UInt8 => typed_min_max_batch!($VALUES, UInt8Array, UInt8, $OP),
             DataType::Timestamp(TimeUnit::Second, _) => {
-                typed_min_max_batch!($VALUES, TimestampSecondArray, TimestampSecond, $OP)
+                typed_min_max_batch!($VALUES, Int64Array, TimestampSecond, $OP)
+            }
+            DataType::Timestamp(TimeUnit::Millisecond, _) => {
+                typed_min_max_batch!($VALUES, Int64Array, TimestampMillisecond, $OP)
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                typed_min_max_batch!($VALUES, Int64Array, TimestampMicrosecond, $OP)
+            }
+            DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+                typed_min_max_batch!($VALUES, Int64Array, TimestampNanosecond, $OP)
             }
-            DataType::Timestamp(TimeUnit::Millisecond, _) => typed_min_max_batch!(
-                $VALUES,
-                TimestampMillisecondArray,
-                TimestampMillisecond,
-                $OP
-            ),
-            DataType::Timestamp(TimeUnit::Microsecond, _) => typed_min_max_batch!(
-                $VALUES,
-                TimestampMicrosecondArray,
-                TimestampMicrosecond,
-                $OP
-            ),
-            DataType::Timestamp(TimeUnit::Nanosecond, _) => typed_min_max_batch!(
-                $VALUES,
-                TimestampNanosecondArray,
-                TimestampNanosecond,
-                $OP
-            ),
             other => {
                 // This should have been handled before
                 return Err(DataFusionError::Internal(format!(
@@ -174,7 +158,13 @@ fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, min_string)
         }
-        _ => min_max_batch!(values, min),
+        DataType::Float64 => {
+            typed_min_max_batch!(values, Float64Array, Float64, min_primitive)
+        }
+        DataType::Float32 => {
+            typed_min_max_batch!(values, Float32Array, Float32, min_primitive)
+        }
+        _ => min_max_batch!(values, min_primitive),
     })
 }
 
@@ -187,7 +177,13 @@ fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
         DataType::LargeUtf8 => {
             typed_min_max_batch_string!(values, LargeStringArray, LargeUtf8, max_string)
         }
-        _ => min_max_batch!(values, max),
+        DataType::Float64 => {
+            typed_min_max_batch!(values, Float64Array, Float64, max_primitive)
+        }
+        DataType::Float32 => {
+            typed_min_max_batch!(values, Float32Array, Float32, max_primitive)
+        }
+        _ => min_max_batch!(values, max_primitive),
     })
 }
 
@@ -448,12 +444,11 @@ mod tests {
     use crate::physical_plan::expressions::col;
     use crate::physical_plan::expressions::tests::aggregate;
     use crate::{error::Result, generic_test_op};
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
+    use arrow2::record_batch::RecordBatch;
 
     #[test]
     fn max_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let a: ArrayRef = Arc::new(Int32Array::from_slice(&[1, 2, 3, 4, 5]));
         generic_test_op!(
             a,
             DataType::Int32,
@@ -465,7 +460,7 @@ mod tests {
 
     #[test]
     fn min_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let a: ArrayRef = Arc::new(Int32Array::from_slice(&[1, 2, 3, 4, 5]));
         generic_test_op!(
             a,
             DataType::Int32,
@@ -477,7 +472,7 @@ mod tests {
 
     #[test]
     fn max_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["d", "a", "c", "b"]));
+        let a: ArrayRef = Arc::new(StringArray::from_slice(&["d", "a", "c", "b"]));
         generic_test_op!(
             a,
             DataType::Utf8,
@@ -489,7 +484,7 @@ mod tests {
 
     #[test]
     fn max_large_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(LargeStringArray::from(vec!["d", "a", "c", "b"]));
+        let a: ArrayRef = Arc::new(LargeStringArray::from_slice(&["d", "a", "c", "b"]));
         generic_test_op!(
             a,
             DataType::LargeUtf8,
@@ -501,7 +496,7 @@ mod tests {
 
     #[test]
     fn min_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(StringArray::from(vec!["d", "a", "c", "b"]));
+        let a: ArrayRef = Arc::new(StringArray::from_slice(&["d", "a", "c", "b"]));
         generic_test_op!(
             a,
             DataType::Utf8,
@@ -513,7 +508,7 @@ mod tests {
 
     #[test]
     fn min_large_utf8() -> Result<()> {
-        let a: ArrayRef = Arc::new(LargeStringArray::from(vec!["d", "a", "c", "b"]));
+        let a: ArrayRef = Arc::new(LargeStringArray::from_slice(&["d", "a", "c", "b"]));
         generic_test_op!(
             a,
             DataType::LargeUtf8,
@@ -525,7 +520,7 @@ mod tests {
 
     #[test]
     fn max_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
+        let a: ArrayRef = Arc::new(Int32Array::from(&[
             Some(1),
             None,
             Some(3),
@@ -543,7 +538,7 @@ mod tests {
 
     #[test]
     fn min_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
+        let a: ArrayRef = Arc::new(Int32Array::from(&[
             Some(1),
             None,
             Some(3),
@@ -561,7 +556,7 @@ mod tests {
 
     #[test]
     fn max_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
+        let a: ArrayRef = Arc::new(Int32Array::from(&[None, None]));
         generic_test_op!(
             a,
             DataType::Int32,
@@ -573,7 +568,7 @@ mod tests {
 
     #[test]
     fn min_i32_all_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None]));
+        let a: ArrayRef = Arc::new(Int32Array::from(&[None, None]));
         generic_test_op!(
             a,
             DataType::Int32,
@@ -585,8 +580,9 @@ mod tests {
 
     #[test]
     fn max_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
+        let a: ArrayRef = Arc::new(UInt32Array::from_slice(&[
+            1_u32, 2_u32, 3_u32, 4_u32, 5_u32,
+        ]));
         generic_test_op!(
             a,
             DataType::UInt32,
@@ -598,8 +594,9 @@ mod tests {
 
     #[test]
     fn min_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
+        let a: ArrayRef = Arc::new(UInt32Array::from_slice(&[
+            1_u32, 2_u32, 3_u32, 4_u32, 5_u32,
+        ]));
         generic_test_op!(
             a,
             DataType::UInt32,
@@ -611,8 +608,9 @@ mod tests {
 
     #[test]
     fn max_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
+        let a: ArrayRef = Arc::new(Float32Array::from_slice(&[
+            1_f32, 2_f32, 3_f32, 4_f32, 5_f32,
+        ]));
         generic_test_op!(
             a,
             DataType::Float32,
@@ -624,8 +622,9 @@ mod tests {
 
     #[test]
     fn min_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
+        let a: ArrayRef = Arc::new(Float32Array::from_slice(&[
+            1_f32, 2_f32, 3_f32, 4_f32, 5_f32,
+        ]));
         generic_test_op!(
             a,
             DataType::Float32,
@@ -637,8 +636,9 @@ mod tests {
 
     #[test]
     fn max_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
+        let a: ArrayRef = Arc::new(Float64Array::from_slice(&[
+            1_f64, 2_f64, 3_f64, 4_f64, 5_f64,
+        ]));
         generic_test_op!(
             a,
             DataType::Float64,
@@ -650,8 +650,9 @@ mod tests {
 
     #[test]
     fn min_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
+        let a: ArrayRef = Arc::new(Float64Array::from_slice(&[
+            1_f64, 2_f64, 3_f64, 4_f64, 5_f64,
+        ]));
         generic_test_op!(
             a,
             DataType::Float64,
diff --git a/datafusion/src/physical_plan/expressions/mod.rs b/datafusion/src/physical_plan/expressions/mod.rs
index 4d57c39bb31cc..665cf248514b0 100644
--- a/datafusion/src/physical_plan/expressions/mod.rs
+++ b/datafusion/src/physical_plan/expressions/mod.rs
@@ -22,8 +22,18 @@ use std::sync::Arc;
 use super::ColumnarValue;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::PhysicalExpr;
-use arrow::compute::kernels::sort::{SortColumn, SortOptions};
-use arrow::record_batch::RecordBatch;
+use arrow2::array::Array;
+use arrow2::compute::sort::SortOptions;
+use arrow2::record_batch::RecordBatch;
+
+type ArrayRef = Arc<dyn Array>;
+
+/// One column to be used in lexicographical sort
+#[derive(Clone, Debug)]
+pub struct SortColumn {
+    pub values: ArrayRef,
+    pub options: Option<SortOptions>,
+}
 
 mod average;
 #[macro_use]
diff --git a/datafusion/src/physical_plan/expressions/negative.rs b/datafusion/src/physical_plan/expressions/negative.rs
index 65010c6acd1ec..d17acf899b09d 100644
--- a/datafusion/src/physical_plan/expressions/negative.rs
+++ b/datafusion/src/physical_plan/expressions/negative.rs
@@ -20,14 +20,15 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use arrow::array::ArrayRef;
-use arrow::compute::kernels::arithmetic::negate;
-use arrow::{
-    array::{Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array},
+use arrow2::{
+    array::*,
+    compute::arithmetics::negate,
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
 
+type ArrayRef = Arc<dyn Array>;
+
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{ColumnarValue, PhysicalExpr};
 
@@ -36,12 +37,12 @@ use super::coercion;
 /// Invoke a compute kernel on array(s)
 macro_rules! compute_op {
     // invoke unary operator
-    ($OPERAND:expr, $OP:ident, $DT:ident) => {{
+    ($OPERAND:expr, $DT:ident) => {{
         let operand = $OPERAND
             .as_any()
             .downcast_ref::<$DT>()
             .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&operand)?))
+        Ok(Arc::new(negate(operand)))
     }};
 }
 
@@ -89,12 +90,12 @@ impl PhysicalExpr for NegativeExpr {
         match arg {
             ColumnarValue::Array(array) => {
                 let result: Result<ArrayRef> = match array.data_type() {
-                    DataType::Int8 => compute_op!(array, negate, Int8Array),
-                    DataType::Int16 => compute_op!(array, negate, Int16Array),
-                    DataType::Int32 => compute_op!(array, negate, Int32Array),
-                    DataType::Int64 => compute_op!(array, negate, Int64Array),
-                    DataType::Float32 => compute_op!(array, negate, Float32Array),
-                    DataType::Float64 => compute_op!(array, negate, Float64Array),
+                    DataType::Int8 => compute_op!(array, Int8Array),
+                    DataType::Int16 => compute_op!(array, Int16Array),
+                    DataType::Int32 => compute_op!(array, Int32Array),
+                    DataType::Int64 => compute_op!(array, Int64Array),
+                    DataType::Float32 => compute_op!(array, Float32Array),
+                    DataType::Float64 => compute_op!(array, Float64Array),
                     _ => Err(DataFusionError::Internal(format!(
                         "(- '{:?}') can't be evaluated because the expression's type is {:?}, not signed numeric",
                         self,
diff --git a/datafusion/src/physical_plan/expressions/not.rs b/datafusion/src/physical_plan/expressions/not.rs
index 23a1a46651dee..ac467a8cc344d 100644
--- a/datafusion/src/physical_plan/expressions/not.rs
+++ b/datafusion/src/physical_plan/expressions/not.rs
@@ -25,9 +25,9 @@ use super::ColumnarValue;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::PhysicalExpr;
 use crate::scalar::ScalarValue;
-use arrow::array::BooleanArray;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
+use arrow2::array::BooleanArray;
+use arrow2::datatypes::{DataType, Schema};
+use arrow2::record_batch::RecordBatch;
 
 /// Not expression
 #[derive(Debug)]
@@ -82,7 +82,7 @@ impl PhysicalExpr for NotExpr {
                             )
                         })?;
                 Ok(ColumnarValue::Array(Arc::new(
-                    arrow::compute::kernels::boolean::not(array)?,
+                    arrow2::compute::boolean::not(array),
                 )))
             }
             ColumnarValue::Scalar(scalar) => {
@@ -121,7 +121,7 @@ mod tests {
     use super::*;
     use crate::error::Result;
     use crate::physical_plan::expressions::col;
-    use arrow::datatypes::*;
+    use arrow2::datatypes::*;
 
     #[test]
     fn neg_op() -> Result<()> {
diff --git a/datafusion/src/physical_plan/expressions/nullif.rs b/datafusion/src/physical_plan/expressions/nullif.rs
index 7cc58ed2318f4..b632cc45c67f4 100644
--- a/datafusion/src/physical_plan/expressions/nullif.rs
+++ b/datafusion/src/physical_plan/expressions/nullif.rs
@@ -15,57 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
-
 use super::ColumnarValue;
 use crate::error::{DataFusionError, Result};
-use crate::scalar::ScalarValue;
-use arrow::array::Array;
-use arrow::array::{
-    ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, Int8Array, StringArray, TimestampNanosecondArray,
-    UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-};
-use arrow::compute::kernels::boolean::nullif;
-use arrow::compute::kernels::comparison::{eq, eq_scalar, eq_utf8, eq_utf8_scalar};
-use arrow::datatypes::{DataType, TimeUnit};
-
-/// Invoke a compute kernel on a primitive array and a Boolean Array
-macro_rules! compute_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{
-        let ll = $LEFT
-            .as_any()
-            .downcast_ref::<$DT>()
-            .expect("compute_op failed to downcast array");
-        let rr = $RIGHT
-            .as_any()
-            .downcast_ref::<BooleanArray>()
-            .expect("compute_op failed to downcast array");
-        Ok(Arc::new($OP(&ll, &rr)?) as ArrayRef)
-    }};
-}
-
-/// Binary op between primitive and boolean arrays
-macro_rules! primitive_bool_array_op {
-    ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{
-        match $LEFT.data_type() {
-            DataType::Int8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int8Array),
-            DataType::Int16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int16Array),
-            DataType::Int32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int32Array),
-            DataType::Int64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Int64Array),
-            DataType::UInt8 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt8Array),
-            DataType::UInt16 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt16Array),
-            DataType::UInt32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt32Array),
-            DataType::UInt64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, UInt64Array),
-            DataType::Float32 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float32Array),
-            DataType::Float64 => compute_bool_array_op!($LEFT, $RIGHT, $OP, Float64Array),
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {:?} for NULLIF/primitive/boolean operator",
-                other
-            ))),
-        }
-    }};
-}
+use arrow2::compute::nullif;
+use arrow2::datatypes::DataType;
 
 /// Implements NULLIF(expr1, expr2)
 /// Args: 0 - left expr is any array
@@ -83,20 +36,14 @@ pub fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
     match (lhs, rhs) {
         (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
-            let cond_array = binary_array_op_scalar!(lhs, rhs.clone(), eq).unwrap()?;
-
-            let array = primitive_bool_array_op!(lhs, *cond_array, nullif)?;
-
-            Ok(ColumnarValue::Array(array))
-        }
-        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
-            // Get args0 == args1 evaluated and produce a boolean array
-            let cond_array = binary_array_op!(lhs, rhs, eq)?;
-
-            // Now, invoke nullif on the result
-            let array = primitive_bool_array_op!(lhs, *cond_array, nullif)?;
-            Ok(ColumnarValue::Array(array))
+            Ok(ColumnarValue::Array(
+                nullif::nullif(lhs.as_ref(), rhs.to_array_of_size(lhs.len()).as_ref())?
+                    .into(),
+            ))
         }
+        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => Ok(
+            ColumnarValue::Array(nullif::nullif(lhs.as_ref(), rhs.as_ref())?.into()),
+        ),
         _ => Err(DataFusionError::NotImplemented(
             "nullif does not support a literal as first argument".to_string(),
         )),
@@ -122,8 +69,11 @@ pub static SUPPORTED_NULLIF_TYPES: &[DataType] = &[
 
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
     use super::*;
-    use crate::error::Result;
+    use crate::{error::Result, scalar::ScalarValue};
+    use arrow2::array::Int32Array;
 
     #[test]
     fn nullif_int32() -> Result<()> {
@@ -145,7 +95,7 @@ mod tests {
         let result = nullif_func(&[a, lit_array])?;
         let result = result.into_array(0);
 
-        let expected = Arc::new(Int32Array::from(vec![
+        let expected = Int32Array::from(vec![
             Some(1),
             None,
             None,
@@ -155,15 +105,15 @@ mod tests {
             None,
             Some(4),
             Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
+        ]);
+        assert_eq!(expected, result.as_ref());
         Ok(())
     }
 
     #[test]
     // Ensure that arrays with no nulls can also invoke NULLIF() correctly
     fn nullif_int32_nonulls() -> Result<()> {
-        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
+        let a = Int32Array::from_slice(&[1, 3, 10, 7, 8, 1, 2, 4, 5]);
         let a = ColumnarValue::Array(Arc::new(a));
 
         let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
@@ -171,7 +121,7 @@ mod tests {
         let result = nullif_func(&[a, lit_array])?;
         let result = result.into_array(0);
 
-        let expected = Arc::new(Int32Array::from(vec![
+        let expected = Int32Array::from(vec![
             None,
             Some(3),
             Some(10),
@@ -181,8 +131,8 @@ mod tests {
             Some(2),
             Some(4),
             Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
+        ]);
+        assert_eq!(expected, result.as_ref());
         Ok(())
     }
 }
diff --git a/datafusion/src/physical_plan/expressions/sum.rs b/datafusion/src/physical_plan/expressions/sum.rs
index 7bbbf99fa6598..f7e611de439c4 100644
--- a/datafusion/src/physical_plan/expressions/sum.rs
+++ b/datafusion/src/physical_plan/expressions/sum.rs
@@ -24,15 +24,11 @@ use std::sync::Arc;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{Accumulator, AggregateExpr, PhysicalExpr};
 use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::datatypes::DataType;
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    datatypes::Field,
-};
+use arrow2::compute;
+use arrow2::datatypes::DataType;
+use arrow2::{array::*, datatypes::Field};
+
+type ArrayRef = Arc<dyn Array>;
 
 use super::format_state_name;
 
@@ -128,7 +124,7 @@ impl SumAccumulator {
 macro_rules! typed_sum_delta_batch {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
         let array = $VALUES.as_any().downcast_ref::<$ARRAYTYPE>().unwrap();
-        let delta = compute::sum(array);
+        let delta = compute::aggregate::sum(array);
         ScalarValue::$SCALAR(delta)
     }};
 }
@@ -276,12 +272,12 @@ mod tests {
     use super::*;
     use crate::physical_plan::expressions::col;
     use crate::{error::Result, generic_test_op};
-    use arrow::datatypes::*;
-    use arrow::record_batch::RecordBatch;
+    use arrow2::datatypes::*;
+    use arrow2::record_batch::RecordBatch;
 
     #[test]
     fn sum_i32() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let a: ArrayRef = Arc::new(Int32Array::from_slice(&[1, 2, 3, 4, 5]));
         generic_test_op!(
             a,
             DataType::Int32,
@@ -293,7 +289,7 @@ mod tests {
 
     #[test]
     fn sum_i32_with_nulls() -> Result<()> {
-        let a: ArrayRef = Arc::new(Int32Array::from(vec![
+        let a: ArrayRef = Arc::new(Int32Array::from(&[
             Some(1),
             None,
             Some(3),
@@ -323,8 +319,9 @@ mod tests {
 
     #[test]
     fn sum_u32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32]));
+        let a: ArrayRef = Arc::new(UInt32Array::from_slice(&[
+            1_u32, 2_u32, 3_u32, 4_u32, 5_u32,
+        ]));
         generic_test_op!(
             a,
             DataType::UInt32,
@@ -336,8 +333,9 @@ mod tests {
 
     #[test]
     fn sum_f32() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32]));
+        let a: ArrayRef = Arc::new(Float32Array::from_slice(&[
+            1_f32, 2_f32, 3_f32, 4_f32, 5_f32,
+        ]));
         generic_test_op!(
             a,
             DataType::Float32,
@@ -349,8 +347,9 @@ mod tests {
 
     #[test]
     fn sum_f64() -> Result<()> {
-        let a: ArrayRef =
-            Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64]));
+        let a: ArrayRef = Arc::new(Float64Array::from_slice(&[
+            1_f64, 2_f64, 3_f64, 4_f64, 5_f64,
+        ]));
         generic_test_op!(
             a,
             DataType::Float64,
diff --git a/datafusion/src/physical_plan/expressions/try_cast.rs b/datafusion/src/physical_plan/expressions/try_cast.rs
index 5e402fdea28ad..60aadfdec7dbe 100644
--- a/datafusion/src/physical_plan/expressions/try_cast.rs
+++ b/datafusion/src/physical_plan/expressions/try_cast.rs
@@ -23,11 +23,10 @@ use super::ColumnarValue;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::PhysicalExpr;
 use crate::scalar::ScalarValue;
-use arrow::compute;
-use arrow::compute::kernels;
-use arrow::datatypes::{DataType, Schema};
-use arrow::record_batch::RecordBatch;
-use compute::can_cast_types;
+use arrow2::compute;
+use arrow2::datatypes::{DataType, Schema};
+use arrow2::record_batch::RecordBatch;
+use compute::cast;
 
 /// TRY_CAST expression casts an expression to a specific data type and retuns NULL on invalid cast
 #[derive(Debug)]
@@ -78,13 +77,13 @@ impl PhysicalExpr for TryCastExpr {
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
         let value = self.expr.evaluate(batch)?;
         match value {
-            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(kernels::cast::cast(
-                &array,
-                &self.cast_type,
-            )?)),
+            ColumnarValue::Array(array) => Ok(ColumnarValue::Array(
+                cast::cast(array.as_ref(), &self.cast_type)?.into(),
+            )),
             ColumnarValue::Scalar(scalar) => {
                 let scalar_array = scalar.to_array();
-                let cast_array = kernels::cast::cast(&scalar_array, &self.cast_type)?;
+                let cast_array =
+                    cast::cast(scalar_array.as_ref(), &self.cast_type)?.into();
                 let cast_scalar = ScalarValue::try_from_array(&cast_array, 0)?;
                 Ok(ColumnarValue::Scalar(cast_scalar))
             }
@@ -104,7 +103,7 @@ pub fn try_cast(
     let expr_type = expr.data_type(input_schema)?;
     if expr_type == cast_type {
         Ok(expr.clone())
-    } else if can_cast_types(&expr_type, &cast_type) {
+    } else if cast::can_cast_types(&expr_type, &cast_type) {
         Ok(Arc::new(TryCastExpr::new(expr, cast_type)))
     } else {
         Err(DataFusionError::Internal(format!(
@@ -119,11 +118,9 @@ mod tests {
     use super::*;
     use crate::error::Result;
     use crate::physical_plan::expressions::col;
-    use arrow::array::{StringArray, Time64NanosecondArray};
-    use arrow::{
-        array::{Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array},
-        datatypes::*,
-    };
+    use arrow2::{array::*, datatypes::*};
+
+    type StringArray = Utf8Array<i32>;
 
     // runs an end-to-end test of physical type cast
     // 1. construct a record batch with a column "a" of type A
@@ -134,7 +131,7 @@ mod tests {
     macro_rules! generic_test_cast {
         ($A_ARRAY:ident, $A_TYPE:expr, $A_VEC:expr, $TYPEARRAY:ident, $TYPE:expr, $VEC:expr) => {{
             let schema = Schema::new(vec![Field::new("a", $A_TYPE, false)]);
-            let a = $A_ARRAY::from($A_VEC);
+            let a = $A_ARRAY::from_slice(&$A_VEC);
             let batch =
                 RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(a)])?;
 
@@ -221,15 +218,12 @@ mod tests {
     #[test]
     fn test_cast_i64_t64() -> Result<()> {
         let original = vec![1, 2, 3, 4, 5];
-        let expected: Vec<Option<i64>> = original
-            .iter()
-            .map(|i| Some(Time64NanosecondArray::from(vec![*i]).value(0)))
-            .collect();
+        let expected: Vec<Option<i64>> = original.iter().map(|i| Some(*i)).collect();
         generic_test_cast!(
             Int64Array,
             DataType::Int64,
             original.clone(),
-            TimestampNanosecondArray,
+            Int64Array,
             DataType::Timestamp(TimeUnit::Nanosecond, None),
             expected
         );
diff --git a/datafusion/src/physical_plan/filter.rs b/datafusion/src/physical_plan/filter.rs
index bc2b17aa4f47d..1bdd9a90042ae 100644
--- a/datafusion/src/physical_plan/filter.rs
+++ b/datafusion/src/physical_plan/filter.rs
@@ -28,11 +28,14 @@ use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
 };
-use arrow::array::BooleanArray;
-use arrow::compute::filter_record_batch;
-use arrow::datatypes::{DataType, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+
+use arrow2::array::BooleanArray;
+use arrow2::compute::filter::filter_record_batch;
+use arrow2::datatypes::{DataType, Schema};
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
+
+type SchemaRef = Arc<Schema>;
 
 use async_trait::async_trait;
 
diff --git a/datafusion/src/physical_plan/functions.rs b/datafusion/src/physical_plan/functions.rs
index 367e594f6e977..f757361fbf95f 100644
--- a/datafusion/src/physical_plan/functions.rs
+++ b/datafusion/src/physical_plan/functions.rs
@@ -43,17 +43,19 @@ use crate::{
     error::{DataFusionError, Result},
     scalar::ScalarValue,
 };
-use arrow::{
-    array::{ArrayRef, NullArray},
-    compute::kernels::length::{bit_length, length},
+use arrow2::{
+    array::{Array, NullArray},
+    compute::length::length,
     datatypes::TimeUnit,
-    datatypes::{DataType, Field, Int32Type, Int64Type, Schema},
+    datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
 };
 use fmt::{Debug, Formatter};
 use std::convert::From;
 use std::{any::Any, fmt, str::FromStr, sync::Arc};
 
+type ArrayRef = Arc<dyn Array>;
+
 /// A function's signature, which defines the function's supported argument types.
 #[derive(Debug, Clone, PartialEq)]
 pub enum Signature {
@@ -557,7 +559,7 @@ pub fn create_physical_expr(
             ))),
         },
         BuiltinScalarFunction::BitLength => |args| match &args[0] {
-            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
+            ColumnarValue::Array(v) => todo!(),
             ColumnarValue::Scalar(v) => match v {
                 ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
                     v.as_ref().map(|x| (x.len() * 8) as i32),
@@ -584,7 +586,7 @@ pub fn create_physical_expr(
             DataType::Utf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
                     character_length,
-                    Int32Type,
+                    i32,
                     "character_length"
                 );
                 make_scalar_function(func)(args)
@@ -592,7 +594,7 @@ pub fn create_physical_expr(
             DataType::LargeUtf8 => {
                 let func = invoke_if_unicode_expressions_feature_flag!(
                     character_length,
-                    Int64Type,
+                    i64,
                     "character_length"
                 );
                 make_scalar_function(func)(args)
@@ -685,7 +687,9 @@ pub fn create_physical_expr(
         }
         BuiltinScalarFunction::NullIf => nullif_func,
         BuiltinScalarFunction::OctetLength => |args| match &args[0] {
-            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
+            ColumnarValue::Array(v) => {
+                Ok(ColumnarValue::Array(length(v.as_ref())?.into()))
+            }
             ColumnarValue::Scalar(v) => match v {
                 ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
                     v.as_ref().map(|x| x.len() as i32),
@@ -860,15 +864,13 @@ pub fn create_physical_expr(
         },
         BuiltinScalarFunction::Strpos => |args| match args[0].data_type() {
             DataType::Utf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    strpos, Int32Type, "strpos"
-                );
+                let func =
+                    invoke_if_unicode_expressions_feature_flag!(strpos, i32, "strpos");
                 make_scalar_function(func)(args)
             }
             DataType::LargeUtf8 => {
-                let func = invoke_if_unicode_expressions_feature_flag!(
-                    strpos, Int64Type, "strpos"
-                );
+                let func =
+                    invoke_if_unicode_expressions_feature_flag!(strpos, i64, "strpos");
                 make_scalar_function(func)(args)
             }
             other => Err(DataFusionError::Internal(format!(
@@ -894,10 +896,10 @@ pub fn create_physical_expr(
         },
         BuiltinScalarFunction::ToHex => |args| match args[0].data_type() {
             DataType::Int32 => {
-                make_scalar_function(string_expressions::to_hex::<Int32Type>)(args)
+                make_scalar_function(string_expressions::to_hex::<i32>)(args)
             }
             DataType::Int64 => {
-                make_scalar_function(string_expressions::to_hex::<Int64Type>)(args)
+                make_scalar_function(string_expressions::to_hex::<i64>)(args)
             }
             other => Err(DataFusionError::Internal(format!(
                 "Unsupported data type {:?} for function to_hex",
@@ -963,9 +965,7 @@ fn signature(fun: &BuiltinScalarFunction) -> Signature {
 
     // for now, the list is small, as we do not have many built-in functions.
     match fun {
-        BuiltinScalarFunction::Array => {
-            Signature::Variadic(array_expressions::SUPPORTED_ARRAY_TYPES.to_vec())
-        }
+        BuiltinScalarFunction::Array => Signature::VariadicEqual,
         BuiltinScalarFunction::Concat | BuiltinScalarFunction::ConcatWithSeparator => {
             Signature::Variadic(vec![DataType::Utf8])
         }
@@ -1188,7 +1188,7 @@ type NullColumnarValue = ColumnarValue;
 impl From<&RecordBatch> for NullColumnarValue {
     fn from(batch: &RecordBatch) -> Self {
         let num_rows = batch.num_rows();
-        ColumnarValue::Array(Arc::new(NullArray::new(num_rows)))
+        ColumnarValue::Array(Arc::new(NullArray::from_data(num_rows)))
     }
 }
 
@@ -1272,14 +1272,9 @@ mod tests {
         physical_plan::expressions::{col, lit},
         scalar::ScalarValue,
     };
-    use arrow::{
-        array::{
-            Array, ArrayRef, BinaryArray, BooleanArray, FixedSizeListArray, Float64Array,
-            Int32Array, ListArray, StringArray, UInt32Array, UInt64Array,
-        },
-        datatypes::Field,
-        record_batch::RecordBatch,
-    };
+    use arrow2::{array::*, datatypes::Field, record_batch::RecordBatch};
+
+    type StringArray = Utf8Array<i32>;
 
     /// $FUNC function to test
     /// $ARGS arguments (vec) to pass to function
@@ -1295,7 +1290,7 @@ mod tests {
 
             // any type works here: we evaluate against a literal of `value`
             let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-            let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
+            let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from_slice(&[1]))];
 
             let expr =
                 create_physical_expr(&BuiltinScalarFunction::$FUNC, $ARGS, &schema, &ctx_state)?;
@@ -2766,6 +2761,7 @@ mod tests {
             Utf8,
             StringArray
         );
+        type B = BinaryArray<i32>;
         #[cfg(feature = "crypto_expressions")]
         test_function!(
             SHA224,
@@ -2777,7 +2773,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2790,7 +2786,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2799,7 +2795,7 @@ mod tests {
             Ok(None),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(not(feature = "crypto_expressions"))]
         test_function!(
@@ -2810,7 +2806,7 @@ mod tests {
             )),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2823,7 +2819,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2836,7 +2832,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2845,7 +2841,7 @@ mod tests {
             Ok(None),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(not(feature = "crypto_expressions"))]
         test_function!(
@@ -2856,7 +2852,7 @@ mod tests {
             )),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2871,7 +2867,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2886,7 +2882,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2895,7 +2891,7 @@ mod tests {
             Ok(None),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(not(feature = "crypto_expressions"))]
         test_function!(
@@ -2906,7 +2902,7 @@ mod tests {
             )),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2922,7 +2918,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2938,7 +2934,7 @@ mod tests {
             ])),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(feature = "crypto_expressions")]
         test_function!(
@@ -2947,7 +2943,7 @@ mod tests {
             Ok(None),
             &[u8],
             Binary,
-            BinaryArray
+            B
         );
         #[cfg(not(feature = "crypto_expressions"))]
         test_function!(
@@ -3469,7 +3465,7 @@ mod tests {
             &ctx_state,
         )?;
 
-        let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
+        let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from_slice(&[1]))];
         let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
         let result = expr.evaluate(&batch);
 
@@ -3529,24 +3525,24 @@ mod tests {
     #[test]
     fn test_array() -> Result<()> {
         generic_test_array(
-            Arc::new(StringArray::from(vec!["aa"])),
-            Arc::new(StringArray::from(vec!["bb"])),
+            Arc::new(StringArray::from_slice(&["aa"])),
+            Arc::new(StringArray::from_slice(&["bb"])),
             DataType::Utf8,
             "StringArray\n[\n  \"aa\",\n  \"bb\",\n]",
         )?;
 
         // different types, to validate that casting happens
         generic_test_array(
-            Arc::new(UInt32Array::from(vec![1u32])),
-            Arc::new(UInt64Array::from(vec![1u64])),
+            Arc::new(UInt32Array::from_slice(&[1u32])),
+            Arc::new(UInt64Array::from_slice(&[1u64])),
             DataType::UInt64,
             "PrimitiveArray<UInt64>\n[\n  1,\n  1,\n]",
         )?;
 
         // different types (another order), to validate that casting happens
         generic_test_array(
-            Arc::new(UInt64Array::from(vec![1u64])),
-            Arc::new(UInt32Array::from(vec![1u32])),
+            Arc::new(UInt64Array::from_slice(&[1u64])),
+            Arc::new(UInt32Array::from_slice(&[1u32])),
             DataType::UInt64,
             "PrimitiveArray<UInt64>\n[\n  1,\n  1,\n]",
         )
@@ -3559,7 +3555,7 @@ mod tests {
         let ctx_state = ExecutionContextState::new();
 
         // concat(value, value)
-        let col_value: ArrayRef = Arc::new(StringArray::from(vec!["aaa-555"]));
+        let col_value: ArrayRef = Arc::new(StringArray::from_slice(&["aaa-555"]));
         let pattern = lit(ScalarValue::Utf8(Some(r".*-(\d*)".to_string())));
         let columns: Vec<ArrayRef> = vec![col_value];
         let expr = create_physical_expr(
@@ -3580,7 +3576,7 @@ mod tests {
         let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
 
         // downcast works
-        let result = result.as_any().downcast_ref::<ListArray>().unwrap();
+        let result = result.as_any().downcast_ref::<ListArray<i32>>().unwrap();
         let first_row = result.value(0);
         let first_row = first_row.as_any().downcast_ref::<StringArray>().unwrap();
 
@@ -3600,7 +3596,7 @@ mod tests {
         // concat(value, value)
         let col_value = lit(ScalarValue::Utf8(Some("aaa-555".to_string())));
         let pattern = lit(ScalarValue::Utf8(Some(r".*-(\d*)".to_string())));
-        let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
+        let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from_slice(&[1]))];
         let expr = create_physical_expr(
             &BuiltinScalarFunction::RegexpMatch,
             &[col_value, pattern],
@@ -3619,7 +3615,7 @@ mod tests {
         let result = expr.evaluate(&batch)?.into_array(batch.num_rows());
 
         // downcast works
-        let result = result.as_any().downcast_ref::<ListArray>().unwrap();
+        let result = result.as_any().downcast_ref::<ListArray<i32>>().unwrap();
         let first_row = result.value(0);
         let first_row = first_row.as_any().downcast_ref::<StringArray>().unwrap();
 
diff --git a/datafusion/src/physical_plan/group_scalar.rs b/datafusion/src/physical_plan/group_scalar.rs
index 943386d215c4f..7c81be072e6e4 100644
--- a/datafusion/src/physical_plan/group_scalar.rs
+++ b/datafusion/src/physical_plan/group_scalar.rs
@@ -39,9 +39,9 @@ pub(crate) enum GroupByScalar {
     Utf8(Box<String>),
     LargeUtf8(Box<String>),
     Boolean(bool),
-    TimeMillisecond(i64),
-    TimeMicrosecond(i64),
-    TimeNanosecond(i64),
+    TimestampMillisecond(i64),
+    TimestampMicrosecond(i64),
+    TimestampNanosecond(i64),
     Date32(i32),
 }
 
@@ -66,13 +66,13 @@ impl TryFrom<&ScalarValue> for GroupByScalar {
             ScalarValue::UInt32(Some(v)) => GroupByScalar::UInt32(*v),
             ScalarValue::UInt64(Some(v)) => GroupByScalar::UInt64(*v),
             ScalarValue::TimestampMillisecond(Some(v)) => {
-                GroupByScalar::TimeMillisecond(*v)
+                GroupByScalar::TimestampMillisecond(*v)
             }
             ScalarValue::TimestampMicrosecond(Some(v)) => {
-                GroupByScalar::TimeMicrosecond(*v)
+                GroupByScalar::TimestampMicrosecond(*v)
             }
             ScalarValue::TimestampNanosecond(Some(v)) => {
-                GroupByScalar::TimeNanosecond(*v)
+                GroupByScalar::TimestampNanosecond(*v)
             }
             ScalarValue::Utf8(Some(v)) => GroupByScalar::Utf8(Box::new(v.clone())),
             ScalarValue::LargeUtf8(Some(v)) => {
@@ -121,13 +121,13 @@ impl From<&GroupByScalar> for ScalarValue {
             GroupByScalar::UInt64(v) => ScalarValue::UInt64(Some(*v)),
             GroupByScalar::Utf8(v) => ScalarValue::Utf8(Some(v.to_string())),
             GroupByScalar::LargeUtf8(v) => ScalarValue::LargeUtf8(Some(v.to_string())),
-            GroupByScalar::TimeMillisecond(v) => {
+            GroupByScalar::TimestampMillisecond(v) => {
                 ScalarValue::TimestampMillisecond(Some(*v))
             }
-            GroupByScalar::TimeMicrosecond(v) => {
+            GroupByScalar::TimestampMicrosecond(v) => {
                 ScalarValue::TimestampMicrosecond(Some(*v))
             }
-            GroupByScalar::TimeNanosecond(v) => {
+            GroupByScalar::TimestampNanosecond(v) => {
                 ScalarValue::TimestampNanosecond(Some(*v))
             }
             GroupByScalar::Date32(v) => ScalarValue::Date32(Some(*v)),
diff --git a/datafusion/src/physical_plan/hash_aggregate.rs b/datafusion/src/physical_plan/hash_aggregate.rs
index 0a822dc898afb..f74cccbb1a5cd 100644
--- a/datafusion/src/physical_plan/hash_aggregate.rs
+++ b/datafusion/src/physical_plan/hash_aggregate.rs
@@ -27,43 +27,29 @@ use futures::{
     Future,
 };
 
-use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     Accumulator, AggregateExpr, DisplayFormatType, Distribution, ExecutionPlan,
     Partitioning, PhysicalExpr, SQLMetric,
 };
-
-use arrow::{
-    array::{Array, UInt32Builder},
-    error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{
-    array::{
-        ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
-        Int8Array, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
-    },
-    compute,
-};
-use arrow::{
-    array::{BooleanArray, Date32Array, DictionaryArray},
-    compute::cast,
-    datatypes::{
-        ArrowDictionaryKeyType, ArrowNativeType, Int16Type, Int32Type, Int64Type,
-        Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-    },
+use crate::{
+    error::{DataFusionError, Result},
+    scalar::ScalarValue,
 };
-use arrow::{
-    datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit},
+
+use arrow2::error::{ArrowError, Result as ArrowResult};
+use arrow2::{array::*, compute};
+use arrow2::{buffer::MutableBuffer, datatypes::*};
+use arrow2::{
+    datatypes::{DataType, Field, Schema},
     record_batch::RecordBatch,
 };
 use hashbrown::HashMap;
 use ordered_float::OrderedFloat;
 use pin_project_lite::pin_project;
 
-use arrow::array::{
-    LargeStringArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray,
-};
+type SchemaRef = Arc<Schema>;
+type ArrayRef = Arc<dyn Array>;
+
 use async_trait::async_trait;
 
 use super::{
@@ -390,7 +376,7 @@ fn group_aggregate_batch(
                 if v.is_empty() {
                     batch_keys.push(key.clone())
                 };
-                v.push(row as u32)
+                v.push(row as i32)
             })
             // 1.2
             .or_insert_with(|| {
@@ -400,22 +386,23 @@ fn group_aggregate_batch(
                 let _ = create_group_by_values(&group_values, row, &mut group_by_values);
                 (
                     key.clone(),
-                    (group_by_values.clone(), accumulator_set, vec![row as u32]),
+                    (group_by_values.clone(), accumulator_set, vec![row as i32]),
                 )
             });
     }
 
     // Collect all indices + offsets based on keys in this vec
-    let mut batch_indices: UInt32Builder = UInt32Builder::new(0);
+    let mut batch_indices = MutableBuffer::<i32>::with_capacity(0);
     let mut offsets = vec![0];
     let mut offset_so_far = 0;
     for key in batch_keys.iter() {
         let (_, _, indices) = accumulators.get_mut(key).unwrap();
-        batch_indices.append_slice(&indices)?;
+        batch_indices.extend_from_slice(&indices);
         offset_so_far += indices.len();
         offsets.push(offset_so_far);
     }
-    let batch_indices = batch_indices.finish();
+    let batch_indices =
+        Int32Array::from_data(DataType::Int32, batch_indices.into(), None);
 
     // `Take` all values based on indices into Arrays
     let values: Vec<Vec<Arc<dyn Array>>> = aggr_input_values
@@ -424,12 +411,9 @@ fn group_aggregate_batch(
             array
                 .iter()
                 .map(|array| {
-                    compute::take(
-                        array.as_ref(),
-                        &batch_indices,
-                        None, // None: no index check
-                    )
-                    .unwrap()
+                    compute::take::take(array.as_ref(), &batch_indices)
+                        .unwrap()
+                        .into()
                 })
                 .collect()
             // 2.3
@@ -457,7 +441,7 @@ fn group_aggregate_batch(
                             .iter()
                             .map(|array| {
                                 // 2.3
-                                array.slice(offsets[0], offsets[1] - offsets[0])
+                                array.slice(offsets[0], offsets[1] - offsets[0]).into()
                             })
                             .collect::<Vec<ArrayRef>>(),
                     )
@@ -490,7 +474,7 @@ fn group_aggregate_batch(
 /// but it also has to to handle the case where the dictionary itself
 /// is not the same across all record batches (and thus indexes in one
 /// record batch may not correspond to the same index in another)
-fn dictionary_create_key_for_col<K: ArrowDictionaryKeyType>(
+fn dictionary_create_key_for_col<K: DictionaryKey>(
     col: &ArrayRef,
     row: usize,
     vec: &mut Vec<u8>,
@@ -498,7 +482,7 @@ fn dictionary_create_key_for_col<K: ArrowDictionaryKeyType>(
     let dict_col = col.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
 
     // look up the index in the values dictionary
-    let keys_col = dict_col.keys_array();
+    let keys_col = dict_col.keys();
     let values_index = keys_col.value(row).to_usize().ok_or_else(|| {
         DataFusionError::Internal(format!(
             "Can not convert index to usize in dictionary of type creating group by value {:?}",
@@ -557,29 +541,15 @@ fn create_key_for_col(col: &ArrayRef, row: usize, vec: &mut Vec<u8>) -> Result<(
             let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
             vec.extend_from_slice(&array.value(row).to_le_bytes());
         }
-        DataType::Timestamp(TimeUnit::Millisecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMillisecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Microsecond, None) => {
+        DataType::Timestamp(_, None) => {
             let array = col
                 .as_any()
-                .downcast_ref::<TimestampMicrosecondArray>()
-                .unwrap();
-            vec.extend_from_slice(&array.value(row).to_le_bytes());
-        }
-        DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
+                .downcast_ref::<Int64Array>()
                 .unwrap();
             vec.extend_from_slice(&array.value(row).to_le_bytes());
         }
         DataType::Utf8 => {
-            let array = col.as_any().downcast_ref::<StringArray>().unwrap();
+            let array = col.as_any().downcast_ref::<Utf8Array<i32>>().unwrap();
             let value = array.value(row);
             // store the size
             vec.extend_from_slice(&value.len().to_le_bytes());
@@ -587,7 +557,7 @@ fn create_key_for_col(col: &ArrayRef, row: usize, vec: &mut Vec<u8>) -> Result<(
             vec.extend_from_slice(value.as_bytes());
         }
         DataType::LargeUtf8 => {
-            let array = col.as_any().downcast_ref::<LargeStringArray>().unwrap();
+            let array = col.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
             let value = array.value(row);
             // store the size
             vec.extend_from_slice(&value.len().to_le_bytes());
@@ -595,33 +565,33 @@ fn create_key_for_col(col: &ArrayRef, row: usize, vec: &mut Vec<u8>) -> Result<(
             vec.extend_from_slice(value.as_bytes());
         }
         DataType::Date32 => {
-            let array = col.as_any().downcast_ref::<Date32Array>().unwrap();
+            let array = col.as_any().downcast_ref::<Int32Array>().unwrap();
             vec.extend_from_slice(&array.value(row).to_le_bytes());
         }
         DataType::Dictionary(index_type, _) => match **index_type {
             DataType::Int8 => {
-                dictionary_create_key_for_col::<Int8Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<i8>(col, row, vec)?;
             }
             DataType::Int16 => {
-                dictionary_create_key_for_col::<Int16Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<i16>(col, row, vec)?;
             }
             DataType::Int32 => {
-                dictionary_create_key_for_col::<Int32Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<i32>(col, row, vec)?;
             }
             DataType::Int64 => {
-                dictionary_create_key_for_col::<Int64Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<i64>(col, row, vec)?;
             }
             DataType::UInt8 => {
-                dictionary_create_key_for_col::<UInt8Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<u8>(col, row, vec)?;
             }
             DataType::UInt16 => {
-                dictionary_create_key_for_col::<UInt16Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<u16>(col, row, vec)?;
             }
             DataType::UInt32 => {
-                dictionary_create_key_for_col::<UInt32Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<u32>(col, row, vec)?;
             }
             DataType::UInt64 => {
-                dictionary_create_key_for_col::<UInt64Type>(col, row, vec)?;
+                dictionary_create_key_for_col::<u64>(col, row, vec)?;
             }
             _ => return Err(DataFusionError::Internal(format!(
                 "Unsupported GROUP BY type (dictionary index type not supported creating key) {}",
@@ -723,7 +693,7 @@ impl GroupedHashAggregateStream {
 
 type AccumulatorItem = Box<dyn Accumulator>;
 type Accumulators =
-    HashMap<Vec<u8>, (Box<[GroupByScalar]>, Vec<AccumulatorItem>, Vec<u32>), RandomState>;
+    HashMap<Vec<u8>, (Box<[GroupByScalar]>, Vec<AccumulatorItem>, Vec<i32>), RandomState>;
 
 impl Stream for GroupedHashAggregateStream {
     type Item = ArrowResult<RecordBatch>;
@@ -748,7 +718,7 @@ impl Stream for GroupedHashAggregateStream {
 
                 // check for error in receiving channel and unwrap actual result
                 let result = match result {
-                    Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving
+                    Err(e) => Err(ArrowError::External("".to_string(), Box::new(e))), // error receiving
                     Ok(result) => result,
                 };
 
@@ -939,7 +909,7 @@ impl Stream for HashAggregateStream {
 
                 // check for error in receiving channel and unwrap actual result
                 let result = match result {
-                    Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving
+                    Err(e) => Err(ArrowError::External("".to_string(), Box::new(e))), // error receiving
                     Ok(result) => result,
                 };
 
@@ -965,7 +935,7 @@ fn concatenate(arrays: Vec<Vec<ArrayRef>>) -> ArrowResult<Vec<ArrayRef>> {
                 .iter()
                 .map(|a| a[column].as_ref())
                 .collect::<Vec<_>>();
-            compute::concat(&array_list)
+            Ok(compute::concat::concatenate(&array_list)?.into())
         })
         .collect::<ArrowResult<Vec<_>>>()
 }
@@ -987,42 +957,9 @@ fn create_batch_from_map(
         .map(|(_, (group_by_values, accumulator_set, _))| {
             // 2.
             let mut groups = (0..num_group_expr)
-                .map(|i| match &group_by_values[i] {
-                    GroupByScalar::Float32(n) => {
-                        Arc::new(Float32Array::from(vec![(*n).into()] as Vec<f32>))
-                            as ArrayRef
-                    }
-                    GroupByScalar::Float64(n) => {
-                        Arc::new(Float64Array::from(vec![(*n).into()] as Vec<f64>))
-                            as ArrayRef
-                    }
-                    GroupByScalar::Int8(n) => {
-                        Arc::new(Int8Array::from(vec![*n])) as ArrayRef
-                    }
-                    GroupByScalar::Int16(n) => Arc::new(Int16Array::from(vec![*n])),
-                    GroupByScalar::Int32(n) => Arc::new(Int32Array::from(vec![*n])),
-                    GroupByScalar::Int64(n) => Arc::new(Int64Array::from(vec![*n])),
-                    GroupByScalar::UInt8(n) => Arc::new(UInt8Array::from(vec![*n])),
-                    GroupByScalar::UInt16(n) => Arc::new(UInt16Array::from(vec![*n])),
-                    GroupByScalar::UInt32(n) => Arc::new(UInt32Array::from(vec![*n])),
-                    GroupByScalar::UInt64(n) => Arc::new(UInt64Array::from(vec![*n])),
-                    GroupByScalar::Utf8(str) => {
-                        Arc::new(StringArray::from(vec![&***str]))
-                    }
-                    GroupByScalar::LargeUtf8(str) => {
-                        Arc::new(LargeStringArray::from(vec![&***str]))
-                    }
-                    GroupByScalar::Boolean(b) => Arc::new(BooleanArray::from(vec![*b])),
-                    GroupByScalar::TimeMillisecond(n) => {
-                        Arc::new(TimestampMillisecondArray::from(vec![*n]))
-                    }
-                    GroupByScalar::TimeMicrosecond(n) => {
-                        Arc::new(TimestampMicrosecondArray::from(vec![*n]))
-                    }
-                    GroupByScalar::TimeNanosecond(n) => {
-                        Arc::new(TimestampNanosecondArray::from_vec(vec![*n], None))
-                    }
-                    GroupByScalar::Date32(n) => Arc::new(Date32Array::from(vec![*n])),
+                .map(|i| {
+                    let scalar: ScalarValue = (&group_by_values[i]).into();
+                    scalar.to_array()
                 })
                 .collect::<Vec<ArrayRef>>();
 
@@ -1047,7 +984,10 @@ fn create_batch_from_map(
         let columns = columns
             .iter()
             .zip(output_schema.fields().iter())
-            .map(|(col, desired_field)| cast(col, desired_field.data_type()))
+            .map(|(col, desired_field)| {
+                compute::cast::cast(col.as_ref(), desired_field.data_type())
+                    .map(|x| x.into())
+            })
             .collect::<ArrowResult<Vec<_>>>()?;
 
         RecordBatch::try_new(Arc::new(output_schema.to_owned()), columns)?
@@ -1097,14 +1037,14 @@ fn finalize_aggregation(
 }
 
 /// Extract the value in `col[row]` from a dictionary a GroupByScalar
-fn dictionary_create_group_by_value<K: ArrowDictionaryKeyType>(
+fn dictionary_create_group_by_value<K: DictionaryKey>(
     col: &ArrayRef,
     row: usize,
 ) -> Result<GroupByScalar> {
     let dict_col = col.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
 
     // look up the index in the values dictionary
-    let keys_col = dict_col.keys_array();
+    let keys_col = dict_col.keys();
     let values_index = keys_col.value(row).to_usize().ok_or_else(|| {
         DataFusionError::Internal(format!(
             "Can not convert index to usize in dictionary of type creating group by value {:?}",
@@ -1159,11 +1099,11 @@ fn create_group_by_value(col: &ArrayRef, row: usize) -> Result<GroupByScalar> {
             Ok(GroupByScalar::Int64(array.value(row)))
         }
         DataType::Utf8 => {
-            let array = col.as_any().downcast_ref::<StringArray>().unwrap();
+            let array = col.as_any().downcast_ref::<Utf8Array<i32>>().unwrap();
             Ok(GroupByScalar::Utf8(Box::new(array.value(row).into())))
         }
         DataType::LargeUtf8 => {
-            let array = col.as_any().downcast_ref::<LargeStringArray>().unwrap();
+            let array = col.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
             Ok(GroupByScalar::Utf8(Box::new(array.value(row).into())))
         }
         DataType::Boolean => {
@@ -1171,39 +1111,30 @@ fn create_group_by_value(col: &ArrayRef, row: usize) -> Result<GroupByScalar> {
             Ok(GroupByScalar::Boolean(array.value(row)))
         }
         DataType::Timestamp(TimeUnit::Millisecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMillisecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeMillisecond(array.value(row)))
+            let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
+            Ok(GroupByScalar::TimestampMillisecond(array.value(row)))
         }
         DataType::Timestamp(TimeUnit::Microsecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampMicrosecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeMicrosecond(array.value(row)))
+            let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
+            Ok(GroupByScalar::TimestampMicrosecond(array.value(row)))
         }
         DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-            let array = col
-                .as_any()
-                .downcast_ref::<TimestampNanosecondArray>()
-                .unwrap();
-            Ok(GroupByScalar::TimeNanosecond(array.value(row)))
+            let array = col.as_any().downcast_ref::<Int64Array>().unwrap();
+            Ok(GroupByScalar::TimestampNanosecond(array.value(row)))
         }
         DataType::Date32 => {
-            let array = col.as_any().downcast_ref::<Date32Array>().unwrap();
+            let array = col.as_any().downcast_ref::<Int32Array>().unwrap();
             Ok(GroupByScalar::Date32(array.value(row)))
         }
         DataType::Dictionary(index_type, _) => match **index_type {
-            DataType::Int8 => dictionary_create_group_by_value::<Int8Type>(col, row),
-            DataType::Int16 => dictionary_create_group_by_value::<Int16Type>(col, row),
-            DataType::Int32 => dictionary_create_group_by_value::<Int32Type>(col, row),
-            DataType::Int64 => dictionary_create_group_by_value::<Int64Type>(col, row),
-            DataType::UInt8 => dictionary_create_group_by_value::<UInt8Type>(col, row),
-            DataType::UInt16 => dictionary_create_group_by_value::<UInt16Type>(col, row),
-            DataType::UInt32 => dictionary_create_group_by_value::<UInt32Type>(col, row),
-            DataType::UInt64 => dictionary_create_group_by_value::<UInt64Type>(col, row),
+            DataType::Int8 => dictionary_create_group_by_value::<i8>(col, row),
+            DataType::Int16 => dictionary_create_group_by_value::<i16>(col, row),
+            DataType::Int32 => dictionary_create_group_by_value::<i32>(col, row),
+            DataType::Int64 => dictionary_create_group_by_value::<i64>(col, row),
+            DataType::UInt8 => dictionary_create_group_by_value::<u8>(col, row),
+            DataType::UInt16 => dictionary_create_group_by_value::<u16>(col, row),
+            DataType::UInt32 => dictionary_create_group_by_value::<u32>(col, row),
+            DataType::UInt64 => dictionary_create_group_by_value::<u64>(col, row),
             _ => Err(DataFusionError::NotImplemented(format!(
                 "Unsupported GROUP BY type (dictionary index type not supported) {}",
                 col.data_type(),
@@ -1232,7 +1163,7 @@ pub(crate) fn create_group_by_values(
 #[cfg(test)]
 mod tests {
 
-    use arrow::array::Float64Array;
+    use arrow2::array::Float64Array;
 
     use super::*;
     use crate::physical_plan::expressions::{col, Avg};
@@ -1255,16 +1186,16 @@ mod tests {
                 RecordBatch::try_new(
                     schema.clone(),
                     vec![
-                        Arc::new(UInt32Array::from(vec![2, 3, 4, 4])),
-                        Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
+                        Arc::new(UInt32Array::from_slice(&[2, 3, 4, 4])),
+                        Arc::new(Float64Array::from_slice(&[1.0, 2.0, 3.0, 4.0])),
                     ],
                 )
                 .unwrap(),
                 RecordBatch::try_new(
                     schema,
                     vec![
-                        Arc::new(UInt32Array::from(vec![2, 3, 3, 4])),
-                        Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0])),
+                        Arc::new(UInt32Array::from_slice(&[2, 3, 3, 4])),
+                        Arc::new(Float64Array::from_slice(&[1.0, 2.0, 3.0, 4.0])),
                     ],
                 )
                 .unwrap(),
diff --git a/datafusion/src/physical_plan/hash_join.rs b/datafusion/src/physical_plan/hash_join.rs
index 01551cd4daf4c..6bff236dd9b34 100644
--- a/datafusion/src/physical_plan/hash_join.rs
+++ b/datafusion/src/physical_plan/hash_join.rs
@@ -21,16 +21,6 @@
 use ahash::CallHasher;
 use ahash::RandomState;
 
-use arrow::{
-    array::{
-        ArrayData, ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array,
-        Float64Array, LargeStringArray, PrimitiveArray, TimestampMicrosecondArray,
-        TimestampMillisecondArray, TimestampNanosecondArray, UInt32BufferBuilder,
-        UInt32Builder, UInt64BufferBuilder, UInt64Builder,
-    },
-    compute,
-    datatypes::{TimeUnit, UInt32Type, UInt64Type},
-};
 use smallvec::{smallvec, SmallVec};
 use std::{any::Any, usize};
 use std::{hash::Hasher, sync::Arc};
@@ -41,18 +31,14 @@ use futures::{Stream, StreamExt, TryStreamExt};
 use hashbrown::HashMap;
 use tokio::sync::Mutex;
 
-use arrow::array::Array;
-use arrow::datatypes::DataType;
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+use arrow2::datatypes::*;
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
+use arrow2::{array::*, buffer::MutableBuffer};
 
-use arrow::array::{
-    Int16Array, Int32Array, Int64Array, Int8Array, StringArray, UInt16Array, UInt32Array,
-    UInt64Array, UInt8Array,
-};
+use arrow2::compute::take;
 
-use super::expressions::col;
+use super::{expressions::col, ArrayRef};
 use super::{
     hash_utils::{build_join_schema, check_join_is_valid, JoinOn, JoinType},
     merge::MergeExec,
@@ -66,6 +52,10 @@ use super::{
 use crate::physical_plan::coalesce_batches::concat_batches;
 use log::debug;
 
+type SchemaRef = Arc<Schema>;
+type StringArray = Utf8Array<i32>;
+type LargeStringArray = Utf8Array<i64>;
+
 // Maps a `u64` hash value based on the left ["on" values] to a list of indices with this key's value.
 //
 // Note that the `u64` keys are not stored in the hashmap (hence the `()` as key), but are only used
@@ -501,10 +491,10 @@ fn build_batch_from_indices(
     schema: &Schema,
     left: &RecordBatch,
     right: &RecordBatch,
-    left_indices: UInt64Array,
-    right_indices: UInt32Array,
+    left_indices: Int64Array,
+    right_indices: Int32Array,
     column_indices: &[ColumnIndex],
-) -> ArrowResult<(RecordBatch, UInt64Array)> {
+) -> ArrowResult<(RecordBatch, Int64Array)> {
     // build the columns of the new [RecordBatch]:
     // 1. pick whether the column is from the left or right
     // 2. based on the pick, `take` items from the different RecordBatches
@@ -513,10 +503,10 @@ fn build_batch_from_indices(
     for column_index in column_indices {
         let array = if column_index.is_left {
             let array = left.column(column_index.index);
-            compute::take(array.as_ref(), &left_indices, None)?
+            take::take(array.as_ref(), &left_indices)?.into()
         } else {
             let array = right.column(column_index.index);
-            compute::take(array.as_ref(), &right_indices, None)?
+            take::take(array.as_ref(), &right_indices)?.into()
         };
         columns.push(array);
     }
@@ -533,7 +523,7 @@ fn build_batch(
     schema: &Schema,
     column_indices: &[ColumnIndex],
     random_state: &RandomState,
-) -> ArrowResult<(RecordBatch, UInt64Array)> {
+) -> ArrowResult<(RecordBatch, Int64Array)> {
     let (left_indices, right_indices) = build_join_indexes(
         &left_data,
         &batch,
@@ -588,7 +578,7 @@ fn build_join_indexes(
     left_on: &[String],
     right_on: &[String],
     random_state: &RandomState,
-) -> Result<(UInt64Array, UInt32Array)> {
+) -> Result<(Int64Array, Int32Array)> {
     let keys_values = right_on
         .iter()
         .map(|name| Ok(col(name).evaluate(right)?.into_array(right.num_rows())))
@@ -608,8 +598,8 @@ fn build_join_indexes(
     match join_type {
         JoinType::Inner => {
             // Using a buffer builder to avoid slower normal builder
-            let mut left_indices = UInt64BufferBuilder::new(0);
-            let mut right_indices = UInt32BufferBuilder::new(0);
+            let mut left_indices = MutableBuffer::<i64>::new();
+            let mut right_indices = MutableBuffer::<i32>::new();
 
             // Visit all of the right rows
             for (row, hash_value) in hash_values.iter().enumerate() {
@@ -624,29 +614,29 @@ fn build_join_indexes(
                     for &i in indices {
                         // Check hash collisions
                         if equal_rows(i as usize, row, &left_join_values, &keys_values)? {
-                            left_indices.append(i);
-                            right_indices.append(row as u32);
+                            left_indices.push(i as i64);
+                            right_indices.push(row as i32);
                         }
                     }
                 }
             }
-            let left = ArrayData::builder(DataType::UInt64)
-                .len(left_indices.len())
-                .add_buffer(left_indices.finish())
-                .build();
-            let right = ArrayData::builder(DataType::UInt32)
-                .len(right_indices.len())
-                .add_buffer(right_indices.finish())
-                .build();
 
             Ok((
-                PrimitiveArray::<UInt64Type>::from(left),
-                PrimitiveArray::<UInt32Type>::from(right),
+                PrimitiveArray::<i64>::from_data(
+                    DataType::Int64,
+                    left_indices.into(),
+                    None,
+                ),
+                PrimitiveArray::<i32>::from_data(
+                    DataType::Int32,
+                    right_indices.into(),
+                    None,
+                ),
             ))
         }
         JoinType::Left => {
-            let mut left_indices = UInt64Builder::new(0);
-            let mut right_indices = UInt32Builder::new(0);
+            let mut left_indices = MutableBuffer::<i64>::new();
+            let mut right_indices = MutableBuffer::<i32>::new();
 
             // First visit all of the rows
             for (row, hash_value) in hash_values.iter().enumerate() {
@@ -656,17 +646,28 @@ fn build_join_indexes(
                     for &i in indices {
                         // Collision check
                         if equal_rows(i as usize, row, &left_join_values, &keys_values)? {
-                            left_indices.append_value(i)?;
-                            right_indices.append_value(row as u32)?;
+                            left_indices.push(i as i64);
+                            right_indices.push(row as i32);
                         }
                     }
                 };
             }
-            Ok((left_indices.finish(), right_indices.finish()))
+            Ok((
+                PrimitiveArray::<i64>::from_data(
+                    DataType::Int64,
+                    left_indices.into(),
+                    None,
+                ),
+                PrimitiveArray::<i32>::from_data(
+                    DataType::Int32,
+                    right_indices.into(),
+                    None,
+                ),
+            ))
         }
         JoinType::Right | JoinType::Full => {
-            let mut left_indices = UInt64Builder::new(0);
-            let mut right_indices = UInt32Builder::new(0);
+            let mut left_indices = Primitive::<i64>::new();
+            let mut right_indices = Primitive::<i32>::new();
 
             for (row, hash_value) in hash_values.iter().enumerate() {
                 match left.raw_entry().from_hash(*hash_value, |_| true) {
@@ -678,22 +679,25 @@ fn build_join_indexes(
                                 &left_join_values,
                                 &keys_values,
                             )? {
-                                left_indices.append_value(i)?;
-                                right_indices.append_value(row as u32)?;
+                                left_indices.push(Some(i as i64).as_ref());
+                                right_indices.push(Some(row as i32).as_ref());
                             } else {
-                                left_indices.append_null()?;
-                                right_indices.append_value(row as u32)?;
+                                left_indices.push(None);
+                                right_indices.push(Some(row as i32).as_ref());
                             }
                         }
                     }
                     None => {
                         // when no match, add the row with None for the left side
-                        left_indices.append_null()?;
-                        right_indices.append_value(row as u32)?;
+                        left_indices.push(None);
+                        right_indices.push(Some(row as i32).as_ref());
                     }
                 }
             }
-            Ok((left_indices.finish(), right_indices.finish()))
+            Ok((
+                left_indices.to(DataType::Int64),
+                right_indices.to(DataType::Int32),
+            ))
         }
     }
 }
@@ -977,7 +981,7 @@ pub fn create_hashes<'a>(
                     multi_col
                 );
             }
-            DataType::Int32 => {
+            DataType::Int32 | DataType::Date32 => {
                 hash_array_primitive!(
                     Int32Array,
                     col,
@@ -987,7 +991,7 @@ pub fn create_hashes<'a>(
                     multi_col
                 );
             }
-            DataType::Int64 => {
+            DataType::Int64 | DataType::Timestamp(_, None) | DataType::Date64 => {
                 hash_array_primitive!(
                     Int64Array,
                     col,
@@ -997,79 +1001,29 @@ pub fn create_hashes<'a>(
                     multi_col
                 );
             }
-            DataType::Float32 => {
-                hash_array_float!(
-                    Float32Array,
+            DataType::Boolean => {
+                hash_array!(
+                    BooleanArray,
                     col,
-                    u32,
+                    u8,
                     hashes_buffer,
                     random_state,
                     multi_col
                 );
             }
-            DataType::Float64 => {
+            DataType::Float32 => {
                 hash_array_float!(
-                    Float64Array,
-                    col,
-                    u64,
-                    hashes_buffer,
-                    random_state,
-                    multi_col
-                );
-            }
-            DataType::Timestamp(TimeUnit::Millisecond, None) => {
-                hash_array_primitive!(
-                    TimestampMillisecondArray,
-                    col,
-                    i64,
-                    hashes_buffer,
-                    random_state,
-                    multi_col
-                );
-            }
-            DataType::Timestamp(TimeUnit::Microsecond, None) => {
-                hash_array_primitive!(
-                    TimestampMicrosecondArray,
-                    col,
-                    i64,
-                    hashes_buffer,
-                    random_state,
-                    multi_col
-                );
-            }
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                hash_array_primitive!(
-                    TimestampNanosecondArray,
-                    col,
-                    i64,
-                    hashes_buffer,
-                    random_state,
-                    multi_col
-                );
-            }
-            DataType::Date32 => {
-                hash_array_primitive!(
-                    Date32Array,
-                    col,
-                    i32,
-                    hashes_buffer,
-                    random_state,
-                    multi_col
-                );
-            }
-            DataType::Date64 => {
-                hash_array_primitive!(
-                    Date64Array,
+                    Float32Array,
                     col,
-                    i64,
+                    u8,
                     hashes_buffer,
                     random_state,
                     multi_col
                 );
             }
-            DataType::Boolean => {
-                hash_array!(
-                    BooleanArray,
+            DataType::Float64 => {
+                hash_array_float!(
+                    Float64Array,
                     col,
                     u8,
                     hashes_buffer,
@@ -1116,24 +1070,24 @@ fn produce_unmatched(
     left_data: &JoinLeftData,
 ) -> ArrowResult<RecordBatch> {
     // Find indices which didn't match any right row (are false)
-    let unmatched_indices: Vec<u64> = visited_left_side
+    let unmatched_indices: MutableBuffer<i64> = visited_left_side
         .iter()
         .enumerate()
         .filter(|&(_, &value)| !value)
-        .map(|(index, _)| index as u64)
+        .map(|(index, _)| index as i64)
         .collect();
 
     // generate batches by taking values from the left side and generating columns filled with null on the right side
-    let indices = UInt64Array::from_iter_values(unmatched_indices);
+    let indices = Int64Array::from_data(DataType::Int64, unmatched_indices.into(), None);
     let num_rows = indices.len();
     let mut columns: Vec<Arc<dyn Array>> = Vec::with_capacity(schema.fields().len());
     for (idx, column_index) in column_indices.iter().enumerate() {
         let array = if column_index.is_left {
             let array = left_data.1.column(column_index.index);
-            compute::take(array.as_ref(), &indices, None).unwrap()
+            take::take(array.as_ref(), &indices)?.into()
         } else {
-            let datatype = schema.field(idx).data_type();
-            arrow::array::new_null_array(datatype, num_rows)
+            let datatype = schema.field(idx).data_type().clone();
+            new_null_array(datatype, num_rows).into()
         };
 
         columns.push(array);
@@ -1173,7 +1127,7 @@ impl Stream for HashJoinStream {
                         match self.join_type {
                             JoinType::Left | JoinType::Full => {
                                 left_side.iter().flatten().for_each(|x| {
-                                    self.visited_left_side[x as usize] = true;
+                                    self.visited_left_side[*x as usize] = true;
                                 });
                             }
                             JoinType::Inner | JoinType::Right => {}
@@ -1243,7 +1197,7 @@ mod tests {
         c: (&str, &Vec<i32>),
     ) -> Arc<dyn ExecutionPlan> {
         let batch = build_table_i32(a, b, c);
-        let schema = batch.schema();
+        let schema = batch.schema().clone();
         Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap())
     }
 
@@ -1381,7 +1335,7 @@ mod tests {
         );
         let batch2 =
             build_table_i32(("a1", &vec![2]), ("b2", &vec![2]), ("c1", &vec![9]));
-        let schema = batch1.schema();
+        let schema = batch1.schema().clone();
         let left = Arc::new(
             MemoryExec::try_new(&[vec![batch1], vec![batch2]], schema, None).unwrap(),
         );
@@ -1433,7 +1387,7 @@ mod tests {
         );
         let batch2 =
             build_table_i32(("a2", &vec![30]), ("b1", &vec![5]), ("c2", &vec![90]));
-        let schema = batch1.schema();
+        let schema = batch1.schema().clone();
         let right = Arc::new(
             MemoryExec::try_new(&[vec![batch1], vec![batch2]], schema, None).unwrap(),
         );
@@ -1483,7 +1437,7 @@ mod tests {
         c: (&str, &Vec<i32>),
     ) -> Arc<dyn ExecutionPlan> {
         let batch = build_table_i32(a, b, c);
-        let schema = batch.schema();
+        let schema = batch.schema().clone();
         Arc::new(
             MemoryExec::try_new(&[vec![batch.clone(), batch]], schema, None).unwrap(),
         )
@@ -1575,7 +1529,7 @@ mod tests {
         );
         let right = build_table_i32(("a2", &vec![]), ("b1", &vec![]), ("c2", &vec![]));
         let on = &[("b1", "b1")];
-        let schema = right.schema();
+        let schema = right.schema().clone();
         let right = Arc::new(MemoryExec::try_new(&[vec![right]], schema, None).unwrap());
         let join = join(left, right, on, &JoinType::Left).unwrap();
 
@@ -1607,7 +1561,7 @@ mod tests {
         );
         let right = build_table_i32(("a2", &vec![]), ("b2", &vec![]), ("c2", &vec![]));
         let on = &[("b1", "b2")];
-        let schema = right.schema();
+        let schema = right.schema().clone();
         let right = Arc::new(MemoryExec::try_new(&[vec![right]], schema, None).unwrap());
         let join = join(left, right, on, &JoinType::Full).unwrap();
 
@@ -1784,18 +1738,11 @@ mod tests {
             &random_state,
         )?;
 
-        let mut left_ids = UInt64Builder::new(0);
-        left_ids.append_value(0)?;
-        left_ids.append_value(1)?;
-
-        let mut right_ids = UInt32Builder::new(0);
-
-        right_ids.append_value(0)?;
-        right_ids.append_value(1)?;
-
-        assert_eq!(left_ids.finish(), l);
+        let left_ids = Int64Array::from_slice(&[0, 1]);
+        let right_ids = Int32Array::from_slice(&[0, 1]);
 
-        assert_eq!(right_ids.finish(), r);
+        assert_eq!(left_ids, l);
+        assert_eq!(right_ids, r);
 
         Ok(())
     }
diff --git a/datafusion/src/physical_plan/hash_utils.rs b/datafusion/src/physical_plan/hash_utils.rs
index 7e030af3a124c..5bcf74547d444 100644
--- a/datafusion/src/physical_plan/hash_utils.rs
+++ b/datafusion/src/physical_plan/hash_utils.rs
@@ -18,7 +18,7 @@
 //! Functionality used both on logical and physical plans
 
 use crate::error::{DataFusionError, Result};
-use arrow::datatypes::{Field, Schema};
+use arrow2::datatypes::{Field, Schema};
 use std::collections::HashSet;
 
 /// All valid types of joins.
diff --git a/datafusion/src/physical_plan/limit.rs b/datafusion/src/physical_plan/limit.rs
index c56dbe141b2d1..33ba2a28f4783 100644
--- a/datafusion/src/physical_plan/limit.rs
+++ b/datafusion/src/physical_plan/limit.rs
@@ -29,11 +29,15 @@ use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
 };
-use arrow::array::ArrayRef;
-use arrow::compute::limit;
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+
+use arrow2::array::Array;
+use arrow2::compute::limit::limit;
+use arrow2::datatypes::Schema;
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
+
+type SchemaRef = Arc<Schema>;
+type ArrayRef = Arc<dyn Array>;
 
 use super::{RecordBatchStream, SendableRecordBatchStream};
 
@@ -218,10 +222,10 @@ impl ExecutionPlan for LocalLimitExec {
 /// Truncate a RecordBatch to maximum of n rows
 pub fn truncate_batch(batch: &RecordBatch, n: usize) -> RecordBatch {
     let limited_columns: Vec<ArrayRef> = (0..batch.num_columns())
-        .map(|i| limit(batch.column(i), n))
+        .map(|i| limit(batch.column(i).as_ref(), n).into())
         .collect();
 
-    RecordBatch::try_new(batch.schema(), limited_columns).unwrap()
+    RecordBatch::try_new(batch.schema().clone(), limited_columns).unwrap()
 }
 
 /// A Limit stream limits the stream to up to `limit` rows.
diff --git a/datafusion/src/physical_plan/math_expressions.rs b/datafusion/src/physical_plan/math_expressions.rs
index cfc239cde6613..42d666bcb621a 100644
--- a/datafusion/src/physical_plan/math_expressions.rs
+++ b/datafusion/src/physical_plan/math_expressions.rs
@@ -16,42 +16,35 @@
 // under the License.
 
 //! Math expressions
-use super::{ColumnarValue, ScalarValue};
-use crate::error::{DataFusionError, Result};
-use arrow::array::{Float32Array, Float64Array};
-use arrow::datatypes::DataType;
 use rand::{thread_rng, Rng};
 use std::iter;
 use std::sync::Arc;
 
-macro_rules! downcast_compute_op {
-    ($ARRAY:expr, $NAME:expr, $FUNC:ident, $TYPE:ident) => {{
-        let n = $ARRAY.as_any().downcast_ref::<$TYPE>();
-        match n {
-            Some(array) => {
-                let res: $TYPE =
-                    arrow::compute::kernels::arity::unary(array, |x| x.$FUNC());
-                Ok(Arc::new(res))
-            }
-            _ => Err(DataFusionError::Internal(format!(
-                "Invalid data type for {}",
-                $NAME
-            ))),
-        }
-    }};
-}
+use arrow2::array::Float64Array;
+use arrow2::compute::arity::unary;
+use arrow2::datatypes::DataType;
+
+use super::{ColumnarValue, ScalarValue};
+use crate::error::{DataFusionError, Result};
 
 macro_rules! unary_primitive_array_op {
     ($VALUE:expr, $NAME:expr, $FUNC:ident) => {{
         match ($VALUE) {
             ColumnarValue::Array(array) => match array.data_type() {
                 DataType::Float32 => {
-                    let result = downcast_compute_op!(array, $NAME, $FUNC, Float32Array);
-                    Ok(ColumnarValue::Array(result?))
+                    let array = array.as_any().downcast_ref().unwrap();
+                    let array = unary::<f32, _, f64>(
+                        array,
+                        |x| x.$FUNC() as f64,
+                        DataType::Float32,
+                    );
+                    Ok(ColumnarValue::Array(Arc::new(array)))
                 }
                 DataType::Float64 => {
-                    let result = downcast_compute_op!(array, $NAME, $FUNC, Float64Array);
-                    Ok(ColumnarValue::Array(result?))
+                    let array = array.as_any().downcast_ref().unwrap();
+                    let array =
+                        unary::<f64, _, f64>(array, |x| x.$FUNC(), DataType::Float64);
+                    Ok(ColumnarValue::Array(Arc::new(array)))
                 }
                 other => Err(DataFusionError::Internal(format!(
                     "Unsupported data type {:?} for function {}",
@@ -114,7 +107,7 @@ pub fn random(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     };
     let mut rng = thread_rng();
     let values = iter::repeat_with(|| rng.gen_range(0.0..1.0)).take(len);
-    let array = Float64Array::from_iter_values(values);
+    let array = Float64Array::from_trusted_len_values_iter(values);
     Ok(ColumnarValue::Array(Arc::new(array)))
 }
 
@@ -122,7 +115,7 @@ pub fn random(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 mod tests {
 
     use super::*;
-    use arrow::array::{Float64Array, NullArray};
+    use arrow2::array::{Float64Array, NullArray};
 
     #[test]
     fn test_random_expression() {
diff --git a/datafusion/src/physical_plan/memory.rs b/datafusion/src/physical_plan/memory.rs
index 85d8aeef073c1..e29d3d227fbb2 100644
--- a/datafusion/src/physical_plan/memory.rs
+++ b/datafusion/src/physical_plan/memory.rs
@@ -27,9 +27,10 @@ use super::{
     SendableRecordBatchStream,
 };
 use crate::error::{DataFusionError, Result};
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+use arrow2::datatypes::Schema;
+type SchemaRef = Arc<Schema>;
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
 
 use async_trait::async_trait;
 use futures::Stream;
diff --git a/datafusion/src/physical_plan/merge.rs b/datafusion/src/physical_plan/merge.rs
index c65227c161148..6cc4a8dcd7637 100644
--- a/datafusion/src/physical_plan/merge.rs
+++ b/datafusion/src/physical_plan/merge.rs
@@ -28,9 +28,9 @@ use futures::Stream;
 
 use async_trait::async_trait;
 
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    datatypes::SchemaRef,
+use arrow2::record_batch::RecordBatch;
+use arrow2::{
+    datatypes::Schema,
     error::{ArrowError, Result as ArrowResult},
 };
 
@@ -41,6 +41,8 @@ use crate::physical_plan::{DisplayFormatType, ExecutionPlan, Partitioning};
 use super::SendableRecordBatchStream;
 use pin_project_lite::pin_project;
 
+type SchemaRef = Arc<Schema>;
+
 /// Merge execution plan executes partitions in parallel and combines them into a single
 /// partition. No guarantees are made about the order of the resulting partition.
 #[derive(Debug)]
@@ -128,7 +130,8 @@ impl ExecutionPlan for MergeExec {
                             Err(e) => {
                                 // If send fails, plan being torn
                                 // down, no place to send the error
-                                let arrow_error = ArrowError::ExternalError(Box::new(e));
+                                let arrow_error =
+                                    ArrowError::External("".to_string(), Box::new(e));
                                 sender.send(Err(arrow_error)).await.ok();
                                 return;
                             }
diff --git a/datafusion/src/physical_plan/mod.rs b/datafusion/src/physical_plan/mod.rs
index e915b2c257ddc..436f611bc4e13 100644
--- a/datafusion/src/physical_plan/mod.rs
+++ b/datafusion/src/physical_plan/mod.rs
@@ -25,10 +25,13 @@ use std::{any::Any, pin::Pin};
 use crate::execution::context::ExecutionContextState;
 use crate::logical_plan::LogicalPlan;
 use crate::{error::Result, scalar::ScalarValue};
-use arrow::datatypes::{DataType, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use arrow::{array::ArrayRef, datatypes::Field};
+use arrow2::array::Array;
+use arrow2::datatypes::{DataType, Field, Schema};
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
+
+type ArrayRef = Arc<dyn Array>;
+type SchemaRef = Arc<Schema>;
 
 use async_trait::async_trait;
 pub use display::DisplayFormatType;
@@ -37,7 +40,7 @@ use futures::stream::Stream;
 use self::{display::DisplayableExecutionPlan, merge::MergeExec};
 use hashbrown::HashMap;
 
-/// Trait for types that stream [arrow::record_batch::RecordBatch]
+/// Trait for types that stream [arrow2::record_batch::RecordBatch]
 pub trait RecordBatchStream: Stream<Item = ArrowResult<RecordBatch>> {
     /// Returns the schema of this `RecordBatchStream`.
     ///
@@ -527,6 +530,7 @@ pub mod string_expressions;
 pub mod type_coercion;
 pub mod udaf;
 pub mod udf;
+
 #[cfg(feature = "unicode_expressions")]
 pub mod unicode_expressions;
 pub mod union;
diff --git a/datafusion/src/physical_plan/parquet.rs b/datafusion/src/physical_plan/parquet.rs
index dd5e77bc21eb9..051d800dea470 100644
--- a/datafusion/src/physical_plan/parquet.rs
+++ b/datafusion/src/physical_plan/parquet.rs
@@ -23,35 +23,23 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::{any::Any, collections::HashSet};
 
-use super::{
-    planner::DefaultPhysicalPlanner, ColumnarValue, PhysicalExpr, RecordBatchStream,
-    SendableRecordBatchStream,
-};
+use super::{RecordBatchStream, SendableRecordBatchStream};
 use crate::physical_plan::{common, DisplayFormatType, ExecutionPlan, Partitioning};
 use crate::{
     error::{DataFusionError, Result},
-    execution::context::ExecutionContextState,
     logical_plan::{Expr, Operator},
     optimizer::utils,
 };
-use arrow::record_batch::RecordBatch;
-use arrow::{
-    array::new_null_array,
+
+use arrow2::{
+    array::*,
+    datatypes::*,
     error::{ArrowError, Result as ArrowResult},
-};
-use arrow::{
-    array::{make_array, ArrayData, ArrayRef, BooleanArray, BooleanBufferBuilder},
-    buffer::MutableBuffer,
-    datatypes::{DataType, Field, Schema, SchemaRef},
-};
-use parquet::file::{
-    metadata::RowGroupMetaData,
-    reader::{FileReader, SerializedFileReader},
-    statistics::Statistics as ParquetStatistics,
+    io::parquet::read::{self, CompressedPage, FileMetaData},
+    record_batch::RecordBatch,
 };
 
 use fmt::Debug;
-use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
 use tokio::{
     sync::mpsc::{channel, Receiver, Sender},
     task,
@@ -62,23 +50,22 @@ use crate::datasource::datasource::{ColumnStatistics, Statistics};
 use async_trait::async_trait;
 use futures::stream::{Stream, StreamExt};
 
+type SchemaRef = Arc<Schema>;
+type ArrayRef = Arc<dyn Array>;
+
 /// Execution plan for scanning one or more Parquet partitions
 #[derive(Debug, Clone)]
 pub struct ParquetExec {
     /// Parquet partitions to read
     partitions: Vec<ParquetPartition>,
     /// Schema after projection is applied
-    schema: SchemaRef,
+    schema: Arc<Schema>,
     /// Projection for which columns to load
     projection: Vec<usize>,
-    /// Batch size
-    batch_size: usize,
     /// Statistics for the data set (sum of statistics for all partitions)
     statistics: Statistics,
-    /// Optional predicate builder
-    predicate_builder: Option<RowGroupPredicateBuilder>,
     /// Optional limit of the number of rows
-    limit: Option<usize>,
+    limit: usize,
 }
 
 /// Represents one partition of a Parquet data set and this currently means one Parquet file.
@@ -93,7 +80,7 @@ pub struct ParquetExec {
 #[derive(Debug, Clone)]
 pub struct ParquetPartition {
     /// The Parquet filename for this partition
-    pub filenames: Vec<String>,
+    pub filename: String,
     /// Statistics for this partition
     pub statistics: Statistics,
 }
@@ -105,7 +92,6 @@ impl ParquetExec {
         path: &str,
         projection: Option<Vec<usize>>,
         predicate: Option<Expr>,
-        batch_size: usize,
         max_concurrency: usize,
         limit: Option<usize>,
     ) -> Result<Self> {
@@ -126,7 +112,6 @@ impl ParquetExec {
                 &filenames,
                 projection,
                 predicate,
-                batch_size,
                 max_concurrency,
                 limit,
             )
@@ -139,80 +124,45 @@ impl ParquetExec {
         filenames: &[&str],
         projection: Option<Vec<usize>>,
         predicate: Option<Expr>,
-        batch_size: usize,
         max_concurrency: usize,
         limit: Option<usize>,
     ) -> Result<Self> {
+        let limit = limit.unwrap_or(usize::MAX);
         // build a list of Parquet partitions with statistics and gather all unique schemas
         // used in this data set
-        let mut schemas: Vec<Schema> = vec![];
+        let mut schemas: Vec<Arc<Schema>> = vec![];
         let mut partitions = Vec::with_capacity(max_concurrency);
-        let filenames: Vec<String> = filenames.iter().map(|s| s.to_string()).collect();
-        let chunks = split_files(&filenames, max_concurrency);
         let mut num_rows = 0;
         let mut total_byte_size = 0;
-        let mut null_counts = Vec::new();
         let mut limit_exhausted = false;
-        for chunk in chunks {
-            let mut filenames: Vec<String> =
-                chunk.iter().map(|x| x.to_string()).collect();
-            let mut total_files = 0;
-            for filename in &filenames {
-                total_files += 1;
-                let file = File::open(filename)?;
-                let file_reader = Arc::new(SerializedFileReader::new(file)?);
-                let mut arrow_reader = ParquetFileArrowReader::new(file_reader);
-                let meta_data = arrow_reader.get_metadata();
-                // collect all the unique schemas in this data set
-                let schema = arrow_reader.get_schema()?;
-                let num_fields = schema.fields().len();
-                if schemas.is_empty() || schema != schemas[0] {
-                    schemas.push(schema);
-                    null_counts = vec![0; num_fields]
-                }
-                for row_group_meta in meta_data.row_groups() {
-                    num_rows += row_group_meta.num_rows();
-                    total_byte_size += row_group_meta.total_byte_size();
-
-                    // Currently assumes every Parquet file has same schema
-                    // https://issues.apache.org/jira/browse/ARROW-11017
-                    let columns_null_counts = row_group_meta
-                        .columns()
-                        .iter()
-                        .flat_map(|c| c.statistics().map(|stats| stats.null_count()));
-
-                    for (i, cnt) in columns_null_counts.enumerate() {
-                        null_counts[i] += cnt
-                    }
-                    if limit.map(|x| num_rows >= x as i64).unwrap_or(false) {
-                        limit_exhausted = true;
-                        break;
-                    }
-                }
-            }
+        for filename in filenames {
+            let mut file = File::open(filename)?;
+            let file_metadata = read::read_metadata(&mut file)?;
+            let schema = read::get_schema(&file_metadata)?;
+            let schema = Arc::new(schema);
 
-            let column_stats = null_counts
+            let row_count: i64 = (&file_metadata.row_groups)
                 .iter()
-                .map(|null_count| ColumnStatistics {
-                    null_count: Some(*null_count as usize),
-                    max_value: None,
-                    min_value: None,
-                    distinct_count: None,
-                })
-                .collect();
+                .map(|group| group.num_rows())
+                .sum();
+            let row_count = row_count as usize;
+            num_rows += row_count;
+
+            if schemas.is_empty() || schema != schemas[0] {
+                schemas.push(schema);
+            }
 
             let statistics = Statistics {
-                num_rows: Some(num_rows as usize),
-                total_byte_size: Some(total_byte_size as usize),
-                column_statistics: Some(column_stats),
+                num_rows: Some(row_count),
+                total_byte_size: None,
+                column_statistics: None,
             };
             // remove files that are not needed in case of limit
-            filenames.truncate(total_files);
             partitions.push(ParquetPartition {
-                filenames,
+                filename: filename.to_string(),
                 statistics,
             });
-            if limit_exhausted {
+            if num_rows > limit {
                 break;
             }
         }
@@ -228,28 +178,16 @@ impl ParquetExec {
             )));
         }
         let schema = schemas[0].clone();
-        let predicate_builder = predicate.and_then(|predicate_expr| {
-            RowGroupPredicateBuilder::try_new(&predicate_expr, schema.clone()).ok()
-        });
 
-        Ok(Self::new(
-            partitions,
-            schema,
-            projection,
-            predicate_builder,
-            batch_size,
-            limit,
-        ))
+        Ok(Self::new(partitions, schema, projection, limit))
     }
 
     /// Create a new Parquet reader execution plan with provided partitions and schema
     pub fn new(
         partitions: Vec<ParquetPartition>,
-        schema: Schema,
+        schema: Arc<Schema>,
         projection: Option<Vec<usize>>,
-        predicate_builder: Option<RowGroupPredicateBuilder>,
-        batch_size: usize,
-        limit: Option<usize>,
+        limit: usize,
     ) -> Self {
         let projection = match projection {
             Some(p) => p,
@@ -310,8 +248,6 @@ impl ParquetExec {
             partitions,
             schema: Arc::new(projected_schema),
             projection,
-            predicate_builder,
-            batch_size,
             statistics,
             limit,
         }
@@ -327,11 +263,6 @@ impl ParquetExec {
         &self.projection
     }
 
-    /// Batch size
-    pub fn batch_size(&self) -> usize {
-        self.batch_size
-    }
-
     /// Statistics for the data set (sum of statistics for all partitions)
     pub fn statistics(&self) -> &Statistics {
         &self.statistics
@@ -340,16 +271,16 @@ impl ParquetExec {
 
 impl ParquetPartition {
     /// Create a new parquet partition
-    pub fn new(filenames: Vec<String>, statistics: Statistics) -> Self {
+    pub fn new(filename: String, statistics: Statistics) -> Self {
         Self {
-            filenames,
+            filename,
             statistics,
         }
     }
 
     /// The Parquet filename for this partition
-    pub fn filenames(&self) -> &[String] {
-        &self.filenames
+    pub fn filename(&self) -> &String {
+        &self.filename
     }
 
     /// Statistics for this partition
@@ -358,20 +289,22 @@ impl ParquetPartition {
     }
 }
 
+/*
 #[derive(Debug, Clone)]
 /// Predicate builder used for generating of predicate functions, used to filter row group metadata
 pub struct RowGroupPredicateBuilder {
-    parquet_schema: Schema,
+    parquet_schema: Arc<Schema>,
     predicate_expr: Arc<dyn PhysicalExpr>,
     stat_column_req: Vec<(String, StatisticsType, Field)>,
 }
 
+
 impl RowGroupPredicateBuilder {
     /// Try to create a new instance of PredicateExpressionBuilder.
     /// This will translate the filter expression into a statistics predicate expression
     /// (for example (column / 2) = 4 becomes (column_min / 2) <= 4 && 4 <= (column_max / 2)),
     /// then convert it to a DataFusion PhysicalExpression and cache it for later use by build_row_group_predicate.
-    pub fn try_new(expr: &Expr, parquet_schema: Schema) -> Result<Self> {
+    pub fn try_new(expr: &Expr, parquet_schema: Arc<Schema>) -> Result<Self> {
         // build predicate expression once
         let mut stat_column_req = Vec::<(String, StatisticsType, Field)>::new();
         let logical_predicate_expr =
@@ -454,36 +387,7 @@ impl RowGroupPredicateBuilder {
         }
     }
 }
-
-/// Build a RecordBatch from a list of RowGroupMetadata structs,
-/// creating arrays, one for each statistics column,
-/// as requested in the stat_column_req parameter.
-fn build_statistics_record_batch(
-    row_groups: &[RowGroupMetaData],
-    parquet_schema: &Schema,
-    stat_column_req: &[(String, StatisticsType, Field)],
-) -> Result<RecordBatch> {
-    let mut fields = Vec::<Field>::new();
-    let mut arrays = Vec::<ArrayRef>::new();
-    for (column_name, statistics_type, stat_field) in stat_column_req {
-        if let Some((column_index, _)) = parquet_schema.column_with_name(column_name) {
-            let statistics = row_groups
-                .iter()
-                .map(|g| g.column(column_index).statistics())
-                .collect::<Vec<_>>();
-            let array = build_statistics_array(
-                &statistics,
-                *statistics_type,
-                stat_field.data_type(),
-            );
-            fields.push(stat_field.clone());
-            arrays.push(array);
-        }
-    }
-    let schema = Arc::new(Schema::new(fields));
-    RecordBatch::try_new(schema, arrays)
-        .map_err(|err| DataFusionError::Plan(err.to_string()))
-}
+*/
 
 struct StatisticsExpressionBuilder<'a> {
     column_name: String,
@@ -712,89 +616,25 @@ enum StatisticsType {
     Max,
 }
 
-fn build_statistics_array(
-    statistics: &[Option<&ParquetStatistics>],
-    statistics_type: StatisticsType,
-    data_type: &DataType,
-) -> ArrayRef {
-    let statistics_count = statistics.len();
-    let first_group_stats = statistics.iter().find(|s| s.is_some());
-    let first_group_stats = if let Some(Some(statistics)) = first_group_stats {
-        // found first row group with statistics defined
-        statistics
-    } else {
-        // no row group has statistics defined
-        return new_null_array(data_type, statistics_count);
-    };
-
-    let (data_size, arrow_type) = match first_group_stats {
-        ParquetStatistics::Int32(_) => (std::mem::size_of::<i32>(), DataType::Int32),
-        ParquetStatistics::Int64(_) => (std::mem::size_of::<i64>(), DataType::Int64),
-        ParquetStatistics::Float(_) => (std::mem::size_of::<f32>(), DataType::Float32),
-        ParquetStatistics::Double(_) => (std::mem::size_of::<f64>(), DataType::Float64),
-        ParquetStatistics::ByteArray(_) if data_type == &DataType::Utf8 => {
-            (0, DataType::Utf8)
-        }
-        _ => {
-            // type of statistics not supported
-            return new_null_array(data_type, statistics_count);
-        }
-    };
-
-    let statistics = statistics.iter().map(|s| {
-        s.filter(|s| s.has_min_max_set())
-            .map(|s| match statistics_type {
-                StatisticsType::Min => s.min_bytes(),
-                StatisticsType::Max => s.max_bytes(),
+type Payload = Vec<Vec<CompressedPage>>;
+
+// Task of the producer of compressed pages. This performs minimal CPU work
+fn producer_task(path: &str, response_tx: Sender<Payload>) -> Result<()> {
+    let mut file = File::open(path)?;
+    let metadata = read::read_metadata(&mut file)?;
+    for row_group in 0..metadata.row_groups.len() {
+        let columns = (0..metadata.schema().num_columns())
+            .map(|column| {
+                Ok(
+                    read::get_page_iterator(&metadata, row_group, column, &mut file)?
+                        .map(|x| x.map_err(|x| ArrowError::from_external_error(x)))
+                        .collect::<ArrowResult<Vec<_>>>()?,
+                )
             })
-    });
-
-    if arrow_type == DataType::Utf8 {
-        let data_size = statistics
-            .clone()
-            .map(|x| x.map(|b| b.len()).unwrap_or(0))
-            .sum();
-        let mut builder =
-            arrow::array::StringBuilder::with_capacity(statistics_count, data_size);
-        let string_statistics =
-            statistics.map(|x| x.and_then(|bytes| std::str::from_utf8(bytes).ok()));
-        for maybe_string in string_statistics {
-            match maybe_string {
-                Some(string_value) => builder.append_value(string_value).unwrap(),
-                None => builder.append_null().unwrap(),
-            };
-        }
-        return Arc::new(builder.finish());
-    }
-
-    let mut data_buffer = MutableBuffer::new(statistics_count * data_size);
-    let mut bitmap_builder = BooleanBufferBuilder::new(statistics_count);
-    let mut null_count = 0;
-    for s in statistics {
-        if let Some(stat_data) = s {
-            bitmap_builder.append(true);
-            data_buffer.extend_from_slice(stat_data);
-        } else {
-            bitmap_builder.append(false);
-            data_buffer.resize(data_buffer.len() + data_size, 0);
-            null_count += 1;
-        }
+            .collect::<Result<Vec<_>>>()?;
+        response_tx.blocking_send(columns);
     }
-
-    let mut builder = ArrayData::builder(arrow_type)
-        .len(statistics_count)
-        .add_buffer(data_buffer.into());
-    if null_count > 0 {
-        builder = builder.null_bit_buffer(bitmap_builder.finish());
-    }
-    let array_data = builder.build();
-    let statistics_array = make_array(array_data);
-    if statistics_array.data_type() == data_type {
-        return statistics_array;
-    }
-    // cast statistics array to required data type
-    arrow::compute::cast(&statistics_array, data_type)
-        .unwrap_or_else(|_| new_null_array(data_type, statistics_count))
+    Ok(())
 }
 
 #[async_trait]
@@ -835,32 +675,20 @@ impl ExecutionPlan for ParquetExec {
     async fn execute(&self, partition: usize) -> Result<SendableRecordBatchStream> {
         // because the parquet implementation is not thread-safe, it is necessary to execute
         // on a thread and communicate with channels
-        let (response_tx, response_rx): (
-            Sender<ArrowResult<RecordBatch>>,
-            Receiver<ArrowResult<RecordBatch>>,
-        ) = channel(2);
+        let (response_tx, response_rx): (Sender<Payload>, Receiver<Payload>) = channel(2);
 
-        let filenames = self.partitions[partition].filenames.clone();
+        let path = self.partitions[partition].filename.clone();
         let projection = self.projection.clone();
-        let predicate_builder = self.predicate_builder.clone();
-        let batch_size = self.batch_size;
         let limit = self.limit;
 
-        task::spawn_blocking(move || {
-            if let Err(e) = read_files(
-                &filenames,
-                &projection,
-                &predicate_builder,
-                batch_size,
-                response_tx,
-                limit,
-            ) {
-                println!("Parquet reader thread terminated due to error: {:?}", e);
-            }
-        });
+        let mut file = File::open(path.clone())?;
+        let metadata = read::read_metadata(&mut file)?;
+
+        task::spawn_blocking(move || producer_task(&path, response_tx).unwrap());
 
         Ok(Box::pin(ParquetStream {
             schema: self.schema.clone(),
+            metadata,
             inner: ReceiverStream::new(response_rx),
         }))
     }
@@ -875,15 +703,12 @@ impl ExecutionPlan for ParquetExec {
                 let files: Vec<_> = self
                     .partitions
                     .iter()
-                    .map(|pp| pp.filenames.iter())
-                    .flatten()
-                    .map(|s| s.as_str())
+                    .map(|pp| pp.filename.as_str())
                     .collect();
 
                 write!(
                     f,
-                    "ParquetExec: batch_size={}, limit={:?}, partitions=[{}]",
-                    self.batch_size,
+                    "ParquetExec: limit={:?}, partitions=[{}]",
                     self.limit,
                     files.join(", ")
                 )
@@ -892,84 +717,33 @@ impl ExecutionPlan for ParquetExec {
     }
 }
 
-fn send_result(
-    response_tx: &Sender<ArrowResult<RecordBatch>>,
-    result: ArrowResult<RecordBatch>,
-) -> Result<()> {
-    // Note this function is running on its own blockng tokio thread so blocking here is ok.
-    response_tx
-        .blocking_send(result)
-        .map_err(|e| DataFusionError::Execution(e.to_string()))?;
-    Ok(())
-}
-
-fn read_files(
-    filenames: &[String],
-    projection: &[usize],
-    predicate_builder: &Option<RowGroupPredicateBuilder>,
-    batch_size: usize,
-    response_tx: Sender<ArrowResult<RecordBatch>>,
-    limit: Option<usize>,
-) -> Result<()> {
-    let mut total_rows = 0;
-    'outer: for filename in filenames {
-        let file = File::open(&filename)?;
-        let mut file_reader = SerializedFileReader::new(file)?;
-        if let Some(predicate_builder) = predicate_builder {
-            let row_group_predicate = predicate_builder
-                .build_row_group_predicate(file_reader.metadata().row_groups());
-            file_reader.filter_row_groups(&row_group_predicate);
-        }
-        let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
-        let mut batch_reader = arrow_reader
-            .get_record_reader_by_columns(projection.to_owned(), batch_size)?;
-        loop {
-            match batch_reader.next() {
-                Some(Ok(batch)) => {
-                    //println!("ParquetExec got new batch from {}", filename);
-                    total_rows += batch.num_rows();
-                    send_result(&response_tx, Ok(batch))?;
-                    if limit.map(|l| total_rows >= l).unwrap_or(false) {
-                        break 'outer;
-                    }
-                }
-                None => {
-                    break;
-                }
-                Some(Err(e)) => {
-                    let err_msg = format!(
-                        "Error reading batch from {}: {}",
-                        filename,
-                        e.to_string()
-                    );
-                    // send error to operator
-                    send_result(
-                        &response_tx,
-                        Err(ArrowError::ParquetError(err_msg.clone())),
-                    )?;
-                    // terminate thread with error
-                    return Err(DataFusionError::Execution(err_msg));
-                }
-            }
-        }
-    }
-
-    // finished reading files (dropping response_tx will close
-    // channel)
-    Ok(())
-}
-
-fn split_files(filenames: &[String], n: usize) -> Vec<&[String]> {
-    let mut chunk_size = filenames.len() / n;
-    if filenames.len() % n > 0 {
-        chunk_size += 1;
-    }
-    filenames.chunks(chunk_size).collect()
+struct ParquetStream {
+    schema: SchemaRef,
+    metadata: FileMetaData,
+    inner: ReceiverStream<Payload>,
 }
 
-struct ParquetStream {
+fn deserialize(
+    columns: Vec<Vec<CompressedPage>>,
+    metadata: &FileMetaData,
     schema: SchemaRef,
-    inner: ReceiverStream<ArrowResult<RecordBatch>>,
+) -> ArrowResult<RecordBatch> {
+    let data_types = schema.fields().iter().map(|field| field.data_type());
+    let descriptors = metadata.row_groups[0]
+        .columns()
+        .iter()
+        .map(|x| x.descriptor());
+    let columns = columns
+        .into_iter()
+        .zip(descriptors)
+        .zip(data_types)
+        .map(|((pages, descriptor), type_)| {
+            let array =
+                read::page_iter_to_array(pages.into_iter().map(|x| Ok(x)), descriptor)?;
+            arrow2::compute::cast::cast(array.as_ref(), type_).map(|x| x.into())
+        })
+        .collect::<ArrowResult<Vec<_>>>()?;
+    RecordBatch::try_new(schema, columns)
 }
 
 impl Stream for ParquetStream {
@@ -979,7 +753,9 @@ impl Stream for ParquetStream {
         mut self: std::pin::Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
-        self.inner.poll_next_unpin(cx)
+        self.inner
+            .poll_next_unpin(cx)
+            .map(|x| x.map(|x| deserialize(x, &self.metadata, self.schema.clone())))
     }
 }
 
@@ -989,10 +765,11 @@ impl RecordBatchStream for ParquetStream {
     }
 }
 
+/*
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow::array::{Int32Array, StringArray};
+    use arrow2::array::{Int32Array, StringArray};
     use futures::StreamExt;
     use parquet::basic::Type as PhysicalType;
     use parquet::schema::types::SchemaDescPtr;
@@ -1035,7 +812,7 @@ mod tests {
 
     #[tokio::test]
     async fn test() -> Result<()> {
-        let testdata = arrow::util::test_util::parquet_test_data();
+        let testdata = crate::test::parquet_test_data();
         let filename = format!("{}/alltypes_plain.parquet", testdata);
         let parquet_exec = ParquetExec::try_from_path(
             &filename,
@@ -1545,3 +1322,4 @@ mod tests {
         Arc::new(SchemaDescriptor::new(Arc::new(schema)))
     }
 }
+*/
diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs
index 9e7dc7172b820..f0440da12f494 100644
--- a/datafusion/src/physical_plan/planner.rs
+++ b/datafusion/src/physical_plan/planner.rs
@@ -48,13 +48,15 @@ use crate::{
     error::{DataFusionError, Result},
     physical_plan::displayable,
 };
-use arrow::{compute::can_cast_types, datatypes::DataType};
 
-use arrow::compute::SortOptions;
-use arrow::datatypes::{Schema, SchemaRef};
+use arrow2::compute::cast::can_cast_types;
+use arrow2::compute::sort::SortOptions;
+use arrow2::datatypes::*;
 use expressions::col;
 use log::debug;
 
+type SchemaRef = Arc<Schema>;
+
 /// This trait exposes the ability to plan an [`ExecutionPlan`] out of a [`LogicalPlan`].
 pub trait ExtensionPlanner {
     /// Create a physical plan for a [`UserDefinedLogicalNode`].
@@ -786,7 +788,7 @@ mod tests {
         logical_plan::{col, lit, sum, LogicalPlanBuilder},
         physical_plan::SendableRecordBatchStream,
     };
-    use arrow::datatypes::{DataType, Field, SchemaRef};
+    use arrow2::datatypes::{DataType, Field};
     use async_trait::async_trait;
     use fmt::Debug;
     use std::{any::Any, fmt};
@@ -804,7 +806,7 @@ mod tests {
 
     #[test]
     fn test_all_operators() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let path = format!("{}/csv/aggregate_test_100.csv", testdata);
 
         let options = CsvReadOptions::new().schema_infer_max_records(100);
@@ -844,7 +846,7 @@ mod tests {
 
     #[test]
     fn test_with_csv_plan() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let path = format!("{}/csv/aggregate_test_100.csv", testdata);
 
         let options = CsvReadOptions::new().schema_infer_max_records(100);
@@ -863,7 +865,7 @@ mod tests {
 
     #[test]
     fn errors() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let path = format!("{}/csv/aggregate_test_100.csv", testdata);
         let options = CsvReadOptions::new().schema_infer_max_records(100);
 
@@ -965,7 +967,7 @@ mod tests {
 
     #[test]
     fn in_list_types() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let path = format!("{}/csv/aggregate_test_100.csv", testdata);
         let options = CsvReadOptions::new().schema_infer_max_records(100);
 
@@ -1013,7 +1015,7 @@ mod tests {
 
     #[test]
     fn hash_agg_input_schema() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let path = format!("{}/csv/aggregate_test_100.csv", testdata);
 
         let options = CsvReadOptions::new().schema_infer_max_records(100);
@@ -1036,7 +1038,7 @@ mod tests {
 
     #[test]
     fn hash_agg_group_by_partitioned() -> Result<()> {
-        let testdata = arrow::util::test_util::arrow_test_data();
+        let testdata = crate::test::arrow_test_data();
         let path = format!("{}/csv/aggregate_test_100.csv", testdata);
 
         let options = CsvReadOptions::new().schema_infer_max_records(100);
diff --git a/datafusion/src/physical_plan/projection.rs b/datafusion/src/physical_plan/projection.rs
index c0d78ff7168bf..e97bb7ca0e419 100644
--- a/datafusion/src/physical_plan/projection.rs
+++ b/datafusion/src/physical_plan/projection.rs
@@ -29,9 +29,12 @@ use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{
     DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
 };
-use arrow::datatypes::{Field, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+
+use arrow2::datatypes::{Field, Schema};
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
+
+type SchemaRef = Arc<Schema>;
 
 use super::{RecordBatchStream, SendableRecordBatchStream};
 use async_trait::async_trait;
diff --git a/datafusion/src/physical_plan/regex_expressions.rs b/datafusion/src/physical_plan/regex_expressions.rs
index b526e7259ef61..2bd9f7e15dd3e 100644
--- a/datafusion/src/physical_plan/regex_expressions.rs
+++ b/datafusion/src/physical_plan/regex_expressions.rs
@@ -25,32 +25,34 @@ use std::any::type_name;
 use std::sync::Arc;
 
 use crate::error::{DataFusionError, Result};
-use arrow::array::{ArrayRef, GenericStringArray, StringOffsetSizeTrait};
-use arrow::compute;
+use arrow2::array::{Array, Offset, Utf8Array};
+use arrow2::compute;
 use hashbrown::HashMap;
 use regex::Regex;
 
+type ArrayRef = Arc<dyn Array>;
+
 macro_rules! downcast_string_arg {
     ($ARG:expr, $NAME:expr, $T:ident) => {{
         $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
+            .downcast_ref::<Utf8Array<T>>()
             .ok_or_else(|| {
                 DataFusionError::Internal(format!(
                     "could not cast {} to {}",
                     $NAME,
-                    type_name::<GenericStringArray<T>>()
+                    type_name::<Utf8Array<T>>()
                 ))
             })?
     }};
 }
 
 /// extract a specific group from a string column, using a regular expression
-pub fn regexp_match<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn regexp_match<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
-        2 => compute::regexp_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T), None)
-        .map_err(DataFusionError::ArrowError),
-        3 => compute::regexp_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T),  Some(downcast_string_arg!(args[1], "flags", T)))
-        .map_err(DataFusionError::ArrowError),
+        2 => compute::regex_match::regex_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T))
+        .map_err(DataFusionError::ArrowError).map(|x| Arc::new(x) as Arc<dyn Array>),
+        3 => compute::regex_match::regex_match(downcast_string_arg!(args[0], "string", T), downcast_string_arg!(args[1], "pattern", T))
+        .map_err(DataFusionError::ArrowError).map(|x| Arc::new(x) as Arc<dyn Array>),
         other => Err(DataFusionError::Internal(format!(
             "regexp_match was called with {} arguments. It requires at least 2 and at most 3.",
             other
@@ -72,7 +74,7 @@ fn regex_replace_posix_groups(replacement: &str) -> String {
 /// Replaces substring(s) matching a POSIX regular expression.
 ///
 /// example: `regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'`
-pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn regexp_replace<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     // creating Regex is expensive so create hashmap for memoization
     let mut patterns: HashMap<String, Regex> = HashMap::new();
 
@@ -108,7 +110,7 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<Arr
                 }
             _ => Ok(None)
             })
-            .collect::<Result<GenericStringArray<T>>>()?;
+            .collect::<Result<Utf8Array<T>>>()?;
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -160,7 +162,7 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<Arr
                 }
             _ => Ok(None)
             })
-            .collect::<Result<GenericStringArray<T>>>()?;
+            .collect::<Result<Utf8Array<T>>>()?;
 
             Ok(Arc::new(result) as ArrayRef)
         }
diff --git a/datafusion/src/physical_plan/repartition.rs b/datafusion/src/physical_plan/repartition.rs
index 2599690bfc003..cf81807c2e6ac 100644
--- a/datafusion/src/physical_plan/repartition.rs
+++ b/datafusion/src/physical_plan/repartition.rs
@@ -25,9 +25,12 @@ use std::{any::Any, collections::HashMap, vec};
 
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::{DisplayFormatType, ExecutionPlan, Partitioning};
-use arrow::record_batch::RecordBatch;
-use arrow::{array::Array, error::Result as ArrowResult};
-use arrow::{compute::take, datatypes::SchemaRef};
+
+use arrow2::{
+    array::*, buffer::MutableBuffer, compute::take, datatypes::*,
+    error::Result as ArrowResult, record_batch::RecordBatch,
+};
+
 use tokio_stream::wrappers::UnboundedReceiverStream;
 
 use super::{hash_join::create_hashes, RecordBatchStream, SendableRecordBatchStream};
@@ -41,6 +44,7 @@ use tokio::sync::{
 };
 use tokio::task::JoinHandle;
 
+type SchemaRef = Arc<Schema>;
 type MaybeBatch = Option<ArrowResult<RecordBatch>>;
 
 /// The repartition operator maps N input partitions to M output partitions based on a
@@ -170,32 +174,38 @@ impl ExecutionPlan for RepartitionExec {
                                 // Hash arrays and compute buckets based on number of partitions
                                 let hashes =
                                     create_hashes(&arrays, &random_state, hashes_buf)?;
-                                let mut indices = vec![vec![]; num_output_partitions];
+                                let mut indices = (0..num_output_partitions)
+                                    .map(|_| MutableBuffer::<i64>::new())
+                                    .collect::<Vec<_>>();
                                 for (index, hash) in hashes.iter().enumerate() {
-                                    indices
-                                        [(*hash % num_output_partitions as u64) as usize]
-                                        .push(index as u64)
+                                    let i =
+                                        (*hash % num_output_partitions as u64) as usize;
+                                    indices[i].push(index as i64)
                                 }
                                 for (num_output_partition, partition_indices) in
                                     indices.into_iter().enumerate()
                                 {
-                                    let indices = partition_indices.into();
+                                    let indices = Int64Array::from_data(
+                                        DataType::Int64,
+                                        partition_indices.into(),
+                                        None,
+                                    );
                                     // Produce batches based on indices
                                     let columns = input_batch
                                         .columns()
                                         .iter()
                                         .map(|c| {
-                                            take(c.as_ref(), &indices, None).map_err(
-                                                |e| {
+                                            take::take(c.as_ref(), &indices)
+                                                .map_err(|e| {
                                                     DataFusionError::Execution(
                                                         e.to_string(),
                                                     )
-                                                },
-                                            )
+                                                })
+                                                .map(|x| x.into())
                                         })
                                         .collect::<Result<Vec<Arc<dyn Array>>>>()?;
                                     let output_batch = RecordBatch::try_new(
-                                        input_batch.schema(),
+                                        input_batch.schema().clone(),
                                         columns,
                                     );
                                     let tx = txs.get_mut(&num_output_partition).unwrap();
@@ -310,9 +320,9 @@ impl RecordBatchStream for RepartitionStream {
 mod tests {
     use super::*;
     use crate::physical_plan::memory::MemoryExec;
-    use arrow::array::UInt32Array;
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
+    use arrow2::array::UInt32Array;
+    use arrow2::datatypes::{DataType, Field, Schema};
+    use arrow2::record_batch::RecordBatch;
 
     #[tokio::test]
     async fn one_to_many_round_robin() -> Result<()> {
@@ -415,7 +425,7 @@ mod tests {
     fn create_batch(schema: &Arc<Schema>) -> RecordBatch {
         RecordBatch::try_new(
             schema.clone(),
-            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
+            vec![Arc::new(UInt32Array::from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]))],
         )
         .unwrap()
     }
diff --git a/datafusion/src/physical_plan/sort.rs b/datafusion/src/physical_plan/sort.rs
index 8229060190215..602d09db0a79f 100644
--- a/datafusion/src/physical_plan/sort.rs
+++ b/datafusion/src/physical_plan/sort.rs
@@ -30,12 +30,12 @@ use hashbrown::HashMap;
 
 use pin_project_lite::pin_project;
 
-pub use arrow::compute::SortOptions;
-use arrow::compute::{concat, lexsort_to_indices, take, SortColumn, TakeOptions};
-use arrow::datatypes::SchemaRef;
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
-use arrow::{array::ArrayRef, error::ArrowError};
+pub use arrow2::compute::sort::SortOptions;
+use arrow2::compute::{concat, sort::lexsort_to_indices, sort::SortColumn, take};
+use arrow2::datatypes::Schema;
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
+use arrow2::{array::Array, error::ArrowError};
 
 use super::{RecordBatchStream, SendableRecordBatchStream};
 use crate::error::{DataFusionError, Result};
@@ -44,6 +44,9 @@ use crate::physical_plan::{
     common, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, SQLMetric,
 };
 
+type SchemaRef = Arc<Schema>;
+type ArrayRef = Arc<dyn Array>;
+
 /// Sort execution plan
 #[derive(Debug)]
 pub struct SortExec {
@@ -182,26 +185,33 @@ fn sort_batches(
             .iter()
             .enumerate()
             .map(|(i, _)| {
-                concat(
+                concat::concatenate(
                     &batches
                         .iter()
                         .map(|batch| batch.column(i).as_ref())
                         .collect::<Vec<_>>(),
                 )
+                .map(|x| x.into())
             })
             .collect::<ArrowResult<Vec<ArrayRef>>>()?,
     )?;
 
+    let columns = expr
+        .iter()
+        .map(|e| e.evaluate_to_sort_column(&combined_batch))
+        .collect::<Result<Vec<_>>>()
+        .map_err(DataFusionError::into_arrow_external_error)?;
+    let columns = columns
+        .iter()
+        .map(|x| SortColumn {
+            values: x.values.as_ref(),
+            options: x.options,
+        })
+        .collect::<Vec<_>>();
+
     // sort combined record batch
     // TODO: pushup the limit expression to sort
-    let indices = lexsort_to_indices(
-        &expr
-            .iter()
-            .map(|e| e.evaluate_to_sort_column(&combined_batch))
-            .collect::<Result<Vec<SortColumn>>>()
-            .map_err(DataFusionError::into_arrow_external_error)?,
-        None,
-    )?;
+    let indices = lexsort_to_indices(&columns)?;
 
     // reorder all rows based on sorted indices
     let sorted_batch = RecordBatch::try_new(
@@ -209,17 +219,7 @@ fn sort_batches(
         combined_batch
             .columns()
             .iter()
-            .map(|column| {
-                take(
-                    column.as_ref(),
-                    &indices,
-                    // disable bound check overhead since indices are already generated from
-                    // the same record batch
-                    Some(TakeOptions {
-                        check_bounds: false,
-                    }),
-                )
-            })
+            .map(|column| take::take(column.as_ref(), &indices).map(|x| x.into()))
             .collect::<ArrowResult<Vec<ArrayRef>>>()?,
     );
     sorted_batch.map(Some)
@@ -289,7 +289,9 @@ impl Stream for SortStream {
 
                 // check for error in receiving channel and unwrap actual result
                 let result = match result {
-                    Err(e) => Some(Err(ArrowError::ExternalError(Box::new(e)))), // error receiving
+                    Err(e) => {
+                        Some(Err(ArrowError::External("".to_string(), Box::new(e))))
+                    } // error receiving
                     Ok(result) => result.transpose(),
                 };
 
@@ -321,8 +323,8 @@ mod tests {
         csv::{CsvExec, CsvReadOptions},
     };
     use crate::test;
-    use arrow::array::*;
-    use arrow::datatypes::*;
+    use arrow2::array::*;
+    use arrow2::datatypes::*;
 
     #[tokio::test]
     async fn test_sort() -> Result<()> {
@@ -363,15 +365,18 @@ mod tests {
 
         let columns = result[0].columns();
 
-        let c1 = as_string_array(&columns[0]);
+        let c1 = columns[0]
+            .as_any()
+            .downcast_ref::<Utf8Array<i32>>()
+            .unwrap();
         assert_eq!(c1.value(0), "a");
         assert_eq!(c1.value(c1.len() - 1), "e");
 
-        let c2 = as_primitive_array::<UInt32Type>(&columns[1]);
+        let c2 = columns[1].as_any().downcast_ref::<UInt32Array>().unwrap();
         assert_eq!(c2.value(0), 1);
         assert_eq!(c2.value(c2.len() - 1), 5,);
 
-        let c7 = as_primitive_array::<UInt8Type>(&columns[6]);
+        let c7 = columns[6].as_any().downcast_ref::<UInt8Array>().unwrap();
         assert_eq!(c7.value(0), 15);
         assert_eq!(c7.value(c7.len() - 1), 254,);
 
@@ -445,8 +450,8 @@ mod tests {
         assert_eq!(DataType::Float32, *columns[0].data_type());
         assert_eq!(DataType::Float64, *columns[1].data_type());
 
-        let a = as_primitive_array::<Float32Type>(&columns[0]);
-        let b = as_primitive_array::<Float64Type>(&columns[1]);
+        let a = columns[0].as_any().downcast_ref::<Float32Array>().unwrap();
+        let b = columns[1].as_any().downcast_ref::<Float64Array>().unwrap();
 
         // convert result to strings to allow comparing to expected result containing NaN
         let result: Vec<(Option<String>, Option<String>)> = (0..result[0].num_rows())
diff --git a/datafusion/src/physical_plan/string_expressions.rs b/datafusion/src/physical_plan/string_expressions.rs
index 882fe30502fdf..7c82b8d269266 100644
--- a/datafusion/src/physical_plan/string_expressions.rs
+++ b/datafusion/src/physical_plan/string_expressions.rs
@@ -28,25 +28,27 @@ use crate::{
     error::{DataFusionError, Result},
     scalar::ScalarValue,
 };
-use arrow::{
+use arrow2::{
     array::{
-        Array, ArrayRef, BooleanArray, GenericStringArray, Int32Array, Int64Array,
-        PrimitiveArray, StringArray, StringOffsetSizeTrait,
+        Array, BooleanArray, Int32Array, Int64Array, Offset, PrimitiveArray, Utf8Array,
     },
-    datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
+    datatypes::DataType,
 };
 
 use super::ColumnarValue;
 
+type StringArray = Utf8Array<i32>;
+type ArrayRef = Arc<dyn Array>;
+
 macro_rules! downcast_string_arg {
     ($ARG:expr, $NAME:expr, $T:ident) => {{
         $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
+            .downcast_ref::<Utf8Array<T>>()
             .ok_or_else(|| {
                 DataFusionError::Internal(format!(
                     "could not cast {} to {}",
                     $NAME,
-                    type_name::<GenericStringArray<T>>()
+                    type_name::<Utf8Array<T>>()
                 ))
             })?
     }};
@@ -90,20 +92,20 @@ macro_rules! downcast_vec {
 }
 
 /// applies a unary expression to `args[0]` that is expected to be downcastable to
-/// a `GenericStringArray` and returns a `GenericStringArray` (which may have a different offset)
+/// a `Utf8Array` and returns a `Utf8Array` (which may have a different offset)
 /// # Errors
 /// This function errors when:
 /// * the number of arguments is not 1
-/// * the first argument is not castable to a `GenericStringArray`
+/// * the first argument is not castable to a `Utf8Array`
 pub(crate) fn unary_string_function<'a, T, O, F, R>(
     args: &[&'a dyn Array],
     op: F,
     name: &str,
-) -> Result<GenericStringArray<O>>
+) -> Result<Utf8Array<O>>
 where
     R: AsRef<str>,
-    O: StringOffsetSizeTrait,
-    T: StringOffsetSizeTrait,
+    O: Offset,
+    T: Offset,
     F: Fn(&'a str) -> R,
 {
     if args.len() != 1 {
@@ -174,7 +176,7 @@ where
 
 /// Returns the numeric code of the first character of the argument.
 /// ascii('x') = 120
-pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn ascii<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
 
     let result = string_array
@@ -192,7 +194,7 @@ pub fn ascii<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Removes the longest string containing only characters in characters (a space by default) from the start and end of string.
 /// btrim('xyxtrimyyx', 'xyz') = 'trim'
-pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn btrim<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         1 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -204,7 +206,7 @@ pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                         string.trim_start_matches(' ').trim_end_matches(' ')
                     })
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -227,7 +229,7 @@ pub fn btrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                         )
                     }
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -246,15 +248,15 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
     // first map is the iterator, second is for the `Option<_>`
     let result = integer_array
         .iter()
-        .map(|integer: Option<i64>| {
+        .map(|integer| {
             integer
                 .map(|integer| {
-                    if integer == 0 {
+                    if *integer == 0 {
                         Err(DataFusionError::Execution(
                             "null character not permitted.".to_string(),
                         ))
                     } else {
-                        match core::char::from_u32(integer as u32) {
+                        match core::char::from_u32(*integer as u32) {
                             Some(integer) => Ok(integer.to_string()),
                             None => Err(DataFusionError::Execution(
                                 "requested character too large for encoding.".to_string(),
@@ -307,7 +309,7 @@ pub fn concat(args: &[ColumnarValue]) -> Result<ColumnarValue> {
                 }
                 Some(owned_string)
             })
-            .collect::<StringArray>();
+            .collect::<Utf8Array<i32>>();
 
         Ok(ColumnarValue::Array(Arc::new(result)))
     } else {
@@ -370,7 +372,7 @@ pub fn concat_ws(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
 /// initcap('hi THOMAS') = 'Hi Thomas'
-pub fn initcap<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn initcap<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
 
     // first map is the iterator, second is for the `Option<_>`
@@ -393,7 +395,7 @@ pub fn initcap<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
                 char_vector.iter().collect::<String>()
             })
         })
-        .collect::<GenericStringArray<T>>();
+        .collect::<Utf8Array<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
@@ -406,7 +408,7 @@ pub fn lower(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
 /// Removes the longest string containing only characters in characters (a space by default) from the start of string.
 /// ltrim('zzzytest', 'xyz') = 'test'
-pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn ltrim<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         1 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -414,7 +416,7 @@ pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             let result = string_array
                 .iter()
                 .map(|string| string.map(|string: &str| string.trim_start_matches(' ')))
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -432,7 +434,7 @@ pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => None,
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -445,7 +447,7 @@ pub fn ltrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Repeats string the specified number of times.
 /// repeat('Pg', 4) = 'PgPgPgPg'
-pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn repeat<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let number_array = downcast_arg!(args[1], "number", Int64Array);
 
@@ -453,17 +455,17 @@ pub fn repeat<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
         .iter()
         .zip(number_array.iter())
         .map(|(string, number)| match (string, number) {
-            (Some(string), Some(number)) => Some(string.repeat(number as usize)),
+            (Some(string), Some(number)) => Some(string.repeat(*number as usize)),
             _ => None,
         })
-        .collect::<GenericStringArray<T>>();
+        .collect::<Utf8Array<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 /// Replaces all occurrences in string of substring from with substring to.
 /// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef'
-pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn replace<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let from_array = downcast_string_arg!(args[1], "from", T);
     let to_array = downcast_string_arg!(args[2], "to", T);
@@ -476,14 +478,14 @@ pub fn replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
             (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)),
             _ => None,
         })
-        .collect::<GenericStringArray<T>>();
+        .collect::<Utf8Array<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 /// Removes the longest string containing only characters in characters (a space by default) from the end of string.
 /// rtrim('testxxzx', 'xyz') = 'test'
-pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn rtrim<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         1 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -491,7 +493,7 @@ pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             let result = string_array
                 .iter()
                 .map(|string| string.map(|string: &str| string.trim_end_matches(' ')))
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -509,7 +511,7 @@ pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => None,
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -522,7 +524,7 @@ pub fn rtrim<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Splits string at occurrences of delimiter and returns the n'th field (counting from one).
 /// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def'
-pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn split_part<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let delimiter_array = downcast_string_arg!(args[1], "delimiter", T);
     let n_array = downcast_arg!(args[2], "n", Int64Array);
@@ -533,13 +535,13 @@ pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRe
         .zip(n_array.iter())
         .map(|((string, delimiter), n)| match (string, delimiter, n) {
             (Some(string), Some(delimiter), Some(n)) => {
-                if n <= 0 {
+                if *n <= 0 {
                     Err(DataFusionError::Execution(
                         "field position must be greater than zero".to_string(),
                     ))
                 } else {
                     let split_string: Vec<&str> = string.split(delimiter).collect();
-                    match split_string.get(n as usize - 1) {
+                    match split_string.get(*n as usize - 1) {
                         Some(s) => Ok(Some(*s)),
                         None => Ok(Some("")),
                     }
@@ -547,14 +549,14 @@ pub fn split_part<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRe
             }
             _ => Ok(None),
         })
-        .collect::<Result<GenericStringArray<T>>>()?;
+        .collect::<Result<Utf8Array<T>>>()?;
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 /// Returns true if string starts with prefix.
 /// starts_with('alphabet', 'alph') = 't'
-pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn starts_with<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let prefix_array = downcast_string_arg!(args[1], "prefix", T);
 
@@ -572,10 +574,7 @@ pub fn starts_with<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayR
 
 /// Converts the number to its equivalent hexadecimal representation.
 /// to_hex(2147483647) = '7fffffff'
-pub fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
+pub fn to_hex<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let integer_array = downcast_primitive_array_arg!(args[0], "integer", T);
 
     let result = integer_array
@@ -583,7 +582,7 @@ where
         .map(|integer| {
             integer.map(|integer| format!("{:x}", integer.to_usize().unwrap()))
         })
-        .collect::<GenericStringArray<i32>>();
+        .collect::<StringArray>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
diff --git a/datafusion/src/physical_plan/type_coercion.rs b/datafusion/src/physical_plan/type_coercion.rs
index 06d3739b53b27..6b64a75e8207f 100644
--- a/datafusion/src/physical_plan/type_coercion.rs
+++ b/datafusion/src/physical_plan/type_coercion.rs
@@ -31,7 +31,7 @@
 
 use std::{sync::Arc, vec};
 
-use arrow::datatypes::{DataType, Schema, TimeUnit};
+use arrow2::datatypes::{DataType, Schema, TimeUnit};
 
 use super::{functions::Signature, PhysicalExpr};
 use crate::error::{DataFusionError, Result};
@@ -212,7 +212,7 @@ pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
 mod tests {
     use super::*;
     use crate::physical_plan::expressions::col;
-    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow2::datatypes::{DataType, Field, Schema};
 
     #[test]
     fn test_maybe_data_types() {
diff --git a/datafusion/src/physical_plan/udaf.rs b/datafusion/src/physical_plan/udaf.rs
index f7515d326d0a5..c50e1991c8153 100644
--- a/datafusion/src/physical_plan/udaf.rs
+++ b/datafusion/src/physical_plan/udaf.rs
@@ -21,7 +21,7 @@ use fmt::{Debug, Formatter};
 use std::any::Any;
 use std::fmt;
 
-use arrow::{
+use arrow2::{
     datatypes::Field,
     datatypes::{DataType, Schema},
 };
diff --git a/datafusion/src/physical_plan/udf.rs b/datafusion/src/physical_plan/udf.rs
index a79c0a8a36059..78f9f018cb9ca 100644
--- a/datafusion/src/physical_plan/udf.rs
+++ b/datafusion/src/physical_plan/udf.rs
@@ -20,7 +20,7 @@
 use fmt::{Debug, Formatter};
 use std::fmt;
 
-use arrow::datatypes::Schema;
+use arrow2::datatypes::Schema;
 
 use crate::error::Result;
 use crate::{logical_plan::Expr, physical_plan::PhysicalExpr};
diff --git a/datafusion/src/physical_plan/unicode_expressions.rs b/datafusion/src/physical_plan/unicode_expressions.rs
index 3852fd7c931fa..00ac6ed93abb2 100644
--- a/datafusion/src/physical_plan/unicode_expressions.rs
+++ b/datafusion/src/physical_plan/unicode_expressions.rs
@@ -25,25 +25,23 @@ use std::any::type_name;
 use std::cmp::Ordering;
 use std::sync::Arc;
 
-use crate::error::{DataFusionError, Result};
-use arrow::{
-    array::{
-        ArrayRef, GenericStringArray, Int64Array, PrimitiveArray, StringOffsetSizeTrait,
-    },
-    datatypes::{ArrowNativeType, ArrowPrimitiveType},
-};
+use arrow2::array::*;
 use hashbrown::HashMap;
 use unicode_segmentation::UnicodeSegmentation;
 
+use crate::error::{DataFusionError, Result};
+
+type ArrayRef = Arc<dyn Array>;
+
 macro_rules! downcast_string_arg {
     ($ARG:expr, $NAME:expr, $T:ident) => {{
         $ARG.as_any()
-            .downcast_ref::<GenericStringArray<T>>()
+            .downcast_ref::<Utf8Array<T>>()
             .ok_or_else(|| {
                 DataFusionError::Internal(format!(
                     "could not cast {} to {}",
                     $NAME,
-                    type_name::<GenericStringArray<T>>()
+                    type_name::<Utf8Array<T>>()
                 ))
             })?
     }};
@@ -63,41 +61,38 @@ macro_rules! downcast_arg {
 
 /// Returns number of characters in the string.
 /// character_length('josé') = 4
-pub fn character_length<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let string_array: &GenericStringArray<T::Native> = args[0]
-        .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
-        .ok_or_else(|| {
-            DataFusionError::Internal("could not cast string to StringArray".to_string())
-        })?;
-
-    let result = string_array
-        .iter()
-        .map(|string| {
-            string.map(|string: &str| {
-                T::Native::from_usize(string.graphemes(true).count()).expect(
-                    "should not fail as graphemes.count will always return integer",
+pub fn character_length<O: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array =
+        args[0]
+            .as_any()
+            .downcast_ref::<Utf8Array<O>>()
+            .ok_or_else(|| {
+                DataFusionError::Internal(
+                    "could not cast string to StringArray".to_string(),
                 )
-            })
+            })?;
+
+    let iter = string_array.iter().map(|string| {
+        string.map(|string: &str| {
+            O::from_usize(string.graphemes(true).count())
+                .expect("should not fail as graphemes.count will always return integer")
         })
-        .collect::<PrimitiveArray<T>>();
+    });
+    let result = Primitive::<O>::from_trusted_len_iter(iter).to(O::DATA_TYPE);
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
 /// left('abcde', 2) = 'ab'
-pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn left<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let n_array = downcast_arg!(args[1], "n", Int64Array);
     let result = string_array
         .iter()
         .zip(n_array.iter())
         .map(|(string, n)| match (string, n) {
-            (Some(string), Some(n)) => match n.cmp(&0) {
+            (Some(string), Some(&n)) => match n.cmp(&0) {
                 Ordering::Less => {
                     let graphemes = string.graphemes(true);
                     let len = graphemes.clone().count() as i64;
@@ -116,14 +111,14 @@ pub fn left<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             },
             _ => None,
         })
-        .collect::<GenericStringArray<T>>();
+        .collect::<Utf8Array<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 /// Extends the string to length 'length' by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right).
 /// lpad('hi', 5, 'xy') = 'xyxhi'
-pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn lpad<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         2 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -134,7 +129,7 @@ pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .zip(length_array.iter())
                 .map(|(string, length)| match (string, length) {
                     (Some(string), Some(length)) => {
-                        let length = length as usize;
+                        let length = *length as usize;
                         if length == 0 {
                             Some("".to_string())
                         } else {
@@ -153,7 +148,7 @@ pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => None,
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -167,7 +162,7 @@ pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .zip(length_array.iter())
                 .zip(fill_array.iter())
                 .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
+                    (Some(string), Some(&length), Some(fill)) => {
                         let length = length as usize;
 
                         if length == 0 {
@@ -199,7 +194,7 @@ pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => None,
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -212,7 +207,7 @@ pub fn lpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Reverses the order of the characters in the string.
 /// reverse('abcde') = 'edcba'
-pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn reverse<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
 
     let result = string_array
@@ -220,14 +215,14 @@ pub fn reverse<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
         .map(|string| {
             string.map(|string: &str| string.graphemes(true).rev().collect::<String>())
         })
-        .collect::<GenericStringArray<T>>();
+        .collect::<Utf8Array<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 /// Returns last n characters in the string, or when n is negative, returns all but first |n| characters.
 /// right('abcde', 2) = 'de'
-pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn right<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let n_array = downcast_arg!(args[1], "n", Int64Array);
 
@@ -258,7 +253,7 @@ pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     string
                         .graphemes(true)
                         .rev()
-                        .take(n as usize)
+                        .take(*n as usize)
                         .collect::<Vec<&str>>()
                         .iter()
                         .rev()
@@ -268,14 +263,14 @@ pub fn right<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             },
             _ => None,
         })
-        .collect::<GenericStringArray<T>>();
+        .collect::<Utf8Array<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
 
 /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated.
 /// rpad('hi', 5, 'xy') = 'hixyx'
-pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn rpad<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         2 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -285,7 +280,7 @@ pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .iter()
                 .zip(length_array.iter())
                 .map(|(string, length)| match (string, length) {
-                    (Some(string), Some(length)) => {
+                    (Some(string), Some(&length)) => {
                         let length = length as usize;
                         if length == 0 {
                             Some("".to_string())
@@ -302,7 +297,7 @@ pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => None,
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -316,7 +311,7 @@ pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .zip(length_array.iter())
                 .zip(fill_array.iter())
                 .map(|((string, length), fill)| match (string, length, fill) {
-                    (Some(string), Some(length), Some(fill)) => {
+                    (Some(string), Some(&length), Some(fill)) => {
                         let length = length as usize;
                         let graphemes = string.graphemes(true).collect::<Vec<&str>>();
                         let fill_chars = fill.chars().collect::<Vec<char>>();
@@ -339,7 +334,7 @@ pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => None,
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -352,20 +347,17 @@ pub fn rpad<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)
 /// strpos('high', 'ig') = 2
-pub fn strpos<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
-where
-    T::Native: StringOffsetSizeTrait,
-{
-    let string_array: &GenericStringArray<T::Native> = args[0]
+pub fn strpos<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let string_array: &Utf8Array<T> = args[0]
         .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
+        .downcast_ref::<Utf8Array<T>>()
         .ok_or_else(|| {
             DataFusionError::Internal("could not cast string to StringArray".to_string())
         })?;
 
-    let substring_array: &GenericStringArray<T::Native> = args[1]
+    let substring_array: &Utf8Array<T> = args[1]
         .as_any()
-        .downcast_ref::<GenericStringArray<T::Native>>()
+        .downcast_ref::<Utf8Array<T>>()
         .ok_or_else(|| {
             DataFusionError::Internal(
                 "could not cast substring to StringArray".to_string(),
@@ -381,7 +373,7 @@ where
                 // this method first finds the matching byte using rfind
                 // then maps that to the character index by matching on the grapheme_index of the byte_index
                 Some(
-                    T::Native::from_usize(string.to_string().rfind(substring).map_or(
+                    T::from_usize(string.to_string().rfind(substring).map_or(
                         0,
                         |byte_offset| {
                             string
@@ -411,7 +403,7 @@ where
 /// Extracts the substring of string starting at the start'th character, and extending for count characters if that is specified. (Same as substring(string from start for count).)
 /// substr('alphabet', 3) = 'phabet'
 /// substr('alphabet', 3, 2) = 'ph'
-pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn substr<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     match args.len() {
         2 => {
             let string_array = downcast_string_arg!(args[0], "string", T);
@@ -421,7 +413,7 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .iter()
                 .zip(start_array.iter())
                 .map(|(string, start)| match (string, start) {
-                    (Some(string), Some(start)) => {
+                    (Some(string), Some(&start)) => {
                         if start <= 0 {
                             Some(string.to_string())
                         } else {
@@ -436,7 +428,7 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => None,
                 })
-                .collect::<GenericStringArray<T>>();
+                .collect::<Utf8Array<T>>();
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -450,7 +442,7 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                 .zip(start_array.iter())
                 .zip(count_array.iter())
                 .map(|((string, start), count)| match (string, start, count) {
-                    (Some(string), Some(start), Some(count)) => {
+                    (Some(string), Some(&start), Some(&count)) => {
                         if count < 0 {
                             Err(DataFusionError::Execution(
                                 "negative substring length not allowed".to_string(),
@@ -475,7 +467,7 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                     _ => Ok(None),
                 })
-                .collect::<Result<GenericStringArray<T>>>()?;
+                .collect::<Result<Utf8Array<T>>>()?;
 
             Ok(Arc::new(result) as ArrayRef)
         }
@@ -488,7 +480,7 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 /// Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.
 /// translate('12345', '143', 'ax') = 'a2x5'
-pub fn translate<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+pub fn translate<T: Offset>(args: &[ArrayRef]) -> Result<ArrayRef> {
     let string_array = downcast_string_arg!(args[0], "string", T);
     let from_array = downcast_string_arg!(args[1], "from", T);
     let to_array = downcast_string_arg!(args[2], "to", T);
@@ -525,7 +517,7 @@ pub fn translate<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef
             }
             _ => None,
         })
-        .collect::<GenericStringArray<T>>();
+        .collect::<Utf8Array<T>>();
 
     Ok(Arc::new(result) as ArrayRef)
 }
diff --git a/datafusion/src/physical_plan/union.rs b/datafusion/src/physical_plan/union.rs
index cbab728a8428b..836045354821e 100644
--- a/datafusion/src/physical_plan/union.rs
+++ b/datafusion/src/physical_plan/union.rs
@@ -23,7 +23,8 @@
 
 use std::{any::Any, sync::Arc};
 
-use arrow::datatypes::SchemaRef;
+use arrow2::datatypes::Schema;
+type SchemaRef = Arc<Schema>;
 
 use super::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
 use crate::error::Result;
@@ -104,7 +105,7 @@ mod tests {
         csv::{CsvExec, CsvReadOptions},
     };
     use crate::test;
-    use arrow::record_batch::RecordBatch;
+    use arrow2::record_batch::RecordBatch;
 
     #[tokio::test]
     async fn test_union_partitions() -> Result<()> {
diff --git a/datafusion/src/scalar.rs b/datafusion/src/scalar.rs
index e59d21e7fcef0..92621ee0b60c0 100644
--- a/datafusion/src/scalar.rs
+++ b/datafusion/src/scalar.rs
@@ -19,24 +19,12 @@
 
 use std::{convert::TryFrom, fmt, iter::repeat, sync::Arc};
 
-use arrow::datatypes::{ArrowDictionaryKeyType, DataType, Field, IntervalUnit, TimeUnit};
-use arrow::{
-    array::*,
-    datatypes::{
-        ArrowNativeType, Float32Type, Int16Type, Int32Type, Int64Type, Int8Type,
-        TimestampNanosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-    },
-};
-use arrow::{
-    array::{
-        ArrayRef, Int16Builder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
-        TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-        UInt16Builder, UInt32Builder, UInt64Builder, UInt8Builder,
-    },
-    datatypes::{
-        TimestampMicrosecondType, TimestampMillisecondType, TimestampSecondType,
-    },
-};
+use arrow2::datatypes::{DataType, IntervalUnit, TimeUnit};
+use arrow2::{array::*, buffer::MutableBuffer, types::days_ms};
+
+type ArrayRef = Arc<dyn Array>;
+type StringArray = Utf8Array<i32>;
+type LargeStringArray = Utf8Array<i64>;
 
 use crate::error::{DataFusionError, Result};
 
@@ -75,7 +63,9 @@ pub enum ScalarValue {
     /// large binary
     LargeBinary(Option<Vec<u8>>),
     /// list of nested ScalarValue
-    List(Option<Vec<ScalarValue>>, DataType),
+    // 1st argument are the inner values
+    // 2st argument is datatype (i.e. it includes `Field`)
+    List(Option<Arc<dyn Array>>, DataType),
     /// Date stored as a signed 32bit int
     Date32(Option<i32>),
     /// Date stored as a signed 64bit int
@@ -91,7 +81,7 @@ pub enum ScalarValue {
     /// Interval with YearMonth unit
     IntervalYearMonth(Option<i32>),
     /// Interval with DayTime unit
-    IntervalDayTime(Option<i64>),
+    IntervalDayTime(Option<days_ms>),
 }
 
 macro_rules! typed_cast {
@@ -104,91 +94,14 @@ macro_rules! typed_cast {
     }};
 }
 
-macro_rules! build_list {
-    ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr) => {{
-        match $VALUES {
-            // the return on the macro is necessary, to short-circuit and return ArrayRef
-            None => {
-                return new_null_array(
-                    &DataType::List(Box::new(Field::new(
-                        "item",
-                        DataType::$SCALAR_TY,
-                        true,
-                    ))),
-                    $SIZE,
-                )
-            }
-            Some(values) => {
-                build_values_list!($VALUE_BUILDER_TY, $SCALAR_TY, values, $SIZE)
-            }
-        }
-    }};
-}
-
-macro_rules! build_timestamp_list {
-    ($TIME_UNIT:expr, $TIME_ZONE:expr, $VALUES:expr, $SIZE:expr) => {{
-        match $VALUES {
-            // the return on the macro is necessary, to short-circuit and return ArrayRef
-            None => {
-                return new_null_array(
-                    &DataType::List(Box::new(Field::new(
-                        "item",
-                        DataType::Timestamp($TIME_UNIT, $TIME_ZONE),
-                        true,
-                    ))),
-                    $SIZE,
-                )
-            }
-            Some(values) => match $TIME_UNIT {
-                TimeUnit::Second => build_values_list!(
-                    TimestampSecondBuilder,
-                    TimestampSecond,
-                    values,
-                    $SIZE
-                ),
-                TimeUnit::Microsecond => build_values_list!(
-                    TimestampMillisecondBuilder,
-                    TimestampMillisecond,
-                    values,
-                    $SIZE
-                ),
-                TimeUnit::Millisecond => build_values_list!(
-                    TimestampMicrosecondBuilder,
-                    TimestampMicrosecond,
-                    values,
-                    $SIZE
-                ),
-                TimeUnit::Nanosecond => build_values_list!(
-                    TimestampNanosecondBuilder,
-                    TimestampNanosecond,
-                    values,
-                    $SIZE
-                ),
-            },
-        }
-    }};
-}
-
-macro_rules! build_values_list {
-    ($VALUE_BUILDER_TY:ident, $SCALAR_TY:ident, $VALUES:expr, $SIZE:expr) => {{
-        let mut builder = ListBuilder::new($VALUE_BUILDER_TY::new($VALUES.len()));
-
-        for _ in 0..$SIZE {
-            for scalar_value in $VALUES {
-                match scalar_value {
-                    ScalarValue::$SCALAR_TY(Some(v)) => {
-                        builder.values().append_value(v.clone()).unwrap()
-                    }
-                    ScalarValue::$SCALAR_TY(None) => {
-                        builder.values().append_null().unwrap();
-                    }
-                    _ => panic!("Incompatible ScalarValue for list"),
-                };
-            }
-            builder.append(true).unwrap();
-        }
-
-        builder.finish()
+macro_rules! dyn_to_array {
+    ($self:expr, $value:expr, $size:expr, $ty:ty) => {{
+        Arc::new(PrimitiveArray::<$ty>::from_data(
+            $self.get_datatype(),
+            MutableBuffer::<$ty>::from_trusted_len_iter(repeat(*$value).take($size))
+                .into(),
+            None,
+        ))
     }};
 }
 
@@ -223,9 +136,7 @@ impl ScalarValue {
             ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
             ScalarValue::Binary(_) => DataType::Binary,
             ScalarValue::LargeBinary(_) => DataType::LargeBinary,
-            ScalarValue::List(_, data_type) => {
-                DataType::List(Box::new(Field::new("item", data_type.clone(), true)))
-            }
+            ScalarValue::List(_, data_type) => data_type.clone(),
             ScalarValue::Date32(_) => DataType::Date32,
             ScalarValue::Date64(_) => DataType::Date64,
             ScalarValue::IntervalYearMonth(_) => {
@@ -290,151 +201,107 @@ impl ScalarValue {
                 Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
             }
             ScalarValue::Float64(e) => match e {
-                Some(value) => Arc::new(Float64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Float64, size),
+                Some(value) => dyn_to_array!(self, value, size, f64),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::Float32(e) => match e {
-                Some(value) => Arc::new(Float32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Float32, size),
+                Some(value) => dyn_to_array!(self, value, size, f32),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::Int8(e) => match e {
-                Some(value) => Arc::new(Int8Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int8, size),
+                Some(value) => dyn_to_array!(self, value, size, i8),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::Int16(e) => match e {
-                Some(value) => Arc::new(Int16Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int16, size),
+                Some(value) => dyn_to_array!(self, value, size, i16),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
-            ScalarValue::Int32(e) => match e {
-                Some(value) => Arc::new(Int32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int32, size),
+            ScalarValue::Int32(e)
+            | ScalarValue::Date32(e)
+            | ScalarValue::IntervalYearMonth(e) => match e {
+                Some(value) => dyn_to_array!(self, value, size, i32),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
-            ScalarValue::Int64(e) => match e {
-                Some(value) => Arc::new(Int64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Int64, size),
+            ScalarValue::Int64(e)
+            | ScalarValue::Date64(e)
+            | ScalarValue::TimestampSecond(e)
+            | ScalarValue::TimestampMillisecond(e)
+            | ScalarValue::TimestampMicrosecond(e)
+            | ScalarValue::TimestampNanosecond(e) => match e {
+                Some(value) => dyn_to_array!(self, value, size, i64),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::UInt8(e) => match e {
-                Some(value) => Arc::new(UInt8Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt8, size),
+                Some(value) => dyn_to_array!(self, value, size, u8),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::UInt16(e) => match e {
-                Some(value) => Arc::new(UInt16Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt16, size),
+                Some(value) => dyn_to_array!(self, value, size, u16),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::UInt32(e) => match e {
-                Some(value) => Arc::new(UInt32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt32, size),
+                Some(value) => dyn_to_array!(self, value, size, u32),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::UInt64(e) => match e {
-                Some(value) => Arc::new(UInt64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::UInt64, size),
-            },
-            ScalarValue::TimestampSecond(e) => match e {
-                Some(value) => Arc::new(TimestampSecondArray::from_iter_values(
-                    repeat(*value).take(size),
-                )),
-                None => {
-                    new_null_array(&DataType::Timestamp(TimeUnit::Second, None), size)
-                }
-            },
-            ScalarValue::TimestampMillisecond(e) => match e {
-                Some(value) => Arc::new(TimestampMillisecondArray::from_iter_values(
-                    repeat(*value).take(size),
-                )),
-                None => new_null_array(
-                    &DataType::Timestamp(TimeUnit::Millisecond, None),
-                    size,
-                ),
-            },
-            ScalarValue::TimestampMicrosecond(e) => match e {
-                Some(value) => {
-                    Arc::new(TimestampMicrosecondArray::from_value(*value, size))
-                }
-                None => new_null_array(
-                    &DataType::Timestamp(TimeUnit::Microsecond, None),
-                    size,
-                ),
-            },
-            ScalarValue::TimestampNanosecond(e) => match e {
-                Some(value) => {
-                    Arc::new(TimestampNanosecondArray::from_value(*value, size))
-                }
-                None => {
-                    new_null_array(&DataType::Timestamp(TimeUnit::Nanosecond, None), size)
-                }
+                Some(value) => dyn_to_array!(self, value, size, u64),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::Utf8(e) => match e {
                 Some(value) => {
-                    Arc::new(StringArray::from_iter_values(repeat(value).take(size)))
+                    Arc::new(repeat(Some(&value)).take(size).collect::<Utf8Array<i32>>())
                 }
-                None => new_null_array(&DataType::Utf8, size),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::LargeUtf8(e) => match e {
                 Some(value) => {
-                    Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size)))
+                    Arc::new(repeat(Some(&value)).take(size).collect::<Utf8Array<i64>>())
                 }
-                None => new_null_array(&DataType::LargeUtf8, size),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::Binary(e) => match e {
                 Some(value) => Arc::new(
                     repeat(Some(value.as_slice()))
                         .take(size)
-                        .collect::<BinaryArray>(),
+                        .collect::<BinaryArray<i32>>(),
                 ),
-                None => {
-                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryArray>())
-                }
+                None => new_null_array(self.get_datatype(), size).into(),
             },
             ScalarValue::LargeBinary(e) => match e {
                 Some(value) => Arc::new(
                     repeat(Some(value.as_slice()))
                         .take(size)
-                        .collect::<LargeBinaryArray>(),
-                ),
-                None => Arc::new(
-                    repeat(None::<&str>)
-                        .take(size)
-                        .collect::<LargeBinaryArray>(),
+                        .collect::<BinaryArray<i64>>(),
                 ),
+                None => new_null_array(self.get_datatype(), size).into(),
             },
-            ScalarValue::List(values, data_type) => Arc::new(match data_type {
-                DataType::Boolean => build_list!(BooleanBuilder, Boolean, values, size),
-                DataType::Int8 => build_list!(Int8Builder, Int8, values, size),
-                DataType::Int16 => build_list!(Int16Builder, Int16, values, size),
-                DataType::Int32 => build_list!(Int32Builder, Int32, values, size),
-                DataType::Int64 => build_list!(Int64Builder, Int64, values, size),
-                DataType::UInt8 => build_list!(UInt8Builder, UInt8, values, size),
-                DataType::UInt16 => build_list!(UInt16Builder, UInt16, values, size),
-                DataType::UInt32 => build_list!(UInt32Builder, UInt32, values, size),
-                DataType::UInt64 => build_list!(UInt64Builder, UInt64, values, size),
-                DataType::Utf8 => build_list!(StringBuilder, Utf8, values, size),
-                DataType::Float32 => build_list!(Float32Builder, Float32, values, size),
-                DataType::Float64 => build_list!(Float64Builder, Float64, values, size),
-                DataType::Timestamp(unit, tz) => {
-                    build_timestamp_list!(unit.clone(), tz.clone(), values, size)
-                }
-                DataType::LargeUtf8 => {
-                    build_list!(LargeStringBuilder, LargeUtf8, values, size)
+            ScalarValue::List(values, data_type) => {
+                if let Some(values) = values {
+                    let length = values.len();
+                    let refs = std::iter::repeat(values.as_ref())
+                        .take(size)
+                        .collect::<Vec<_>>();
+                    let values =
+                        arrow2::compute::concat::concatenate(&refs).unwrap().into();
+                    let offsets: arrow2::buffer::Buffer<i32> =
+                        (0..=size).map(|i| (i * length) as i32).collect();
+                    Arc::new(ListArray::<i32>::from_data(
+                        data_type.clone(),
+                        offsets,
+                        values,
+                        None,
+                    ))
+                } else {
+                    new_null_array(self.get_datatype(), size).into()
                 }
-                dt => panic!("Unexpected DataType for list {:?}", dt),
-            }),
-            ScalarValue::Date32(e) => match e {
-                Some(value) => Arc::new(Date32Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Date32, size),
-            },
-            ScalarValue::Date64(e) => match e {
-                Some(value) => Arc::new(Date64Array::from_value(*value, size)),
-                None => new_null_array(&DataType::Date64, size),
-            },
+            }
             ScalarValue::IntervalDayTime(e) => match e {
-                Some(value) => Arc::new(IntervalDayTimeArray::from_value(*value, size)),
-                None => new_null_array(&DataType::Interval(IntervalUnit::DayTime), size),
-            },
-            ScalarValue::IntervalYearMonth(e) => match e {
-                Some(value) => Arc::new(IntervalYearMonthArray::from_value(*value, size)),
-                None => {
-                    new_null_array(&DataType::Interval(IntervalUnit::YearMonth), size)
+                Some(value) => {
+                    Arc::new(PrimitiveArray::<days_ms>::from_trusted_len_values_iter(
+                        std::iter::repeat(*value).take(size),
+                    ))
                 }
+                None => new_null_array(self.get_datatype(), size).into(),
             },
         }
     }
@@ -456,67 +323,45 @@ impl ScalarValue {
             DataType::Utf8 => typed_cast!(array, index, StringArray, Utf8),
             DataType::LargeUtf8 => typed_cast!(array, index, LargeStringArray, LargeUtf8),
             DataType::List(nested_type) => {
-                let list_array =
-                    array.as_any().downcast_ref::<ListArray>().ok_or_else(|| {
+                let list_array = array
+                    .as_any()
+                    .downcast_ref::<ListArray<i32>>()
+                    .ok_or_else(|| {
                         DataFusionError::Internal(
                             "Failed to downcast ListArray".to_string(),
                         )
                     })?;
-                let value = match list_array.is_null(index) {
-                    true => None,
-                    false => {
-                        let nested_array = list_array.value(index);
-                        let scalar_vec = (0..nested_array.len())
-                            .map(|i| ScalarValue::try_from_array(&nested_array, i))
-                            .collect::<Result<Vec<_>>>()?;
-                        Some(scalar_vec)
-                    }
-                };
-                ScalarValue::List(value, nested_type.data_type().clone())
+                let is_valid = list_array.is_valid(index);
+                let value = list_array.value(index).into();
+                ScalarValue::List(Some(value), nested_type.data_type().clone())
             }
             DataType::Date32 => {
-                typed_cast!(array, index, Date32Array, Date32)
+                typed_cast!(array, index, Int32Array, Date32)
             }
             DataType::Date64 => {
-                typed_cast!(array, index, Date64Array, Date64)
+                typed_cast!(array, index, Int64Array, Date64)
             }
             DataType::Timestamp(TimeUnit::Second, _) => {
-                typed_cast!(array, index, TimestampSecondArray, TimestampSecond)
+                typed_cast!(array, index, Int64Array, TimestampSecond)
             }
             DataType::Timestamp(TimeUnit::Millisecond, _) => {
-                typed_cast!(
-                    array,
-                    index,
-                    TimestampMillisecondArray,
-                    TimestampMillisecond
-                )
+                typed_cast!(array, index, Int64Array, TimestampMillisecond)
             }
             DataType::Timestamp(TimeUnit::Microsecond, _) => {
-                typed_cast!(
-                    array,
-                    index,
-                    TimestampMicrosecondArray,
-                    TimestampMicrosecond
-                )
+                typed_cast!(array, index, Int64Array, TimestampMicrosecond)
             }
             DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-                typed_cast!(array, index, TimestampNanosecondArray, TimestampNanosecond)
+                typed_cast!(array, index, Int64Array, TimestampNanosecond)
             }
             DataType::Dictionary(index_type, _) => match **index_type {
-                DataType::Int8 => Self::try_from_dict_array::<Int8Type>(array, index)?,
-                DataType::Int16 => Self::try_from_dict_array::<Int16Type>(array, index)?,
-                DataType::Int32 => Self::try_from_dict_array::<Int32Type>(array, index)?,
-                DataType::Int64 => Self::try_from_dict_array::<Int64Type>(array, index)?,
-                DataType::UInt8 => Self::try_from_dict_array::<UInt8Type>(array, index)?,
-                DataType::UInt16 => {
-                    Self::try_from_dict_array::<UInt16Type>(array, index)?
-                }
-                DataType::UInt32 => {
-                    Self::try_from_dict_array::<UInt32Type>(array, index)?
-                }
-                DataType::UInt64 => {
-                    Self::try_from_dict_array::<UInt64Type>(array, index)?
-                }
+                DataType::Int8 => Self::try_from_dict_array::<i8>(array, index)?,
+                DataType::Int16 => Self::try_from_dict_array::<i16>(array, index)?,
+                DataType::Int32 => Self::try_from_dict_array::<i32>(array, index)?,
+                DataType::Int64 => Self::try_from_dict_array::<i64>(array, index)?,
+                DataType::UInt8 => Self::try_from_dict_array::<u8>(array, index)?,
+                DataType::UInt16 => Self::try_from_dict_array::<u16>(array, index)?,
+                DataType::UInt32 => Self::try_from_dict_array::<u32>(array, index)?,
+                DataType::UInt64 => Self::try_from_dict_array::<u64>(array, index)?,
                 _ => {
                     return Err(DataFusionError::Internal(format!(
                     "Index type not supported while creating scalar from dictionary: {}",
@@ -533,14 +378,14 @@ impl ScalarValue {
         })
     }
 
-    fn try_from_dict_array<K: ArrowDictionaryKeyType>(
+    fn try_from_dict_array<K: DictionaryKey>(
         array: &ArrayRef,
         index: usize,
     ) -> Result<Self> {
         let dict_array = array.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
 
         // look up the index in the values dictionary
-        let keys_col = dict_array.keys_array();
+        let keys_col = dict_array.keys();
         let values_index = keys_col.value(index).to_usize().ok_or_else(|| {
             DataFusionError::Internal(format!(
                 "Can not convert index to usize in dictionary of type creating group by value {:?}",
@@ -551,71 +396,33 @@ impl ScalarValue {
     }
 }
 
-impl From<f64> for ScalarValue {
-    fn from(value: f64) -> Self {
-        ScalarValue::Float64(Some(value))
-    }
-}
-
-impl From<f32> for ScalarValue {
-    fn from(value: f32) -> Self {
-        ScalarValue::Float32(Some(value))
-    }
-}
-
-impl From<i8> for ScalarValue {
-    fn from(value: i8) -> Self {
-        ScalarValue::Int8(Some(value))
-    }
-}
-
-impl From<i16> for ScalarValue {
-    fn from(value: i16) -> Self {
-        ScalarValue::Int16(Some(value))
-    }
-}
-
-impl From<i32> for ScalarValue {
-    fn from(value: i32) -> Self {
-        ScalarValue::Int32(Some(value))
-    }
-}
-
-impl From<i64> for ScalarValue {
-    fn from(value: i64) -> Self {
-        ScalarValue::Int64(Some(value))
-    }
-}
-
-impl From<bool> for ScalarValue {
-    fn from(value: bool) -> Self {
-        ScalarValue::Boolean(Some(value))
-    }
-}
-
-impl From<u8> for ScalarValue {
-    fn from(value: u8) -> Self {
-        ScalarValue::UInt8(Some(value))
-    }
-}
-
-impl From<u16> for ScalarValue {
-    fn from(value: u16) -> Self {
-        ScalarValue::UInt16(Some(value))
-    }
-}
+macro_rules! impl_scalar {
+    ($ty:ty, $scalar:tt) => {
+        impl From<$ty> for ScalarValue {
+            fn from(value: $ty) -> Self {
+                ScalarValue::$scalar(Some(value))
+            }
+        }
 
-impl From<u32> for ScalarValue {
-    fn from(value: u32) -> Self {
-        ScalarValue::UInt32(Some(value))
-    }
+        impl From<Option<$ty>> for ScalarValue {
+            fn from(value: Option<$ty>) -> Self {
+                ScalarValue::$scalar(value)
+            }
+        }
+    };
 }
 
-impl From<u64> for ScalarValue {
-    fn from(value: u64) -> Self {
-        ScalarValue::UInt64(Some(value))
-    }
-}
+impl_scalar!(f64, Float64);
+impl_scalar!(f32, Float32);
+impl_scalar!(i8, Int8);
+impl_scalar!(i16, Int16);
+impl_scalar!(i32, Int32);
+impl_scalar!(i64, Int64);
+impl_scalar!(bool, Boolean);
+impl_scalar!(u8, UInt8);
+impl_scalar!(u16, UInt16);
+impl_scalar!(u32, UInt32);
+impl_scalar!(u64, UInt64);
 
 macro_rules! impl_try_from {
     ($SCALAR:ident, $NATIVE:ident) => {
@@ -711,9 +518,7 @@ impl TryFrom<&DataType> for ScalarValue {
             DataType::Timestamp(TimeUnit::Nanosecond, _) => {
                 ScalarValue::TimestampNanosecond(None)
             }
-            DataType::List(ref nested_type) => {
-                ScalarValue::List(None, nested_type.data_type().clone())
-            }
+            DataType::List(ref nested_type) => ScalarValue::List(None, datatype.clone()),
             _ => {
                 return Err(DataFusionError::NotImplemented(format!(
                     "Can't create a scalar of type \"{:?}\"",
@@ -775,17 +580,13 @@ impl fmt::Display for ScalarValue {
                 )?,
                 None => write!(f, "NULL")?,
             },
-            ScalarValue::List(e, _) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{}", v))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
+            ScalarValue::List(e, _) => {
+                if let Some(e) = e {
+                    write!(f, "{}", e)?
+                } else {
+                    write!(f, "NULL")?
+                }
+            }
             ScalarValue::Date32(e) => format_option!(f, e)?,
             ScalarValue::Date64(e) => format_option!(f, e)?,
             ScalarValue::IntervalDayTime(e) => format_option!(f, e)?,
@@ -840,42 +641,6 @@ impl fmt::Debug for ScalarValue {
     }
 }
 
-/// Trait used to map a NativeTime to a ScalarType.
-pub trait ScalarType<T: ArrowNativeType> {
-    /// returns a scalar from an optional T
-    fn scalar(r: Option<T>) -> ScalarValue;
-}
-
-impl ScalarType<f32> for Float32Type {
-    fn scalar(r: Option<f32>) -> ScalarValue {
-        ScalarValue::Float32(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampSecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampSecond(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampMillisecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampMillisecond(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampMicrosecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampMicrosecond(r)
-    }
-}
-
-impl ScalarType<i64> for TimestampNanosecondType {
-    fn scalar(r: Option<i64>) -> ScalarValue {
-        ScalarValue::TimestampNanosecond(r)
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -883,37 +648,13 @@ mod tests {
     #[test]
     fn scalar_list_null_to_array() {
         let list_array_ref = ScalarValue::List(None, DataType::UInt64).to_array();
-        let list_array = list_array_ref.as_any().downcast_ref::<ListArray>().unwrap();
+        let list_array = list_array_ref
+            .as_any()
+            .downcast_ref::<ListArray<i32>>()
+            .unwrap();
 
         assert!(list_array.is_null(0));
         assert_eq!(list_array.len(), 1);
         assert_eq!(list_array.values().len(), 0);
     }
-
-    #[test]
-    fn scalar_list_to_array() {
-        let list_array_ref = ScalarValue::List(
-            Some(vec![
-                ScalarValue::UInt64(Some(100)),
-                ScalarValue::UInt64(None),
-                ScalarValue::UInt64(Some(101)),
-            ]),
-            DataType::UInt64,
-        )
-        .to_array();
-
-        let list_array = list_array_ref.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(list_array.len(), 1);
-        assert_eq!(list_array.values().len(), 3);
-
-        let prim_array_ref = list_array.value(0);
-        let prim_array = prim_array_ref
-            .as_any()
-            .downcast_ref::<UInt64Array>()
-            .unwrap();
-        assert_eq!(prim_array.len(), 3);
-        assert_eq!(prim_array.value(0), 100);
-        assert!(prim_array.is_null(1));
-        assert_eq!(prim_array.value(2), 101);
-    }
 }
diff --git a/datafusion/src/sql/planner.rs b/datafusion/src/sql/planner.rs
index 34c5901b450a2..7a440aae77931 100644
--- a/datafusion/src/sql/planner.rs
+++ b/datafusion/src/sql/planner.rs
@@ -39,7 +39,9 @@ use crate::{
     sql::parser::{CreateExternalTable, FileType, Statement as DFStatement},
 };
 
-use arrow::datatypes::*;
+use arrow2::datatypes::*;
+use arrow2::types::days_ms;
+
 use hashbrown::HashMap;
 
 use crate::prelude::JoinType;
@@ -1305,7 +1307,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             ))));
         }
 
-        let result: i64 = (result_days << 32) | result_millis;
+        let result = days_ms::new(result_days as i32, result_millis as i32);
         Ok(Expr::Literal(ScalarValue::IntervalDayTime(Some(result))))
     }
 
diff --git a/datafusion/src/test/exec.rs b/datafusion/src/test/exec.rs
index 04cd29530c016..3ec49548ea8c4 100644
--- a/datafusion/src/test/exec.rs
+++ b/datafusion/src/test/exec.rs
@@ -17,13 +17,16 @@
 
 //! Simple iterator over batches for use in testing
 
+use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use arrow::{
-    datatypes::SchemaRef, error::Result as ArrowResult, record_batch::RecordBatch,
+use arrow2::{
+    datatypes::Schema, error::Result as ArrowResult, record_batch::RecordBatch,
 };
 use futures::Stream;
 
+type SchemaRef = Arc<Schema>;
+
 use crate::physical_plan::RecordBatchStream;
 
 /// Index into the data that has been returned so far
@@ -97,6 +100,6 @@ impl Stream for TestStream {
 impl RecordBatchStream for TestStream {
     /// Get the schema
     fn schema(&self) -> SchemaRef {
-        self.data[0].schema()
+        self.data[0].schema().clone()
     }
 }
diff --git a/datafusion/src/test/mod.rs b/datafusion/src/test/mod.rs
index 926a692261691..4a2b90d26f5ea 100644
--- a/datafusion/src/test/mod.rs
+++ b/datafusion/src/test/mod.rs
@@ -17,22 +17,22 @@
 
 //! Common unit test utility methods
 
-use crate::datasource::{MemTable, TableProvider};
-use crate::error::Result;
-use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
-use array::{
-    Array, ArrayRef, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray, TimestampSecondArray,
-};
-use arrow::array::{self, Int32Array};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
 use std::fs::File;
 use std::io::prelude::*;
 use std::io::{BufReader, BufWriter};
 use std::sync::Arc;
+use std::{env, error::Error, path::PathBuf};
+
 use tempfile::TempDir;
 
+use arrow2::array::*;
+use arrow2::datatypes::*;
+use arrow2::record_batch::RecordBatch;
+
+use crate::datasource::{MemTable, TableProvider};
+use crate::error::Result;
+use crate::logical_plan::{LogicalPlan, LogicalPlanBuilder};
+
 pub fn create_table_dual() -> Arc<dyn TableProvider> {
     let dual_schema = Arc::new(Schema::new(vec![
         Field::new("id", DataType::Int32, false),
@@ -41,8 +41,8 @@ pub fn create_table_dual() -> Arc<dyn TableProvider> {
     let batch = RecordBatch::try_new(
         dual_schema.clone(),
         vec![
-            Arc::new(array::Int32Array::from(vec![1])),
-            Arc::new(array::StringArray::from(vec!["a"])),
+            Arc::new(Int32Array::from_slice(&[1])),
+            Arc::new(Utf8Array::<i32>::from_slice(&["a"])),
         ],
     )
     .unwrap();
@@ -52,7 +52,7 @@ pub fn create_table_dual() -> Arc<dyn TableProvider> {
 
 /// Generated partitioned copy of a CSV file
 pub fn create_partitioned_csv(filename: &str, partitions: usize) -> Result<String> {
-    let testdata = arrow::util::test_util::arrow_test_data();
+    let testdata = arrow_test_data();
     let path = format!("{}/csv/{}", testdata, filename);
 
     let tmp_dir = TempDir::new()?;
@@ -92,7 +92,7 @@ pub fn create_partitioned_csv(filename: &str, partitions: usize) -> Result<Strin
 }
 
 /// Get the schema for the aggregate_test_* csv files
-pub fn aggr_test_schema() -> SchemaRef {
+pub fn aggr_test_schema() -> Arc<Schema> {
     Arc::new(Schema::new(vec![
         Field::new("c1", DataType::Utf8, false),
         Field::new("c2", DataType::UInt32, false),
@@ -145,9 +145,9 @@ pub fn build_table_i32(
     RecordBatch::try_new(
         Arc::new(schema),
         vec![
-            Arc::new(Int32Array::from(a.1.clone())),
-            Arc::new(Int32Array::from(b.1.clone())),
-            Arc::new(Int32Array::from(c.1.clone())),
+            Arc::new(Int32Array::from_slice(a.1)),
+            Arc::new(Int32Array::from_slice(b.1)),
+            Arc::new(Int32Array::from_slice(c.1)),
         ],
     )
     .unwrap()
@@ -165,11 +165,10 @@ pub fn table_with_sequence(
     seq_end: i32,
 ) -> Result<Arc<dyn TableProvider>> {
     let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
-    let arr = Arc::new(Int32Array::from((seq_start..=seq_end).collect::<Vec<_>>()));
-    let partitions = vec![vec![RecordBatch::try_new(
-        schema.clone(),
-        vec![arr as ArrayRef],
-    )?]];
+    let arr = Arc::new(Int32Array::from_slice(
+        &(seq_start..=seq_end).collect::<Vec<_>>(),
+    ));
+    let partitions = vec![vec![RecordBatch::try_new(schema.clone(), vec![arr])?]];
     Ok(Arc::new(MemTable::try_new(schema, partitions)?))
 }
 
@@ -179,8 +178,7 @@ pub fn make_partition(sz: i32) -> RecordBatch {
     let seq_end = sz;
     let values = (seq_start..seq_end).collect::<Vec<_>>();
     let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
-    let arr = Arc::new(Int32Array::from(values));
-    let arr = arr as ArrayRef;
+    let arr = Arc::new(Int32Array::from_slice(&values));
 
     RecordBatch::try_new(schema, vec![arr]).unwrap()
 }
@@ -188,7 +186,7 @@ pub fn make_partition(sz: i32) -> RecordBatch {
 /// Return a new table provider containing all of the supported timestamp types
 pub fn table_with_timestamps() -> Arc<dyn TableProvider> {
     let batch = make_timestamps();
-    let schema = batch.schema();
+    let schema = batch.schema().clone();
     let partitions = vec![vec![batch]];
     Arc::new(MemTable::try_new(schema, partitions).unwrap())
 }
@@ -242,13 +240,17 @@ pub fn make_timestamps() -> RecordBatch {
         .map(|(i, _)| format!("Row {}", i))
         .collect::<Vec<_>>();
 
-    let arr_nanos = TimestampNanosecondArray::from_opt_vec(ts_nanos, None);
-    let arr_micros = TimestampMicrosecondArray::from_opt_vec(ts_micros, None);
-    let arr_millis = TimestampMillisecondArray::from_opt_vec(ts_millis, None);
-    let arr_secs = TimestampSecondArray::from_opt_vec(ts_secs, None);
+    let arr_nanos = Primitive::<i64>::from(ts_nanos)
+        .to(DataType::Timestamp(TimeUnit::Nanosecond, None));
+    let arr_micros = Primitive::<i64>::from(ts_micros)
+        .to(DataType::Timestamp(TimeUnit::Microsecond, None));
+    let arr_millis = Primitive::<i64>::from(ts_millis)
+        .to(DataType::Timestamp(TimeUnit::Millisecond, None));
+    let arr_secs =
+        Primitive::<i64>::from(ts_secs).to(DataType::Timestamp(TimeUnit::Second, None));
 
     let names = names.iter().map(|s| s.as_str()).collect::<Vec<_>>();
-    let arr_names = StringArray::from(names);
+    let arr_names = Utf8Array::<i32>::from(&names);
 
     let schema = Schema::new(vec![
         Field::new("nanos", arr_nanos.data_type().clone(), false),
@@ -292,7 +294,7 @@ macro_rules! assert_batches_eq {
         let expected_lines: Vec<String> =
             $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
 
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
+        let formatted = arrow2::util::pretty::pretty_format_batches($CHUNKS).unwrap();
 
         let actual_lines: Vec<&str> = formatted.trim().lines().collect();
 
@@ -326,7 +328,7 @@ macro_rules! assert_batches_sorted_eq {
             expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
         }
 
-        let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS).unwrap();
+        let formatted = arrow2::util::pretty::pretty_format_batches($CHUNKS).unwrap();
         // fix for windows: \r\n -->
 
         let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
@@ -344,3 +346,75 @@ macro_rules! assert_batches_sorted_eq {
         );
     };
 }
+
+/// Returns the arrow test data directory, which is by default stored
+/// in a git submodule rooted at `arrow/testing/data`.
+///
+/// The default can be overridden by the optional environment
+/// variable `ARROW_TEST_DATA`
+///
+/// panics when the directory can not be found.
+///
+/// Example:
+/// ```
+/// let testdata = crate::test::arrow_test_data();
+/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
+/// assert!(std::path::PathBuf::from(csvdata).exists());
+/// ```
+pub fn arrow_test_data() -> String {
+    match get_data_dir("ARROW_TEST_DATA", "testing/arrow-testing/data") {
+        Ok(pb) => pb.display().to_string(),
+        Err(err) => panic!("failed to get arrow data dir: {}", err),
+    }
+}
+
+/// Returns a directory path for finding test data.
+///
+/// udf_env: name of an environment variable
+///
+/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
+///
+///  Returns either:
+/// The path referred to in `udf_env` if that variable is set and refers to a directory
+/// The submodule_data directory relative to CARGO_MANIFEST_PATH
+fn get_data_dir(
+    udf_env: &str,
+    submodule_data: &str,
+) -> std::result::Result<PathBuf, Box<dyn Error>> {
+    // Try user defined env.
+    if let Ok(dir) = env::var(udf_env) {
+        let trimmed = dir.trim().to_string();
+        if !trimmed.is_empty() {
+            let pb = PathBuf::from(trimmed);
+            if pb.is_dir() {
+                return Ok(pb);
+            } else {
+                return Err(format!(
+                    "the data dir `{}` defined by env {} not found",
+                    pb.display().to_string(),
+                    udf_env
+                )
+                .into());
+            }
+        }
+    }
+
+    // The env is undefined or its value is trimmed to empty, let's try default dir.
+
+    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
+    // set by `cargo run` or `cargo test`, see:
+    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
+    let dir = env!("CARGO_MANIFEST_DIR");
+
+    let pb = PathBuf::from(dir).join(submodule_data);
+    if pb.is_dir() {
+        Ok(pb)
+    } else {
+        Err(format!(
+            "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
+             HINT: try running `git submodule update --init`",
+            udf_env,
+            pb.display().to_string(),
+        ).into())
+    }
+}
diff --git a/datafusion/tests/custom_sources.rs b/datafusion/tests/custom_sources.rs
index b39f47bba07b1..851640b2b2997 100644
--- a/datafusion/tests/custom_sources.rs
+++ b/datafusion/tests/custom_sources.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::Int32Array;
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::error::Result as ArrowResult;
-use arrow::record_batch::RecordBatch;
+use arrow2::array::Int32Array;
+use arrow2::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow2::error::Result as ArrowResult;
+use arrow2::record_batch::RecordBatch;
 
 use datafusion::{
     datasource::{datasource::Statistics, TableProvider},
diff --git a/datafusion/tests/dataframe.rs b/datafusion/tests/dataframe.rs
index b93e21f4ababb..b6465bcb41c09 100644
--- a/datafusion/tests/dataframe.rs
+++ b/datafusion/tests/dataframe.rs
@@ -17,8 +17,8 @@
 
 use std::sync::Arc;
 
-use arrow::datatypes::{DataType, Field, Schema};
-use arrow::{
+use arrow2::datatypes::{DataType, Field, Schema};
+use arrow2::{
     array::{Int32Array, StringArray},
     record_batch::RecordBatch,
 };
diff --git a/datafusion/tests/provider_filter_pushdown.rs b/datafusion/tests/provider_filter_pushdown.rs
index 0bf67bea8b9d4..1696dcb15dc7a 100644
--- a/datafusion/tests/provider_filter_pushdown.rs
+++ b/datafusion/tests/provider_filter_pushdown.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{as_primitive_array, Int32Builder, UInt64Array};
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::RecordBatch;
+use arrow2::array::{as_primitive_array, Int32Builder, UInt64Array};
+use arrow2::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow2::record_batch::RecordBatch;
 use async_trait::async_trait;
 use datafusion::datasource::datasource::{
     Statistics, TableProvider, TableProviderFilterPushDown,
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 17e0f13609a38..aafa5e667afd3 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -21,15 +21,7 @@ use std::sync::Arc;
 use chrono::prelude::*;
 use chrono::Duration;
 
-extern crate arrow;
-extern crate datafusion;
-
-use arrow::{array::*, datatypes::TimeUnit};
-use arrow::{datatypes::Int32Type, datatypes::Int64Type, record_batch::RecordBatch};
-use arrow::{
-    datatypes::{DataType, Field, Schema, SchemaRef},
-    util::display::array_value_to_string,
-};
+use arrow2::{array::*, datatypes::*, record_batch::RecordBatch};
 
 use datafusion::logical_plan::LogicalPlan;
 use datafusion::prelude::*;
@@ -126,7 +118,7 @@ async fn parquet_query() {
 #[tokio::test]
 async fn parquet_single_nan_schema() {
     let mut ctx = ExecutionContext::new();
-    let testdata = arrow::util::test_util::parquet_test_data();
+    let testdata = crate::test::parquet_test_data();
     ctx.register_parquet("single_nan", &format!("{}/single_nan.parquet", testdata))
         .unwrap();
     let sql = "SELECT mycol FROM single_nan";
@@ -144,7 +136,7 @@ async fn parquet_single_nan_schema() {
 #[ignore = "Test ignored, will be enabled as part of the nested Parquet reader"]
 async fn parquet_list_columns() {
     let mut ctx = ExecutionContext::new();
-    let testdata = arrow::util::test_util::parquet_test_data();
+    let testdata = crate::test::parquet_test_data();
     ctx.register_parquet(
         "list_columns",
         &format!("{}/list_columns.parquet", testdata),
@@ -1610,7 +1602,7 @@ fn aggr_test_schema() -> SchemaRef {
 }
 
 async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
-    let testdata = arrow::util::test_util::arrow_test_data();
+    let testdata = crate::test::arrow_test_data();
 
     // TODO: The following c9 should be migrated to UInt32 and c10 should be UInt64 once
     // unsigned is supported.
@@ -1650,7 +1642,7 @@ async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) {
 }
 
 fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> {
-    let testdata = arrow::util::test_util::arrow_test_data();
+    let testdata = crate::test::arrow_test_data();
     let schema = aggr_test_schema();
     ctx.register_csv(
         "aggregate_test_100",
@@ -1677,7 +1669,7 @@ fn register_aggregate_simple_csv(ctx: &mut ExecutionContext) -> Result<()> {
 }
 
 fn register_alltypes_parquet(ctx: &mut ExecutionContext) {
-    let testdata = arrow::util::test_util::parquet_test_data();
+    let testdata = crate::test::parquet_test_data();
     ctx.register_parquet(
         "alltypes_plain",
         &format!("{}/alltypes_plain.parquet", testdata),
@@ -2936,7 +2928,7 @@ async fn test_cast_expressions_error() -> Result<()> {
         Ok(_) => panic!("expected error"),
         Err(e) => {
             assert!(e.to_string().contains(
-                "Cast error: Cannot cast string 'c' to value of arrow::datatypes::types::Int32Type type"
+                "Cast error: Cannot cast string 'c' to value of arrow2::datatypes::types::Int32Type type"
             ))
         }
     }
@@ -2975,7 +2967,7 @@ async fn test_physical_plan_display_indent() {
         "                      CsvExec: source=Path(ARROW_TEST_DATA/csv/aggregate_test_100.csv: [ARROW_TEST_DATA/csv/aggregate_test_100.csv]), has_header=true",
         ];
 
-    let data_path = arrow::util::test_util::arrow_test_data();
+    let data_path = crate::test::arrow_test_data();
     let actual = format!("{}", displayable(physical_plan.as_ref()).indent())
         .trim()
         .lines()
diff --git a/datafusion/tests/user_defined_plan.rs b/datafusion/tests/user_defined_plan.rs
index 8914c05e8f88f..9ea7248c786f3 100644
--- a/datafusion/tests/user_defined_plan.rs
+++ b/datafusion/tests/user_defined_plan.rs
@@ -60,7 +60,7 @@
 
 use futures::{Stream, StreamExt};
 
-use arrow::{
+use arrow2::{
     array::{Int64Array, StringArray},
     datatypes::SchemaRef,
     error::ArrowError,