From e05ccfcd6783b6e524f9635160767eafd77ca95f Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Thu, 2 Sep 2021 18:37:42 +0200 Subject: [PATCH] refactor: rename infer_avro_schema_from_reader to read_avro_schema_from_reader --- datafusion/src/avro_to_arrow/arrow_array_reader.rs | 2 +- datafusion/src/avro_to_arrow/mod.rs | 10 +++++----- datafusion/src/avro_to_arrow/reader.rs | 11 +++++------ datafusion/src/datasource/avro.rs | 10 +++++----- datafusion/src/physical_plan/avro.rs | 10 +++++----- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/datafusion/src/avro_to_arrow/arrow_array_reader.rs b/datafusion/src/avro_to_arrow/arrow_array_reader.rs index 359d443d55126..cc8ed8e669426 100644 --- a/datafusion/src/avro_to_arrow/arrow_array_reader.rs +++ b/datafusion/src/avro_to_arrow/arrow_array_reader.rs @@ -979,7 +979,7 @@ mod test { let testdata = crate::test_util::arrow_test_data(); let filename = format!("{}/avro/{}", testdata, name); let builder = ReaderBuilder::new() - .infer_schema() + .read_schema() .with_batch_size(batch_size); builder.build(File::open(filename).unwrap()).unwrap() } diff --git a/datafusion/src/avro_to_arrow/mod.rs b/datafusion/src/avro_to_arrow/mod.rs index e0c05b2774653..531b1092e1d62 100644 --- a/datafusion/src/avro_to_arrow/mod.rs +++ b/datafusion/src/avro_to_arrow/mod.rs @@ -31,17 +31,17 @@ pub use reader::{Reader, ReaderBuilder}; use std::io::{Read, Seek}; #[cfg(feature = "avro")] -/// Infer Avro schema given a reader -pub fn infer_avro_schema_from_reader(reader: &mut R) -> Result { +/// Read Avro schema given a reader +pub fn read_avro_schema_from_reader(reader: &mut R) -> Result { let avro_reader = avro_rs::Reader::new(reader)?; let schema = avro_reader.writer_schema(); schema::to_arrow_schema(schema) } #[cfg(not(feature = "avro"))] -/// Infer Avro schema given a reader (requires the avro feature) -pub fn infer_avro_schema_from_reader(_: &mut R) -> Result { +/// Read Avro schema given a reader (requires the avro feature) +pub fn read_avro_schema_from_reader(_: &mut R) -> Result { Err(crate::error::DataFusionError::NotImplemented( - "cannot infer avro schema without the 'avro' feature enabled".to_string(), + "cannot read avro schema without the 'avro' feature enabled".to_string(), )) } diff --git a/datafusion/src/avro_to_arrow/reader.rs b/datafusion/src/avro_to_arrow/reader.rs index c316122b8e219..8baad14746d37 100644 --- a/datafusion/src/avro_to_arrow/reader.rs +++ b/datafusion/src/avro_to_arrow/reader.rs @@ -28,8 +28,7 @@ use std::sync::Arc; pub struct ReaderBuilder { /// Optional schema for the Avro file /// - /// If the schema is not supplied, the reader will try to infer the schema - /// based on the Avro structure. + /// If the schema is not supplied, the reader will try to read the schema. schema: Option, /// Batch size (number of records to load each time) /// @@ -65,7 +64,7 @@ impl ReaderBuilder { /// let file = File::open("test/data/basic.avro").unwrap(); /// /// // create a builder, inferring the schema with the first 100 records - /// let builder = crate::datafusion::avro_to_arrow::ReaderBuilder::new().infer_schema().with_batch_size(100); + /// let builder = crate::datafusion::avro_to_arrow::ReaderBuilder::new().read_schema().with_batch_size(100); /// /// let reader = builder.build::(file).unwrap(); /// @@ -83,7 +82,7 @@ impl ReaderBuilder { } /// Set the Avro reader to infer the schema of the file - pub fn infer_schema(mut self) -> Self { + pub fn read_schema(mut self) -> Self { // remove any schema that is set self.schema = None; self @@ -111,7 +110,7 @@ impl ReaderBuilder { // check if schema should be inferred let schema = match self.schema { Some(schema) => schema, - None => Arc::new(super::infer_avro_schema_from_reader(&mut source)?), + None => Arc::new(super::read_avro_schema_from_reader(&mut source)?), }; source.seek(SeekFrom::Start(0))?; Reader::try_new(source, schema, self.batch_size, self.projection) @@ -180,7 +179,7 @@ mod tests { fn build_reader(name: &str) -> Reader { let testdata = crate::test_util::arrow_test_data(); let filename = format!("{}/avro/{}", testdata, name); - let builder = ReaderBuilder::new().infer_schema().with_batch_size(64); + let builder = ReaderBuilder::new().read_schema().with_batch_size(64); builder.build(File::open(filename).unwrap()).unwrap() } diff --git a/datafusion/src/datasource/avro.rs b/datafusion/src/datasource/avro.rs index b094edb7caedc..7b39bedae478c 100644 --- a/datafusion/src/datasource/avro.rs +++ b/datafusion/src/datasource/avro.rs @@ -50,14 +50,14 @@ pub struct AvroFile { } impl AvroFile { - /// Attempt to initialize a `AvroFile` from a path. The schema can be inferred automatically. + /// Attempt to initialize a `AvroFile` from a path. The schema can be read automatically. pub fn try_new(path: &str, options: AvroReadOptions) -> Result { let schema = if let Some(schema) = options.schema { schema } else { let filenames = common::build_checked_file_list(path, options.file_extension)?; - Arc::new(AvroExec::try_infer_schema(&filenames)?) + Arc::new(AvroExec::try_read_schema(&filenames)?) }; Ok(Self { @@ -89,8 +89,8 @@ impl AvroFile { }) } - /// Attempt to initialize an AvroFile from a reader impls Seek. The schema can be inferred automatically. - pub fn try_new_from_reader_infer_schema( + /// Attempt to initialize an AvroFile from a reader impls Seek. The schema can be read automatically. + pub fn try_new_from_reader_schema( mut reader: R, options: AvroReadOptions, ) -> Result { @@ -98,7 +98,7 @@ impl AvroFile { if let Some(schema) = options.schema { schema } else { - Arc::new(crate::avro_to_arrow::infer_avro_schema_from_reader( + Arc::new(crate::avro_to_arrow::read_avro_schema_from_reader( &mut reader, )?) } diff --git a/datafusion/src/physical_plan/avro.rs b/datafusion/src/physical_plan/avro.rs index c7536b0dd6c66..54e47d1c60303 100644 --- a/datafusion/src/physical_plan/avro.rs +++ b/datafusion/src/physical_plan/avro.rs @@ -19,7 +19,7 @@ #[cfg(feature = "avro")] use super::RecordBatchStream; use super::{common, source::Source, ExecutionPlan, Partitioning}; -use crate::avro_to_arrow::infer_avro_schema_from_reader; +use crate::avro_to_arrow::read_avro_schema_from_reader; use crate::error::{DataFusionError, Result}; use crate::physical_plan::DisplayFormatType; use arrow::datatypes::{Schema, SchemaRef}; @@ -98,7 +98,7 @@ impl AvroExec { let schema = match options.schema { Some(s) => s, - None => Arc::new(AvroExec::try_infer_schema(filenames.as_slice())?), + None => Arc::new(AvroExec::try_read_schema(filenames.as_slice())?), }; let projected_schema = match &projection { @@ -192,12 +192,12 @@ impl AvroExec { self.limit } - /// Infer schema for given Avro dataset - pub fn try_infer_schema(filenames: &[String]) -> Result { + /// Read schema for given Avro dataset + pub fn try_read_schema(filenames: &[String]) -> Result { let mut schemas = Vec::new(); for filename in filenames { let mut file = File::open(filename)?; - let schema = infer_avro_schema_from_reader(&mut file)?; + let schema = read_avro_schema_from_reader(&mut file)?; schemas.push(schema); }