Skip to content

Commit

Permalink
Move reader_parser to arrow-cast (#3022) (#3043)
Browse files Browse the repository at this point in the history
* Move reader_parser to arrow-cast (#3022)

* Format
  • Loading branch information
tustvold authored Nov 7, 2022
1 parent 951caed commit b7bc79b
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 145 deletions.
122 changes: 122 additions & 0 deletions arrow-cast/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

use arrow_array::types::*;
use arrow_array::ArrowPrimitiveType;
use arrow_schema::ArrowError;
use chrono::prelude::*;

Expand Down Expand Up @@ -130,6 +132,126 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
)))
}

/// Specialized parsing implementations
/// used by csv and json reader
pub trait Parser: ArrowPrimitiveType {
fn parse(string: &str) -> Option<Self::Native>;

fn parse_formatted(string: &str, _format: &str) -> Option<Self::Native> {
Self::parse(string)
}
}

impl Parser for Float32Type {
fn parse(string: &str) -> Option<f32> {
lexical_core::parse(string.as_bytes()).ok()
}
}

impl Parser for Float64Type {
fn parse(string: &str) -> Option<f64> {
lexical_core::parse(string.as_bytes()).ok()
}
}

macro_rules! parser_primitive {
($t:ty) => {
impl Parser for $t {
fn parse(string: &str) -> Option<Self::Native> {
string.parse::<Self::Native>().ok()
}
}
};
}
parser_primitive!(UInt64Type);
parser_primitive!(UInt32Type);
parser_primitive!(UInt16Type);
parser_primitive!(UInt8Type);
parser_primitive!(Int64Type);
parser_primitive!(Int32Type);
parser_primitive!(Int16Type);
parser_primitive!(Int8Type);

impl Parser for TimestampNanosecondType {
fn parse(string: &str) -> Option<i64> {
string_to_timestamp_nanos(string).ok()
}
}

impl Parser for TimestampMicrosecondType {
fn parse(string: &str) -> Option<i64> {
let nanos = string_to_timestamp_nanos(string).ok();
nanos.map(|x| x / 1000)
}
}

impl Parser for TimestampMillisecondType {
fn parse(string: &str) -> Option<i64> {
let nanos = string_to_timestamp_nanos(string).ok();
nanos.map(|x| x / 1_000_000)
}
}

impl Parser for TimestampSecondType {
fn parse(string: &str) -> Option<i64> {
let nanos = string_to_timestamp_nanos(string).ok();
nanos.map(|x| x / 1_000_000_000)
}
}

parser_primitive!(Time64NanosecondType);
parser_primitive!(Time64MicrosecondType);
parser_primitive!(Time32MillisecondType);
parser_primitive!(Time32SecondType);

/// Number of days between 0001-01-01 and 1970-01-01
const EPOCH_DAYS_FROM_CE: i32 = 719_163;

impl Parser for Date32Type {
fn parse(string: &str) -> Option<i32> {
let date = string.parse::<chrono::NaiveDate>().ok()?;
Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
}

fn parse_formatted(string: &str, format: &str) -> Option<i32> {
let date = chrono::NaiveDate::parse_from_str(string, format).ok()?;
Some(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
}
}

impl Parser for Date64Type {
fn parse(string: &str) -> Option<i64> {
let date_time = string.parse::<NaiveDateTime>().ok()?;
Some(date_time.timestamp_millis())
}

fn parse_formatted(string: &str, format: &str) -> Option<i64> {
use chrono::format::Fixed;
use chrono::format::StrftimeItems;
let fmt = StrftimeItems::new(format);
let has_zone = fmt.into_iter().any(|item| match item {
chrono::format::Item::Fixed(fixed_item) => matches!(
fixed_item,
Fixed::RFC2822
| Fixed::RFC3339
| Fixed::TimezoneName
| Fixed::TimezoneOffsetColon
| Fixed::TimezoneOffsetColonZ
| Fixed::TimezoneOffset
| Fixed::TimezoneOffsetZ
),
_ => false,
});
if has_zone {
let date_time = chrono::DateTime::parse_from_str(string, format).ok()?;
Some(date_time.timestamp_millis())
} else {
let date_time = NaiveDateTime::parse_from_str(string, format).ok()?;
Some(date_time.timestamp_millis())
}
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ use crate::array::{
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::record_batch::{RecordBatch, RecordBatchOptions};
use crate::util::reader_parser::Parser;
use arrow_cast::parse::Parser;

use crate::csv::map_csv_error;
use csv_crate::{ByteRecord, StringRecord};
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/json/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::record_batch::{RecordBatch, RecordBatchOptions};
use crate::util::bit_util;
use crate::util::reader_parser::Parser;
use crate::{array::*, buffer::Buffer};
use arrow_cast::parse::Parser;

#[derive(Debug, Clone)]
enum InferredType {
Expand Down
1 change: 0 additions & 1 deletion arrow/src/util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,3 @@ pub mod string_writer;
pub mod test_util;

pub use arrow_cast::display;
pub(crate) mod reader_parser;
142 changes: 0 additions & 142 deletions arrow/src/util/reader_parser.rs

This file was deleted.

0 comments on commit b7bc79b

Please sign in to comment.