Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added support to read decimal from csv. (#602)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Nov 14, 2021
1 parent 73ee16d commit 42c6fea
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
47 changes: 47 additions & 0 deletions src/io/csv/read_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,50 @@ where
Arc::new(PrimitiveArray::<T>::from_trusted_len_iter(iter).to(datatype))
}

#[inline]
fn significant_bytes(bytes: &[u8]) -> usize {
bytes.iter().map(|byte| (*byte != b'0') as usize).sum()
}

/// Deserializes bytes to a single i128 representing a decimal
/// The decimal precision and scale are not checked.
#[inline]
fn deserialize_decimal(bytes: &[u8], precision: usize, scale: usize) -> Option<i128> {
let mut a = bytes.split(|x| *x == b'.');
let lhs = a.next();
let rhs = a.next();
match (lhs, rhs) {
(Some(lhs), Some(rhs)) => lexical_core::parse::<i128>(lhs).ok().and_then(|x| {
lexical_core::parse::<i128>(rhs)
.ok()
.map(|y| (x, lhs, y, rhs))
.and_then(|(lhs, lhs_b, rhs, rhs_b)| {
let lhs_s = significant_bytes(lhs_b);
let rhs_s = significant_bytes(rhs_b);
if lhs_s + rhs_s > precision || rhs_s > scale {
None
} else {
Some((lhs, rhs, rhs_s))
}
})
.map(|(lhs, rhs, rhs_s)| lhs * 10i128.pow(rhs_s as u32) + rhs)
}),
(None, Some(rhs)) => {
if rhs.len() != precision || rhs.len() != scale {
return None;
}
lexical_core::parse::<i128>(rhs).ok()
}
(Some(lhs), None) => {
if lhs.len() != precision || scale != 0 {
return None;
}
lexical_core::parse::<i128>(lhs).ok()
}
(None, None) => None,
}
}

fn deserialize_boolean<B, F>(rows: &[B], column: usize, op: F) -> Arc<dyn Array>
where
B: ByteRecordGeneric,
Expand Down Expand Up @@ -193,6 +237,9 @@ pub(crate) fn deserialize_column<B: ByteRecordGeneric>(
})
})
}
Decimal(precision, scale) => deserialize_primitive(rows, column, datatype, |x| {
deserialize_decimal(x, precision, scale)
}),
Utf8 => deserialize_utf8::<i32, _>(rows, column),
LargeUtf8 => deserialize_utf8::<i64, _>(rows, column),
Binary => deserialize_binary::<i32, _>(rows, column),
Expand Down
27 changes: 27 additions & 0 deletions tests/it/io/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,33 @@ fn date64() -> Result<()> {
Ok(())
}

#[test]
fn decimal() -> Result<()> {
let result = test_deserialize("1.1,\n1.2,\n1.22,\n1.3,\n", DataType::Decimal(2, 1))?;
let expected =
Int128Array::from(&[Some(11), Some(12), None, Some(13)]).to(DataType::Decimal(2, 1));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn decimal_only_scale() -> Result<()> {
let result = test_deserialize("0.01,\n0.12,\n0.222,\n0.13,\n", DataType::Decimal(2, 2))?;
let expected =
Int128Array::from(&[Some(1), Some(12), None, Some(13)]).to(DataType::Decimal(2, 2));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn decimal_only_integer() -> Result<()> {
let result = test_deserialize("1,\n1.0,\n1.1,\n10.0,\n", DataType::Decimal(1, 0))?;
let expected =
Int128Array::from(&[Some(1), Some(1), None, Some(10)]).to(DataType::Decimal(1, 0));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn boolean() -> Result<()> {
let input = vec!["true", "True", "False", "F", "t"];
Expand Down

0 comments on commit 42c6fea

Please sign in to comment.