Skip to content

Commit

Permalink
Merge remote-tracking branch 'my-fork/master' into merged_region
Browse files Browse the repository at this point in the history
  • Loading branch information
gongyan committed Feb 27, 2024
2 parents d816039 + 10303e4 commit 5238efc
Show file tree
Hide file tree
Showing 5 changed files with 328 additions and 26 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

## Unreleased

- feat: added `is_error` and `get_error` methods to the `DataType` trait

## 0.24.0

- refactor (breaking): rename `DataType` enum to `Data` and `DataTypeRef` to `DataRef`
Expand Down
39 changes: 17 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,50 +45,45 @@ fn example() -> Result<(), Error> {
}
```

Note if you want to deserialize a column that may have invalid types (i.e. a float where some values may be strings), you can use Serde's `deserialize_with` field attribute:
Calamine provides helper functions to deal with invalid type values. For instance if you
want to deserialize a column which should contain floats but may also contain invalid values
(i.e. strings), you can use the [`deserialize_as_f64_or_none`] helper function with Serde's
[`deserialize_with`](https://serde.rs/field-attrs.html) field attribute:

```rust
use serde::{DataType, Deserialize};
use calamine::{RangeDeserializerBuilder, Reader, Xlsx};
use calamine::{deserialize_as_f64_or_none, open_workbook, RangeDeserializerBuilder, Reader, Xlsx};
use serde::Deserialize;

#[derive(Deserialize)]
struct Record {
metric: String,
#[serde(deserialize_with = "de_opt_f64")]
#[serde(deserialize_with = "deserialize_as_f64_or_none")]
value: Option<f64>,
}

// Convert value cell to Some(f64) if float or int, else None
fn de_opt_f64<'de, D>(deserializer: D) -> Result<Option<f64>, D::Error>
where
D: serde::Deserializer<'de>,
{
let data = calamine::Data::deserialize(deserializer)?;
if let Some(float) = data.as_f64() {
Ok(Some(float))
} else {
Ok(None)
}
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
fn main() -> Result<(), Box<dyn std::error::Error>> {
let path = format!("{}/tests/excel.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xlsx<_> = open_workbook(path)?;

let range = excel
.worksheet_range("Sheet1")
.ok_or(calamine::Error::Msg("Cannot find Sheet1"))??;
.worksheet_range("Sheet1")
.map_err(|_| calamine::Error::Msg("Cannot find Sheet1"))?;

let iter_result =
let iter_records =
RangeDeserializerBuilder::with_headers(&["metric", "value"]).from_range(&range)?;

for result in iter_results {
for result in iter_records {
let record: Record = result?;
println!("metric={:?}, value={:?}", record.metric, record.value);
}

Ok(())
}
```

The [`deserialize_as_f64_or_none`] function will discard all invalid values, if you want to
return them as `String` you can use the [`deserialize_as_f64_or_string`] function instead.

### Reader: Simple

```rust
Expand Down
28 changes: 28 additions & 0 deletions src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ impl DataType for Data {
matches!(*self, Data::DateTimeIso(_))
}

fn is_error(&self) -> bool {
matches!(*self, Data::Error(_))
}

fn get_int(&self) -> Option<i64> {
if let Data::Int(v) = self {
Some(*v)
Expand Down Expand Up @@ -129,6 +133,13 @@ impl DataType for Data {
}
}

fn get_error(&self) -> Option<&CellErrorType> {
match self {
Data::Error(e) => Some(e),
_ => None,
}
}

fn as_string(&self) -> Option<String> {
match self {
Data::Float(v) => Some(v.to_string()),
Expand Down Expand Up @@ -382,6 +393,10 @@ impl DataType for DataRef<'_> {
matches!(*self, DataRef::DateTimeIso(_))
}

fn is_error(&self) -> bool {
matches!(*self, DataRef::Error(_))
}

fn get_int(&self) -> Option<i64> {
if let DataRef::Int(v) = self {
Some(*v)
Expand Down Expand Up @@ -438,6 +453,13 @@ impl DataType for DataRef<'_> {
}
}

fn get_error(&self) -> Option<&CellErrorType> {
match self {
DataRef::Error(e) => Some(e),
_ => None,
}
}

fn as_string(&self) -> Option<String> {
match self {
DataRef::Float(v) => Some(v.to_string()),
Expand Down Expand Up @@ -489,6 +511,9 @@ pub trait DataType {
/// Assess if datatype is a string
fn is_string(&self) -> bool;

/// Assess if datatype is a CellErrorType
fn is_error(&self) -> bool;

/// Assess if datatype is an ISO8601 duration
#[cfg(feature = "dates")]
fn is_duration_iso(&self) -> bool;
Expand Down Expand Up @@ -525,6 +550,9 @@ pub trait DataType {
#[cfg(feature = "dates")]
fn get_duration_iso(&self) -> Option<&str>;

/// Try getting Error value
fn get_error(&self) -> Option<&CellErrorType>;

/// Try converting data type into a string
fn as_string(&self) -> Option<String>;

Expand Down
87 changes: 84 additions & 3 deletions src/de.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use serde::de::value::BorrowedStrDeserializer;
use serde::de::{self, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor};
use serde::{self, forward_to_deserialize_any, Deserialize};
use serde::{self, forward_to_deserialize_any, Deserialize, Deserializer};
use std::marker::PhantomData;
use std::{fmt, slice, str};

Expand Down Expand Up @@ -163,7 +163,7 @@ impl<'h, H: AsRef<str> + Clone + 'h> RangeDeserializerBuilder<'h, H> {
///
/// Ok(())
/// } else {
/// return Err(From::from("expected at least one record but got none"));
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
Expand Down Expand Up @@ -192,7 +192,7 @@ impl<'h, H: AsRef<str> + Clone + 'h> RangeDeserializerBuilder<'h, H> {
///
/// Ok(())
/// } else {
/// return Err(From::from("expected at least one record but got none"));
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
Expand All @@ -208,6 +208,87 @@ impl<'h, H: AsRef<str> + Clone + 'h> RangeDeserializerBuilder<'h, H> {
}
}

impl<'h> RangeDeserializerBuilder<'h, &str> {
/// Build a `RangeDeserializer` from this configuration and keep only selected headers
/// from the specified deserialization struct.
///
/// # Example
///
/// ```
/// # use calamine::{open_workbook, Error, RangeDeserializerBuilder, Reader, Xlsx};
/// # use serde_derive::Deserialize;
/// #[derive(Deserialize)]
/// struct Record {
/// label: String,
/// value: f64,
/// }
///
/// fn main() -> Result<(), Error> {
/// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
/// let mut workbook: Xlsx<_> = open_workbook(path)?;
/// let range = workbook.worksheet_range("Sheet1")?;
/// let mut iter =
/// RangeDeserializerBuilder::with_deserialize_headers::<Record>().from_range(&range)?;
///
/// if let Some(result) = iter.next() {
/// let record: Record = result?;
/// assert_eq!(record.label, "celsius");
/// assert_eq!(record.value, 22.2222);
///
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn with_deserialize_headers<'de, T>() -> Self
where
T: Deserialize<'de>,
{
struct StructFieldsDeserializer<'h> {
fields: &'h mut Option<&'static [&'static str]>,
}

impl<'de, 'h> Deserializer<'de> for StructFieldsDeserializer<'h> {
type Error = de::value::Error;

fn deserialize_any<V>(self, _visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
Err(de::Error::custom("I'm just here for the fields"))
}

fn deserialize_struct<V>(
self,
_name: &'static str,
fields: &'static [&'static str],
_visitor: V,
) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
*self.fields = Some(fields); // get the names of the deserialized fields
Err(de::Error::custom("I'm just here for the fields"))
}

serde::forward_to_deserialize_any! {
bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 char str string bytes
byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct map enum identifier ignored_any
}
}

let mut serialized_names = None;
let _ = T::deserialize(StructFieldsDeserializer {
fields: &mut serialized_names,
});
let headers = serialized_names.unwrap_or_default();

Self::with_headers(headers)
}
}

/// A configured `Range` deserializer.
///
/// # Example
Expand Down
Loading

0 comments on commit 5238efc

Please sign in to comment.