Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shared formula #425

Merged
merged 3 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 101 additions & 3 deletions src/xlsx/cells_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ use quick_xml::{
events::{attributes::Attribute, BytesStart, Event},
name::QName,
};
use std::{borrow::Borrow, collections::HashMap};

use super::{
get_attribute, get_dimension, get_row, get_row_column, read_string, Dimensions, XlReader,
get_attribute, get_dimension, get_row, get_row_column, read_string, replace_cell_names,
Dimensions, XlReader,
};
use crate::{
datatype::DataRef,
Expand All @@ -23,6 +25,7 @@ pub struct XlsxCellReader<'a> {
col_index: u32,
buf: Vec<u8>,
cell_buf: Vec<u8>,
formulas: Vec<Option<(String, HashMap<(u32, u32), (i64, i64)>)>>,
}

impl<'a> XlsxCellReader<'a> {
Expand Down Expand Up @@ -68,6 +71,7 @@ impl<'a> XlsxCellReader<'a> {
col_index: 0,
buf: Vec::with_capacity(1024),
cell_buf: Vec::with_capacity(1024),
formulas: Vec::with_capacity(1024),
})
}

Expand Down Expand Up @@ -165,9 +169,103 @@ impl<'a> XlsxCellReader<'a> {
self.cell_buf.clear();
match self.xml.read_event_into(&mut self.cell_buf) {
Ok(Event::Start(ref e)) => {
if let Some(f) = read_formula(&mut self.xml, e)? {
value = Some(f);
let formula = read_formula(&mut self.xml, e)?;
if let Some(f) = formula.borrow() {
value = Some(f.clone());
}
match get_attribute(e.attributes(), QName(b"t")) {
Ok(Some(b"shared")) => {
// shared formula
let mut offset_map: HashMap<(u32, u32), (i64, i64)> =
HashMap::new();
// shared index
let shared_index =
match get_attribute(e.attributes(), QName(b"si"))? {
Some(res) => match std::str::from_utf8(res) {
Ok(res) => match usize::from_str_radix(res, 10)
{
Ok(res) => res,
Err(e) => {
return Err(XlsxError::ParseInt(e));
}
},
Err(_) => {
return Err(XlsxError::Unexpected(
"si attribute must be a number",
));
}
},
None => {
return Err(XlsxError::Unexpected(
"si attribute is mandatory if it is shared",
));
}
};
// shared reference
match get_attribute(e.attributes(), QName(b"ref"))? {
Some(res) => {
// orignal reference formula
let reference = get_dimension(res)?;
if reference.start.0 != reference.end.0 {
for i in
0..=(reference.end.0 - reference.start.0)
{
offset_map.insert(
(
reference.start.0 + i,
reference.start.1,
),
(
(reference.start.0 as i64
- pos.0 as i64
+ i as i64),
0,
),
);
}
} else if reference.start.1 != reference.end.1 {
for i in
0..=(reference.end.1 - reference.start.1)
{
offset_map.insert(
(
reference.start.0,
reference.start.1 + i,
),
(
0,
(reference.start.1 as i64
- pos.1 as i64
+ i as i64),
),
);
}
}

if let Some(f) = formula.borrow() {
while self.formulas.len() < shared_index {
self.formulas.push(None);
}
self.formulas
.push(Some((f.clone(), offset_map)));
}
value = formula;
}
None => {
// calculated formula
if let Some(Some((f, offset_map))) =
self.formulas.get(shared_index)
{
if let Some(offset) = offset_map.get(&*&pos) {
value =
Some(replace_cell_names(f, *offset)?);
}
}
}
};
}
_ => {}
};
}
Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break,
Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")),
Expand Down
159 changes: 159 additions & 0 deletions src/xlsx/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,130 @@ fn check_for_password_protected<RS: Read + Seek>(reader: &mut RS) -> Result<(),
Ok(())
}

/// check if a char vector is a valid cell name
/// column name must be between A and XFD,
/// last char must be digit
fn valid_cell_name(name: &[char]) -> bool {
if name.is_empty() {
return false;
}
if name.len() < 2 {
return false;
}
if name.len() > 3 {
if name[3].is_ascii_alphabetic() {
return false;
}
if name[2].is_alphabetic() {
if "YZ".contains(name[0]) {
return false;
} else if name[0] == 'X' {
if name[1] == 'F' {
if !"ABCD".contains(name[2]) {
return false;
};
} else if !"ABCDE".contains(name[1]) {
return false;
}
}
}
}
match name.last() {
Some(c) => c.is_ascii_digit(),
_ => false,
}
}

/// advance the cell name by the offset
fn replace_cell(name: &[char], offset: (i64, i64)) -> Result<Vec<u8>, XlsxError> {
let cell = get_row_column(
name.into_iter()
.map(|c| *c as u8)
.collect::<Vec<_>>()
.as_slice(),
)?;
coordinate_to_name((
(cell.0 as i64 + offset.0) as u32,
(cell.1 as i64 + offset.1) as u32,
))
}

/// advance all valid cell names in the string by the offset
fn replace_cell_names(s: &str, offset: (i64, i64)) -> Result<String, XlsxError> {
let mut res: Vec<u8> = Vec::new();
let mut cell: Vec<char> = Vec::new();
let mut is_cell_row = false;
let mut in_quote = false;
for c in s.chars() {
if c == '"' {
in_quote = !in_quote;
}
if in_quote {
res.push(c as u8);
continue;
}
if c.is_ascii_alphabetic() {
if is_cell_row {
// two cell not possible stick togather in formula
res.extend(cell.iter().map(|c| *c as u8));
cell.clear();
is_cell_row = false;
}
cell.push(c);
} else if c.is_ascii_digit() {
is_cell_row = true;
cell.push(c);
} else {
if valid_cell_name(cell.as_ref()) {
res.extend(replace_cell(cell.as_ref(), offset)?);
} else {
res.extend(cell.iter().map(|c| *c as u8));
}
cell.clear();
is_cell_row = false;
res.push(c as u8);
}
}
if !cell.is_empty() {
if valid_cell_name(cell.as_ref()) {
res.extend(replace_cell(cell.as_ref(), offset)?);
} else {
res.extend(cell.iter().map(|c| *c as u8));
}
}
match String::from_utf8(res) {
Ok(s) => Ok(s),
Err(_) => Err(XlsxError::Unexpected("fail to convert cell name")),
}
}

/// Convert the integer to Excelsheet column title.
/// If the column number not in 1~16384, an Error is returned.
pub(crate) fn column_number_to_name(num: u32) -> Result<Vec<u8>, XlsxError> {
if num >= MAX_COLUMNS {
return Err(XlsxError::Unexpected("column number overflow"));
}
let mut col: Vec<u8> = Vec::new();
let mut num = num + 1;
while num > 0 {
let integer = ((num - 1) % 26 + 65) as u8;
col.push(integer);
num = (num - 1) / 26;
}
col.reverse();
Ok(col)
}

/// Convert a cell coordinate to Excelsheet cell name.
/// If the column number not in 1~16384, an Error is returned.
pub(crate) fn coordinate_to_name(cell: (u32, u32)) -> Result<Vec<u8>, XlsxError> {
let cell = &[
column_number_to_name(cell.1)?,
(cell.0 + 1).to_string().into_bytes(),
];
Ok(cell.concat())
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1178,4 +1302,39 @@ mod tests {
CellErrorType::Value
);
}

#[test]
fn test_column_number_to_name() {
assert_eq!(column_number_to_name(0).unwrap(), b"A");
assert_eq!(column_number_to_name(25).unwrap(), b"Z");
assert_eq!(column_number_to_name(26).unwrap(), b"AA");
assert_eq!(column_number_to_name(27).unwrap(), b"AB");
assert_eq!(column_number_to_name(MAX_COLUMNS - 1).unwrap(), b"XFD");
}

#[test]
fn test_coordinate_to_name() {
assert_eq!(coordinate_to_name((0, 0)).unwrap(), b"A1");
assert_eq!(
coordinate_to_name((MAX_ROWS - 1, MAX_COLUMNS - 1)).unwrap(),
b"XFD1048576"
);
}

#[test]
fn test_replace_cell_names() {
assert_eq!(replace_cell_names("A1", (1, 0)).unwrap(), "A2".to_owned());
assert_eq!(
replace_cell_names("CONCATENATE(A1, \"a\")", (1, 0)).unwrap(),
"CONCATENATE(A2, \"a\")".to_owned()
);
assert_eq!(
replace_cell_names(
"A1 is a cell, B1 is another, also C107, but XFE123 is not and \"A3\" in quote wont change.",
(1, 0)
)
.unwrap(),
"A2 is a cell, B2 is another, also C108, but XFE123 is not and \"A3\" in quote wont change.".to_owned()
);
}
}
Binary file added tests/issue_391.xlsx
Binary file not shown.
23 changes: 21 additions & 2 deletions tests/test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use calamine::Data::{Bool, DateTime, DateTimeIso, DurationIso, Empty, Error, Float, String};
use calamine::{
open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Reader,
Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
open_workbook, open_workbook_auto, DataType, ExcelDateTime, ExcelDateTimeType, Ods, Range,
Reader, Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx,
};
use calamine::{CellErrorType::*, Data};
use std::collections::BTreeSet;
Expand Down Expand Up @@ -1878,3 +1878,22 @@ fn issue_401_empty_tables() {
let tables = excel.table_names();
assert!(tables.is_empty());
}

#[test]
fn issue_391_shared_formula() {
setup();

let path = format!("{}/tests/issue_391.xlsx", env!("CARGO_MANIFEST_DIR"));
let mut excel: Xlsx<_> = open_workbook(&path).unwrap();
let mut expect = Range::<std::string::String>::new((1, 0), (6, 0));
for (i, cell) in vec!["A1+1", "A2+1", "A3+1", "A4+1", "A5+1", "A6+1"]
.iter()
.enumerate()
{
expect.set_value((1 + i as u32, 0), cell.to_string());
}
let res = excel.worksheet_formula("Sheet1").unwrap();
assert_eq!(expect.start(), res.start());
assert_eq!(expect.end(), res.end());
assert!(expect.cells().eq(res.cells()));
}