From 32d5b33f336ae754db5ff13f85d22deab8e0a72e Mon Sep 17 00:00:00 2001 From: gongyan Date: Mon, 6 Dec 2021 04:01:28 +0800 Subject: [PATCH 1/8] support to get merged region --- .gitignore | 1 + Cargo.toml | 3 +- src/lib.rs | 4 +-- src/xls.rs | 30 +++++++++++++++++- src/xlsx.rs | 68 ++++++++++++++++++++++++++++++++++++++-- tests/merged_range.xls | Bin 0 -> 27648 bytes tests/merged_range.xlsx | Bin 0 -> 10741 bytes tests/test.rs | 55 ++++++++++++++++++++++++++++++++ 8 files changed, 155 insertions(+), 6 deletions(-) create mode 100644 tests/merged_range.xls create mode 100644 tests/merged_range.xlsx diff --git a/.gitignore b/.gitignore index bcc01c7d..87b3e1ad 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ target Cargo.lock *.bk .vim +/.idea/ diff --git a/Cargo.toml b/Cargo.toml index 452cb02b..2ca673bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "calamine" -version = "0.18.0" +version = "0.18.0-mikorab" authors = ["Johann Tuffe "] repository = "https://github.com/tafia/calamine" documentation = "https://docs.rs/calamine" @@ -11,6 +11,7 @@ keywords = ["excel", "vba", "office", "ods", "serde"] categories = ["encoding", "parsing", "text-processing"] exclude = ["tests/**/*"] edition = "2018" +publish = ["mikorab"] [badges] travis-ci = { repository = "tafia/calamine" } diff --git a/src/lib.rs b/src/lib.rs index 6e6a18d6..dfc43320 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,9 +87,9 @@ pub use crate::datatype::DataType; pub use crate::de::{DeError, RangeDeserializer, RangeDeserializerBuilder, ToCellDeserializer}; pub use crate::errors::Error; pub use crate::ods::{Ods, OdsError}; -pub use crate::xls::{Xls, XlsError, XlsOptions}; +pub use crate::xls::{Xls, XlsError, XlsOptions, Dimensions as XlsDimensions}; pub use crate::xlsb::{Xlsb, XlsbError}; -pub use crate::xlsx::{Xlsx, XlsxError}; +pub use crate::xlsx::{Xlsx, XlsxError, Dimensions as XlsxDimensions}; use crate::vba::VbaProject; diff --git a/src/xls.rs b/src/xls.rs index 331e6755..ba4c5b0f 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -130,6 +130,8 @@ pub struct Xls { metadata: Metadata, marker: PhantomData, options: XlsOptions, + /// Merged Regions: Name, Sheet, Merged Dimensions + merged_regions: Option>, } impl Xls { @@ -175,6 +177,7 @@ impl Xls { marker: PhantomData, metadata: Metadata::default(), options, + merged_regions: None }; xls.parse_workbook(reader, cfb)?; @@ -183,6 +186,19 @@ impl Xls { Ok(xls) } + + /// Get the merged regions of all the sheets + pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> { + self.merged_regions.as_ref().expect("Merged Regions must be loaded before the are referenced") + } + + /// Get the merged regions by sheet name + pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> { + self.merged_regions().iter() + .filter(|s| (**s).0 == name) + .map(|(name, sheet, region)| (name, sheet, region)) + .collect() + } } impl Reader for Xls { @@ -529,11 +545,23 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result, XlsEr )) } -struct Dimensions { +/// Dimensions info +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)] +pub struct Dimensions { start: (u32, u32), end: (u32, u32), } +impl Dimensions { + /// create dimensions info with start position and end position + pub fn new(start: (u32, u32), end: (u32, u32)) -> Self { + Self { + start, + end + } + } +} + fn parse_dimensions(r: &[u8]) -> Result { let (rf, rl, cf, cl) = match r.len() { 10 => ( diff --git a/src/xlsx.rs b/src/xlsx.rs index 903bd77d..864ad313 100644 --- a/src/xlsx.rs +++ b/src/xlsx.rs @@ -155,6 +155,8 @@ where tables: Option, Dimensions)>>, /// Cell (number) formats formats: Vec, + /// Merged Regions: Name, Sheet, Merged Dimensions + merged_regions: Option>, /// Metadata metadata: Metadata, } @@ -511,6 +513,59 @@ impl Xlsx { Ok(()) } + // sheets must be added before this is called!! + fn read_merged_regions(&mut self) -> Result<(), XlsxError> { + let mut regions = Vec::new(); + for (sheet_name, sheet_path) in &self.sheets { + // we need another mutable borrow of self.zip later so we enclose this borrow within braces + { + let mut xml = match xml_reader(&mut self.zip, &sheet_path) { + None => continue, + Some(x) => x?, + }; + let mut buf = Vec::new(); + loop { + buf.clear(); + match xml.read_event(&mut buf) { + Ok(Event::Start(ref e)) if e.local_name() == b"mergeCell" => { + if let Some(attr) = get_attribute(e.attributes(), b"ref")? { + let dismension = get_dimension(attr)?; + regions.push((sheet_name.to_string(), sheet_path.to_string(), dismension)); + } + } + Ok(Event::Eof) => break, + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + } + } + self.merged_regions = Some(regions); + Ok(()) + } + + /// Load the merged regions + pub fn load_merged_regions(&mut self) -> Result<(), XlsxError> { + if self.merged_regions.is_none() { + self.read_merged_regions() + } else { + Ok(()) + } + } + + /// Get the merged regions of all the sheets + pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> { + self.merged_regions.as_ref().expect("Merged Regions must be loaded before the are referenced") + } + + /// Get the merged regions by sheet name + pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> { + self.merged_regions().iter() + .filter(|s| (**s).0 == name) + .map(|(name, sheet, region)| (name, sheet, region)) + .collect() + } + /// Load the tables from pub fn load_tables(&mut self) -> Result<(), XlsxError> { if self.tables.is_none() { @@ -663,6 +718,7 @@ impl Reader for Xlsx { sheets: Vec::new(), tables: None, metadata: Metadata::default(), + merged_regions: None }; xlsx.read_shared_strings()?; xlsx.read_styles()?; @@ -983,13 +1039,21 @@ fn is_builtin_date_format_id(id: &[u8]) -> bool { } } -#[derive(Debug, PartialEq)] -struct Dimensions { +/// Dimensions info +#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)] +pub struct Dimensions { start: (u32, u32), end: (u32, u32), } impl Dimensions { + /// create dimensions info with start position and end position + pub fn new(start: (u32, u32), end: (u32, u32)) -> Self { + Self { + start, + end + } + } fn len(&self) -> u64 { (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64 } diff --git a/tests/merged_range.xls b/tests/merged_range.xls new file mode 100644 index 0000000000000000000000000000000000000000..043cee0e7f6465bb3b31b557d8a377f20cb43337 GIT binary patch literal 27648 zcmeG_30M@z(!I+9i->|CatJJ^C?NNth>IYepn`bCARa8Dq8PlR2=PV~5m7vdH{K%R z6%}udLDvZ45zmN<#wZ>{Lp19CRWp0c?#>c;@BjYq``_E%uV<#KtE;N3tE-Rcf%2;+ z4>#?%sv;_(GtnVmBnCu}0q4N8pDb-mAYUTEr0nhs&j8Yf|3?}y;6p>|>5|x>EBa>$ zm+0{cse*ek4^xCZg7`I=h|hxm%MeD%FoJ_JoCd=S;qV9tUbj4@ zETyg?kb~jfK#_M4^~52AxuF!^QtIkJU7Nv`Lyk~o?obeKVn*&k2|md|VNFCa-ENq}4)J|&NUM3OONG)W?p3CiZq;Q&r4Dn&_Vo(A|ms5cuv z*4;$TOS$f58sMEYz&o?TIYh`JC(7f;3P+E_rYjCH(172B6`xC5vBEjf*6MmgJs7Cq z+wv@kE-@v37D(C=A41{Puju{iZaW!6EF+4 z6DXAs5_i&3nHdP7ATKSd1Ooy>p@L}uaRVDwFqPrT`6>V^C9R;%T7vs=qAS@p_-j|` z^Ve-{d3K<0D`qaGEF}~HXxv(kf_xY>Aci9<~xE}aWN}j){)`AP&j`}h= z-zmdk>OtdQC;bS?0C!5`8z{jEu%qEk6mS7Ns>4<2H{GI>T2 zvR^Kb9ymp|dg*~@W`m<=px}8XCi#Z>hJ;7!8D}XpJTdW<-6=c5J3`50CWEWU4`UCF zuL3ub!LO6y@CZ(k){h=|UN(93U=Uz~8_DR%^+UINa~T}ur{$#OZ^nSr_0nWln(X8d zRbO@cq4l5+KcE5sqz1Um4yAf4FflPVGzb0D{T;-Ic?YzD%ymosOk?1)_-14l8m>Ha zA=7$i=cVyINSYEH5xB$%#t|KOLZ3wqEHD^1gLws4y#S&#ssK*^8dU%VX;cATV+-gu zwg9d-8zqu{V+&xTG>QYdpGFlxztpG#=ztnk06kfw3Sif6Q~~VUjVgdnqEQ7pH@1MV zu?0dKTL7ljjjIFhcr>cgZjCMA-q->TjV<7)Q~+H9(=Tz-_+3;SnG>Ms$Q_w#0WM$a z;m}vWvIG_$0)aXa=Y|lG2ZpqjD=2CGsrB{9Gxb*X^Ojs%!60|pGTsL+H3mc~~V${%oHp7y2Oo>FS zR*RC75;db?!eq5T7|^DI%xIV}^;w)J;3{f?cA%u%H2wgU{@4x*@Z|1Qtrn^iBRNQZ_HftLn= zrrAz#ZQB5y)K3If-2Y5O99cgR*ivN>fdsow>L&twp#Mw+y-$5Q!S#CsbW%SN*hBtj zBI3yUiP$L-fdsow>L(K15F+?4r9PdQDG|}=PwFQEJ01Cl10vz0V~w;Ae~C_INWCHg#3;{6wO5?y>o^MGl9Gwzol9}{*yJ}i73 zSoz?BPs&GEl=$h%O?Eyl*!lRf@Nr?~gX=^oA3afGO-&6uA5(Tdek^?2vGSo81eD(N zMTwGHF*_eKc0T?reA=_}!RfiQK73K)?W$UKJ}ue#1hDY&VdaBs6Dc19QR1W5ci8!u zBOgB&D^j;NJK)2HDd)ozN1nL#ik(j@c0O$O=E%y2X>a1lzb@}(=VQUnht1yFu<~Kr zn>h0B-Mj33EZO<6*_#_HAEv#DBPCDQu=BBE=fh@io~(SB_9l)z_~SEnKGy7f*zC=h zl@HV2#F0NfJjl+cHS!5)ki7|j4;!Y!-h6!5oqbC40c*#jBXa*{!_J4z-kezZFzwC9 z_g(Qkc0RW3eAw*Gm6Z?E-h6z^%F5XJ*s=3rvp08EK1_S_@s-?8WalGb=fh@iUaWkW z_U7Z8y;ItMRNKd%oe!J6`LXh0+MAE>&hl)w`tXQDhOQ)+-x}N|NFWUyU0sO|nLSl6 z=K44Ut>79EM_6fc^pjSv&gHu)BkMGma0%w%Vw4(7VX4DY_^q zqHU^DY$2ye+oJ}Gbcf?oGPY4rL@V@EskNz`B3hoh=3o}0Ph4T5!g&>!%SZddrl13d zV=dv3S8=kQu8TVuu$_le;WO^}Nqah|ErUP{t27#iK%)f>JD(|MtvmtJhxd5EI&FwL zYb#)#HiXFLDEfG`7?9* zDar-7=0Fy6a0!OfA5<{z!^vul`(jj$QEJS@jTl;LMT)PKXP}f?!EH|3gAzEWld4A# z=pP>4DJhpfMMa8Mz)z9Slm&OrC=EhYZ=!%w+d^DHsqV32bfMOn0gX@Ii|85`Jo+gb3FRw@&lC%aaw~NVGgtB;P$!9jtXFp zUEXv2k3EA zl=w?v2VYhX+~!x}-}wGuL;}%v6id5cWnJ=Yps8fJGJPsW2w2WR^U3=H4j4fSdN^RH z(zBP-V@_)bsY&yu>ZL}ngMuCo7pnA}<@A`-7eZM3D$3VUK@SHDReEjZ^qA8WsN-qE zE?=;M9u6I<^gQJBn9~s$wo}iu)9a+5hf@$$dfsw+%xMQ96Zdv!qh~-u(7t|4zO+1- zKMd$mj)l5#xJ|&mFbB6rW&RfT)nzUX_rw+EOs8b#Os8b#OsBH>gGn}nLk{pUfnO{P zRU=6<5t06I)O7~+6a}dSIQ}|?jEAj*e(*Gvq(H6$gLybMfDES&_|Av!j4po4;>t^esbu^xN2V*440(wNO&Q%ZWpr1`c>7e!qlZd{ zoJTiJYVWR+5e5B%YV#f{8K3@^z`-m&iG!DjczVMbUb$U|dkx$Rz;s6O%^}L{gF4Ix z#)Sf`;>R{!#mLzWM$)gh4$lk>8Nv4C`Ge-CVCW>R;XRot+=qszOqeX1FgZSU#zfJi zxc1Y>C+}H)xz9N-Dykf9YtO_wBd3Wp27MezE$O&7kNe`0P$ zTJ*JZ7S2;W3v$}!3?)JT?*W@^V-qzQ3J43z{Wqqj1dnPbnyG!rf zqj2Hnq^hYQr_$$?&ELA+nLl*;#06Vs-aQmw+OJ|urz_spg*^^?t!&YJ(e1vaF-2hs z6=lZPdPUsz8|jq3efc->x9nE0w|p27S^3^P=2qbFqGdbEBnQrUiP!f%lfKWrI)XPL zFfVewa*LB6P>StTXeeT+ZmY#){0_4Sg389&-i6Ate6RmpMpYrowK9{uV>$7xP+ zCsT!6Z`x;@p544H?qqC z;g*|>?>(?sR%Sdjz4X)jD@U%*PJd_l>Gs{M4#&(~7EgE3Tj2L9%d$Apb^n&RC817# zoZr5G%A%yo1<$*^Ov-pY>F#FNs)?J%&G2mYx~R%#`@pw0MxI^TJ{WrLRx$|b&{S-SY?sv^JRHfB+=$B+D)_V(7T zWqX9_AD`A#dRFcn^fq&G`sZ`o_oaWR*>LaN;`~k1-{1J-mgo9Y-~Rbw-RlijrAI~{ z+;>*r>3rrC8@nM#2J;R5-dz0q?QM5Gj>B5rCZ~^_nUb$>_2y&iXw#yl?~Na1b`eFM z-hE)E)vejaSA)F8vpo3z4{b*syfShWDat&W$8VlkalX@xPW`_*V%*X1O3juYy(dl! z(4FDzl0Q4ce{Epf#X)Ny@+bBFdX&C}^}uQQ#vVm86D$tQTY7X#{^0Rlm)Kk8r0vaF z)h*a*Ua2^WXMf|Xv@H|y2vL>}@Q7U7#6w&F6U$xf>`ap!eR z%f587-SxwSv-Xvyk!x<)R5^LyzPD(^iC)+CeRpv`|>{JUR%HLTk<^TVU+Z+Dgp9*n%Y_SKE*pF{5b z@Qs~mufJ#Yc{{g__xv)#ziD%hXEmXP`7tpa0l7&~D*qqxG9l z+^My6pXPbH&yxeSb25rU3=Kp+-*>$cy7{n$S(n_tPmTR_433rK%DCu`?Ucz-ks{sjR=~5r|0wzBPE=jp;$9h4 zWs}DnbGq=itz(T_WRJ?qEgq9q{cOy}<=2hN_I}OjGJSNJf9Svm{;4^WL&~Oi`Z2s{ zTb!lI@;-gLRQ2EMHEiY6XZnV{1LjVyn_IE`P~XvSJ$TMfZ(ef9aLhgZVneFc6_fAk z)*Q|8c#%@*Eh^c4vR|HmUbbULqkcD|W4-%U#ax=yvDSUrcd4uPT5@;Ia~e1AZ_|q5 zH#UyF@AulO?ZTyTbqS6p9U}blL;Ck`S>b=B-L@;mNjh%(JH@XUG}t4o>Dp~m|B5{} z$oa_r@?$I0!X7va9?tmGG_KCfsLQ6q1(91^EcRV4E`OOE``RW;cYSD$UsPChT1wSa z=Ls{GdG*ns`|S5m!;gG*$jfioOE=Nh>bmCd@AB@R(tlL(-I3G2mrU36`m5I1@fYLw z)$5&B8vPLQ>W`NxX|vw3xMrtwo!Dvud2ce%f)eD8Igc~AU}0)$S{FKdj7 z-1+SnjySny%<=s1%S=c4Jn&B1WySCKeC0Q5&y4@!Wp-$cd7sSN!3o8~=VYe@*0dbG zytiwVi&Knid+*h$&D!)@QBb}(_CrQd(3WKbpEzvU*5#+;${g)!XCC(@y33^7^L1S}|0sUwxNcV@@6ym?$Ikx>6{F`Z9IgkP z9nv(V1vExrnc(*8txQ05j;AQ?1^Q&n_*4t3&Y2ViEnzarm^;z)D2Dn~DXC6i6qIhV zQKF_Zsx(xMfI$OZx-+Gz$0Yjo*2U8;uvFJ2aZvjTJfKe2!Gd-jV_g9mWC_5QFkpy> z$n;mj#!6oa8*bq5P^aJa?!H!__aJ z^pey2Oj#l!p#Ud@PH`ZGW$6?bQi#ssqPgopiiX2BB&9Vvr33kF$500OqZC}Z6tFD-Shz(=m(hh3HUXW| zgA^X3p;P*h!d{0dtX*enl}e|O7w#X>DddHpb|5buIWHZG7q$_Ng}m_d3_69pu(jwE z@-l(cE!Ye@3svq1TWnSklSyZ83v!rh=upT{fFHJg7I=s(CEMvpY=Am!P@!7T=-{Wb z2Cf^MRg+#&EffqDfw??NZ5+xKVOoz?6(|N(L$x(^hwdb$hGp@mLtX=}E~$fqO{4j( zAOeCkKoekZ(CON6ZzT>RWF+8gz14rNw-VCk@gL!>PEsBLy;Uf570v#M_BsTxuz#a( z5;n*;38@Zvk{Y+sL(+U`zapcCmLjCQ4@!d_6R~upo`eT^=q`{3o)5r0lodV81j@4v z1JBZjbfrBD)qO(&A3Y0pes#~nr9BHhNu&L?hd4&3$v7#tvxDwsg)(w`ypfIBv4EJPex zheO0MYC1$5o#sKrF*XMxa@ql{W(#*5;W4N_jj*q?E+Tf{5v(M9CB{B8+h_ z-B8X6h5U>=!Cy81{T7Ed003x%Rs&iMXf>eKfK~%q4QMr>)qqw5S`BD5pw)m@16mDe z)Ifdff9{PdHwxNY8?VTR^}qXvB3%EAA>vLo?ho{Xi2KTeA>!xj2@vr-RWd|e!%v2Y z>;72~@dOX<1K@tZ5{S4zunQvFegM6uH>PL-ipIMwFv0S;;X@zkR6P2SrqW#8T|kVZ zTW{6vuYsK+x)9v7L8}3+2DBQ`YCx+2tp>Cj&}u-d0j&nK8qjJ$tAYO(4d6N+*U-2u z#`p1fjtx(*;ffjG>El`-&!XYF9@p%+e#cWDxbDX_KAs)JcmBA}$CGpT*(0v?aX-KU zBL2BD{PSG+=cw=#B>W3?xFWZMD1c}W(E%d<4K6@k8SA z@S!o*y(efPk&+1^li}my6o6hFe(rTqT~k6&W~ literal 0 HcmV?d00001 diff --git a/tests/merged_range.xlsx b/tests/merged_range.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..24e467a5e7962d843eef248475bccc2db91e15ab GIT binary patch literal 10741 zcmeHtWmH^Swsqld1p$Ht4K9TQ4^D7*0>Rzgg1fsr!Gl|Hf&}-3KyW8OaQCms?e5&m z&HF~bAMaQ98Kdgd8C7f6-utY%);w!%IY}sJEC4J39smH407$}LaoIxv0O2qI044w) z^0kn)m4l&`gSL{ZjiJ33gNvmFaV|6@RW<+;{QLhN|G_KJt<-PT2|{mG-R2W+Qwi}A zQdWjW)rzDCo5- zPiJ#V46EpOzaj^c8`hO%1Zb8*e6q3en#wADsna=$Z7+{;X-r*Jlr5jkiI%7;frBS< z^VDeKy>> zRPIT{wbWA~YiY4jgp9vKQAIofyl~v`)9nGllClK~K3hYUTQ$skt@>2;m9E;;C^U;( zc%f#yxykyyxF|AQlzuUoD>)SRr1u)0&7dqU_E8x>O+4vvD)aa%?s>|KRipUB$=0gq zuiDUUa-#cL4(N-H(TA=cSQs1Owt0>yE<3AH*_+9gV~HJ z6!jidoUSu_o#6)S%MW`t?Dlq z3HTc+Fcp!&T-CNSw6JGnczFLWNB@I~`KPIu#!Aa}fsh0D#cqQ7&L`)i(FG-)1;m<3 zlsvt~=g{gR-jUnHoPM)QRO1M%vwMNmrZz#%<>|Z2%_4!b^3VoD86%PS3pCAzXWr~kR zkF@%N-bFdYq@dz%SzzS{_U!Gr(KN5A#DXmpzFo3vmiOW#8Xx@ z;WVw(%e3Pnb=5Yo_;@Uw){c4mmR=^kPl1#bc)>g>+WS2F)Jvn5^>8r5wVNHG>wVur zzi${((E>Pa{hcHd#Ra-o;1v;t0s!y;@DMH*jDPUN+1k!r&)VAjA*KD3GZ5gs21fbM zK3Zaj&ALIz{`&!Ke&emSv3GIMS`ALg8 z6m(L${Nj~f6R;ZcW=kgZ^a|MH>C^g^qcw@BZ(L@Cx4+0vH4V*>M|a-9>-I$MzqGbo zWYvm@YV*V&JA8`NAA%H@9Z6s9YkKnrTOAU{H8V#uZn>)$N?n-OmOz!fP_hwP@rGqc zkNJanXY?e-FjB7)FIPUxYsTmHH8jXk9@|${w5-E?XLc(0+ep9j9B!%gtW&Tzbb~zy z4eSzN&-t^@l*UO%&4ADY_Y5nAPYQD*19xic3`J=U?LYl3?EjTLgKR-0o+P z4TK65*A;qhEZIdr*nkq2fpI(B|1=#7IXD^H>J=iRuNx|0O;_PLfI(L`);w%G*{ zsS@$gz1Ek7)0cq<%&a#YwT*O+RfW=SQxt6A$w=rwIjyYg9h*%2+^L-I{#*zsVp>!* z=0a?6_7Yh!(f2SyH)3T54Sn$WX>{9r;oVoMdix#JX0paMEHV=93G_SPq}muyv{7b6Uq0zv zj=eo6*$X&S)n*cKMncz2CYomeZnBT26`!;QPeUV77Uh$JH5)E>J+KdrjgErz0o|M=n`)ZQeZ(U9`TBC37 z_X$3-8m_8tSCHh*DTv=&+I9GK`{DloJ+^E?AS7Nt=wPsR zM)|`->>XS!4DBCc%AE4B)fW(Y8`&Xh;=ZY;Q_YX^SfzO@GG!W|LRj}?8b`CrEhgte zEd5GP0OfLb>q_V6>?7Rp;b8!{8wV)z}^vQQEi3JYLw!M!j2?feL;AK^3M2T*VQs4+CsU(cAn9N}Uop zwZyOmZo4Y^1>_Y!NIK>}9#fPPORZq*GF0LQMT=9>dt2x@fXYns}?wRCa+SrSxrrgOy+paZ6 zCFo?=)~S5sih!z!YWW`-IMK#3NY(2bD^^TB*N2j+HD%tfCU@OFmB*VB>$MPNecM@; zEk)Bvt3h-_x*1og!ENVOZ%L+?o%opI7FNviYgg8sI^7O+clt`0hk6kN2Qh}q;`MM&ZL+)w2&F@}~H zcep|>)dg{WK%@;ndU|^oP;_f!m7Ja4-=^Xb5U4^0l=8y#pH=Og=bP{UJxegV=`cIN z><|RUQnEi-@(@SuO$-ek?EjIBe_k;ED;4`kb(nR5(1rH{Zv3X*$+?5%LZ53(kW5kB z0z}@-=AKEIe0Y9&{e9YBFtSU30i@-nj&m9opihcaX2nkuz$%W7rWoO$0phKv{dm`iGh^ise*or)YN;4^|`7=k4_dfMaso2q|fB*AX9nM^_mwn2+gZ_E~JECXF&Q}Bo z;X6a~_RipB@%KQ%ZOJUz@$W1F)1Q|hv=f9Sat?JTWalKSfzxxsCLBwPAu6^WT1;ja{b$D~dIQv9)kG#Pv;Cz;8?tuzjU^z5+e1`D5 z6 z%)m-`-9}=@0)vETPa)4XQ>@>dNd=-Yn);$j(J)kFoDrgtCCXzQS#Wo@d(m6%ffP~= zNW_JEUR`u!dE$EAGV>rdDl+NgK?oJR;`qvirl~rfoG;8tYWy@^Sg=62OT|hnLGo3n zAhq;d$zpkPv&Z&G6(1I${nVu|P|ckh!oOY|hS+`lW3I$z%ehO)wi zqR2MzKFPz)XLXq|5{#9Gv~9n5DM-7g_c>RVGeIy!jGKQmF|uC2&3Ru-2MU zN+OCwgg!U92|TT|P!BT7s;Qpt%jqw6WZfhDvLE+-+)FBr=E#(eq0*|Mr&V~TRqhd^ zhVgD3&4U=H5{|=i??!v#q63Mw(D*Vj!OfJTo^FoB0Jqg!L^vzQHV(X5sgGH+1mBF?;Ra3qZ#>a3I z=4TSN!Zv8q(VR>lX3&-(-cC~=^o;S~Wn%28DgcVH0NF3@okIPdQHmN8%hnHF4$HFf!3A1P7_A#2}Uoz0a6bHQ;e`+uVU@U~!$i{n`p zQ%E8G`AoL9Ndqbl)ibgIy*WFa-2$L!Wn#oWM(yE-m^@Oy_-e}(%oK_>Q-iDg7q@b!sxMl87&d^>X@_*tgSKXg#hoeIltTiTZ5rj%6D6}gD+#gLT$_|j3Ad!{T zufa%NXvD0o9>@zC<*s`ot7xGdo%F}ObB~JJyM60$e>dJrR8yZo{cZsUOZL=_vF&=L zqGF%R>+WPY3{Ja1nyA;F@cu`TY}W13?BI}4l)Wm??e=(0Z&W7l&CTUG8r|1f1ibA7 zh!Bu7!?MsOO<1-c_MJ;|%bN&+;9z;E0G(OACQ3ml7gL`il&OvaqzcMxX11lvOKHEI zH_1aVRkd%K!jYF4^lQ6-@1Yn5>RHjyHV(fmxG+!Is$y_e))8h;!`oQ%Lb*O?ixUV& zNmZo^g-Kn(MH2PYzs7y-=In>wRncq^4;rE9Y!paGB~?j6?i&-!Yngxn3TR@p`J$V3 zu{Td;puVer<9z57;;-vd6mSDj?26bN*UauSvhyY_;eHDjGWN>OIwssfGYTpA1WR$_ zq$k<_b;cmVi7L{Sh#ao_wDT4zb4&>FHCq~YFcaZ7!)t-y+>`{C5Fx^AjKv@C^d@B{6j(G}AkO8xb z_Iu;!@15i4%f}0|x174}g@H55*i zl#%iBz1_5VFqYQ)P4~(K*Shx^C9OxX-lf-ECC z%&scOVs%1&ZB@42h4Br3SYr?R?Dhb6nAo{N^MI5qfTcD@Yy#q*Vt`q);XCQG0#w!9 zxDo+jXc+>D+(g>u_5oNtS`_)YfhXwx&WW0s2J2d6a02Xp;gpy>K)2~S;MUg36D;0I z#2r=T+u@FCiZr2y^UlK@mb~LN8egpA9mB1UOv^SmiZZR76gHWSz-pfRNNK5*AKP%D zG(i(~!p0;6Mp#-yiVeW=CxoZF1Ofzpb`_aD4oS_aCILRfM4%%t!^v{+b#AhiI@7ZCdd%;cnqBvv4 zm;YGU(r;14-hUF)n(J7obyb~~GI+g7;dxur{n$Rm^Z`Y%D2|NJrjpGQi3V3Obh@hv z`!yvto!~j(H$05YbXP%*z1`TwGv81mED4l$)q|2Z#bXDvlY+D7@7FW6863G2xItCH zAe!r~c}XJF-0!5#KJx44A3)YXzq7Z$5mcV)fG~94mu4wQ9$|h%ma3+##T=HAL4J5~=#$RPoyck?NE*ux{Yn@>#zyRf$ zg*$5)`|Qv(=_ANT_NgJV#6%;EgW_MS8pg(ewBx(2dXKNH1YJUr0{PnlgoH5)Ef~I& z=oT}Gk%xuA-cLpvml#NXfBhXudLHy6Fq9lB6eX&WcQX(7IjhTD0a!>PZ+f1&`zOv_ zW(#E5&;$TuoV-Wq2`n>RzoTmDq=pNkQi7_%LXxTN{6R=kzSGQFOwaf-$&XS7pGldgA$yv_{Ze$hQ9u+o= znUXN{U8F^ip3Z9&;u|k}xMj>1d)o8zc1cwn*V-&{M0H`*s?3lJG_+IHvt7>H5qbtn z&*0CYhr2ReLm!ziPqJ&pXvoAYvCuk?)2R`2U>P$a54}Ti>;1IjZQ`oYsgIEFZ}+s& z-YI%;=Z{jjdG5JYG4p*)H(`D%ZYvn=$e+WkXE%*TJioY$<)Ux0hcb%m%miutM9eMS zh3p)jYaB^$M%ZB|SRHeD3gZ~4n+KZZWleI}mc*vdb9U^jhG8xhQlYYdWlAV#KM=>$@uAh$8@)AGhbtVl~4b{H# z7>D8b5)WKkU7r*jK0i5|pim*LD7OociE1?ysu2evXRsr{dH94x=xKkl<}uFn+u)dJ z0xrF~4Ig%&XboVwZu^7iWTKnVIu%U2`7J-IdSbpl6x$oX-D9jjM5n!puAQNQqJy2O zmGRHISl*Xm8E{Ji?tmM(kA68aVFgbTZ?s+FX|oDIL$CXWSL`UUOL}$&9YOiAclwIj zYfxh7&^_}@k~Xz49r2=JHqV(YybJZxYOAXYtjG@=8%1I5kCT`LySEu6dI@w25M;+k zxi3!NWfAVKtS~u_*k%wGuDXramM);Mx{V&^ovWYa8RMWU5wWj9qH-3YbZu_Df32J4 z_X4X__w~lBniqq~dz)kJW!nTX+ClNySlUTgi26?st>MSMe%E&WoFFzu>H*w3B2Kv` zpLn>K4|brP$Pi14%8dwdcs@}l#!fmP>Od*U;FeP+=6wH!>$c?#?so#6CtyjC4_p$F z2Nw`Az>Nt5YkfI8Ya4q;eQP_zhcW=Thxgxp1lWn*#%fD;fv`R;!n6zZyQdX6>Ic?C zCA{QBPp4`NSSzPxGZo97Z)@GknnVqZI?s$-e0d&g;Suk60M(-sF@O=GhvmHzQKXde z?!!EVX3j^h0_gYnFq~XIB0DQ*6sxI6Js-nNS#ec;P}F`rn2_KHpoMpqpO6>C674>NYVp{@95{HErn6C%_uan$#k7APkX`$)#dGE zKR1tBt$n&?7#oXTUGfS+KoDmJ;`mc+g}cBaH!|`G9R9YcQB2-UB?ey9Iq-|}4~E)+ zsp(*7r)cQl@W4^VN7u{@dZt!-I@!peYEaEQ1m1E>R!m5YPn<6H2Lw?>C^9HAeP~R~ z=tvzs*w{#93alBw(hQ6Nh;b?YAq0aPI7sJF9V~P3KmFzz^?yGYrvJOa4At#uW`Oxf z3yy5)f99jEjm>}Q_;3UNIMQP~%;p~YUJvcTm$du!{*`@*hM!}Fgm9eVnrWh@a;wSn zLcR^xoJSeQS+07Z79lGhNkYynoIgc3D2<~@Lt-Pc&e#)^cx#|Sd zyx_^jsjn`$E@9Z}=SxUjWq)}Vp(aX1oa+-G6Cerm9-}MvIM3WfDK+MYSn*p6ECoF0 zIn{)5o8?nWDAY^RQCZF(7@7#{;M8bTxE1 z=~qqmm@}{Rl@$?iz#scCz9g{WE2~&FKF}E&ZkTTY*`bZJ0#stJtz%{qA7814iQqNr zpBtaXrn@cyI&i?4E3*!e=sH>oP4u#nhaYV%Z&vi~kMixoPU|h=7{YYrT_5v_c_?FC+5rsqX#1j|#u(yVG5qce!Jbc`E zwJm?A@)QSg*>J2^q^eliKh}wbL60t2GS#*=qrXJO3T|C;bWzY5o4spy@N$dJE9r9& z<%3`B#9y?lJkN;lsHU%#%OuIC*UN z*S`Co4adP_{hzz=k8vJ%VSgbZfgb`M^<*C#KW@=!^Lxb*nfdHo#5&lx>Nd8{0Nq3}NYiSjoc`557^dg_-w0ANN00Q^l| zJvM)wi+%wtk^CIMqqO+jH2Lp?T9W=dpg+^yW6-}Qs$b&-0ItBG|C;DO(w3Yg447{Z RI~dRaKxhD9`1!-t{{wcyB = open_workbook(&path).unwrap(); + excel.load_merged_regions().unwrap(); + assert_eq!( + excel.merged_regions().iter().map(|(o1, o2, o3)|(o1.to_string(), o2.to_string(), o3.clone())).collect::>(), + vec![ + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 0), (1, 0))), // A1:A2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 1), (1, 1))), // B1:B2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 2), (1, 3))), // C1:D2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((2, 2), (2, 3))), // C3:D3 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((3, 2), (3, 3))), // C4:D4 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 4), (1, 4))), // E1:E2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 5), (1, 5))), // F1:F2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 6), (1, 6))), // G1:G2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 7), (1, 7))), // H1:H2 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 0), (3, 0))), // A1:A4 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 1), (1, 1))), // B1:B2 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 2), (1, 3))), // C1:D2 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((2, 2), (3, 3))), // C3:D4 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 4), (1, 4))), // E1:E2 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 5), (3, 7))), // F1:H4 + ].into_iter().collect::>(), + ); + assert_eq!( + excel.merged_regions_by_sheet("Sheet1").iter().map(|&(o1, o2, o3)|(o1.to_string(), o2.to_string(), o3.clone())).collect::>(), + vec![ + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 0), (1, 0))), // A1:A2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 1), (1, 1))), // B1:B2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 2), (1, 3))), // C1:D2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((2, 2), (2, 3))), // C3:D3 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((3, 2), (3, 3))), // C4:D4 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 4), (1, 4))), // E1:E2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 5), (1, 5))), // F1:F2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 6), (1, 6))), // G1:G2 + ("Sheet1".to_string(), "xl/worksheets/sheet1.xml".to_string(), XlsxDimensions::new((0, 7), (1, 7))), // H1:H2 + ].into_iter().collect::>(), + ); + assert_eq!( + excel.merged_regions_by_sheet("Sheet2").iter().map(|&(o1, o2, o3)|(o1.to_string(), o2.to_string(), o3.clone())).collect::>(), + vec![ + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 0), (3, 0))), // A1:A4 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 1), (1, 1))), // B1:B2 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 2), (1, 3))), // C1:D2 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((2, 2), (3, 3))), // C3:D4 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 4), (1, 4))), // E1:E2 + ("Sheet2".to_string(), "xl/worksheets/sheet2.xml".to_string(), XlsxDimensions::new((0, 5), (3, 7))), // F1:H4 + ].into_iter().collect::>(), + ); +} \ No newline at end of file From 712f2eda826cb09331006c00dbd36114f8809b3d Mon Sep 17 00:00:00 2001 From: gongyan Date: Mon, 6 Dec 2021 12:29:48 +0800 Subject: [PATCH 2/8] support to get merged region --- Cargo.toml | 2 +- src/xls.rs | 6 ++++-- src/xlsx.rs | 6 ++++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2ca673bc..7fbf0f14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "calamine" -version = "0.18.0-mikorab" +version = "0.18.0-mikorab.1" authors = ["Johann Tuffe "] repository = "https://github.com/tafia/calamine" documentation = "https://docs.rs/calamine" diff --git a/src/xls.rs b/src/xls.rs index ba4c5b0f..59a8f7c1 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -548,8 +548,10 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result, XlsEr /// Dimensions info #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)] pub struct Dimensions { - start: (u32, u32), - end: (u32, u32), + /// start: (row, col) + pub start: (u32, u32), + /// end: (row, col) + pub end: (u32, u32), } impl Dimensions { diff --git a/src/xlsx.rs b/src/xlsx.rs index 864ad313..925af389 100644 --- a/src/xlsx.rs +++ b/src/xlsx.rs @@ -1042,8 +1042,10 @@ fn is_builtin_date_format_id(id: &[u8]) -> bool { /// Dimensions info #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)] pub struct Dimensions { - start: (u32, u32), - end: (u32, u32), + /// start: (row, col) + pub start: (u32, u32), + /// end: (row, col) + pub end: (u32, u32), } impl Dimensions { From 3fca66fa1b6baba61eb8ae0c541323a46eb66673 Mon Sep 17 00:00:00 2001 From: gongyan Date: Mon, 6 Dec 2021 12:34:29 +0800 Subject: [PATCH 3/8] support to get merged region --- src/xls.rs | 4 ++++ src/xlsx.rs | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/xls.rs b/src/xls.rs index 59a8f7c1..2fb4a4be 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -562,6 +562,10 @@ impl Dimensions { end } } + /// check if a position is in it + pub fn contains(&self, row: u32, col: u32) -> bool { + row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1 + } } fn parse_dimensions(r: &[u8]) -> Result { diff --git a/src/xlsx.rs b/src/xlsx.rs index 925af389..9318913a 100644 --- a/src/xlsx.rs +++ b/src/xlsx.rs @@ -1056,6 +1056,10 @@ impl Dimensions { end } } + /// check if a position is in it + pub fn contains(&self, row: u32, col: u32) -> bool { + row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1 + } fn len(&self) -> u64 { (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64 } From c7a14d68728f276cb0f1113f69d61857407f0774 Mon Sep 17 00:00:00 2001 From: gongyan Date: Mon, 6 Dec 2021 12:34:57 +0800 Subject: [PATCH 4/8] support to get merged region --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 7fbf0f14..74c724bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "calamine" -version = "0.18.0-mikorab.1" +version = "0.18.0-mikorab.2" authors = ["Johann Tuffe "] repository = "https://github.com/tafia/calamine" documentation = "https://docs.rs/calamine" From 362fc26ab7716f2584bf929511cd02ffa1adbfb0 Mon Sep 17 00:00:00 2001 From: gongyan Date: Sat, 10 Feb 2024 14:04:31 +0800 Subject: [PATCH 5/8] support to get merged region --- src/lib.rs | 20 ++++- src/xls.rs | 36 +++------ src/xlsx/mod.rs | 64 ++++++++++++++++ tests/test.rs | 193 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 282 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 85faf8d0..fe719126 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,9 +87,9 @@ pub use crate::datatype::{Data, DataRef, DataType, ExcelDateTime, ExcelDateTimeT pub use crate::de::{DeError, RangeDeserializer, RangeDeserializerBuilder, ToCellDeserializer}; pub use crate::errors::Error; pub use crate::ods::{Ods, OdsError}; -pub use crate::xls::{Xls, XlsError, XlsOptions, Dimensions as XlsDimensions}; +pub use crate::xls::{Xls, XlsError, XlsOptions}; pub use crate::xlsb::{Xlsb, XlsbError}; -pub use crate::xlsx::{Xlsx, XlsxError, Dimensions as XlsxDimensions}; +pub use crate::xlsx::{Xlsx, XlsxError}; use crate::vba::VbaProject; @@ -131,13 +131,25 @@ impl fmt::Display for CellErrorType { } } -#[derive(Debug, PartialEq, Default, Clone, Copy)] -pub(crate) struct Dimensions { +/// Dimensions info +#[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)] +pub struct Dimensions { + /// start: (row, col) pub start: (u32, u32), + /// end: (row, col) pub end: (u32, u32), } impl Dimensions { + /// create dimensions info with start position and end position + pub fn new(start: (u32, u32), end: (u32, u32)) -> Self { + Self { start, end } + } + /// check if a position is in it + pub fn contains(&self, row: u32, col: u32) -> bool { + row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1 + } + /// len pub fn len(&self) -> u64 { (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64 } diff --git a/src/xls.rs b/src/xls.rs index 205be920..cbe7de09 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -17,7 +17,9 @@ use crate::formats::{ use crate::utils::read_usize; use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32}; use crate::vba::VbaProject; -use crate::{Cell, CellErrorType, Data, Metadata, Range, Reader, Sheet, SheetType, SheetVisible}; +use crate::{ + Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible, +}; #[derive(Debug)] /// An enum to handle Xls specific errors @@ -196,7 +198,7 @@ impl Xls { formats: Vec::new(), #[cfg(feature = "picture")] pictures: None, - merged_regions: None + merged_regions: None, }; xls.parse_workbook(reader, cfb)?; @@ -208,12 +210,15 @@ impl Xls { /// Get the merged regions of all the sheets pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> { - self.merged_regions.as_ref().expect("Merged Regions must be loaded before the are referenced") + self.merged_regions + .as_ref() + .expect("Merged Regions must be loaded before the are referenced") } /// Get the merged regions by sheet name pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> { - self.merged_regions().iter() + self.merged_regions() + .iter() .filter(|s| (**s).0 == name) .map(|(name, sheet, region)| (name, sheet, region)) .collect() @@ -791,29 +796,6 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result>, X Ok(None) } -/// Dimensions info -#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Copy, Clone)] -pub struct Dimensions { - /// start: (row, col) - pub start: (u32, u32), - /// end: (row, col) - pub end: (u32, u32), -} - -impl Dimensions { - /// create dimensions info with start position and end position - pub fn new(start: (u32, u32), end: (u32, u32)) -> Self { - Self { - start, - end - } - } - /// check if a position is in it - pub fn contains(&self, row: u32, col: u32) -> bool { - row >= self.start.0 && row <= self.end.0 && col >= self.start.1 && col <= self.end.1 - } -} - fn parse_dimensions(r: &[u8]) -> Result { let (rf, rl, cf, cl) = match r.len() { 10 => ( diff --git a/src/xlsx/mod.rs b/src/xlsx/mod.rs index b7465221..c476b9d8 100644 --- a/src/xlsx/mod.rs +++ b/src/xlsx/mod.rs @@ -192,6 +192,8 @@ pub struct Xlsx { /// Pictures #[cfg(feature = "picture")] pictures: Option)>>, + /// Merged Regions: Name, Sheet, Merged Dimensions + merged_regions: Option>, } impl Xlsx { @@ -640,6 +642,67 @@ impl Xlsx { Ok(()) } + // sheets must be added before this is called!! + fn read_merged_regions(&mut self) -> Result<(), XlsxError> { + let mut regions = Vec::new(); + for (sheet_name, sheet_path) in &self.sheets { + // we need another mutable borrow of self.zip later so we enclose this borrow within braces + { + let mut xml = match xml_reader(&mut self.zip, &sheet_path) { + None => continue, + Some(x) => x?, + }; + let mut buf = Vec::new(); + loop { + buf.clear(); + match xml.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) if e.local_name() == QName(b"mergeCell").into() => { + if let Some(attr) = get_attribute(e.attributes(), QName(b"ref").into())? + { + let dismension = get_dimension(attr)?; + regions.push(( + sheet_name.to_string(), + sheet_path.to_string(), + dismension, + )); + } + } + Ok(Event::Eof) => break, + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + } + } + self.merged_regions = Some(regions); + Ok(()) + } + + /// Load the merged regions + pub fn load_merged_regions(&mut self) -> Result<(), XlsxError> { + if self.merged_regions.is_none() { + self.read_merged_regions() + } else { + Ok(()) + } + } + + /// Get the merged regions of all the sheets + pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> { + self.merged_regions + .as_ref() + .expect("Merged Regions must be loaded before the are referenced") + } + + /// Get the merged regions by sheet name + pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> { + self.merged_regions() + .iter() + .filter(|s| (**s).0 == name) + .map(|(name, sheet, region)| (name, sheet, region)) + .collect() + } + /// Load the tables from pub fn load_tables(&mut self) -> Result<(), XlsxError> { if self.tables.is_none() { @@ -777,6 +840,7 @@ impl Reader for Xlsx { metadata: Metadata::default(), #[cfg(feature = "picture")] pictures: None, + merged_regions: None, }; xlsx.read_shared_strings()?; xlsx.read_styles()?; diff --git a/tests/test.rs b/tests/test.rs index 5252def7..4e09ff45 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -4,6 +4,7 @@ use calamine::{ Sheet, SheetType, SheetVisible, Xls, Xlsb, Xlsx, }; use calamine::{CellErrorType::*, Data}; +use std::collections::BTreeSet; use std::io::Cursor; use std::sync::Once; @@ -1088,6 +1089,198 @@ fn issue_221() { ); } +#[test] +fn merged_regions_xlsx() { + use calamine::Dimensions; + use std::string::String; + let path = format!("{}/tests/merged_range.xlsx", env!("CARGO_MANIFEST_DIR")); + let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); + excel.load_merged_regions().unwrap(); + assert_eq!( + excel + .merged_regions() + .iter() + .map(|(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone())) + .collect::>(), + vec![ + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 0), (1, 0)) + ), // A1:A2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((2, 2), (2, 3)) + ), // C3:D3 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((3, 2), (3, 3)) + ), // C4:D4 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 5), (1, 5)) + ), // F1:F2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 6), (1, 6)) + ), // G1:G2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 7), (1, 7)) + ), // H1:H2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 0), (3, 0)) + ), // A1:A4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((2, 2), (3, 3)) + ), // C3:D4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 5), (3, 7)) + ), // F1:H4 + ] + .into_iter() + .collect::>(), + ); + assert_eq!( + excel + .merged_regions_by_sheet("Sheet1") + .iter() + .map(|&(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone())) + .collect::>(), + vec![ + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 0), (1, 0)) + ), // A1:A2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((2, 2), (2, 3)) + ), // C3:D3 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((3, 2), (3, 3)) + ), // C4:D4 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 5), (1, 5)) + ), // F1:F2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 6), (1, 6)) + ), // G1:G2 + ( + "Sheet1".to_string(), + "xl/worksheets/sheet1.xml".to_string(), + Dimensions::new((0, 7), (1, 7)) + ), // H1:H2 + ] + .into_iter() + .collect::>(), + ); + assert_eq!( + excel + .merged_regions_by_sheet("Sheet2") + .iter() + .map(|&(o1, o2, o3)| (o1.to_string(), o2.to_string(), o3.clone())) + .collect::>(), + vec![ + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 0), (3, 0)) + ), // A1:A4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 1), (1, 1)) + ), // B1:B2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 2), (1, 3)) + ), // C1:D2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((2, 2), (3, 3)) + ), // C3:D4 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 4), (1, 4)) + ), // E1:E2 + ( + "Sheet2".to_string(), + "xl/worksheets/sheet2.xml".to_string(), + Dimensions::new((0, 5), (3, 7)) + ), // F1:H4 + ] + .into_iter() + .collect::>(), + ); +} + #[test] fn issue_252() { setup(); From 40ac0bdbac33d86af7df9b9e4a53125381d0f36a Mon Sep 17 00:00:00 2001 From: gongyan Date: Sat, 10 Feb 2024 14:45:41 +0800 Subject: [PATCH 6/8] support to get merged region --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1643204e..30a796b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "calamine" -version = "0.24.0-mikorab" +version = "0.25.0" authors = ["Johann Tuffe "] repository = "https://github.com/tafia/calamine" documentation = "https://docs.rs/calamine" From 443ab3e51e63949a615374cf555d73c05f0f82d5 Mon Sep 17 00:00:00 2001 From: gongyan Date: Sat, 10 Feb 2024 15:19:48 +0800 Subject: [PATCH 7/8] support to get merged region --- src/xls.rs | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/xls.rs b/src/xls.rs index cbe7de09..a3b3693c 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -17,9 +17,7 @@ use crate::formats::{ use crate::utils::read_usize; use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32}; use crate::vba::VbaProject; -use crate::{ - Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible, -}; +use crate::{Cell, CellErrorType, Data, Metadata, Range, Reader, Sheet, SheetType, SheetVisible}; #[derive(Debug)] /// An enum to handle Xls specific errors @@ -150,8 +148,6 @@ pub struct Xls { is_1904: bool, #[cfg(feature = "picture")] pictures: Option)>>, - /// Merged Regions: Name, Sheet, Merged Dimensions - merged_regions: Option>, } impl Xls { @@ -198,7 +194,6 @@ impl Xls { formats: Vec::new(), #[cfg(feature = "picture")] pictures: None, - merged_regions: None, }; xls.parse_workbook(reader, cfb)?; @@ -207,22 +202,6 @@ impl Xls { Ok(xls) } - - /// Get the merged regions of all the sheets - pub fn merged_regions(&self) -> &Vec<(String, String, Dimensions)> { - self.merged_regions - .as_ref() - .expect("Merged Regions must be loaded before the are referenced") - } - - /// Get the merged regions by sheet name - pub fn merged_regions_by_sheet(&self, name: &str) -> Vec<(&String, &String, &Dimensions)> { - self.merged_regions() - .iter() - .filter(|s| (**s).0 == name) - .map(|(name, sheet, region)| (name, sheet, region)) - .collect() - } } impl Reader for Xls { From d816039799d5416f7f9ce0abfd0038193cc65af6 Mon Sep 17 00:00:00 2001 From: gongyan Date: Tue, 27 Feb 2024 18:02:38 +0800 Subject: [PATCH 8/8] support to get merged region --- src/xls.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/xls.rs b/src/xls.rs index a3b3693c..1cf31094 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -17,7 +17,9 @@ use crate::formats::{ use crate::utils::read_usize; use crate::utils::{push_column, read_f64, read_i16, read_i32, read_u16, read_u32}; use crate::vba::VbaProject; -use crate::{Cell, CellErrorType, Data, Metadata, Range, Reader, Sheet, SheetType, SheetVisible}; +use crate::{ + Cell, CellErrorType, Data, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible, +}; #[derive(Debug)] /// An enum to handle Xls specific errors