From 22d990e195f9bfda0311c29dfccf9f1fa5cecae9 Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Wed, 18 Aug 2021 00:20:11 +0200 Subject: [PATCH 1/4] parquet files transformed to avro --- data/avro/alltypes_dictionary.avro | Bin 0 -> 765 bytes data/avro/alltypes_dictionary.parquet.avro | Bin 0 -> 765 bytes data/avro/alltypes_plain.avro | Bin 0 -> 868 bytes data/avro/alltypes_plain.parquet.avro | Bin 0 -> 868 bytes data/avro/alltypes_plain.snappy.avro | Bin 0 -> 766 bytes data/avro/alltypes_plain.snappy.parquet.avro | Bin 0 -> 766 bytes data/avro/binary.avro | Bin 0 -> 236 bytes data/avro/binary.parquet.avro | Bin 0 -> 236 bytes data/avro/datapage_v2.snappy.avro | Bin 0 -> 456 bytes data/avro/dict-page-offset-zero.avro | Bin 0 -> 213 bytes data/avro/fixed_length_decimal.avro | Bin 0 -> 436 bytes data/avro/fixed_length_decimal_legacy.avro | Bin 0 -> 433 bytes data/avro/int32_decimal.avro | Bin 0 -> 392 bytes data/avro/int64_decimal.avro | Bin 0 -> 431 bytes data/avro/list_columns.avro | Bin 0 -> 373 bytes data/avro/nested_lists.snappy.avro | Bin 0 -> 407 bytes data/avro/nonnullable.impala.avro | Bin 0 -> 1570 bytes data/avro/nullable.impala.avro | Bin 0 -> 1812 bytes data/avro/nulls.snappy.avro | Bin 0 -> 330 bytes data/avro/repeated_no_annotation.avro | Bin 0 -> 627 bytes data/avro/single_nan.avro | Bin 0 -> 204 bytes 21 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 data/avro/alltypes_dictionary.avro create mode 100644 data/avro/alltypes_dictionary.parquet.avro create mode 100644 data/avro/alltypes_plain.avro create mode 100644 data/avro/alltypes_plain.parquet.avro create mode 100644 data/avro/alltypes_plain.snappy.avro create mode 100644 data/avro/alltypes_plain.snappy.parquet.avro create mode 100644 data/avro/binary.avro create mode 100644 data/avro/binary.parquet.avro create mode 100644 data/avro/datapage_v2.snappy.avro create mode 100644 data/avro/dict-page-offset-zero.avro create mode 100644 data/avro/fixed_length_decimal.avro create mode 100644 data/avro/fixed_length_decimal_legacy.avro create mode 100644 data/avro/int32_decimal.avro create mode 100644 data/avro/int64_decimal.avro create mode 100644 data/avro/list_columns.avro create mode 100644 data/avro/nested_lists.snappy.avro create mode 100644 data/avro/nonnullable.impala.avro create mode 100644 data/avro/nullable.impala.avro create mode 100644 data/avro/nulls.snappy.avro create mode 100644 data/avro/repeated_no_annotation.avro create mode 100644 data/avro/single_nan.avro diff --git a/data/avro/alltypes_dictionary.avro b/data/avro/alltypes_dictionary.avro new file mode 100644 index 0000000000000000000000000000000000000000..1fdd79e8a915c0f6d11e6ea1eb21faba6ffeaf16 GIT binary patch literal 765 zcmb7?u}cFn7{zn76_gGgij#|>V~tlkSr8o^1VO~5l$yIs&(vIUBx&KOg>EiV+#KxY z<{~)hj{X^rPGYaMhn^?YF)zRO_&&b3V>ON01>BYbDf^Ikcre$2I`9dsKuK&N z9aw}Md*NFPf19)k+nwkQN|>WyrP`SwG{ndDDx_S8<`!eH)-6ViB?PP6f_)=HiC~^Y zYs!ORP|Wa0wDK@xQ`1v}7IlBKOmH`8;06;oKJG|Mf*sMa7>O-6787x-NnL44xf}CX zfhH=+@;_Qrk0^~j|7UzuiLa%ZaH);i-nb``Y1yN;6bg2utJk`wkS@YL9+nF!AIl@u zCQ?zs=gX*sR`a9k*}@@qR&nh60eEk{T`Zyg;jtaj;9RlY=0LxRhEi$u(ZR+=aVqA(TQl7ZEp? zZvFtl$=@Iq90g}be}guy4H_fV@!oshm+#|y53IJfu#CEb8`3_*9vUw8pbC5pD^OsY z3kMbD=_(pheu@Eal8i8n{I{ijO_ODrqO*DFrWNX<>0!&$9VQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@qqLCPW8qm?rANRPs{|T^rU8f1#kq+&IYg@_T47RVI;Ps3{JeB*4o=I-PegYvSPYxul>E}9oK!T= zK?JcXPb^7|FD@y{%u7eplT=xfT8vE#X{t*yb5n~;5_1bsjfW=*sHb!C(=(G3b3!2L z1g=RpH#4~?zgVdj9_z8S2KhzldWi*zz{I6jT##6ltyh*>RGgWg$7QT%sAnVsPI}4t zDXGak#d(PZ1(gi<#4|ViPK@ufae9_CQ}DnlfjdkM%mNG?42%q6j0_n92FCgZhWZAU z3`_LR!obDAz`(@HBqPFb_DqjGgE1>3i;9Du0fUGkL&Kasw^$7rc(M{0 z7)l2$ literal 0 HcmV?d00001 diff --git a/data/avro/alltypes_plain.parquet.avro b/data/avro/alltypes_plain.parquet.avro new file mode 100644 index 0000000000000000000000000000000000000000..cb7b2c26a1f65cd76938abcc555f01dc4435b161 GIT binary patch literal 868 zcmeZI%3@>_ODrqO*DFrWNX<>0!&$9VQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@qqLCPW8qm?rANRPs{|T^rU8f1#kq+&IYg@_T47RVI;Ps3{JeB*4o=I-PegYvSPYxul>E}9oK!T= zK?JcXPb^7|FD@y{%u7eplT=xfT8vE#X{t*yb5n~;5_1bsjfW=*sHb!C(=(G3b3!2L z1g=RpH#4~?zgVdj9_z8S2KhzldWi*zz{I6jT##6ltyh*>RGgWg$7QT%sAnVsPI}4t zDXGak#d(PZ1(gg-g_pWby#Kc+bmH0-GEoUn1nw|3FbgnnFfcNNF*0Nb7#Qmt80s5X zGB6o1G&C$&_@-eK3j-Ge0|OH)lZ*(%*)u)%492XCEGiCq1`HyG3=MPk++sCg;K@p0 zU@*~3V30Zb>HeI}EDU@=0Z{|S1dfNRA21p)uz=)^K=Nx|G=JH^!oUU=5Co|>v9JZG qf&nC8C}^OlpwK>j!j1JT3``7Z3XGx#Dj-W1UjPZ!th^J5?g0R4_#L1C literal 0 HcmV?d00001 diff --git a/data/avro/alltypes_plain.snappy.avro b/data/avro/alltypes_plain.snappy.avro new file mode 100644 index 0000000000000000000000000000000000000000..d818ab554c139a5e13fceb6ff5a2968028d708b2 GIT binary patch literal 766 zcmb7?F-yZh7=|yU71Synij&jPv8HM3VjUD*1wq86)Oty->FMPz++7Qy6e7VI8gC=o)vXnC?Zs7*yD7x-gi-T?6GAWK6HxXlTq;Op-L6{Tq zn7+a?&hkH65f@8^Tmf#Y0!>`x61{aQwJTxkoE^k z7K~!SSOWMI{JxL(c;A+}ugxr=mf*Uyn%G7C*)~+ZhhYT@ zY;j@3JY>iXX2rct+`@EwqBjVpwuF^xdxDSvAKR;tFcp{&DTTF8K4eVJY2D)V8yQF# zu{c^0<`07+hCiaEizuC%o*FcX^P8odIdKa&C`ZwCM_L?g^Fx#3$Z=ya7Do!#r4ocW z5s&FBEaNQyqZM(nRLJ#y#)p;oI+}8aScvY7dlH(KU1A9?VJG~0tz&TE=*UCEa_Q1T z;y`a_ODrqO*DFrWNX<>$##pUXQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@sg^7Db}A^M}0k}6A5i$VHJb8?hoYh!B-@{7{-5(^T6Ht7`? zBo<}sm8BLHXXfW|8S5G98Hs@1k({5Bn#@z2msn6x$?#qv$M`G1_Ze-+kbNRkcYc!y xQR!h|VqjomWME=qU}9!qVqsunWnf}sU}9%r;$UFnWMJZAVB+p~`yqs`7XW-CNJanv literal 0 HcmV?d00001 diff --git a/data/avro/binary.parquet.avro b/data/avro/binary.parquet.avro new file mode 100644 index 0000000000000000000000000000000000000000..262ecbc1317a8c84ad1900f8f669381d80af179a GIT binary patch literal 236 zcmeZI%3@>_ODrqO*DFrWNX<>$##pUXQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@sg^7Db}A^M}0k}6A5i$VHJb8?hoYh!B-@{7{-5(^T6Ht7`? zBo<}sm8BLHXXfW|8S5G98Hs@1k({5Bn#@z2msn6x$?)lOhOdXdpwh`j)$>ed)Fw%U xsPr%}F)%POGB7bQFflVQu`n>PGBB|*FtIZ*aWF7(GB9y5Fmd<0{SZRe3jl+ONDcr1 literal 0 HcmV?d00001 diff --git a/data/avro/datapage_v2.snappy.avro b/data/avro/datapage_v2.snappy.avro new file mode 100644 index 0000000000000000000000000000000000000000..ccd590076c4cbf33520426db0aabd898442d852c GIT binary patch literal 456 zcmeZI%3@>_ODrqO*DFrWNX<>0$5O3SQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@qKfvO?8qm_zFiZb)kLE1}ma+G3gb>OO!kW^*nm0(knjHDzb zzceW)6`LxM<6zb$<>%+5Cgx#N2GR>v2KQWIQBh(gP&~6FH5cRsB~*vN935M0kYALp zmspSp3?04Vg2bY1y|UDz;>`R!E@M4IJtGlt*d^zuq$cwe=Oq>tR5EyY&0VlX<_ODrqO*DFrWNX<>$$5^dYQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@rV;tLXsO0rWcfw~~3L@Q_ODrqO*DFrWNX<=L#$2sbQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@sY5_3vZfjS^20L7t3rDay6BAJBDEiOn*#xz|IW>Rrx6;RgD zP)8{zKRq)!F((A#xRli7%-qBrpj`z(cV-r6=I1F{8JX%R6$8}(br{tG-CdfKqZC^k zTWgSCl&+Tuv?wE04`^Rewq99k5l9P{v7VuxkqFrT$@xGtd5ZHA3koV37U->r`QaM- z@S0epOz8HLBT)@P@Aj!y(BFW9da6*!Yg;7VAhhYK`e*tosB7lrNKnAk{kkJCf zFMu2tA0T4`kijYfWK;n09UzCz0mxVaWUzAp896|F0m$Jn0WxOD@)-PKfCJ8iJ_ODrqO*DFrWNX<=L!d$IXQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@sY5_3vZfjS^20L7t3rDay6BAJBDEiOn*#xz|IW>Rrx6;Rep zM=2*iJu^8mCj{cQl+@(R+{7H9RRut2W)^4W=P6kk8tW((1JwXE7}WxuU7C}l6k8ix zYmi@*u9pb3CnHr4XkAgZURi1pNDG&-o}r$R2-y3{`9L#yit`c+3Mv`=g^ny;_cEAcxHX$XEhouyX(zIY4{?$l);IWVe{X00JC;z!YbKwW-f$ Hbe8}CT}^*L literal 0 HcmV?d00001 diff --git a/data/avro/int32_decimal.avro b/data/avro/int32_decimal.avro new file mode 100644 index 0000000000000000000000000000000000000000..0623a288a3fd160979f0ce07037ed6b8bf0c1328 GIT binary patch literal 392 zcmeZI%3@>_ODrqO*DFrWNX<=L#9XaZQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@sY5_3vZfjS^20L7t3rDay6BAJBDEiOn*#xz|IW>Rrx6;Rel zM=2*iJu^8mCj{cQl+@(R+{7H9RRut2W)^4W=P6m4=qMEfm4MXO0$p92lcN+{8(V9T zUzDzw2(%_6RS#%eQMO)LY7s~Wm$9Cqo{W;hO^UIU z*=tjjE^9E4(c^_ODrqO*DFrWNX<=L!d$IXQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@sY5_3vZfjS^20L7t3rDay6BAJBDEiOn*#xz|IW>Rrx6;Rex zM=2*iJu^8mCj{cQl+@(R+{7H9RRut2W)^4W=P6kk8t5n$1JwXE7}WxuU7C}l6k8ix zYmi@*u9pb3CnHr4XkAgZURi1pNDG&-o}r$R2-y3{`9L#yit`c+3Mv`ah0WT1g1KsI z?TQC6a;Ki&H+aQ3fsuiMk&z*Vk(Gtv1S1;@qYf(@!vrAy0^~4702zCL3}yu&qXmdx z068o^K*k0jgH;5`r~u+SKn|M&kg){FVCMiba)9^(ki%iZ%4RTw0SGw$fJn}S6K5Z0 Hp}PYB`qY9% literal 0 HcmV?d00001 diff --git a/data/avro/list_columns.avro b/data/avro/list_columns.avro new file mode 100644 index 0000000000000000000000000000000000000000..0d2dd2354016f5dc1a636e5fcb3fdb9a0ec44e01 GIT binary patch literal 373 zcmeZI%3@>_ODrqO*DFrWNX<=L#$2sbQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@qq^GeK2;&U>KOMu!S#sDRu<|P&tB~}8(GfPr)LAsQ3^7GO` z#+T;gD8<$y7;xiCOVTWeHm$g%C=;uBv9$*IMd^Bp1&Kf(>J=9x7G>*|r4|)u=I3!4 z>lx}9iGcl|oS%}K%u}3~SWr;OaCEoG)^ibCo{JkzO%>fyJDIJ_s)B)yfr)_yNCFXC vVp1{#TWVT516xLB76Std69Xp`4+jGS2dktG6GV`$qOyu%b?vpc=!O9R>-%{Y literal 0 HcmV?d00001 diff --git a/data/avro/nested_lists.snappy.avro b/data/avro/nested_lists.snappy.avro new file mode 100644 index 0000000000000000000000000000000000000000..6cbff89610a7fce5f817edd668a06f5b5ac76a5b GIT binary patch literal 407 zcmeZI%3@>_ODrqO*DFrWNX<<=#$2sbQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@qKfvO?8fnrc&5{rrwD}myfC8@a(#iU9o6_*rc=B0yNQks*a z6kCgr0e4Fh!YxXfc_j$lv9$*IMd^Bp1&Kf(>lGIy7G>*|r4|)u=I3!4>lx}9iGaf+ zIX@*enWs1}v7n%mA>d!>+<&~!>)XFJ@3vpKVl~?g#(WkA7FH%3rbGs&BnAd12Bu^N z1_l-;MlOyN1_nkUBSj#OQIS=OQ-vW_P=$ewQJT|LRE33!fmJ~VNCIIRPy+*#>x<@m GbmIXq^L?5C literal 0 HcmV?d00001 diff --git a/data/avro/nonnullable.impala.avro b/data/avro/nonnullable.impala.avro new file mode 100644 index 0000000000000000000000000000000000000000..7ff8f3b7ae18a6847a2ad1d1513dbd842000b621 GIT binary patch literal 1570 zcmb7E%Sr<=6isRoEnNuKen56Y4Zc>zjUq^)#iGk9CC@*oNX`z(*}J5})C^cD&^>*(x37^3S*B>^$nJ{E$E60G4Q zfAK)lzOIY{CQ~)sVrsuVP5OOjD7iG3OIlN;4HC^E>rbOY5)No)`6_35@fnF+?Tp5f z`t3_82A*#1-)Why*r#se^)SQ{@iI;XNhqwdH1lwhK#Yo|=fIm&#I~X_i{)e6sXN}O z>0YSx{|#G~u9{UA)*f78G;zPE%eN^=EJcvMo9cF&L2(x8O><^vNi}2G8c7qk{p#GT zeihVp+nqyhlW0s<608-uG{GDiXD)p{xCwRpXP>#*BGtCk(%@3D>&Y$`9Zb}v>Qb!k zmgwOj6>-2>;Q;U9{nGUN>~WuZ#h8&O8iD$A;r;FL>#6zj(EK>O`pn(K#u3;6IRF93 xs~-d_0K#G}-`j(wwKV|Niy#Nf5CXWqlUK{lQwYk8Gp(B_3qU>R+pp5D`~y7N|BCs(p0;LrZ+d=J>PfE`ObZ0KF7ig>xI4%S=+qB zzSnm3cqiccvL15F4_o@Y?y-)1kNw~Q?{W8We}Bv2ZY$E4*LV96N5YSfuj{Vw+46MP zb@h$C`2om_H(406odGuC?C(1<@5soy_=7kehMO*@1$o_OlM-0 zp+)**(la^OS?6RdnG_g3Q2`LgAAO_xiu zlWVTONR=(MOmL~ajlBgwv<)VPYMUFv1!3Fh@i20Hue4+=8jb4F{cQOyZfTLnf?x*@ z%a@;hXns9>-@fo?F1Y@*@{1mvy9ERa;zz|(Jg73ERMu1wYua7}LfZ}ahyVm9CdDg& zqAA5Sfdi;QfiVU!4F%IQ@!}-R0I~3jR@m}=R8=^5he&_k!6y_nygf$+kqDrITCNB- zoWhr4tGGKUm>LA@@_&gaZ*dk)!M86Wl$2um6;{iyaRO*@AFtL$fD*MAmA&GdI<5%W zb^|{xP ojUcK}A^5vap#&O0?ThkRtw7;Y+!U;`JmRD%0r|H20<*RJ3%PC=b^rhX literal 0 HcmV?d00001 diff --git a/data/avro/nulls.snappy.avro b/data/avro/nulls.snappy.avro new file mode 100644 index 0000000000000000000000000000000000000000..8be5bec851706b8166bdf5c3bee911bfb3a0d72a GIT binary patch literal 330 zcmeZI%3@>_ODrqO*DFrWNX<>0#ayjaQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@r#;)_d)N|Q@~njnS%rEr*rq!eU+aY14-(Uv5~XXYWm>rs&Pde*T2Yj(SC(2-oSC1;WvpkYXCwmld2)VAYBEo8 pUSdH(CBp%~F6Mn)f0j(`mV6DkbS(>#o>4gji4ZzTW# literal 0 HcmV?d00001 diff --git a/data/avro/repeated_no_annotation.avro b/data/avro/repeated_no_annotation.avro new file mode 100644 index 0000000000000000000000000000000000000000..44edb8e104a3b7b941c66315978bdf3145554e3d GIT binary patch literal 627 zcmeZI%3@>_ODrqO*DFrWNXij}OQt6@qqLCPW8qm?rAN{2RJ+pGXS@}dT1`h43#{vXA!PY%E`}5 z2XZljCOb0^IRJ}GiZYQE0z(n52gX1Wi>)=tFG|--1cpvVsvae8{FKyWp5na3f`Up0vHLF{PWkk9Rqftfp57f+=Xe${hGj4?F)^?(F|aWJ z0S5yE69dDs%`-M>0% literal 0 HcmV?d00001 diff --git a/data/avro/single_nan.avro b/data/avro/single_nan.avro new file mode 100644 index 0000000000000000000000000000000000000000..ccf93e54d93cde00cfb71f60a5158d3b9b53027b GIT binary patch literal 204 zcmeZI%3@>_ODrqO*DFrWNX<>$!&t3UQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@rVE0gncfI1*1L@TA_mnP+;f{ZB5$x(`}jjc7vFG|--EJy@e zrdM2$Sd^_-mReMtnV-jHtY@faBm#Cya(+r`GEZ?{VnIPAgY&J>2lxBA_I?&JxHHZB RbR!cVBLkBSPv1gx2>|LeN+AFM literal 0 HcmV?d00001 From a8f7be380531758eb7962542a5eb020d8795aa20 Mon Sep 17 00:00:00 2001 From: Guillaume Balaine Date: Fri, 20 Aug 2021 06:35:26 +0200 Subject: [PATCH 2/4] delete duplicate files --- data/avro/alltypes_dictionary.parquet.avro | Bin 765 -> 0 bytes data/avro/alltypes_plain.parquet.avro | Bin 868 -> 0 bytes data/avro/alltypes_plain.snappy.parquet.avro | Bin 766 -> 0 bytes data/avro/binary.parquet.avro | Bin 236 -> 0 bytes 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 data/avro/alltypes_dictionary.parquet.avro delete mode 100644 data/avro/alltypes_plain.parquet.avro delete mode 100644 data/avro/alltypes_plain.snappy.parquet.avro delete mode 100644 data/avro/binary.parquet.avro diff --git a/data/avro/alltypes_dictionary.parquet.avro b/data/avro/alltypes_dictionary.parquet.avro deleted file mode 100644 index 103db00868c8c67153584180f9a98db325e1a8ea..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 765 zcmb7?F-yZh7=|xtD<~a06hUx0x>;jtaj;9RlY=0LxRhEi$u(ZR+=aVqA(TQl7ZEp? zZvFtl$=@Iq90g}be}guy4H_fV@!oshm+#|y53IJfu#CEb8`3_*9vUw8pbC5pD^OsY z3kMbD=_(pheu@Eal8i8n{I{ijO_ODrqO*DFrWNX<>0!&$9VQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@qqLCPW8qm?rANRPs{|T^rU8f1#kq+&IYg@_T47RVI;Ps3{JeB*4o=I-PegYvSPYxul>E}9oK!T= zK?JcXPb^7|FD@y{%u7eplT=xfT8vE#X{t*yb5n~;5_1bsjfW=*sHb!C(=(G3b3!2L z1g=RpH#4~?zgVdj9_z8S2KhzldWi*zz{I6jT##6ltyh*>RGgWg$7QT%sAnVsPI}4t zDXGak#d(PZ1(gg-g_pWby#Kc+bmH0-GEoUn1nw|3FbgnnFfcNNF*0Nb7#Qmt80s5X zGB6o1G&C$&_@-eK3j-Ge0|OH)lZ*(%*)u)%492XCEGiCq1`HyG3=MPk++sCg;K@p0 zU@*~3V30Zb>HeI}EDU@=0Z{|S1dfNRA21p)uz=)^K=Nx|G=JH^!oUU=5Co|>v9JZG qf&nC8C}^OlpwK>j!j1JT3``7Z3XGx#Dj-W1UjPZ!th^J5?g0R4_#L1C diff --git a/data/avro/alltypes_plain.snappy.parquet.avro b/data/avro/alltypes_plain.snappy.parquet.avro deleted file mode 100644 index ac35ae5f6766c3fd136af2468c4dbadbf6c2ce6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 766 zcmb7?u}i~16vi*56_g?!ij&jPMW|_NUD`##RS-m6N~xFRnx0MI{JxL(c;A+}ugxr=mf*Uyn%G7C*)~+ZhhYT@ zY;j@3JY>iXX2rct+`@EwqBjVpwuF^xdxDSvAKR;tFcp{&DTTF8K4eVJY2D)V8yQF# zu{c^0<`07+hCiaEizuC%o*FcX^P8odIdKa&C`ZwCM_L?g^Fx#3$Z=ya7Do!#r4ocW z5s&FBEaNQyqZM(nRLJ#y#)p;oI+}8aScvY7dlH(KU1A9?VJG~0tz&TE=*UCEa_Q1T z;y`a_ODrqO*DFrWNX<>$##pUXQdy9yWTjM;nw(#hqNJmgmzWFUm*f}tq?V=T z1i{49GE;L>ij}OQt6@sg^7Db}A^M}0k}6A5i$VHJb8?hoYh!B-@{7{-5(^T6Ht7`? zBo<}sm8BLHXXfW|8S5G98Hs@1k({5Bn#@z2msn6x$?)lOhOdXdpwh`j)$>ed)Fw%U xsPr%}F)%POGB7bQFflVQu`n>PGBB|*FtIZ*aWF7(GB9y5Fmd<0{SZRe3jl+ONDcr1 From 8d306efa213b859645c68f60ee1f6db0b1997b43 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 30 Aug 2021 07:22:15 -0400 Subject: [PATCH 3/4] Add initial README --- data/avro/README.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 data/avro/README.md diff --git a/data/avro/README.md b/data/avro/README.md new file mode 100644 index 0000000..6b7682f --- /dev/null +++ b/data/avro/README.md @@ -0,0 +1,7 @@ +This directory contains AVRO files corresponding to the parquet testing files at https://github.com/apache/parquet-testing/blob/master/data/ + +These files were created by using spark with the following commands: + +``` +TODO +``` From a1504992e4db796c502828e4f57664f6a4cb8ebc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 9 Sep 2021 14:40:18 -0400 Subject: [PATCH 4/4] Update data/avro/README.md --- data/avro/README.md | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/data/avro/README.md b/data/avro/README.md index 6b7682f..2707e12 100644 --- a/data/avro/README.md +++ b/data/avro/README.md @@ -1,7 +1,37 @@ This directory contains AVRO files corresponding to the parquet testing files at https://github.com/apache/parquet-testing/blob/master/data/ -These files were created by using spark with the following commands: +These files were created by using spark using the commands from https://gist.github.com/Igosuki/324b011f40185269d3fc552350d21744 -``` -TODO +Roughly: +```scala +import com.github.mrpowers.spark.daria.sql.DariaWriters +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import org.apache.hadoop.conf.Configuration +import org.apache.commons.io.FilenameUtils + +val fileGlobs = sc.getConf.get("spark.driver.globs") +val dest = sc.getConf.get("spark.driver.out") + +val fs = FileSystem.get(new Configuration(true)); +val status = fs.globStatus(new Path(fileGlobs)) +for (fileStatus <- status) { + val path = fileStatus.getPath().toString() + try { + val dfin = spark.read.format("parquet").load(path) + val fileName = fileStatus.getPath().getName(); + val fileNameWithOutExt = FilenameUtils.removeExtension(fileName); + val destination = s"${dest}/${fileNameWithOutExt}.avro" + println(s"Converting $path to avro at $destination") + DariaWriters.writeSingleFile( + df = dfin, + format = "avro", + sc = spark.sparkContext, + tmpFolder = s"/tmp/dw/${fileName}", + filename = destination + ) + } catch { + case e: Throwable => println(s"failed to convert $path : ${e.getMessage}") + } +} ```