diff --git a/q/unzip/unzip.q b/q/unzip/unzip.q index fac80f8..afde3ed 100644 --- a/q/unzip/unzip.q +++ b/q/unzip/unzip.q @@ -437,12 +437,20 @@ .finos.unzip.priv.parseNum cmp, {"v"$24 60 60 sv 1 1 2*2 sv'0 5 11 cut reverse .finos.unzip.priv.parseBits x}mtm, {.finos.util.ymd . 1980 0 0+2 sv'0 7 11 cut reverse .finos.unzip.priv.parseBits x}mdt, - .finos.unzip.priv.parseNum csz, - .finos.unzip.priv.parseNum usz, .finos.unzip.priv.parseNum nln, .finos.unzip.priv.parseNum xln from z; + if[r[`flg]`data_descriptor; + / data descriptor + r,:`crc`csz`usz!4 cut -12#x; + ]; + + r:update + .finos.unzip.priv.parseNum csz, + .finos.unzip.priv.parseNum usz + from r; + r:update fnm:`$"c"$x y+til nln from r; r:update xfd:x y+nln+til xln from r; @@ -495,7 +503,7 @@ // @return long .finos.unzip.priv.ofcds:{ c:hcount x; - r:{(not 0x504b0506~y 0)&x>y 1}[c]{(read1(x;y-z 1;4);1+z 1)}[x;c]/(0x00000000;0); + r:{(not 0x504b0506~y 0)&x>=y 1}[c]{(read1(x;y-z 1;4);1+z 1)}[x;c]/(0x00000000;0); $[0x504b0506~r 0;1+c-r 1;0N]} // Find offset of zip64 end of central directory locator signature in a zip vector. @@ -598,6 +606,31 @@ ecd64:.finos.unzip.priv.pecd64 .finos.unzip.priv.bytes[y;ecl64`cof;12+.finos.unzip.priv.parseNum .finos.unzip.priv.bytes[y;4+ecl64`cof;8]]]; ecd]; + / check for empty zip + if[not count cd; + :$[ + `list=x; + ([name:0#`]size:0#0Ni;timestamp:0#0Np); + `unzip=x; + $[ + -11h=type z; + [ + .finos.log.error(string z),": file not found in archive"; + 'z; + ]; + 11h=type z; + [ + {.finos.log.error(string x),": file not found in archive"}each z; + 'first z; + ]; + z~(::); + ((0#`)!())]; + '`domain]; + ]; + + / start of central directory + scd:$[-1=ecd`cof;ecd64;ecd]`cof; + / parse central directory .finos.log.debug"parsing central directory"; cd:.finos.unzip.priv.parse[(.finos.unzip.priv.pcd;.finos.unzip.priv.wcd);cd;count cd]; @@ -612,9 +645,16 @@ 1!select name:fnm,size:usz,timestamp:mdt+mtm from cd]; `unzip=x; [ + / calculate next offsets + cd:update nof:scd^next lof from cd; + / apply file filter, if any if[not z~(::); cd:select from cd where fnm in z; + if[count e:exec(raze z)except fnm from cd; + {.finos.log.error(string x),": file not found in archive"}each e; + 'first e; + ]; ]; / parse file data @@ -630,15 +670,16 @@ y:(exec min lof from cd)_y; / extract all files - .finos.unzip.priv.parse[(.finos.unzip.priv.pfd;.finos.unzip.priv.wfd;z);y;($[-1=ecd`cof;ecd64;ecd]`cof)-exec min lof from cd]]; + .finos.unzip.priv.parse[(.finos.unzip.priv.pfd;.finos.unzip.priv.wfd;z);y;scd-exec min lof from cd]]; [ / extract each file mentioned in the central directory - / TODO probably over-reads (at least) the last file f:{[w;x;y;z] h:.finos.unzip.priv.split[w;0].finos.unzip.priv.bytes[x;y`lof;sum w]; first .finos.unzip.priv.pfd[(.finos.unzip.priv.bytes[x;y`lof;z-y`lof];::);sum w;h]}; - cd f[.finos.unzip.priv.wfd;y]'c^exec next lof from cd]]; + / assume the end of the last file is the beginning of the central directory + / might be wrong if archive decryption header and/or archive extra data record are present? + cd f[.finos.unzip.priv.wfd;y]'exec nof from cd]]; r:exec fnm!fdu from fd; @@ -676,6 +717,7 @@ // Set to true to extract files via file scan, rather than by using the // central directory. +// N.B. currently, will likely fail for data-descriptor-based archives .finos.unzip.filescan:0b // List files in an archive. diff --git a/tests/unzip/empty.zip b/tests/unzip/empty.zip new file mode 100644 index 0000000..15cb0ec Binary files /dev/null and b/tests/unzip/empty.zip differ