From 15791fb7b41b61b49bbf654741c8b8cf72cb3a5d Mon Sep 17 00:00:00 2001 From: Aaron Davies Date: Thu, 15 Jun 2023 14:26:08 -0400 Subject: [PATCH 1/5] support data-descriptor-based archives --- q/unzip/unzip.q | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/q/unzip/unzip.q b/q/unzip/unzip.q index fac80f8..edeaa62 100644 --- a/q/unzip/unzip.q +++ b/q/unzip/unzip.q @@ -437,12 +437,20 @@ .finos.unzip.priv.parseNum cmp, {"v"$24 60 60 sv 1 1 2*2 sv'0 5 11 cut reverse .finos.unzip.priv.parseBits x}mtm, {.finos.util.ymd . 1980 0 0+2 sv'0 7 11 cut reverse .finos.unzip.priv.parseBits x}mdt, - .finos.unzip.priv.parseNum csz, - .finos.unzip.priv.parseNum usz, .finos.unzip.priv.parseNum nln, .finos.unzip.priv.parseNum xln from z; + if[r[`flg]`data_descriptor; + / data descriptor + r,:`crc`csz`usz!4 cut -12#x; + ]; + + r:update + .finos.unzip.priv.parseNum csz, + .finos.unzip.priv.parseNum usz + from r; + r:update fnm:`$"c"$x y+til nln from r; r:update xfd:x y+nln+til xln from r; @@ -598,6 +606,9 @@ ecd64:.finos.unzip.priv.pecd64 .finos.unzip.priv.bytes[y;ecl64`cof;12+.finos.unzip.priv.parseNum .finos.unzip.priv.bytes[y;4+ecl64`cof;8]]]; ecd]; + / start of central directory + scd:$[-1=ecd`cof;ecd64;ecd]`cof; + / parse central directory .finos.log.debug"parsing central directory"; cd:.finos.unzip.priv.parse[(.finos.unzip.priv.pcd;.finos.unzip.priv.wcd);cd;count cd]; @@ -612,6 +623,9 @@ 1!select name:fnm,size:usz,timestamp:mdt+mtm from cd]; `unzip=x; [ + / calculate next offsets + cd:update nof:scd^next lof from cd; + / apply file filter, if any if[not z~(::); cd:select from cd where fnm in z; @@ -630,15 +644,16 @@ y:(exec min lof from cd)_y; / extract all files - .finos.unzip.priv.parse[(.finos.unzip.priv.pfd;.finos.unzip.priv.wfd;z);y;($[-1=ecd`cof;ecd64;ecd]`cof)-exec min lof from cd]]; + .finos.unzip.priv.parse[(.finos.unzip.priv.pfd;.finos.unzip.priv.wfd;z);y;scd-exec min lof from cd]]; [ / extract each file mentioned in the central directory - / TODO probably over-reads (at least) the last file f:{[w;x;y;z] h:.finos.unzip.priv.split[w;0].finos.unzip.priv.bytes[x;y`lof;sum w]; first .finos.unzip.priv.pfd[(.finos.unzip.priv.bytes[x;y`lof;z-y`lof];::);sum w;h]}; - cd f[.finos.unzip.priv.wfd;y]'c^exec next lof from cd]]; + / assume the end of the last file is the beginning of the central directory + / might be wrong if archive decryption header and/or archive extra data record are present? + cd f[.finos.unzip.priv.wfd;y]'exec nof from cd]]; r:exec fnm!fdu from fd; @@ -676,6 +691,7 @@ // Set to true to extract files via file scan, rather than by using the // central directory. +// N.B. currently, will likely fail for data-descriptor-based archives .finos.unzip.filescan:0b // List files in an archive. From 30f9cb3653e5f04a34fc85603e3fd816bc3c2668 Mon Sep 17 00:00:00 2001 From: Aaron Davies Date: Wed, 25 Oct 2023 18:06:25 -0400 Subject: [PATCH 2/5] throw error if requested file not found --- q/unzip/unzip.q | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/q/unzip/unzip.q b/q/unzip/unzip.q index edeaa62..30c56f3 100644 --- a/q/unzip/unzip.q +++ b/q/unzip/unzip.q @@ -629,6 +629,10 @@ / apply file filter, if any if[not z~(::); cd:select from cd where fnm in z; + if[count e:exec(raze z)except fnm from cd; + {.finos.log.error(string x),": file not found in archive"}each e; + 'first e; + ]; ]; / parse file data From 25a40fa0dcc37bc45cc59b3aa5c6d1d71384f20e Mon Sep 17 00:00:00 2001 From: Aaron Davies Date: Thu, 26 Oct 2023 14:42:17 -0400 Subject: [PATCH 3/5] add empty zip for testing --- tests/unzip/empty.zip | Bin 0 -> 22 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/unzip/empty.zip diff --git a/tests/unzip/empty.zip b/tests/unzip/empty.zip new file mode 100644 index 0000000000000000000000000000000000000000..15cb0ecb3e219d1701294bfdf0fe3f5cb5d208e7 GIT binary patch literal 22 NcmWIWW@Tf*000g10H*)| literal 0 HcmV?d00001 From 3714b3b0c8ee3ce7ab4e8adb9bf4bf780de3e629 Mon Sep 17 00:00:00 2001 From: Aaron Davies Date: Thu, 26 Oct 2023 14:40:24 -0400 Subject: [PATCH 4/5] fix off-by-one error --- q/unzip/unzip.q | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q/unzip/unzip.q b/q/unzip/unzip.q index 30c56f3..dd86778 100644 --- a/q/unzip/unzip.q +++ b/q/unzip/unzip.q @@ -503,7 +503,7 @@ // @return long .finos.unzip.priv.ofcds:{ c:hcount x; - r:{(not 0x504b0506~y 0)&x>y 1}[c]{(read1(x;y-z 1;4);1+z 1)}[x;c]/(0x00000000;0); + r:{(not 0x504b0506~y 0)&x>=y 1}[c]{(read1(x;y-z 1;4);1+z 1)}[x;c]/(0x00000000;0); $[0x504b0506~r 0;1+c-r 1;0N]} // Find offset of zip64 end of central directory locator signature in a zip vector. From 04faa55dd1d86d7f44b0400ad479c6055506402f Mon Sep 17 00:00:00 2001 From: Aaron Davies Date: Wed, 25 Oct 2023 18:06:37 -0400 Subject: [PATCH 5/5] handle empty zip file --- q/unzip/unzip.q | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/q/unzip/unzip.q b/q/unzip/unzip.q index dd86778..afde3ed 100644 --- a/q/unzip/unzip.q +++ b/q/unzip/unzip.q @@ -606,6 +606,28 @@ ecd64:.finos.unzip.priv.pecd64 .finos.unzip.priv.bytes[y;ecl64`cof;12+.finos.unzip.priv.parseNum .finos.unzip.priv.bytes[y;4+ecl64`cof;8]]]; ecd]; + / check for empty zip + if[not count cd; + :$[ + `list=x; + ([name:0#`]size:0#0Ni;timestamp:0#0Np); + `unzip=x; + $[ + -11h=type z; + [ + .finos.log.error(string z),": file not found in archive"; + 'z; + ]; + 11h=type z; + [ + {.finos.log.error(string x),": file not found in archive"}each z; + 'first z; + ]; + z~(::); + ((0#`)!())]; + '`domain]; + ]; + / start of central directory scd:$[-1=ecd`cof;ecd64;ecd]`cof;