Skip to content

Commit

Permalink
Merge pull request #98 from adavies42/feature/unzip
Browse files Browse the repository at this point in the history
Feature/unzip
  • Loading branch information
adavies42 authored Dec 11, 2023
2 parents dfcdf74 + 04faa55 commit 366c519
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
54 changes: 48 additions & 6 deletions q/unzip/unzip.q
Original file line number Diff line number Diff line change
Expand Up @@ -437,12 +437,20 @@
.finos.unzip.priv.parseNum cmp,
{"v"$24 60 60 sv 1 1 2*2 sv'0 5 11 cut reverse .finos.unzip.priv.parseBits x}mtm,
{.finos.util.ymd . 1980 0 0+2 sv'0 7 11 cut reverse .finos.unzip.priv.parseBits x}mdt,
.finos.unzip.priv.parseNum csz,
.finos.unzip.priv.parseNum usz,
.finos.unzip.priv.parseNum nln,
.finos.unzip.priv.parseNum xln
from z;

if[r[`flg]`data_descriptor;
/ data descriptor
r,:`crc`csz`usz!4 cut -12#x;
];

r:update
.finos.unzip.priv.parseNum csz,
.finos.unzip.priv.parseNum usz
from r;

r:update fnm:`$"c"$x y+til nln from r;

r:update xfd:x y+nln+til xln from r;
Expand Down Expand Up @@ -495,7 +503,7 @@
// @return long
.finos.unzip.priv.ofcds:{
c:hcount x;
r:{(not 0x504b0506~y 0)&x>y 1}[c]{(read1(x;y-z 1;4);1+z 1)}[x;c]/(0x00000000;0);
r:{(not 0x504b0506~y 0)&x>=y 1}[c]{(read1(x;y-z 1;4);1+z 1)}[x;c]/(0x00000000;0);
$[0x504b0506~r 0;1+c-r 1;0N]}

// Find offset of zip64 end of central directory locator signature in a zip vector.
Expand Down Expand Up @@ -598,6 +606,31 @@
ecd64:.finos.unzip.priv.pecd64 .finos.unzip.priv.bytes[y;ecl64`cof;12+.finos.unzip.priv.parseNum .finos.unzip.priv.bytes[y;4+ecl64`cof;8]]];
ecd];

/ check for empty zip
if[not count cd;
:$[
`list=x;
([name:0#`]size:0#0Ni;timestamp:0#0Np);
`unzip=x;
$[
-11h=type z;
[
.finos.log.error(string z),": file not found in archive";
'z;
];
11h=type z;
[
{.finos.log.error(string x),": file not found in archive"}each z;
'first z;
];
z~(::);
((0#`)!())];
'`domain];
];

/ start of central directory
scd:$[-1=ecd`cof;ecd64;ecd]`cof;

/ parse central directory
.finos.log.debug"parsing central directory";
cd:.finos.unzip.priv.parse[(.finos.unzip.priv.pcd;.finos.unzip.priv.wcd);cd;count cd];
Expand All @@ -612,9 +645,16 @@
1!select name:fnm,size:usz,timestamp:mdt+mtm from cd];
`unzip=x;
[
/ calculate next offsets
cd:update nof:scd^next lof from cd;

/ apply file filter, if any
if[not z~(::);
cd:select from cd where fnm in z;
if[count e:exec(raze z)except fnm from cd;
{.finos.log.error(string x),": file not found in archive"}each e;
'first e;
];
];

/ parse file data
Expand All @@ -630,15 +670,16 @@
y:(exec min lof from cd)_y;

/ extract all files
.finos.unzip.priv.parse[(.finos.unzip.priv.pfd;.finos.unzip.priv.wfd;z);y;($[-1=ecd`cof;ecd64;ecd]`cof)-exec min lof from cd]];
.finos.unzip.priv.parse[(.finos.unzip.priv.pfd;.finos.unzip.priv.wfd;z);y;scd-exec min lof from cd]];
[
/ extract each file mentioned in the central directory
/ TODO probably over-reads (at least) the last file
f:{[w;x;y;z]
h:.finos.unzip.priv.split[w;0].finos.unzip.priv.bytes[x;y`lof;sum w];
first .finos.unzip.priv.pfd[(.finos.unzip.priv.bytes[x;y`lof;z-y`lof];::);sum w;h]};

cd f[.finos.unzip.priv.wfd;y]'c^exec next lof from cd]];
/ assume the end of the last file is the beginning of the central directory
/ might be wrong if archive decryption header and/or archive extra data record are present?
cd f[.finos.unzip.priv.wfd;y]'exec nof from cd]];

r:exec fnm!fdu from fd;

Expand Down Expand Up @@ -676,6 +717,7 @@

// Set to true to extract files via file scan, rather than by using the
// central directory.
// N.B. currently, will likely fail for data-descriptor-based archives
.finos.unzip.filescan:0b

// List files in an archive.
Expand Down
Binary file added tests/unzip/empty.zip
Binary file not shown.

0 comments on commit 366c519

Please sign in to comment.