From de2802b1ff2e91a9c52c6d43039b3bd96f63d713 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 20 Feb 2023 11:27:01 -0500 Subject: [PATCH 1/4] Refactor zipfile._strip_extra to use higher level abstractions for extras instead of a heavy-state loop. --- Lib/zipfile/__init__.py | 59 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 6e6211de6b1684..f342b1b4e9cf3b 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -188,28 +188,43 @@ class LargeZipFile(Exception): _DD_SIGNATURE = 0x08074b50 -_EXTRA_FIELD_STRUCT = struct.Struct(' Date: Mon, 20 Feb 2023 12:00:16 -0500 Subject: [PATCH 2/4] Add blurb --- .../next/Library/2023-02-20-12-00-11.gh-issue-88233.o5Zb0t.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-02-20-12-00-11.gh-issue-88233.o5Zb0t.rst diff --git a/Misc/NEWS.d/next/Library/2023-02-20-12-00-11.gh-issue-88233.o5Zb0t.rst b/Misc/NEWS.d/next/Library/2023-02-20-12-00-11.gh-issue-88233.o5Zb0t.rst new file mode 100644 index 00000000000000..945f92d3dfa93b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-20-12-00-11.gh-issue-88233.o5Zb0t.rst @@ -0,0 +1,2 @@ +Refactored ``zipfile._strip_extra`` to use higher level abstactions for +extras instead of a heavy-state loop. From e6abc5543b9b35740a37ea3148cc900342aa429b Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 20 Feb 2023 12:02:37 -0500 Subject: [PATCH 3/4] Remove _strip_extra and use _Extra.strip directly. --- Lib/test/test_zipfile/test_core.py | 46 +++++++++++++++--------------- Lib/zipfile/__init__.py | 5 +--- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index e23f5c2a8556f2..fb95b453614aba 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3023,14 +3023,14 @@ def test_no_data(self): b = s.pack(2, 0) c = s.pack(3, 0) - self.assertEqual(b'', zipfile._strip_extra(a, (self.ZIP64_EXTRA,))) - self.assertEqual(b, zipfile._strip_extra(b, (self.ZIP64_EXTRA,))) + self.assertEqual(b'', zipfile._Extra.strip(a, (self.ZIP64_EXTRA,))) + self.assertEqual(b, zipfile._Extra.strip(b, (self.ZIP64_EXTRA,))) self.assertEqual( - b+b"z", zipfile._strip_extra(b+b"z", (self.ZIP64_EXTRA,))) + b+b"z", zipfile._Extra.strip(b+b"z", (self.ZIP64_EXTRA,))) - self.assertEqual(b+c, zipfile._strip_extra(a+b+c, (self.ZIP64_EXTRA,))) - self.assertEqual(b+c, zipfile._strip_extra(b+a+c, (self.ZIP64_EXTRA,))) - self.assertEqual(b+c, zipfile._strip_extra(b+c+a, (self.ZIP64_EXTRA,))) + self.assertEqual(b+c, zipfile._Extra.strip(a+b+c, (self.ZIP64_EXTRA,))) + self.assertEqual(b+c, zipfile._Extra.strip(b+a+c, (self.ZIP64_EXTRA,))) + self.assertEqual(b+c, zipfile._Extra.strip(b+c+a, (self.ZIP64_EXTRA,))) def test_with_data(self): s = struct.Struct(" Date: Fri, 14 Jul 2023 12:52:25 -0400 Subject: [PATCH 4/4] Use memoryview to avoid unnecessary copies while splitting Extras. --- Lib/zipfile/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index cd1d7a0669867e..2c963de18e4f95 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -209,8 +209,10 @@ def read_one(cls, raw): @classmethod def split(cls, data): - while data: - extra, data = _Extra.read_one(data) + # use memoryview for zero-copy slices + rest = memoryview(data) + while rest: + extra, rest = _Extra.read_one(rest) yield extra @classmethod