Skip to content

Commit

Permalink
[3.11] pythongh-101144: Allow open and read_text encoding to be posit…
Browse files Browse the repository at this point in the history
…ional. (pythonGH-101145)

The zipfile.Path open() and read_text() encoding parameter can be supplied as a positional argument without causing a TypeError again. 3.10.0b1 included a regression that made it keyword only.

Documentation update included as users writing code to be compatible with a wide range of versions will need to consider this for some time..
(cherry picked from commit 5927013)

Co-authored-by: Gregory P. Smith <[email protected]>
  • Loading branch information
gpshead committed Jan 20, 2023
1 parent 1998ea6 commit 797119d
Show file tree
Hide file tree
Showing 6 changed files with 924 additions and 6 deletions.
12 changes: 12 additions & 0 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
Added support for text and binary modes for open. Default
mode is now text.

.. versionchanged:: 3.11.2
The ``encoding`` parameter can be supplied as a positional argument
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
be compatible with unpatched 3.10 and 3.11 versions must pass all
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.

.. method:: Path.iterdir()

Enumerate the children of the current directory.
Expand Down Expand Up @@ -596,6 +602,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
:class:`io.TextIOWrapper` (except ``buffer``, which is
implied by the context).

.. versionchanged:: 3.11.2
The ``encoding`` parameter can be supplied as a positional argument
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
be compatible with unpatched 3.10 and 3.11 versions must pass all
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.

.. method:: Path.read_bytes()

Read the current file as bytes.
Expand Down
66 changes: 65 additions & 1 deletion Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import struct
import subprocess
import sys
from test.support.script_helper import assert_python_ok
import time
import unittest
import unittest.mock as mock
Expand Down Expand Up @@ -3005,7 +3006,69 @@ def test_open(self, alpharep):
a, b, g = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
assert data == "content of a"
self.assertEqual(data, "content of a")
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
data = strm.read()
self.assertEqual(data, "content of a")

def test_open_encoding_utf16(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
zf.filename = "test_open_utf16.zip"
root = zipfile.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("16.txt")
with u16.open('r', "utf-16") as strm:
data = strm.read()
self.assertEqual(data, "This was utf-16")
with u16.open(encoding="utf-16") as strm:
data = strm.read()
self.assertEqual(data, "This was utf-16")

def test_open_encoding_errors(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
zf.filename = "test_read_text_encoding_errors.zip"
root = zipfile.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("bad-utf8.bin")

# encoding= as a positional argument for gh-101144.
data = u16.read_text("utf-8", errors="ignore")
self.assertEqual(data, "invalid utf-8: .")
with u16.open("r", "utf-8", errors="surrogateescape") as f:
self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")

# encoding= both positional and keyword is an error; gh-101144.
with self.assertRaisesRegex(TypeError, "encoding"):
data = u16.read_text("utf-8", encoding="utf-8")

# both keyword arguments work.
with u16.open("r", encoding="utf-8", errors="strict") as f:
# error during decoding with wrong codec.
with self.assertRaises(UnicodeDecodeError):
f.read()

def test_encoding_warnings(self):
"""EncodingWarning must blame the read_text and open calls."""
code = '''\
import io, zipfile
with zipfile.ZipFile(io.BytesIO(), "w") as zf:
zf.filename = '<test_encoding_warnings in memory zip file>'
zf.writestr("path/file.txt", b"Spanish Inquisition")
root = zipfile.Path(zf)
(path,) = root.iterdir()
file_path = path.joinpath("file.txt")
unused = file_path.read_text() # should warn
file_path.open("r").close() # should warn
'''
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
warnings = proc.err.splitlines()
self.assertEqual(len(warnings), 2, proc.err)
self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")

def test_open_write(self):
"""
Expand Down Expand Up @@ -3047,6 +3110,7 @@ def test_read(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
a.read_text("utf-8") # No positional arg TypeError per gh-101144.
assert a.read_bytes() == b"content of a"

@pass_alpharep
Expand Down
Loading

0 comments on commit 797119d

Please sign in to comment.