Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-101144: Allow open and read_text encoding to be positional. #101145

Merged
Merged
12 changes: 12 additions & 0 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
Added support for text and binary modes for open. Default
mode is now text.

.. versionchanged:: 3.11.2
The ``encoding`` parameter can be supplied as a positional argument
without causing a :exc:`TypeError`. As it could in 3.9 and earlier. Code
needing to be compatible with unpatched 3.10 and 3.11 versions should
always pass ``encoding=`` as a keyword argument.
gpshead marked this conversation as resolved.
Show resolved Hide resolved

.. method:: Path.iterdir()

Enumerate the children of the current directory.
Expand Down Expand Up @@ -596,6 +602,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
:class:`io.TextIOWrapper` (except ``buffer``, which is
implied by the context).

.. versionchanged:: 3.11.2
The ``encoding`` parameter can be supplied as a positional argument
without causing a :exc:`TypeError`. As it could in 3.9 and earlier. Code
needing to be compatible with unpatched 3.10 and 3.11 versions should
always pass ``encoding=`` as a keyword argument.

.. method:: Path.read_bytes()

Read the current file as bytes.
Expand Down
74 changes: 69 additions & 5 deletions Lib/test/test_zipfile/test_path.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import io
import zipfile
import itertools
import contextlib
import pathlib
import unittest
import string
import pickle
import itertools
import string
from test.support.script_helper import assert_python_ok
import unittest
import zipfile

from ._test_params import parameterize, Invoked
from ._functools import compose
Expand Down Expand Up @@ -145,7 +146,69 @@ def test_open(self, alpharep):
a, b, g = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
assert data == "content of a"
self.assertEqual(data, "content of a")
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
data = strm.read()
self.assertEqual(data, "content of a")

def test_open_encoding_utf16(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
zf.filename = "test_open_utf16.zip"
root = zipfile.Path(zf)
path, = root.iterdir()
u16 = path.joinpath("16.txt")
with u16.open('r', "utf-16") as strm:
data = strm.read()
self.assertEqual(data, "This was utf-16")
with u16.open(encoding="utf-16") as strm:
data = strm.read()
self.assertEqual(data, "This was utf-16")

def test_open_encoding_errors(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
zf.filename = "test_read_text_encoding_errors.zip"
root = zipfile.Path(zf)
path, = root.iterdir()
u16 = path.joinpath("bad-utf8.bin")

# encoding= as a positional argument for gh-101144.
data = u16.read_text("utf-8", errors="ignore")
self.assertEqual(data, "invalid utf-8: .")
with u16.open("r", "utf-8", errors="surrogateescape") as f:
self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")

# encoding= both positional and keyword is an error; gh-101144.
with self.assertRaisesRegex(TypeError, "encoding"):
data = u16.read_text("utf-8", encoding="utf-8")

# both keyword arguments work.
with u16.open("r", encoding="utf-8", errors="strict") as f:
# error during decoding with wrong codec.
with self.assertRaises(UnicodeDecodeError):
f.read()

def test_encoding_warnings(self):
"""EncodingWarning must blame the read_text and open calls."""
code = '''\
import io, zipfile
with zipfile.ZipFile(io.BytesIO(), "w") as zf:
zf.filename = '<test_encoding_warnings in memory zip file>'
zf.writestr("path/file.txt", b"Spanish Inquisition")
root = zipfile.Path(zf)
path, = root.iterdir()
file_path = path.joinpath("file.txt")
unused = file_path.read_text() # should warn
file_path.open("r").close() # should warn
'''
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
warnings = proc.err.splitlines()
self.assertEqual(len(warnings), 2, proc.err)
self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")

def test_open_write(self):
"""
Expand Down Expand Up @@ -187,6 +250,7 @@ def test_read(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
a.read_text("utf-8") # No positional arg TypeError per gh-101144.
assert a.read_bytes() == b"content of a"

@pass_alpharep
Expand Down
15 changes: 10 additions & 5 deletions Lib/zipfile/_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ def _name_set(self):
return self.__lookup


def _extract_text_encoding(encoding=None, *args, **kwargs):
# stacklevel=3 so that the caller of the caller see any warning.
return io.text_encoding(encoding, 3), args, kwargs
gpshead marked this conversation as resolved.
Show resolved Hide resolved


class Path:
"""
A pathlib-compatible interface for zip files.
Expand Down Expand Up @@ -257,9 +262,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
else:
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
return io.TextIOWrapper(stream, *args, **kwargs)
# Text mode:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
return io.TextIOWrapper(stream, encoding, *args, **kwargs)

@property
def name(self):
Expand All @@ -282,8 +287,8 @@ def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)

def read_text(self, *args, **kwargs):
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
gpshead marked this conversation as resolved.
Show resolved Hide resolved
with self.open('r', *args, **kwargs) as strm:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
with self.open('r', encoding, *args, **kwargs) as strm:
return strm.read()

def read_bytes(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept
``encoding`` as a positional argument. This was the behavior in Python 3.9 and
earlier. 3.10 introduced a regression where supplying it as a positional
argument would lead to a :exc:`TypeError`.