From 6ce5cb046c7d4dd3e4c2a3a1d812c546b540b47f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 24 Sep 2024 14:12:37 -0700 Subject: [PATCH 1/7] Working fix --- Doc/library/itertools.rst | 4 ---- Lib/test/test_itertools.py | 5 +++-- Modules/itertoolsmodule.c | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 508c20f4df6f5e..151ad4d2c20dac 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -707,10 +707,6 @@ loops that truncate the stream. except StopIteration: return - Once a :func:`tee` has been created, the original *iterable* should not be - used anywhere else; otherwise, the *iterable* could get advanced without - the tee objects being informed. - When the input *iterable* is already a tee iterator object, all members of the return tuple are constructed as if they had been produced by the upstream :func:`tee` call. This "flattening step" diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 6820dce3f12620..ef162de921cf0f 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1249,10 +1249,11 @@ def test_tee(self): self.assertEqual(len(result), n) self.assertEqual([list(x) for x in result], [list('abc')]*n) - # tee pass-through to copyable iterator + # tee objects are independent (see bug gh-123884) a, b = tee('abc') c, d = tee(a) - self.assertTrue(a is c) + e, f = tee(c) + self.assertTrue(len({a, b, c, d, e, f}) == 6) # test tee_new t1, t2 = tee('abc') diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index e740ec4d7625c3..6f2e5fa5f8ed6b 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -1058,7 +1058,7 @@ itertools_tee_impl(PyObject *module, PyObject *iterable, Py_ssize_t n) Py_DECREF(result); return NULL; } - if (copyfunc != NULL) { + if (copyfunc != NULL && 0) { copyable = it; } else { From 2a7a4d32ef4a93e9a7021b81600e25c5c5c33246 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 24 Sep 2024 14:14:56 -0700 Subject: [PATCH 2/7] Clean-up unused path --- Modules/itertoolsmodule.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 6f2e5fa5f8ed6b..116d8989e8dbee 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -1053,28 +1053,18 @@ itertools_tee_impl(PyObject *module, PyObject *iterable, Py_ssize_t n) return NULL; } - if (PyObject_GetOptionalAttr(it, &_Py_ID(__copy__), ©func) < 0) { - Py_DECREF(it); + itertools_state *state = get_module_state(module); + copyable = tee_fromiterable(state, it); + Py_DECREF(it); + if (copyable == NULL) { Py_DECREF(result); return NULL; } - if (copyfunc != NULL && 0) { - copyable = it; - } - else { - itertools_state *state = get_module_state(module); - copyable = tee_fromiterable(state, it); - Py_DECREF(it); - if (copyable == NULL) { - Py_DECREF(result); - return NULL; - } - copyfunc = PyObject_GetAttr(copyable, &_Py_ID(__copy__)); - if (copyfunc == NULL) { - Py_DECREF(copyable); - Py_DECREF(result); - return NULL; - } + copyfunc = PyObject_GetAttr(copyable, &_Py_ID(__copy__)); + if (copyfunc == NULL) { + Py_DECREF(copyable); + Py_DECREF(result); + return NULL; } PyTuple_SET_ITEM(result, 0, copyable); From e397bfd5318614715c52173a60022db232589e9e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 24 Sep 2024 22:02:29 -0700 Subject: [PATCH 3/7] Update tee() rough equivalent recipe and related tests --- Doc/library/itertools.rst | 45 ++++++++++++++------- Lib/test/test_itertools.py | 81 ++++++++++++++++++++++---------------- 2 files changed, 76 insertions(+), 50 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 151ad4d2c20dac..9c1aacbcbd6047 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -689,23 +689,38 @@ loops that truncate the stream. Roughly equivalent to:: + class _tee: + + def __init__(self, iterable): + it = iter(iterable) + if isinstance(it, _tee): + self.iterator = it.iterator + self.link = it.link + else: + self.iterator = it + self.link = [None, None] + + def __iter__(self): + return self + + def __next__(self): + link = self.link + if link[1] is None: + link[0] = next(self.iterator) + link[1] = [None, None] + value, self.link = link + return value + def tee(iterable, n=2): if n < 0: - raise ValueError('n must be >= 0') - iterator = iter(iterable) - shared_link = [None, None] - return tuple(_tee(iterator, shared_link) for _ in range(n)) - - def _tee(iterator, link): - try: - while True: - if link[1] is None: - link[0] = next(iterator) - link[1] = [None, None] - value, link = link - yield value - except StopIteration: - return + raise ValueError + if n == 0: + return () + first = _tee(iterable) + result = [first] + for _ in range(n - 1): + result.append(_tee(first)) + return tuple(result) When the input *iterable* is already a tee iterator object, all members of the return tuple are constructed as if they had been diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index ef162de921cf0f..68401845cafd52 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1758,23 +1758,38 @@ def test_tee_recipe(self): # Begin tee() recipe ########################################### + class _tee: + + def __init__(self, iterable): + it = iter(iterable) + if isinstance(it, _tee): + self.iterator = it.iterator + self.link = it.link + else: + self.iterator = it + self.link = [None, None] + + def __iter__(self): + return self + + def __next__(self): + link = self.link + if link[1] is None: + link[0] = next(self.iterator) + link[1] = [None, None] + value, self.link = link + return value + def tee(iterable, n=2): if n < 0: - raise ValueError('n must be >= 0') - iterator = iter(iterable) - shared_link = [None, None] - return tuple(_tee(iterator, shared_link) for _ in range(n)) - - def _tee(iterator, link): - try: - while True: - if link[1] is None: - link[0] = next(iterator) - link[1] = [None, None] - value, link = link - yield value - except StopIteration: - return + raise ValueError + if n == 0: + return () + first = _tee(iterable) + result = [first] + for _ in range(n - 1): + result.append(_tee(first)) + return tuple(result) # End tee() recipe ############################################# @@ -1820,12 +1835,10 @@ def _tee(iterator, link): self.assertRaises(TypeError, tee, [1,2], 'x') self.assertRaises(TypeError, tee, [1,2], 3, 'x') - # Tests not applicable to the tee() recipe - if False: - # tee object should be instantiable - a, b = tee('abc') - c = type(a)('def') - self.assertEqual(list(c), list('def')) + # tee object should be instantiable + a, b = tee('abc') + c = type(a)('def') + self.assertEqual(list(c), list('def')) # test long-lagged and multi-way split a, b, c = tee(range(2000), 3) @@ -1846,21 +1859,19 @@ def _tee(iterator, link): self.assertEqual(len(result), n) self.assertEqual([list(x) for x in result], [list('abc')]*n) + # tee objects are independent (see bug gh-123884) + a, b = tee('abc') + c, d = tee(a) + e, f = tee(c) + self.assertTrue(len({a, b, c, d, e, f}) == 6) - # Tests not applicable to the tee() recipe - if False: - # tee pass-through to copyable iterator - a, b = tee('abc') - c, d = tee(a) - self.assertTrue(a is c) - - # test tee_new - t1, t2 = tee('abc') - tnew = type(t1) - self.assertRaises(TypeError, tnew) - self.assertRaises(TypeError, tnew, 10) - t3 = tnew(t1) - self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc')) + # test tee_new + t1, t2 = tee('abc') + tnew = type(t1) + self.assertRaises(TypeError, tnew) + self.assertRaises(TypeError, tnew, 10) + t3 = tnew(t1) + self.assertTrue(list(t1) == list(t2) == list(t3) == list('abc')) # test that tee objects are weak referencable a, b = tee(range(10)) From 8e2315c6dfff152cef6af38857bc98540a1541c6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 24 Sep 2024 22:40:56 -0700 Subject: [PATCH 4/7] Add blurb --- .../Library/2024-09-24-22-38-51.gh-issue-123884.iEPTK4.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-09-24-22-38-51.gh-issue-123884.iEPTK4.rst diff --git a/Misc/NEWS.d/next/Library/2024-09-24-22-38-51.gh-issue-123884.iEPTK4.rst b/Misc/NEWS.d/next/Library/2024-09-24-22-38-51.gh-issue-123884.iEPTK4.rst new file mode 100644 index 00000000000000..55f1d4b41125c3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-24-22-38-51.gh-issue-123884.iEPTK4.rst @@ -0,0 +1,4 @@ +Fixed bug in itertools.tee() handling of other tee inputs (a tee in a tee). +The output now has the promised *n* independent new iterators. Formerly, +the first iterator was identical (not independent) to the input iterator. +This would sometimes give surprising results. From 4e4fad41367c2a76c88c449fe322ff4aec409e93 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 25 Sep 2024 08:35:06 -0700 Subject: [PATCH 5/7] Call tee_copy() directly --- Modules/itertoolsmodule.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 116d8989e8dbee..1201fa094902d7 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -1036,7 +1036,7 @@ itertools_tee_impl(PyObject *module, PyObject *iterable, Py_ssize_t n) /*[clinic end generated code: output=1c64519cd859c2f0 input=c99a1472c425d66d]*/ { Py_ssize_t i; - PyObject *it, *copyable, *copyfunc, *result; + PyObject *it, *to, *result; if (n < 0) { PyErr_SetString(PyExc_ValueError, "n must be >= 0"); @@ -1054,30 +1054,22 @@ itertools_tee_impl(PyObject *module, PyObject *iterable, Py_ssize_t n) } itertools_state *state = get_module_state(module); - copyable = tee_fromiterable(state, it); + to = tee_fromiterable(state, it); Py_DECREF(it); - if (copyable == NULL) { - Py_DECREF(result); - return NULL; - } - copyfunc = PyObject_GetAttr(copyable, &_Py_ID(__copy__)); - if (copyfunc == NULL) { - Py_DECREF(copyable); + if (to == NULL) { Py_DECREF(result); return NULL; } - PyTuple_SET_ITEM(result, 0, copyable); + PyTuple_SET_ITEM(result, 0, to); for (i = 1; i < n; i++) { - copyable = _PyObject_CallNoArgs(copyfunc); - if (copyable == NULL) { - Py_DECREF(copyfunc); + to = tee_copy((teeobject *)to, NULL); + if (to == NULL) { Py_DECREF(result); return NULL; } - PyTuple_SET_ITEM(result, i, copyable); + PyTuple_SET_ITEM(result, i, to); } - Py_DECREF(copyfunc); return result; } From ee064e72d883cf555b494e2bd37e68c3d9ab8c80 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 25 Sep 2024 09:51:47 -0700 Subject: [PATCH 6/7] Beautify the pure python version --- Doc/library/itertools.rst | 22 +++++++++++----------- Lib/test/test_itertools.py | 22 +++++++++++----------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 9c1aacbcbd6047..c1299ebfe8d27a 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -689,6 +689,17 @@ loops that truncate the stream. Roughly equivalent to:: + def tee(iterable, n=2): + if n < 0: + raise ValueError + if n == 0: + return () + iterator = _tee(iterable) + result = [iterator] + for _ in range(n - 1): + result.append(_tee(iterator)) + return tuple(result) + class _tee: def __init__(self, iterable): @@ -711,17 +722,6 @@ loops that truncate the stream. value, self.link = link return value - def tee(iterable, n=2): - if n < 0: - raise ValueError - if n == 0: - return () - first = _tee(iterable) - result = [first] - for _ in range(n - 1): - result.append(_tee(first)) - return tuple(result) - When the input *iterable* is already a tee iterator object, all members of the return tuple are constructed as if they had been produced by the upstream :func:`tee` call. This "flattening step" diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 68401845cafd52..8469de998ba014 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -1758,6 +1758,17 @@ def test_tee_recipe(self): # Begin tee() recipe ########################################### + def tee(iterable, n=2): + if n < 0: + raise ValueError + if n == 0: + return () + iterator = _tee(iterable) + result = [iterator] + for _ in range(n - 1): + result.append(_tee(iterator)) + return tuple(result) + class _tee: def __init__(self, iterable): @@ -1780,17 +1791,6 @@ def __next__(self): value, self.link = link return value - def tee(iterable, n=2): - if n < 0: - raise ValueError - if n == 0: - return () - first = _tee(iterable) - result = [first] - for _ in range(n - 1): - result.append(_tee(first)) - return tuple(result) - # End tee() recipe ############################################# n = 200 From 6153e70d53127fd04b8f29a8b7fd39e659857dfd Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 25 Sep 2024 11:32:27 -0700 Subject: [PATCH 7/7] Remove unused __copy__ constant --- Include/internal/pycore_global_objects_fini_generated.h | 1 - Include/internal/pycore_global_strings.h | 1 - Include/internal/pycore_runtime_init_generated.h | 1 - Include/internal/pycore_unicodeobject_generated.h | 4 ---- 4 files changed, 7 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 6e948e16b7dbe8..a5f13692c627e4 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -604,7 +604,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__classdictcell__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__complex__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__contains__)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__copy__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ctypes_from_outparam__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__del__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__delattr__)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 5c63a6e519b93d..dd958dcd9fe3ac 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -93,7 +93,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(__classdictcell__) STRUCT_FOR_ID(__complex__) STRUCT_FOR_ID(__contains__) - STRUCT_FOR_ID(__copy__) STRUCT_FOR_ID(__ctypes_from_outparam__) STRUCT_FOR_ID(__del__) STRUCT_FOR_ID(__delattr__) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index bac6b5b8fcfd9d..8d7da8befc94c1 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -602,7 +602,6 @@ extern "C" { INIT_ID(__classdictcell__), \ INIT_ID(__complex__), \ INIT_ID(__contains__), \ - INIT_ID(__copy__), \ INIT_ID(__ctypes_from_outparam__), \ INIT_ID(__del__), \ INIT_ID(__delattr__), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index efdbde4c8ea3c6..d5ad61d4c62e22 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -172,10 +172,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(__copy__); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(__ctypes_from_outparam__); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1));