diff --git a/.github/workflows/generate-coverage.yaml b/.github/workflows/generate-coverage.yaml index 5975837d55..3de1427654 100644 --- a/.github/workflows/generate-coverage.yaml +++ b/.github/workflows/generate-coverage.yaml @@ -79,7 +79,7 @@ jobs: - name: Install dpctl dependencies shell: bash -l {0} run: | - pip install numpy cython setuptools pytest pytest-cov scikit-build cmake coverage[toml] + pip install numpy cython"<3" setuptools pytest pytest-cov scikit-build cmake coverage[toml] - name: Build dpctl with coverage shell: bash -l {0} diff --git a/.github/workflows/generate-docs.yml b/.github/workflows/generate-docs.yml index 768d958e02..a72741c67f 100644 --- a/.github/workflows/generate-docs.yml +++ b/.github/workflows/generate-docs.yml @@ -49,7 +49,7 @@ jobs: if: ${{ !github.event.pull_request || github.event.action != 'closed' }} shell: bash -l {0} run: | - pip install numpy cython setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics + pip install numpy cython"<3" setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics - name: Checkout repo uses: actions/checkout@v3 with: diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml index 1aae32d4d9..e1a390aad8 100644 --- a/.github/workflows/os-llvm-sycl-build.yml +++ b/.github/workflows/os-llvm-sycl-build.yml @@ -108,7 +108,7 @@ jobs: - name: Install dpctl dependencies shell: bash -l {0} run: | - pip install numpy cython setuptools pytest scikit-build cmake + pip install numpy cython"<3" setuptools pytest scikit-build cmake - name: Checkout repo uses: actions/checkout@v3 diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 56958d3355..aad850b060 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -20,7 +20,7 @@ requirements: - cmake >=3.21 - ninja - git - - cython + - cython <3 - python - scikit-build - numpy diff --git a/dpctl/tensor/_elementwise_common.py b/dpctl/tensor/_elementwise_common.py index 9c61f5e97c..55c95f5360 100644 --- a/dpctl/tensor/_elementwise_common.py +++ b/dpctl/tensor/_elementwise_common.py @@ -52,6 +52,15 @@ def __call__(self, x, out=None, order="K"): if not isinstance(x, dpt.usm_ndarray): raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}") + if order not in ["C", "F", "K", "A"]: + order = "K" + buf_dt, res_dt = _find_buf_dtype( + x.dtype, self.result_type_resolver_fn_, x.sycl_device + ) + if res_dt is None: + raise RuntimeError + + orig_out = out if out is not None: if not isinstance(out, dpt.usm_ndarray): raise TypeError( @@ -64,8 +73,21 @@ def __call__(self, x, out=None, order="K"): f"Expected output shape is {x.shape}, got {out.shape}" ) - if ti._array_overlap(x, out): - raise TypeError("Input and output arrays have memory overlap") + if res_dt != out.dtype: + raise TypeError( + f"Output array of type {res_dt} is needed," + f" got {out.dtype}" + ) + + if ( + buf_dt is None + and ti._array_overlap(x, out) + and not ti._same_logical_tensors(x, out) + ): + # Allocate a temporary buffer to avoid memory overlapping. + # Note if `buf_dt` is not None, a temporary copy of `x` will be + # created, so the array overlap check isn't needed. + out = dpt.empty_like(out) if ( dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue)) @@ -75,13 +97,6 @@ def __call__(self, x, out=None, order="K"): "Input and output allocation queues are not compatible" ) - if order not in ["C", "F", "K", "A"]: - order = "K" - buf_dt, res_dt = _find_buf_dtype( - x.dtype, self.result_type_resolver_fn_, x.sycl_device - ) - if res_dt is None: - raise RuntimeError exec_q = x.sycl_queue if buf_dt is None: if out is None: @@ -91,17 +106,20 @@ def __call__(self, x, out=None, order="K"): if order == "A": order = "F" if x.flags.f_contiguous else "C" out = dpt.empty_like(x, dtype=res_dt, order=order) - else: - if res_dt != out.dtype: - raise TypeError( - f"Output array of type {res_dt} is needed," - f" got {out.dtype}" - ) - ht, _ = self.unary_fn_(x, out, sycl_queue=exec_q) - ht.wait() + ht_unary_ev, unary_ev = self.unary_fn_(x, out, sycl_queue=exec_q) + + if not (orig_out is None or orig_out is out): + # Copy the out data from temporary buffer to original memory + ht_copy_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=out, dst=orig_out, sycl_queue=exec_q, depends=[unary_ev] + ) + ht_copy_ev.wait() + out = orig_out + ht_unary_ev.wait() return out + if order == "K": buf = _empty_like_orderK(x, buf_dt) else: @@ -117,11 +135,6 @@ def __call__(self, x, out=None, order="K"): out = _empty_like_orderK(buf, res_dt) else: out = dpt.empty_like(buf, dtype=res_dt, order=order) - else: - if buf_dt != out.dtype: - raise TypeError( - f"Output array of type {buf_dt} is needed, got {out.dtype}" - ) ht, _ = self.unary_fn_(buf, out, sycl_queue=exec_q, depends=[copy_ev]) ht_copy_ev.wait() diff --git a/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp index e4be509a22..331ef6c5eb 100644 --- a/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp +++ b/dpctl/tensor/libtensor/include/utils/memory_overlap.hpp @@ -100,6 +100,53 @@ struct MemoryOverlap } }; +struct SameLogicalTensors +{ + bool operator()(dpctl::tensor::usm_ndarray ar1, + dpctl::tensor::usm_ndarray ar2) const + { + // Same ndim + int nd1 = ar1.get_ndim(); + if (nd1 != ar2.get_ndim()) + return false; + + // Same dtype + int tn1 = ar1.get_typenum(); + if (tn1 != ar2.get_typenum()) + return false; + + // Same pointer + const char *ar1_data = ar1.get_data(); + const char *ar2_data = ar2.get_data(); + + if (ar1_data != ar2_data) + return false; + + // Same shape and strides + const py::ssize_t *ar1_shape = ar1.get_shape_raw(); + const py::ssize_t *ar2_shape = ar2.get_shape_raw(); + + if (!std::equal(ar1_shape, ar1_shape + nd1, ar2_shape)) + return false; + + // Same shape and strides + auto const &ar1_strides = ar1.get_strides_vector(); + auto const &ar2_strides = ar2.get_strides_vector(); + + auto ar1_beg_it = std::begin(ar1_strides); + auto ar1_end_it = std::end(ar1_strides); + + auto ar2_beg_it = std::begin(ar2_strides); + + if (!std::equal(ar1_beg_it, ar1_end_it, ar2_beg_it)) + return false; + + // all checks passed: arrays are logical views + // into the same memory + return true; + } +}; + } // namespace overlap } // namespace tensor } // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions.hpp index 27ee9c9fcb..453992220a 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.hpp @@ -128,7 +128,9 @@ py_unary_ufunc(dpctl::tensor::usm_ndarray src, // check memory overlap auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - if (overlap(src, dst)) { + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + if (overlap(src, dst) && !same_logical_tensors(src, dst)) { throw py::value_error("Arrays index overlapping segments of memory"); } diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 4b36dea534..1833c2d770 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -60,6 +60,7 @@ using dpctl::tensor::c_contiguous_strides; using dpctl::tensor::f_contiguous_strides; using dpctl::tensor::overlap::MemoryOverlap; +using dpctl::tensor::overlap::SameLogicalTensors; using dpctl::tensor::py_internal::copy_usm_ndarray_into_usm_ndarray; @@ -338,6 +339,15 @@ PYBIND11_MODULE(_tensor_impl, m) "Determines if the memory regions indexed by each array overlap", py::arg("array1"), py::arg("array2")); + auto same_logical_tensors = [](dpctl::tensor::usm_ndarray x1, + dpctl::tensor::usm_ndarray x2) -> bool { + auto const &same_logical_tensors = SameLogicalTensors(); + return same_logical_tensors(x1, x2); + }; + m.def("_same_logical_tensors", same_logical_tensors, + "Determines if the memory regions indexed by each array are the same", + py::arg("array1"), py::arg("array2")); + m.def("_place", &py_place, "", py::arg("dst"), py::arg("cumsum"), py::arg("axis_start"), py::arg("axis_end"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); diff --git a/dpctl/tests/_numpy_warnings.py b/dpctl/tests/_numpy_warnings.py new file mode 100644 index 0000000000..1e723c3001 --- /dev/null +++ b/dpctl/tests/_numpy_warnings.py @@ -0,0 +1,28 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy +import pytest + + +@pytest.fixture +def suppress_invalid_numpy_warnings(): + # invalid: treatment for invalid floating-point operation + # (result is not an expressible number, typically indicates + # that a NaN was produced) + old_settings = numpy.seterr(invalid="ignore") + yield + numpy.seterr(**old_settings) # reset to default diff --git a/dpctl/tests/conftest.py b/dpctl/tests/conftest.py index 7fc63a5a24..600953bcf7 100644 --- a/dpctl/tests/conftest.py +++ b/dpctl/tests/conftest.py @@ -26,8 +26,15 @@ invalid_filter, valid_filter, ) +from _numpy_warnings import suppress_invalid_numpy_warnings sys.path.append(os.path.join(os.path.dirname(__file__), "helper")) # common fixtures -__all__ = ["check", "device_selector", "invalid_filter", "valid_filter"] +__all__ = [ + "check", + "device_selector", + "invalid_filter", + "suppress_invalid_numpy_warnings", + "valid_filter", +] diff --git a/dpctl/tests/elementwise/test_abs.py b/dpctl/tests/elementwise/test_abs.py index ee7fa0cb6c..ab0d34d54d 100644 --- a/dpctl/tests/elementwise/test_abs.py +++ b/dpctl/tests/elementwise/test_abs.py @@ -22,7 +22,7 @@ import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported -from .utils import _all_dtypes, _usm_types +from .utils import _all_dtypes, _no_complex_dtypes, _usm_types @pytest.mark.parametrize("dtype", _all_dtypes) @@ -113,3 +113,25 @@ def test_abs_complex(dtype): np.testing.assert_allclose( dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol ) + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes) +def test_abs_out_overlap(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.linspace(0, 35, 60, dtype=dtype, sycl_queue=q) + X = dpt.reshape(X, (3, 5, 4)) + + Xnp = dpt.asnumpy(X) + Ynp = np.abs(Xnp, out=Xnp) + + Y = dpt.abs(X, out=X) + assert Y is X + assert np.allclose(dpt.asnumpy(X), Xnp) + + Ynp = np.abs(Xnp, out=Xnp[::-1]) + Y = dpt.abs(X, out=X[::-1]) + assert Y is not X + assert np.allclose(dpt.asnumpy(X), Xnp) + assert np.allclose(dpt.asnumpy(Y), Ynp) diff --git a/dpctl/tests/elementwise/test_exp.py b/dpctl/tests/elementwise/test_exp.py index 5ea8ded018..85f21694c5 100644 --- a/dpctl/tests/elementwise/test_exp.py +++ b/dpctl/tests/elementwise/test_exp.py @@ -145,3 +145,26 @@ def test_exp_strided(dtype): atol=tol, rtol=tol, ) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_exp_out_overlap(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.linspace(0, 1, 15, dtype=dtype, sycl_queue=q) + X = dpt.reshape(X, (3, 5)) + + Xnp = dpt.asnumpy(X) + Ynp = np.exp(Xnp, out=Xnp) + + Y = dpt.exp(X, out=X) + tol = 8 * dpt.finfo(Y.dtype).resolution + assert Y is X + assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) + + Ynp = np.exp(Xnp, out=Xnp[::-1]) + Y = dpt.exp(X, out=X[::-1]) + assert Y is not X + assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), Ynp, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_log.py b/dpctl/tests/elementwise/test_log.py index ed56fb6468..b0cc337826 100644 --- a/dpctl/tests/elementwise/test_log.py +++ b/dpctl/tests/elementwise/test_log.py @@ -18,7 +18,7 @@ import numpy as np import pytest -from numpy.testing import assert_equal +from numpy.testing import assert_allclose, assert_equal import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported @@ -50,7 +50,7 @@ def test_log_output_contig(dtype): Y = dpt.log(X) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol) @pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) @@ -66,7 +66,7 @@ def test_log_output_strided(dtype): Y = dpt.log(X) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), np.log(Xnp), atol=tol, rtol=tol) @pytest.mark.parametrize("usm_type", _usm_types) @@ -89,7 +89,7 @@ def test_log_usm_type(usm_type): expected_Y[..., 1::2] = np.log(np.float32(10 * dpt.e)) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) @pytest.mark.parametrize("dtype", _all_dtypes) @@ -112,9 +112,7 @@ def test_log_order(dtype): dpt.finfo(Y.dtype).resolution, np.finfo(expected_Y.dtype).resolution, ) - np.testing.assert_allclose( - dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol - ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) def test_log_special_cases(): @@ -126,3 +124,27 @@ def test_log_special_cases(): Xnp = dpt.asnumpy(X) assert_equal(dpt.asnumpy(dpt.log(X)), np.log(Xnp)) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_log_out_overlap(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.linspace(5, 35, 60, dtype=dtype, sycl_queue=q) + X = dpt.reshape(X, (3, 5, 4)) + + Xnp = dpt.asnumpy(X) + Ynp = np.log(Xnp, out=Xnp) + + Y = dpt.log(X, out=X) + assert Y is X + + tol = 8 * dpt.finfo(Y.dtype).resolution + assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) + + Ynp = np.log(Xnp, out=Xnp[::-1]) + Y = dpt.log(X, out=X[::-1]) + assert Y is not X + assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), Ynp, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_sincos.py b/dpctl/tests/elementwise/test_sincos.py index d027ef026a..d4ca463394 100644 --- a/dpctl/tests/elementwise/test_sincos.py +++ b/dpctl/tests/elementwise/test_sincos.py @@ -161,12 +161,6 @@ def test_sincos_errors(callable): y, ) - x = dpt.zeros(2) - y = x - assert_raises_regex( - TypeError, "Input and output arrays have memory overlap", callable, x, y - ) - x = dpt.zeros(2, dtype="float32") y = np.empty_like(x) assert_raises_regex( @@ -230,3 +224,28 @@ def test_sincos_strided(dtype): atol=tol, rtol=tol, ) + + +@pytest.mark.parametrize( + "np_call, dpt_call", [(np.sin, dpt.sin), (np.cos, dpt.cos)] +) +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_sincos_out_overlap(np_call, dpt_call, dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.linspace(-np.pi / 2, np.pi / 2, 60, dtype=dtype, sycl_queue=q) + X = dpt.reshape(X, (3, 5, 4)) + + Xnp = dpt.asnumpy(X) + Ynp = np_call(Xnp, out=Xnp) + + Y = dpt_call(X, out=X) + assert Y is X + assert np.allclose(dpt.asnumpy(X), Xnp) + + Ynp = np_call(Xnp, out=Xnp[::-1]) + Y = dpt_call(X, out=X[::-1]) + assert Y is not X + assert np.allclose(dpt.asnumpy(X), Xnp) + assert np.allclose(dpt.asnumpy(Y), Ynp) diff --git a/dpctl/tests/elementwise/test_sqrt.py b/dpctl/tests/elementwise/test_sqrt.py index ce168a5ccb..a15f5262a7 100644 --- a/dpctl/tests/elementwise/test_sqrt.py +++ b/dpctl/tests/elementwise/test_sqrt.py @@ -18,7 +18,7 @@ import numpy as np import pytest -from numpy.testing import assert_equal +from numpy.testing import assert_allclose, assert_equal import dpctl.tensor as dpt from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported @@ -50,7 +50,7 @@ def test_sqrt_output_contig(dtype): Y = dpt.sqrt(X) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose(dpt.asnumpy(Y), np.sqrt(Xnp), atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), np.sqrt(Xnp), atol=tol, rtol=tol) @pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) @@ -66,7 +66,7 @@ def test_sqrt_output_strided(dtype): Y = dpt.sqrt(X) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose(dpt.asnumpy(Y), np.sqrt(Xnp), atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), np.sqrt(Xnp), atol=tol, rtol=tol) @pytest.mark.parametrize("usm_type", _usm_types) @@ -89,7 +89,7 @@ def test_sqrt_usm_type(usm_type): expected_Y[..., 1::2] = np.sqrt(np.float32(23.0)) tol = 8 * dpt.finfo(Y.dtype).resolution - np.testing.assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) @pytest.mark.parametrize("dtype", _all_dtypes) @@ -112,11 +112,10 @@ def test_sqrt_order(dtype): dpt.finfo(Y.dtype).resolution, np.finfo(expected_Y.dtype).resolution, ) - np.testing.assert_allclose( - dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol - ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) +@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") def test_sqrt_special_cases(): q = get_queue_or_skip() @@ -126,3 +125,27 @@ def test_sqrt_special_cases(): Xnp = dpt.asnumpy(X) assert_equal(dpt.asnumpy(dpt.sqrt(X)), np.sqrt(Xnp)) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_sqrt_out_overlap(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.linspace(0, 35, 60, dtype=dtype, sycl_queue=q) + X = dpt.reshape(X, (3, 5, 4)) + + Xnp = dpt.asnumpy(X) + Ynp = np.sqrt(Xnp, out=Xnp) + + Y = dpt.sqrt(X, out=X) + assert Y is X + + tol = 8 * dpt.finfo(Y.dtype).resolution + assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) + + Ynp = np.sqrt(Xnp, out=Xnp[::-1]) + Y = dpt.sqrt(X, out=X[::-1]) + assert Y is not X + assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol) + assert_allclose(dpt.asnumpy(Y), Ynp, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_square.py b/dpctl/tests/elementwise/test_square.py index 95ec163e2f..3af0528944 100644 --- a/dpctl/tests/elementwise/test_square.py +++ b/dpctl/tests/elementwise/test_square.py @@ -97,3 +97,29 @@ def test_square_special_cases(dtype): rtol=tol, equal_nan=True, ) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_square_out_overlap(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.linspace(0, 35, 60, dtype=dtype, sycl_queue=q) + X = dpt.reshape(X, (3, 5, 4)) + + Xnp = dpt.asnumpy(X) + Ynp = np.square(Xnp, out=Xnp) + + Y = dpt.square(X, out=X) + assert Y is X + assert np.allclose(dpt.asnumpy(X), Xnp) + + X = dpt.linspace(0, 35, 60, dtype=dtype, sycl_queue=q) + X = dpt.reshape(X, (3, 5, 4)) + Xnp = dpt.asnumpy(X) + + Ynp = np.square(Xnp, out=Xnp[::-1]) + Y = dpt.square(X, out=X[::-1]) + assert Y is not X + assert np.allclose(dpt.asnumpy(X), Xnp) + assert np.allclose(dpt.asnumpy(Y), Ynp) diff --git a/setup.py b/setup.py index 6eda8f29f0..2ec9dbbde9 100644 --- a/setup.py +++ b/setup.py @@ -149,20 +149,20 @@ def _get_cmdclass(): package_data={"dpctl": ["tests/*.*", "tests/helper/*.py"]}, include_package_data=True, zip_safe=False, - setup_requires=["Cython"], + setup_requires=["Cython<3"], install_requires=[ "numpy", ], extras_require={ "docs": [ - "Cython", + "Cython<3", "sphinx", "sphinx_rtd_theme", "pydot", "graphviz", "sphinxcontrib-programoutput", ], - "coverage": ["Cython", "pytest", "pytest-cov", "coverage", "tomli"], + "coverage": ["Cython<3", "pytest", "pytest-cov", "coverage", "tomli"], }, keywords="dpctl", classifiers=[