Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Created a temporary copy in case of overlap for unary function #1281

Merged
merged 6 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/generate-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
- name: Install dpctl dependencies
shell: bash -l {0}
run: |
pip install numpy cython setuptools pytest pytest-cov scikit-build cmake coverage[toml]
pip install numpy cython"<3" setuptools pytest pytest-cov scikit-build cmake coverage[toml]

- name: Build dpctl with coverage
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/generate-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
shell: bash -l {0}
run: |
pip install numpy cython setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
pip install numpy cython"<3" setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
- name: Checkout repo
uses: actions/checkout@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/os-llvm-sycl-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
- name: Install dpctl dependencies
shell: bash -l {0}
run: |
pip install numpy cython setuptools pytest scikit-build cmake
pip install numpy cython"<3" setuptools pytest scikit-build cmake

- name: Checkout repo
uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ requirements:
- cmake >=3.21
- ninja
- git
- cython
- cython <3
- python
- scikit-build
- numpy
Expand Down
57 changes: 35 additions & 22 deletions dpctl/tensor/_elementwise_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ def __call__(self, x, out=None, order="K"):
if not isinstance(x, dpt.usm_ndarray):
raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")

if order not in ["C", "F", "K", "A"]:
order = "K"
buf_dt, res_dt = _find_buf_dtype(
x.dtype, self.result_type_resolver_fn_, x.sycl_device
)
if res_dt is None:
raise RuntimeError

orig_out = out
if out is not None:
if not isinstance(out, dpt.usm_ndarray):
raise TypeError(
Expand All @@ -64,8 +73,21 @@ def __call__(self, x, out=None, order="K"):
f"Expected output shape is {x.shape}, got {out.shape}"
)

if ti._array_overlap(x, out):
raise TypeError("Input and output arrays have memory overlap")
if res_dt != out.dtype:
raise TypeError(
f"Output array of type {res_dt} is needed,"
f" got {out.dtype}"
)

if (
buf_dt is None
and ti._array_overlap(x, out)
and not ti._same_logical_tensors(x, out)
):
# Allocate a temporary buffer to avoid memory overlapping.
# Note if `buf_dt` is not None, a temporary copy of `x` will be
# created, so the array overlap check isn't needed.
out = dpt.empty_like(out)

if (
dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue))
Expand All @@ -75,13 +97,6 @@ def __call__(self, x, out=None, order="K"):
"Input and output allocation queues are not compatible"
)

if order not in ["C", "F", "K", "A"]:
order = "K"
buf_dt, res_dt = _find_buf_dtype(
x.dtype, self.result_type_resolver_fn_, x.sycl_device
)
if res_dt is None:
raise RuntimeError
exec_q = x.sycl_queue
if buf_dt is None:
if out is None:
Expand All @@ -91,17 +106,20 @@ def __call__(self, x, out=None, order="K"):
if order == "A":
order = "F" if x.flags.f_contiguous else "C"
out = dpt.empty_like(x, dtype=res_dt, order=order)
else:
if res_dt != out.dtype:
raise TypeError(
f"Output array of type {res_dt} is needed,"
f" got {out.dtype}"
)

ht, _ = self.unary_fn_(x, out, sycl_queue=exec_q)
ht.wait()
ht_unary_ev, unary_ev = self.unary_fn_(x, out, sycl_queue=exec_q)

if not (orig_out is None or orig_out is out):
# Copy the out data from temporary buffer to original memory
ht_copy_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
src=out, dst=orig_out, sycl_queue=exec_q, depends=[unary_ev]
)
ht_copy_ev.wait()
out = orig_out

ht_unary_ev.wait()
return out

if order == "K":
buf = _empty_like_orderK(x, buf_dt)
else:
Expand All @@ -117,11 +135,6 @@ def __call__(self, x, out=None, order="K"):
out = _empty_like_orderK(buf, res_dt)
else:
out = dpt.empty_like(buf, dtype=res_dt, order=order)
else:
if buf_dt != out.dtype:
raise TypeError(
f"Output array of type {buf_dt} is needed, got {out.dtype}"
)

ht, _ = self.unary_fn_(buf, out, sycl_queue=exec_q, depends=[copy_ev])
ht_copy_ev.wait()
Expand Down
47 changes: 47 additions & 0 deletions dpctl/tensor/libtensor/include/utils/memory_overlap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,53 @@ struct MemoryOverlap
}
};

struct SameLogicalTensors
{
bool operator()(dpctl::tensor::usm_ndarray ar1,
dpctl::tensor::usm_ndarray ar2) const
{
// Same ndim
int nd1 = ar1.get_ndim();
if (nd1 != ar2.get_ndim())
return false;

// Same dtype
int tn1 = ar1.get_typenum();
if (tn1 != ar2.get_typenum())
return false;

// Same pointer
const char *ar1_data = ar1.get_data();
const char *ar2_data = ar2.get_data();

if (ar1_data != ar2_data)
return false;

// Same shape and strides
const py::ssize_t *ar1_shape = ar1.get_shape_raw();
const py::ssize_t *ar2_shape = ar2.get_shape_raw();

if (!std::equal(ar1_shape, ar1_shape + nd1, ar2_shape))
return false;

// Same shape and strides
auto const &ar1_strides = ar1.get_strides_vector();
auto const &ar2_strides = ar2.get_strides_vector();

auto ar1_beg_it = std::begin(ar1_strides);
auto ar1_end_it = std::end(ar1_strides);

auto ar2_beg_it = std::begin(ar2_strides);

if (!std::equal(ar1_beg_it, ar1_end_it, ar2_beg_it))
return false;

// all checks passed: arrays are logical views
// into the same memory
return true;
}
};

} // namespace overlap
} // namespace tensor
} // namespace dpctl
4 changes: 3 additions & 1 deletion dpctl/tensor/libtensor/source/elementwise_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ py_unary_ufunc(dpctl::tensor::usm_ndarray src,

// check memory overlap
auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
if (overlap(src, dst)) {
auto const &same_logical_tensors =
dpctl::tensor::overlap::SameLogicalTensors();
if (overlap(src, dst) && !same_logical_tensors(src, dst)) {
throw py::value_error("Arrays index overlapping segments of memory");
}

Expand Down
10 changes: 10 additions & 0 deletions dpctl/tensor/libtensor/source/tensor_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ using dpctl::tensor::c_contiguous_strides;
using dpctl::tensor::f_contiguous_strides;

using dpctl::tensor::overlap::MemoryOverlap;
using dpctl::tensor::overlap::SameLogicalTensors;

using dpctl::tensor::py_internal::copy_usm_ndarray_into_usm_ndarray;

Expand Down Expand Up @@ -338,6 +339,15 @@ PYBIND11_MODULE(_tensor_impl, m)
"Determines if the memory regions indexed by each array overlap",
py::arg("array1"), py::arg("array2"));

auto same_logical_tensors = [](dpctl::tensor::usm_ndarray x1,
dpctl::tensor::usm_ndarray x2) -> bool {
auto const &same_logical_tensors = SameLogicalTensors();
return same_logical_tensors(x1, x2);
};
m.def("_same_logical_tensors", same_logical_tensors,
"Determines if the memory regions indexed by each array are the same",
py::arg("array1"), py::arg("array2"));

m.def("_place", &py_place, "", py::arg("dst"), py::arg("cumsum"),
py::arg("axis_start"), py::arg("axis_end"), py::arg("rhs"),
py::arg("sycl_queue"), py::arg("depends") = py::list());
Expand Down
28 changes: 28 additions & 0 deletions dpctl/tests/_numpy_warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Data Parallel Control (dpctl)
#
# Copyright 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy
import pytest


@pytest.fixture
def suppress_invalid_numpy_warnings():
# invalid: treatment for invalid floating-point operation
# (result is not an expressible number, typically indicates
# that a NaN was produced)
old_settings = numpy.seterr(invalid="ignore")
yield
numpy.seterr(**old_settings) # reset to default
9 changes: 8 additions & 1 deletion dpctl/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,15 @@
invalid_filter,
valid_filter,
)
from _numpy_warnings import suppress_invalid_numpy_warnings

sys.path.append(os.path.join(os.path.dirname(__file__), "helper"))

# common fixtures
__all__ = ["check", "device_selector", "invalid_filter", "valid_filter"]
__all__ = [
"check",
"device_selector",
"invalid_filter",
"suppress_invalid_numpy_warnings",
"valid_filter",
]
24 changes: 23 additions & 1 deletion dpctl/tests/elementwise/test_abs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import dpctl.tensor as dpt
from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported

from .utils import _all_dtypes, _usm_types
from .utils import _all_dtypes, _no_complex_dtypes, _usm_types


@pytest.mark.parametrize("dtype", _all_dtypes)
Expand Down Expand Up @@ -113,3 +113,25 @@ def test_abs_complex(dtype):
np.testing.assert_allclose(
dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol
)


@pytest.mark.parametrize("dtype", _no_complex_dtypes)
def test_abs_out_overlap(dtype):
q = get_queue_or_skip()
skip_if_dtype_not_supported(dtype, q)

X = dpt.linspace(0, 35, 60, dtype=dtype, sycl_queue=q)
X = dpt.reshape(X, (3, 5, 4))

Xnp = dpt.asnumpy(X)
Ynp = np.abs(Xnp, out=Xnp)

Y = dpt.abs(X, out=X)
assert Y is X
assert np.allclose(dpt.asnumpy(X), Xnp)

Ynp = np.abs(Xnp, out=Xnp[::-1])
Y = dpt.abs(X, out=X[::-1])
assert Y is not X
assert np.allclose(dpt.asnumpy(X), Xnp)
assert np.allclose(dpt.asnumpy(Y), Ynp)
23 changes: 23 additions & 0 deletions dpctl/tests/elementwise/test_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,26 @@ def test_exp_strided(dtype):
atol=tol,
rtol=tol,
)


@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
def test_exp_out_overlap(dtype):
q = get_queue_or_skip()
skip_if_dtype_not_supported(dtype, q)

X = dpt.linspace(0, 1, 15, dtype=dtype, sycl_queue=q)
X = dpt.reshape(X, (3, 5))

Xnp = dpt.asnumpy(X)
Ynp = np.exp(Xnp, out=Xnp)

Y = dpt.exp(X, out=X)
tol = 8 * dpt.finfo(Y.dtype).resolution
assert Y is X
assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol)

Ynp = np.exp(Xnp, out=Xnp[::-1])
Y = dpt.exp(X, out=X[::-1])
assert Y is not X
assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol)
assert_allclose(dpt.asnumpy(Y), Ynp, atol=tol, rtol=tol)
Loading