Skip to content

Commit

Permalink
Merge pull request #1281 from IntelPython/unary_out_overlap
Browse files Browse the repository at this point in the history
Created a temporary copy in case of overlap for unary function
  • Loading branch information
oleksandr-pavlyk authored Jul 17, 2023
2 parents a6d16f2 + 03a46e1 commit d3ce80e
Show file tree
Hide file tree
Showing 17 changed files with 294 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/generate-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ jobs:
- name: Install dpctl dependencies
shell: bash -l {0}
run: |
pip install numpy cython setuptools pytest pytest-cov scikit-build cmake coverage[toml]
pip install numpy cython"<3" setuptools pytest pytest-cov scikit-build cmake coverage[toml]
- name: Build dpctl with coverage
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/generate-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
if: ${{ !github.event.pull_request || github.event.action != 'closed' }}
shell: bash -l {0}
run: |
pip install numpy cython setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
pip install numpy cython"<3" setuptools scikit-build cmake sphinx sphinx_rtd_theme pydot graphviz sphinxcontrib-programoutput sphinxcontrib-googleanalytics
- name: Checkout repo
uses: actions/checkout@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/os-llvm-sycl-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
- name: Install dpctl dependencies
shell: bash -l {0}
run: |
pip install numpy cython setuptools pytest scikit-build cmake
pip install numpy cython"<3" setuptools pytest scikit-build cmake
- name: Checkout repo
uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ requirements:
- cmake >=3.21
- ninja
- git
- cython
- cython <3
- python
- scikit-build
- numpy
Expand Down
57 changes: 35 additions & 22 deletions dpctl/tensor/_elementwise_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ def __call__(self, x, out=None, order="K"):
if not isinstance(x, dpt.usm_ndarray):
raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")

if order not in ["C", "F", "K", "A"]:
order = "K"
buf_dt, res_dt = _find_buf_dtype(
x.dtype, self.result_type_resolver_fn_, x.sycl_device
)
if res_dt is None:
raise RuntimeError

orig_out = out
if out is not None:
if not isinstance(out, dpt.usm_ndarray):
raise TypeError(
Expand All @@ -64,8 +73,21 @@ def __call__(self, x, out=None, order="K"):
f"Expected output shape is {x.shape}, got {out.shape}"
)

if ti._array_overlap(x, out):
raise TypeError("Input and output arrays have memory overlap")
if res_dt != out.dtype:
raise TypeError(
f"Output array of type {res_dt} is needed,"
f" got {out.dtype}"
)

if (
buf_dt is None
and ti._array_overlap(x, out)
and not ti._same_logical_tensors(x, out)
):
# Allocate a temporary buffer to avoid memory overlapping.
# Note if `buf_dt` is not None, a temporary copy of `x` will be
# created, so the array overlap check isn't needed.
out = dpt.empty_like(out)

if (
dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue))
Expand All @@ -75,13 +97,6 @@ def __call__(self, x, out=None, order="K"):
"Input and output allocation queues are not compatible"
)

if order not in ["C", "F", "K", "A"]:
order = "K"
buf_dt, res_dt = _find_buf_dtype(
x.dtype, self.result_type_resolver_fn_, x.sycl_device
)
if res_dt is None:
raise RuntimeError
exec_q = x.sycl_queue
if buf_dt is None:
if out is None:
Expand All @@ -91,17 +106,20 @@ def __call__(self, x, out=None, order="K"):
if order == "A":
order = "F" if x.flags.f_contiguous else "C"
out = dpt.empty_like(x, dtype=res_dt, order=order)
else:
if res_dt != out.dtype:
raise TypeError(
f"Output array of type {res_dt} is needed,"
f" got {out.dtype}"
)

ht, _ = self.unary_fn_(x, out, sycl_queue=exec_q)
ht.wait()
ht_unary_ev, unary_ev = self.unary_fn_(x, out, sycl_queue=exec_q)

if not (orig_out is None or orig_out is out):
# Copy the out data from temporary buffer to original memory
ht_copy_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
src=out, dst=orig_out, sycl_queue=exec_q, depends=[unary_ev]
)
ht_copy_ev.wait()
out = orig_out

ht_unary_ev.wait()
return out

if order == "K":
buf = _empty_like_orderK(x, buf_dt)
else:
Expand All @@ -117,11 +135,6 @@ def __call__(self, x, out=None, order="K"):
out = _empty_like_orderK(buf, res_dt)
else:
out = dpt.empty_like(buf, dtype=res_dt, order=order)
else:
if buf_dt != out.dtype:
raise TypeError(
f"Output array of type {buf_dt} is needed, got {out.dtype}"
)

ht, _ = self.unary_fn_(buf, out, sycl_queue=exec_q, depends=[copy_ev])
ht_copy_ev.wait()
Expand Down
47 changes: 47 additions & 0 deletions dpctl/tensor/libtensor/include/utils/memory_overlap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,53 @@ struct MemoryOverlap
}
};

struct SameLogicalTensors
{
bool operator()(dpctl::tensor::usm_ndarray ar1,
dpctl::tensor::usm_ndarray ar2) const
{
// Same ndim
int nd1 = ar1.get_ndim();
if (nd1 != ar2.get_ndim())
return false;

// Same dtype
int tn1 = ar1.get_typenum();
if (tn1 != ar2.get_typenum())
return false;

// Same pointer
const char *ar1_data = ar1.get_data();
const char *ar2_data = ar2.get_data();

if (ar1_data != ar2_data)
return false;

// Same shape and strides
const py::ssize_t *ar1_shape = ar1.get_shape_raw();
const py::ssize_t *ar2_shape = ar2.get_shape_raw();

if (!std::equal(ar1_shape, ar1_shape + nd1, ar2_shape))
return false;

// Same shape and strides
auto const &ar1_strides = ar1.get_strides_vector();
auto const &ar2_strides = ar2.get_strides_vector();

auto ar1_beg_it = std::begin(ar1_strides);
auto ar1_end_it = std::end(ar1_strides);

auto ar2_beg_it = std::begin(ar2_strides);

if (!std::equal(ar1_beg_it, ar1_end_it, ar2_beg_it))
return false;

// all checks passed: arrays are logical views
// into the same memory
return true;
}
};

} // namespace overlap
} // namespace tensor
} // namespace dpctl
4 changes: 3 additions & 1 deletion dpctl/tensor/libtensor/source/elementwise_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ py_unary_ufunc(dpctl::tensor::usm_ndarray src,

// check memory overlap
auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
if (overlap(src, dst)) {
auto const &same_logical_tensors =
dpctl::tensor::overlap::SameLogicalTensors();
if (overlap(src, dst) && !same_logical_tensors(src, dst)) {
throw py::value_error("Arrays index overlapping segments of memory");
}

Expand Down
10 changes: 10 additions & 0 deletions dpctl/tensor/libtensor/source/tensor_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ using dpctl::tensor::c_contiguous_strides;
using dpctl::tensor::f_contiguous_strides;

using dpctl::tensor::overlap::MemoryOverlap;
using dpctl::tensor::overlap::SameLogicalTensors;

using dpctl::tensor::py_internal::copy_usm_ndarray_into_usm_ndarray;

Expand Down Expand Up @@ -338,6 +339,15 @@ PYBIND11_MODULE(_tensor_impl, m)
"Determines if the memory regions indexed by each array overlap",
py::arg("array1"), py::arg("array2"));

auto same_logical_tensors = [](dpctl::tensor::usm_ndarray x1,
dpctl::tensor::usm_ndarray x2) -> bool {
auto const &same_logical_tensors = SameLogicalTensors();
return same_logical_tensors(x1, x2);
};
m.def("_same_logical_tensors", same_logical_tensors,
"Determines if the memory regions indexed by each array are the same",
py::arg("array1"), py::arg("array2"));

m.def("_place", &py_place, "", py::arg("dst"), py::arg("cumsum"),
py::arg("axis_start"), py::arg("axis_end"), py::arg("rhs"),
py::arg("sycl_queue"), py::arg("depends") = py::list());
Expand Down
28 changes: 28 additions & 0 deletions dpctl/tests/_numpy_warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Data Parallel Control (dpctl)
#
# Copyright 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy
import pytest


@pytest.fixture
def suppress_invalid_numpy_warnings():
# invalid: treatment for invalid floating-point operation
# (result is not an expressible number, typically indicates
# that a NaN was produced)
old_settings = numpy.seterr(invalid="ignore")
yield
numpy.seterr(**old_settings) # reset to default
9 changes: 8 additions & 1 deletion dpctl/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,15 @@
invalid_filter,
valid_filter,
)
from _numpy_warnings import suppress_invalid_numpy_warnings

sys.path.append(os.path.join(os.path.dirname(__file__), "helper"))

# common fixtures
__all__ = ["check", "device_selector", "invalid_filter", "valid_filter"]
__all__ = [
"check",
"device_selector",
"invalid_filter",
"suppress_invalid_numpy_warnings",
"valid_filter",
]
24 changes: 23 additions & 1 deletion dpctl/tests/elementwise/test_abs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import dpctl.tensor as dpt
from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported

from .utils import _all_dtypes, _usm_types
from .utils import _all_dtypes, _no_complex_dtypes, _usm_types


@pytest.mark.parametrize("dtype", _all_dtypes)
Expand Down Expand Up @@ -113,3 +113,25 @@ def test_abs_complex(dtype):
np.testing.assert_allclose(
dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol
)


@pytest.mark.parametrize("dtype", _no_complex_dtypes)
def test_abs_out_overlap(dtype):
q = get_queue_or_skip()
skip_if_dtype_not_supported(dtype, q)

X = dpt.linspace(0, 35, 60, dtype=dtype, sycl_queue=q)
X = dpt.reshape(X, (3, 5, 4))

Xnp = dpt.asnumpy(X)
Ynp = np.abs(Xnp, out=Xnp)

Y = dpt.abs(X, out=X)
assert Y is X
assert np.allclose(dpt.asnumpy(X), Xnp)

Ynp = np.abs(Xnp, out=Xnp[::-1])
Y = dpt.abs(X, out=X[::-1])
assert Y is not X
assert np.allclose(dpt.asnumpy(X), Xnp)
assert np.allclose(dpt.asnumpy(Y), Ynp)
23 changes: 23 additions & 0 deletions dpctl/tests/elementwise/test_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,26 @@ def test_exp_strided(dtype):
atol=tol,
rtol=tol,
)


@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"])
def test_exp_out_overlap(dtype):
q = get_queue_or_skip()
skip_if_dtype_not_supported(dtype, q)

X = dpt.linspace(0, 1, 15, dtype=dtype, sycl_queue=q)
X = dpt.reshape(X, (3, 5))

Xnp = dpt.asnumpy(X)
Ynp = np.exp(Xnp, out=Xnp)

Y = dpt.exp(X, out=X)
tol = 8 * dpt.finfo(Y.dtype).resolution
assert Y is X
assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol)

Ynp = np.exp(Xnp, out=Xnp[::-1])
Y = dpt.exp(X, out=X[::-1])
assert Y is not X
assert_allclose(dpt.asnumpy(X), Xnp, atol=tol, rtol=tol)
assert_allclose(dpt.asnumpy(Y), Ynp, atol=tol, rtol=tol)
Loading

0 comments on commit d3ce80e

Please sign in to comment.