From c9cc505ff201f32ed68be45dc1596f5eb75cf2dc Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 17 Oct 2023 01:32:44 -0500 Subject: [PATCH 01/19] Tweaked test_intel_device_info --- dpctl/tests/test_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dpctl/tests/test_utils.py b/dpctl/tests/test_utils.py index 1aab7fd7e7..05b2dc7890 100644 --- a/dpctl/tests/test_utils.py +++ b/dpctl/tests/test_utils.py @@ -21,7 +21,6 @@ import dpctl import dpctl.utils -from dpctl.enum_types import backend_type def test_get_execution_queue_input_validation(): @@ -132,9 +131,7 @@ def test_intel_device_info(): pytest.skip("Default device could not be created") descr = dpctl.utils.intel_device_info(d) assert isinstance(descr, dict) - assert ("device_id" in descr) or ( - not d.has_aspect_cpu and not d.backend == backend_type.level_zero - ) + assert ("device_id" in descr) or not descr allowed_names = [ "device_id", "gpu_slices", From 2d2f235bd49aff9ff186811996a794ba52cdbd41 Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Tue, 17 Oct 2023 08:48:13 -0700 Subject: [PATCH 02/19] Elementwise functions cbrt, exp2, copysign, and rsqrt (#1443) * Implements dpctl.tensor.cbrt * Implements copysign and exp2 elementwise funcs * Adds tests for cbrt, copysign, exp2 * Implements rsqrt and tests for rsqrt * Modified tests for cbrt, copysign, and rsqrt Now test more type combinations/output types --- dpctl/tensor/__init__.py | 8 + dpctl/tensor/_elementwise_funcs.py | 113 ++++++++ .../kernels/elementwise_functions/cbrt.hpp | 172 ++++++++++++ .../elementwise_functions/copysign.hpp | 215 +++++++++++++++ .../kernels/elementwise_functions/exp2.hpp | 229 +++++++++++++++ .../kernels/elementwise_functions/rsqrt.hpp | 179 ++++++++++++ .../source/elementwise_functions.cpp | 261 +++++++++++++++++- dpctl/tests/elementwise/test_cbrt.py | 79 ++++++ dpctl/tests/elementwise/test_copysign.py | 111 ++++++++ dpctl/tests/elementwise/test_exp2.py | 168 +++++++++++ dpctl/tests/elementwise/test_rsqrt.py | 74 +++++ 11 files changed, 1608 insertions(+), 1 deletion(-) create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp create mode 100644 dpctl/tests/elementwise/test_cbrt.py create mode 100644 dpctl/tests/elementwise/test_copysign.py create mode 100644 dpctl/tests/elementwise/test_exp2.py create mode 100644 dpctl/tests/elementwise/test_rsqrt.py diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 3473d5cde5..bab31379b7 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -110,13 +110,16 @@ bitwise_or, bitwise_right_shift, bitwise_xor, + cbrt, ceil, conj, + copysign, cos, cosh, divide, equal, exp, + exp2, expm1, floor, floor_divide, @@ -149,6 +152,7 @@ real, remainder, round, + rsqrt, sign, signbit, sin, @@ -314,4 +318,8 @@ "argmax", "argmin", "prod", + "cbrt", + "exp2", + "copysign", + "rsqrt", ] diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 259443f8e3..24ae7fa8cf 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -1761,3 +1761,116 @@ hypot = BinaryElementwiseFunc( "hypot", ti._hypot_result_type, ti._hypot, _hypot_docstring_ ) + + +# U37: ==== CBRT (x) +_cbrt_docstring_ = """ +cbrt(x, out=None, order='K') + +Computes positive cube-root for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a real floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise positive cube-root. + The data type of the returned array is determined by + the Type Promotion Rules. +""" + +cbrt = UnaryElementwiseFunc( + "cbrt", ti._cbrt_result_type, ti._cbrt, _cbrt_docstring_ +) + + +# U38: ==== EXP2 (x) +_exp2_docstring_ = """ +exp2(x, out=None, order='K') + +Computes the base-2 exponential for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise base-2 exponentials. + The data type of the returned array is determined by + the Type Promotion Rules. +""" + +exp2 = UnaryElementwiseFunc( + "exp2", ti._exp2_result_type, ti._exp2, _exp2_docstring_ +) + + +# B25: ==== COPYSIGN (x1, x2) +_copysign_docstring_ = """ +copysign(x1, x2, out=None, order='K') + +Composes a floating-point value with the magnitude of `x1_i` and the sign of +`x2_i` for each element of input arrays `x1` and `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real floating-point data type. + x2 (usm_ndarray): + Second input array, also expected to have a real floating-point data + type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise results. The data type + of the returned array is determined by the Type Promotion Rules. +""" +copysign = BinaryElementwiseFunc( + "copysign", + ti._copysign_result_type, + ti._copysign, + _copysign_docstring_, +) + + +# U39: ==== RSQRT (x) +_rsqrt_docstring_ = """ +rsqrt(x, out=None, order='K') + +Computes the reciprocal square-root for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a real floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise reciprocal square-root. + The data type of the returned array is determined by + the Type Promotion Rules. +""" + +rsqrt = UnaryElementwiseFunc( + "rsqrt", ti._rsqrt_result_type, ti._rsqrt, _rsqrt_docstring_ +) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp new file mode 100644 index 0000000000..1d4aa65002 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp @@ -0,0 +1,172 @@ +//=== cbrt.hpp - Unary function CBRT ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of CBRT(x) +/// function that compute a square root. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace cbrt +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +template struct CbrtFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::true_type; + + resT operator()(const argT &in) const + { + return sycl::cbrt(in); + } +}; + +template +using CbrtContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using CbrtStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct CbrtOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class cbrt_contig_kernel; + +template +sycl::event cbrt_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, CbrtOutputType, CbrtContigFunctor, cbrt_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct CbrtContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = cbrt_contig_impl; + return fn; + } + } +}; + +template struct CbrtTypeMapFactory +{ + /*! @brief get typeid for output type of std::cbrt(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename CbrtOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class cbrt_strided_kernel; + +template +sycl::event +cbrt_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, CbrtOutputType, CbrtStridedFunctor, cbrt_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct CbrtStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = cbrt_strided_impl; + return fn; + } + } +}; + +} // namespace cbrt +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp new file mode 100644 index 0000000000..b1997d06b4 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp @@ -0,0 +1,215 @@ +//=== copysign.hpp - Binary function COPYSIGN ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of COPYSIGN(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace copysign +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template struct CopysignFunctor +{ + + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + return sycl::copysign(in1, in2); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = sycl::copysign(in1, in2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using CopysignContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs>; + +template +using CopysignStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + CopysignFunctor>; + +template struct CopysignOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class copysign_contig_kernel; + +template +sycl::event copysign_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, CopysignOutputType, CopysignContigFunctor, + copysign_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template struct CopysignContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename CopysignOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = copysign_contig_impl; + return fn; + } + } +}; + +template struct CopysignTypeMapFactory +{ + /*! @brief get typeid for output type of divide(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename CopysignOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class copysign_strided_kernel; + +template +sycl::event +copysign_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, CopysignOutputType, CopysignStridedFunctor, + copysign_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct CopysignStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename CopysignOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = copysign_strided_impl; + return fn; + } + } +}; + +} // namespace copysign +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp new file mode 100644 index 0000000000..67ee23df48 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp @@ -0,0 +1,229 @@ +//=== exp2.hpp - Unary function EXP2 ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of EXP2(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace exp2 +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpctl::tensor::type_utils::is_complex; + +template struct Exp2Functor +{ + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) const + { + if constexpr (is_complex::value) { + using realT = typename argT::value_type; + + const argT tmp = in * std::log(realT(2)); + + constexpr realT q_nan = std::numeric_limits::quiet_NaN(); + + const realT x = std::real(tmp); + const realT y = std::imag(tmp); + if (std::isfinite(x)) { + if (std::isfinite(y)) { + return std::exp(tmp); + } + else { + return resT{q_nan, q_nan}; + } + } + else if (std::isnan(x)) { + /* x is nan */ + if (y == realT(0)) { + return resT{in}; + } + else { + return resT{x, q_nan}; + } + } + else { + if (!std::signbit(x)) { /* x is +inf */ + if (y == realT(0)) { + return resT{x, y}; + } + else if (std::isfinite(y)) { + return resT{x * std::cos(y), x * std::sin(y)}; + } + else { + /* x = +inf, y = +-inf || nan */ + return resT{x, q_nan}; + } + } + else { /* x is -inf */ + if (std::isfinite(y)) { + realT exp_x = std::exp(x); + return resT{exp_x * std::cos(y), exp_x * std::sin(y)}; + } + else { + /* x = -inf, y = +-inf || nan */ + return resT{0, 0}; + } + } + } + } + else { + return sycl::exp2(in); + } + } +}; + +template +using Exp2ContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using Exp2StridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct Exp2OutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class exp2_contig_kernel; + +template +sycl::event exp2_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, Exp2OutputType, Exp2ContigFunctor, exp2_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct Exp2ContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = exp2_contig_impl; + return fn; + } + } +}; + +template struct Exp2TypeMapFactory +{ + /*! @brief get typeid for output type of std::exp2(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename Exp2OutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class exp2_strided_kernel; + +template +sycl::event +exp2_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, Exp2OutputType, Exp2StridedFunctor, exp2_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct Exp2StridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = exp2_strided_impl; + return fn; + } + } +}; + +} // namespace exp2 +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp new file mode 100644 index 0000000000..de51b31c30 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp @@ -0,0 +1,179 @@ +//=== rsqrt.hpp - Unary function RSQRT ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of RSQRT(x) +/// function that computes the reciprocal square root. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace rsqrt +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +template struct RsqrtFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::true_type; + + resT operator()(const argT &in) const + { + return sycl::rsqrt(in); + } +}; + +template +using RsqrtContigFunctor = + elementwise_common::UnaryContigFunctor, + vec_sz, + n_vecs>; + +template +using RsqrtStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct RsqrtOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class rsqrt_contig_kernel; + +template +sycl::event rsqrt_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, RsqrtOutputType, RsqrtContigFunctor, rsqrt_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct RsqrtContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = rsqrt_contig_impl; + return fn; + } + } +}; + +template struct RsqrtTypeMapFactory +{ + /*! @brief get typeid for output type of sycl::rsqrt(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename RsqrtOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class rsqrt_strided_kernel; + +template +sycl::event +rsqrt_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, RsqrtOutputType, RsqrtStridedFunctor, rsqrt_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct RsqrtStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = rsqrt_strided_impl; + return fn; + } + } +}; + +} // namespace rsqrt +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 3cca479a3f..043cac0cd2 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -48,12 +48,15 @@ #include "kernels/elementwise_functions/bitwise_or.hpp" #include "kernels/elementwise_functions/bitwise_right_shift.hpp" #include "kernels/elementwise_functions/bitwise_xor.hpp" +#include "kernels/elementwise_functions/cbrt.hpp" #include "kernels/elementwise_functions/ceil.hpp" #include "kernels/elementwise_functions/conj.hpp" +#include "kernels/elementwise_functions/copysign.hpp" #include "kernels/elementwise_functions/cos.hpp" #include "kernels/elementwise_functions/cosh.hpp" #include "kernels/elementwise_functions/equal.hpp" #include "kernels/elementwise_functions/exp.hpp" +#include "kernels/elementwise_functions/exp2.hpp" #include "kernels/elementwise_functions/expm1.hpp" #include "kernels/elementwise_functions/floor.hpp" #include "kernels/elementwise_functions/floor_divide.hpp" @@ -86,6 +89,7 @@ #include "kernels/elementwise_functions/real.hpp" #include "kernels/elementwise_functions/remainder.hpp" #include "kernels/elementwise_functions/round.hpp" +#include "kernels/elementwise_functions/rsqrt.hpp" #include "kernels/elementwise_functions/sign.hpp" #include "kernels/elementwise_functions/signbit.hpp" #include "kernels/elementwise_functions/sin.hpp" @@ -2749,7 +2753,6 @@ void populate_trunc_dispatch_vectors(void) } // namespace impl // B24: ==== HYPOT (x1, x2) - namespace impl { namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; @@ -2788,6 +2791,151 @@ void populate_hypot_dispatch_tables(void) } // namespace impl +// U37: ==== CBRT (x) +namespace impl +{ + +namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; + +static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; +static int cbrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cbrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_cbrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cbrt_fn_ns; + + using fn_ns::CbrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); + + using fn_ns::CbrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); + + using fn_ns::CbrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); +} + +} // namespace impl + +// B24: ==== COPYSIGN (x1, x2) +namespace impl +{ +namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; + +static binary_contig_impl_fn_ptr_t + copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_copysign_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = copysign_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::CopysignTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(copysign_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::CopysignStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(copysign_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::CopysignContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(copysign_contig_dispatch_table); +}; + +} // namespace impl + +// U38: ==== EXP2 (x) +namespace impl +{ + +namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; + +static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; +static int exp2_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + exp2_strided_dispatch_vector[td_ns::num_types]; + +void populate_exp2_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = exp2_fn_ns; + + using fn_ns::Exp2ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); + + using fn_ns::Exp2StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); + + using fn_ns::Exp2TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp2_output_typeid_vector); +} + +} // namespace impl + +// U39: ==== RSQRT (x) +namespace impl +{ + +namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; + +static unary_contig_impl_fn_ptr_t + rsqrt_contig_dispatch_vector[td_ns::num_types]; +static int rsqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + rsqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_rsqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = rsqrt_fn_ns; + + using fn_ns::RsqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); + + using fn_ns::RsqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); + + using fn_ns::RsqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); +} + +} // namespace impl + // ========================================================================================== // // @@ -4889,6 +5037,117 @@ void init_elementwise_functions(py::module_ m) py::arg("depends") = py::list()); m.def("_hypot_result_type", hypot_result_type_pyapi, ""); } + + // U37: ==== CBRT (x) + { + impl::populate_cbrt_dispatch_vectors(); + using impl::cbrt_contig_dispatch_vector; + using impl::cbrt_output_typeid_vector; + using impl::cbrt_strided_dispatch_vector; + + auto cbrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cbrt_output_typeid_vector, + cbrt_contig_dispatch_vector, cbrt_strided_dispatch_vector); + }; + m.def("_cbrt", cbrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cbrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cbrt_output_typeid_vector); + }; + m.def("_cbrt_result_type", cbrt_result_type_pyapi); + } + + // B25: ==== COPYSIGN (x1, x2) + { + impl::populate_copysign_dispatch_tables(); + using impl::copysign_contig_dispatch_table; + using impl::copysign_output_id_table; + using impl::copysign_strided_dispatch_table; + + auto copysign_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, copysign_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + copysign_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + copysign_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto copysign_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + copysign_output_id_table); + }; + m.def("_copysign", copysign_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_copysign_result_type", copysign_result_type_pyapi, ""); + } + + // U38: ==== EXP2 (x) + { + impl::populate_exp2_dispatch_vectors(); + using impl::exp2_contig_dispatch_vector; + using impl::exp2_output_typeid_vector; + using impl::exp2_strided_dispatch_vector; + + auto exp2_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, exp2_output_typeid_vector, + exp2_contig_dispatch_vector, exp2_strided_dispatch_vector); + }; + m.def("_exp2", exp2_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto exp2_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, exp2_output_typeid_vector); + }; + m.def("_exp2_result_type", exp2_result_type_pyapi); + } + + // U39: ==== RSQRT (x) + { + impl::populate_rsqrt_dispatch_vectors(); + using impl::rsqrt_contig_dispatch_vector; + using impl::rsqrt_output_typeid_vector; + using impl::rsqrt_strided_dispatch_vector; + + auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, rsqrt_output_typeid_vector, + rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); + }; + m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + rsqrt_output_typeid_vector); + }; + m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); + } } } // namespace py_internal diff --git a/dpctl/tests/elementwise/test_cbrt.py b/dpctl/tests/elementwise/test_cbrt.py new file mode 100644 index 0000000000..b06a8d19cf --- /dev/null +++ b/dpctl/tests/elementwise/test_cbrt.py @@ -0,0 +1,79 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _map_to_device_dtype, _no_complex_dtypes, _real_fp_dtypes + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes) +def test_cbrt_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.cbrt(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.cbrt(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_cbrt_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.cbrt(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.cbrt(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_cbrt_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2054 + + X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.cbrt(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.cbrt(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") +def test_cbrt_special_cases(): + get_queue_or_skip() + + X = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = dpt.cbrt(X) + expected = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + tol = dpt.finfo(dpt.float32).resolution + + assert dpt.allclose(res, expected, atol=tol, rtol=tol, equal_nan=True) diff --git a/dpctl/tests/elementwise/test_copysign.py b/dpctl/tests/elementwise/test_copysign.py new file mode 100644 index 0000000000..26a285343c --- /dev/null +++ b/dpctl/tests/elementwise/test_copysign.py @@ -0,0 +1,111 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import numpy as np +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _compare_dtypes, _no_complex_dtypes, _real_fp_dtypes + + +@pytest.mark.parametrize("op1_dtype", _no_complex_dtypes) +@pytest.mark.parametrize("op2_dtype", _no_complex_dtypes) +def test_copysign_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + + r = dpt.copysign(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.copysign( + np.ones(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + assert r.sycl_queue == ar1.sycl_queue + + ar3 = dpt.ones(sz, dtype=op1_dtype) + ar4 = dpt.ones(2 * sz, dtype=op2_dtype) + + r = dpt.copysign(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.copysign( + np.ones(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + + +@pytest.mark.parametrize("arr_dt", _real_fp_dtypes) +def test_copysign_python_scalar(arr_dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(arr_dt, q) + + X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q) + py_ones = ( + bool(1), + int(1), + float(1), + np.float32(1), + ctypes.c_int(1), + ) + for sc in py_ones: + R = dpt.copysign(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.copysign(sc, X) + assert isinstance(R, dpt.usm_ndarray) + + +@pytest.mark.parametrize("dt", _real_fp_dtypes) +def test_copysign(dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dt, q) + + x = dpt.arange(100, dtype=dt, sycl_queue=q) + x[1::2] *= -1 + y = dpt.ones(100, dtype=dt, sycl_queue=q) + y[::2] *= -1 + res = dpt.copysign(x, y) + expected = dpt.negative(x) + tol = dpt.finfo(dt).resolution + assert dpt.allclose(res, expected, atol=tol, rtol=tol) + + +def test_copysign_special_values(): + get_queue_or_skip() + + x1 = dpt.asarray([1.0, 0.0, dpt.nan, dpt.nan], dtype="f4") + y1 = dpt.asarray([-1.0, -0.0, -dpt.nan, -1], dtype="f4") + res = dpt.copysign(x1, y1) + assert dpt.all(dpt.signbit(res)) + x2 = dpt.asarray([-1.0, -0.0, -dpt.nan, -dpt.nan], dtype="f4") + res = dpt.copysign(x2, y1) + assert dpt.all(dpt.signbit(res)) + y2 = dpt.asarray([0.0, 1.0, dpt.nan, 1.0], dtype="f4") + res = dpt.copysign(x2, y2) + assert not dpt.any(dpt.signbit(res)) + res = dpt.copysign(x1, y2) + assert not dpt.any(dpt.signbit(res)) diff --git a/dpctl/tests/elementwise/test_exp2.py b/dpctl/tests/elementwise/test_exp2.py new file mode 100644 index 0000000000..d4bef1efab --- /dev/null +++ b/dpctl/tests/elementwise/test_exp2.py @@ -0,0 +1,168 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _map_to_device_dtype, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_exp2_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.exp2(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.exp2(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_exp2_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(1, 5, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.exp2(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.exp2(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_exp2_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2 * 1027 + + X = dpt.linspace(1, 5, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.exp2(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.exp2(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_exp2_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("f4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 1 / 4 + X[..., 1::2] = 1 / 2 + + Y = dpt.exp2(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.empty(input_shape, dtype=arg_dt) + expected_Y[..., 0::2] = np.exp2(np.float32(1 / 4)) + expected_Y[..., 1::2] = np.exp2(np.float32(1 / 2)) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_exp2_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 1 / 4 + X[..., 1::2] = 1 / 2 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.exp2(U, order=ord) + expected_Y = np.exp2(dpt.asnumpy(U)) + tol = 8 * max( + dpt.finfo(Y.dtype).resolution, + np.finfo(expected_Y.dtype).resolution, + ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +def test_exp2_special_cases(): + get_queue_or_skip() + + X = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = np.asarray([np.nan, 1.0, 1.0, np.inf, 0.0], dtype="f4") + + tol = dpt.finfo(X.dtype).resolution + assert_allclose(dpt.asnumpy(dpt.exp2(X)), res, atol=tol, rtol=tol) + + # special cases for complex variant + num_finite = 1.0 + vals = [ + complex(0.0, 0.0), + complex(num_finite, dpt.inf), + complex(num_finite, dpt.nan), + complex(dpt.inf, 0.0), + complex(-dpt.inf, num_finite), + complex(dpt.inf, num_finite), + complex(-dpt.inf, dpt.inf), + complex(dpt.inf, dpt.inf), + complex(-dpt.inf, dpt.nan), + complex(dpt.inf, dpt.nan), + complex(dpt.nan, 0.0), + complex(dpt.nan, num_finite), + complex(dpt.nan, dpt.nan), + ] + X = dpt.asarray(vals, dtype=dpt.complex64) + cis_1 = complex(np.cos(num_finite), np.sin(num_finite)) + c_nan = complex(np.nan, np.nan) + res = np.asarray( + [ + complex(1.0, 0.0), + c_nan, + c_nan, + complex(np.inf, 0.0), + 0.0, + np.inf * cis_1, + complex(0.0, 0.0), + complex(np.inf, np.nan), + complex(0.0, 0.0), + complex(np.inf, np.nan), + complex(np.nan, 0.0), + c_nan, + c_nan, + ], + dtype=np.complex64, + ) + + tol = dpt.finfo(X.dtype).resolution + with np.errstate(invalid="ignore"): + assert_allclose(dpt.asnumpy(dpt.exp2(X)), res, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_rsqrt.py b/dpctl/tests/elementwise/test_rsqrt.py new file mode 100644 index 0000000000..ef9378ade2 --- /dev/null +++ b/dpctl/tests/elementwise/test_rsqrt.py @@ -0,0 +1,74 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _map_to_device_dtype, _no_complex_dtypes, _real_fp_dtypes + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes) +def test_rsqrt_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.asarray(1, dtype=dtype, sycl_queue=q) + expected_dtype = np.reciprocal(np.sqrt(np.array(1, dtype=dtype))).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.rsqrt(x).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_rsqrt_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + x = dpt.linspace(1, 13, num=n_seq, dtype=dtype, sycl_queue=q) + res = dpt.rsqrt(x) + expected = np.reciprocal(np.sqrt(dpt.asnumpy(x), dtype=dtype)) + tol = 8 * dpt.finfo(res.dtype).resolution + assert_allclose(dpt.asnumpy(res), expected, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_rsqrt_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2054 + + x = dpt.linspace(1, 13, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + res = dpt.rsqrt(x) + expected = np.reciprocal(np.sqrt(dpt.asnumpy(x), dtype=dtype)) + tol = 8 * dpt.finfo(res.dtype).resolution + assert_allclose(dpt.asnumpy(res), expected, atol=tol, rtol=tol) + + +def test_rsqrt_special_cases(): + get_queue_or_skip() + + x = dpt.asarray([dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = dpt.rsqrt(x) + expected = dpt.asarray( + [dpt.nan, dpt.nan, dpt.inf, -dpt.inf, 0.0, dpt.nan], dtype="f4" + ) + assert dpt.allclose(res, expected, equal_nan=True) From 4f5ee5e5463e058576daf5e6316d5c7074368783 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 20 Sep 2023 08:31:15 -0500 Subject: [PATCH 03/19] Use sycl::ext::oneapi::experimental for complex trig/trigh and inverses Use sycl_complex extension to implement complex-valued trigonometric, hyperbolic functions and their inverses. This works around use of double precision functions/literals in implementations of these functions in MSVC headers, causing failures to offload on Iris Xe for single precision input citing lack of fp64 support by the hardware. --- dpctl/tensor/CMakeLists.txt | 2 +- .../include/kernels/elementwise_functions/acos.hpp | 5 ++++- .../include/kernels/elementwise_functions/acosh.hpp | 5 ++++- .../include/kernels/elementwise_functions/asin.hpp | 5 ++++- .../include/kernels/elementwise_functions/asinh.hpp | 5 ++++- .../include/kernels/elementwise_functions/atan.hpp | 5 ++++- .../include/kernels/elementwise_functions/atanh.hpp | 5 ++++- .../include/kernels/elementwise_functions/cos.hpp | 5 ++++- .../include/kernels/elementwise_functions/cosh.hpp | 5 ++++- .../include/kernels/elementwise_functions/sin.hpp | 7 +++++-- .../include/kernels/elementwise_functions/sinh.hpp | 2 ++ .../include/kernels/elementwise_functions/tan.hpp | 4 +++- .../include/kernels/elementwise_functions/tanh.hpp | 5 ++++- 13 files changed, 47 insertions(+), 13 deletions(-) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index aba009411d..1aa1206b51 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -77,7 +77,7 @@ endforeach() if (UNIX) set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp - PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES") + PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES;SYCL_EXT_ONEAPI_COMPLEX") endif() target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp index ac1d597c93..e8d7d9eb17 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace acos namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -114,7 +116,8 @@ template struct AcosFunctor } /* ordinary cases */ - return std::acos(in); + return cmplx_ns::acos( + cmplx_ns::complex(in)); // std::acos(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp index 484b0da8a6..4288e6259f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace acosh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -118,7 +120,8 @@ template struct AcoshFunctor } else { /* ordinary cases */ - acos_in = std::acos(in); + acos_in = cmplx_ns::acos( + cmplx_ns::complex(in)); // std::acos(in); } /* Now we calculate acosh(z) */ diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp index 8b960dd30d..ed9418d69f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace asin namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -134,7 +136,8 @@ template struct AsinFunctor return resT{asinh_im, asinh_re}; } /* ordinary cases */ - return std::asin(in); + return cmplx_ns::asin( + cmplx_ns::complex(in)); // std::asin(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp index 271a861cfe..7712a902e9 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace asinh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -115,7 +117,8 @@ template struct AsinhFunctor } /* ordinary cases */ - return std::asinh(in); + return cmplx_ns::asinh( + cmplx_ns::complex(in)); // std::asinh(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp index f1dcce2831..8a7b67a5ac 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace atan namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -126,7 +128,8 @@ template struct AtanFunctor return resT{atanh_im, atanh_re}; } /* ordinary cases */ - return std::atan(in); + return cmplx_ns::atan( + cmplx_ns::complex(in)); // std::atan(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp index 56432d7808..15b09ee952 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace atanh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -119,7 +121,8 @@ template struct AtanhFunctor return resT{res_re, res_im}; } /* ordinary cases */ - return std::atanh(in); + return cmplx_ns::atanh( + cmplx_ns::complex(in)); // std::atanh(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp index f7c66d5f68..f78c67a4cf 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace cos namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -81,7 +83,8 @@ template struct CosFunctor * real and imaginary parts of input are finite. */ if (in_re_finite && in_im_finite) { - return std::cos(in); + return cmplx_ns::cos( + cmplx_ns::complex(in)); // std::cos(in); } /* diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp index fbcc7e40f9..e1af41c331 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace cosh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -81,7 +83,8 @@ template struct CoshFunctor * real and imaginary parts of input are finite. */ if (xfinite && yfinite) { - return std::cosh(in); + return cmplx_ns::cosh( + cmplx_ns::complex(in)); // std::cosh(in); } /* diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp index b9f03e6234..0f35aa306e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp @@ -23,10 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace sin namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -79,7 +81,8 @@ template struct SinFunctor * real and imaginary parts of input are finite. */ if (in_re_finite && in_im_finite) { - return std::sin(in); + return cmplx_ns::sin( + cmplx_ns::complex(in)); // std::sin(in); } /* diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp index 3a8d05d774..55fdc44531 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace sinh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp index 45f931b7f4..d01a3e906a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace tan namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -118,7 +120,7 @@ template struct TanFunctor return resT{q_nan, q_nan}; } /* ordinary cases */ - return std::tan(in); + return cmplx_ns::tan(cmplx_ns::complex(in)); // std::tan(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp index ef943319b2..237d8d2618 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -49,6 +50,7 @@ namespace tanh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace cmplx_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -112,7 +114,8 @@ template struct TanhFunctor return resT{q_nan, q_nan}; } /* ordinary cases */ - return std::tanh(in); + return cmplx_ns::tanh( + cmplx_ns::complex(in)); // std::tanh(in); } else { static_assert(std::is_floating_point_v || From 1d5fdcecc927e0e0d68acec53e87f71c2c0a1eb0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 20 Sep 2023 09:33:45 -0500 Subject: [PATCH 04/19] Set SYCL_EXT_ONEAPI_COMPLEX on Windows as well --- dpctl/tensor/CMakeLists.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 1aa1206b51..15bc58f57b 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -75,10 +75,15 @@ foreach(_src_fn ${_no_fast_math_sources}) ) endforeach() if (UNIX) - set_source_files_properties( - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp - PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES;SYCL_EXT_ONEAPI_COMPLEX") + set(_compiler_definitions "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES;SYCL_EXT_ONEAPI_COMPLEX") +else() + set(_compiler_definitions "SYCL_EXT_ONEAPI_COMPLEX") endif() +set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp + PROPERTIES COMPILE_DEFINITIONS "${_compiler_definitions}" +) + target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) if(UNIX) From 8df47452020eafabe007860f4768b3512d4ee023 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 22 Sep 2023 13:25:26 -0500 Subject: [PATCH 05/19] Use sycl_complex in add, conj --- .../kernels/elementwise_functions/add.hpp | 28 ++++++++++++++++++- .../kernels/elementwise_functions/conj.hpp | 6 +++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp index df6797845f..060665504e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "utils/offset_utils.hpp" @@ -49,6 +50,7 @@ namespace add namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; +namespace exprm_ns = sycl::ext::oneapi::experimental; template struct AddFunctor { @@ -60,7 +62,31 @@ template struct AddFunctor resT operator()(const argT1 &in1, const argT2 &in2) const { - return in1 + in2; + if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using rT1 = typename argT1::value_type; + using rT2 = typename argT2::value_type; + + return exprm_ns::complex(in1) + exprm_ns::complex(in2); + } + else if constexpr (tu_ns::is_complex::value && + !tu_ns::is_complex::value) + { + using rT1 = typename argT1::value_type; + + return exprm_ns::complex(in1) + in2; + } + else if constexpr (!tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using rT2 = typename argT2::value_type; + + return in1 + exprm_ns::complex(in2); + } + else { + return in1 + in2; + } } template diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp index 3b0a1584de..0b9f0c0460 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -49,6 +50,7 @@ namespace conj namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -68,7 +70,9 @@ template struct ConjFunctor resT operator()(const argT &in) const { if constexpr (is_complex::value) { - return std::conj(in); + using rT = typename argT::value_type; + + return exprm_ns::conj(exprm_ns::complex(in)); // std::conj(in); } else { if constexpr (!std::is_same_v) From ef2563d155ae1e081e14e30f9c0144aab8878798 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 25 Sep 2023 14:16:17 -0500 Subject: [PATCH 06/19] More transitions to experimental complex --- .../kernels/elementwise_functions/abs.hpp | 6 ++++-- .../kernels/elementwise_functions/acos.hpp | 8 ++++---- .../kernels/elementwise_functions/acosh.hpp | 2 +- .../kernels/elementwise_functions/add.hpp | 2 +- .../kernels/elementwise_functions/asin.hpp | 8 ++++---- .../kernels/elementwise_functions/asinh.hpp | 8 ++++---- .../kernels/elementwise_functions/atan.hpp | 8 ++++---- .../kernels/elementwise_functions/atan2.hpp | 2 +- .../kernels/elementwise_functions/atanh.hpp | 8 ++++---- .../elementwise_functions/bitwise_and.hpp | 2 +- .../elementwise_functions/bitwise_invert.hpp | 2 +- .../bitwise_left_shift.hpp | 2 +- .../elementwise_functions/bitwise_or.hpp | 2 +- .../bitwise_right_shift.hpp | 2 +- .../elementwise_functions/bitwise_xor.hpp | 2 +- .../kernels/elementwise_functions/ceil.hpp | 2 +- .../kernels/elementwise_functions/common.hpp | 2 +- .../elementwise_functions/common_inplace.hpp | 2 +- .../kernels/elementwise_functions/conj.hpp | 2 +- .../kernels/elementwise_functions/cos.hpp | 8 ++++---- .../kernels/elementwise_functions/cosh.hpp | 8 ++++---- .../kernels/elementwise_functions/equal.hpp | 17 +++++++++++++++-- .../kernels/elementwise_functions/exp.hpp | 7 +++++-- .../kernels/elementwise_functions/expm1.hpp | 2 +- .../kernels/elementwise_functions/floor.hpp | 2 +- .../elementwise_functions/floor_divide.hpp | 2 +- .../kernels/elementwise_functions/greater.hpp | 2 +- .../elementwise_functions/greater_equal.hpp | 2 +- .../kernels/elementwise_functions/hypot.hpp | 2 +- .../kernels/elementwise_functions/imag.hpp | 2 +- .../kernels/elementwise_functions/isfinite.hpp | 3 ++- .../kernels/elementwise_functions/isinf.hpp | 2 +- .../kernels/elementwise_functions/isnan.hpp | 2 +- .../elementwise_functions/less_equal.hpp | 2 +- 34 files changed, 77 insertions(+), 58 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp index bcf6a28040..ab321ad356 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp @@ -23,12 +23,13 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -49,6 +50,7 @@ namespace abs namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -120,7 +122,7 @@ template struct AbsFunctor } else { #ifdef USE_STD_ABS_FOR_COMPLEX_TYPES - return std::abs(z); + return exprm_ns::abs(exprm_ns::complex(z)); #else return std::hypot(std::real(z), std::imag(z)); #endif diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp index e8d7d9eb17..28968de761 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,7 +48,7 @@ namespace acos namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -116,8 +116,8 @@ template struct AcosFunctor } /* ordinary cases */ - return cmplx_ns::acos( - cmplx_ns::complex(in)); // std::acos(in); + return exprm_ns::acos( + exprm_ns::complex(in)); // std::acos(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp index 4288e6259f..7323579df5 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp index 060665504e..0ed1710833 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/add.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp index ed9418d69f..595d75eeca 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,7 +48,7 @@ namespace asin namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -136,8 +136,8 @@ template struct AsinFunctor return resT{asinh_im, asinh_re}; } /* ordinary cases */ - return cmplx_ns::asin( - cmplx_ns::complex(in)); // std::asin(in); + return exprm_ns::asin( + exprm_ns::complex(in)); // std::asin(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp index 7712a902e9..d126afba87 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,7 +48,7 @@ namespace asinh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -117,8 +117,8 @@ template struct AsinhFunctor } /* ordinary cases */ - return cmplx_ns::asinh( - cmplx_ns::complex(in)); // std::asinh(in); + return exprm_ns::asinh( + exprm_ns::complex(in)); // std::asinh(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp index 8a7b67a5ac..df8bba538b 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp @@ -23,12 +23,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -49,7 +49,7 @@ namespace atan namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -128,8 +128,8 @@ template struct AtanFunctor return resT{atanh_im, atanh_re}; } /* ordinary cases */ - return cmplx_ns::atan( - cmplx_ns::complex(in)); // std::atan(in); + return exprm_ns::atan( + exprm_ns::complex(in)); // std::atan(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp index 765c0fe0c3..8df1667312 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp index 15b09ee952..d6a4b06ac3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp @@ -23,12 +23,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -49,7 +49,7 @@ namespace atanh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -121,8 +121,8 @@ template struct AtanhFunctor return resT{res_re, res_im}; } /* ordinary cases */ - return cmplx_ns::atanh( - cmplx_ns::complex(in)); // std::atanh(in); + return exprm_ns::atanh( + exprm_ns::complex(in)); // std::atanh(in); } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp index 016b3a05d3..85bb603bd3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp @@ -23,9 +23,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp index 9ce56be966..8337396427 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp index 4ae04f97de..79b767e678 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp index 65f25dd296..18173f02b1 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp @@ -23,9 +23,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp index 9442d4f6b7..354b2a5cd9 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp index 2b0ab09dca..c9cdc77701 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp @@ -23,9 +23,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp index 76fa80c287..0059064ec1 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp @@ -23,10 +23,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp index c0a94be341..5dc4728a65 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp @@ -23,10 +23,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp index 614c7f4092..c4f893a532 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/common_inplace.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include namespace dpctl { diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp index 0b9f0c0460..6977e3a747 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/conj.hpp @@ -24,12 +24,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp index f78c67a4cf..bdc1acc1fe 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,7 +48,7 @@ namespace cos namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -83,8 +83,8 @@ template struct CosFunctor * real and imaginary parts of input are finite. */ if (in_re_finite && in_im_finite) { - return cmplx_ns::cos( - cmplx_ns::complex(in)); // std::cos(in); + return exprm_ns::cos( + exprm_ns::complex(in)); // std::cos(in); } /* diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp index e1af41c331..7093d2a2a3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,7 +48,7 @@ namespace cosh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -83,8 +83,8 @@ template struct CoshFunctor * real and imaginary parts of input are finite. */ if (xfinite && yfinite) { - return cmplx_ns::cosh( - cmplx_ns::complex(in)); // std::cosh(in); + return exprm_ns::cosh( + exprm_ns::complex(in)); // std::cosh(in); } /* diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp index cd726f72ea..6d68861396 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp @@ -24,9 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include +#include #include #include "utils/offset_utils.hpp" @@ -48,6 +49,7 @@ namespace equal namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; +namespace exprm_ns = sycl::ext::oneapi::experimental; template struct EqualFunctor { @@ -62,7 +64,18 @@ template struct EqualFunctor resT operator()(const argT1 &in1, const argT2 &in2) const { - return (in1 == in2); + if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::complex(in1) == + exprm_ns::complex(in2); + } + else { + return (in1 == in2); + } } template diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp index 003de44c27..453eb05c52 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp.hpp @@ -23,10 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace exp namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -73,7 +75,8 @@ template struct ExpFunctor const realT y = std::imag(in); if (std::isfinite(x)) { if (std::isfinite(y)) { - return std::exp(in); + return exprm_ns::exp( + exprm_ns::complex(in)); // std::exp(in); } else { return resT{q_nan, q_nan}; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp index 3f6a73b6d3..f5204e87b3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp @@ -24,11 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp index e675407d0b..88a20dafe0 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp @@ -23,10 +23,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp index 025d7e8bc4..b8d528bfc1 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp index 2a151ce737..e01360efa7 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/math_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp index 5704336990..f017b7f150 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/math_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp index a369c54f24..fd19d29c0b 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp index 64da603037..bb1ff2ebcb 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/imag.hpp @@ -24,11 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp index 1d8f177e40..1554f905b7 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isfinite.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "utils/offset_utils.hpp" @@ -46,6 +46,7 @@ namespace isfinite namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; using dpctl::tensor::type_utils::vec_cast; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp index d9afdb9317..2720385614 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isinf.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp index b5051ab833..15551e295a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/isnan.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp index 47e2301fe7..f9f6729968 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/math_utils.hpp" From 0717bbe5274805f152668e85e9c57543c473f8ea Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 27 Sep 2023 13:10:47 -0500 Subject: [PATCH 07/19] More files change to use sycl_complex --- .../include/kernels/elementwise_functions/less.hpp | 2 +- .../include/kernels/elementwise_functions/log.hpp | 12 ++++++++++-- .../include/kernels/elementwise_functions/log10.hpp | 8 ++++++-- .../include/kernels/elementwise_functions/log1p.hpp | 2 +- .../include/kernels/elementwise_functions/log2.hpp | 8 ++++++-- .../kernels/elementwise_functions/logaddexp.hpp | 2 +- .../kernels/elementwise_functions/logical_and.hpp | 2 +- .../kernels/elementwise_functions/logical_not.hpp | 2 +- .../kernels/elementwise_functions/logical_or.hpp | 2 +- .../kernels/elementwise_functions/logical_xor.hpp | 2 +- 10 files changed, 29 insertions(+), 13 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp index c33d6d7c10..02c7a0d95a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/math_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp index abcc899fc0..ff37d87157 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log.hpp @@ -23,10 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -47,6 +48,7 @@ namespace log namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -65,7 +67,13 @@ template struct LogFunctor resT operator()(const argT &in) const { - return std::log(in); + if constexpr (is_complex::value) { + using realT = typename argT::value_type; + return exprm_ns::log(exprm_ns::complex(in)); // std::log(in); + } + else { + return std::log(in); + } } }; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp index afcf8aa085..88dabcaabe 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log10.hpp @@ -24,10 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace log10 namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; using dpctl::tensor::type_utils::vec_cast; @@ -70,7 +72,9 @@ template struct Log10Functor { if constexpr (is_complex::value) { using realT = typename argT::value_type; - return (std::log(in) / std::log(realT{10})); + // return (std::log(in) / std::log(realT{10})); + return exprm_ns::log(exprm_ns::complex(in)) / + std::log(realT{10}); } else { return std::log10(in); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp index 6d7a56ccf5..11e3fb3f9f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp index 533d0120df..57d7dcaf31 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log2.hpp @@ -24,10 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace log2 namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; using dpctl::tensor::type_utils::vec_cast; @@ -70,7 +72,9 @@ template struct Log2Functor { if constexpr (is_complex::value) { using realT = typename argT::value_type; - return std::log(in) / std::log(realT{2}); + // std::log(in) / std::log(realT{2}); + return exprm_ns::log(exprm_ns::complex(in)) / + std::log(realT{2}); } else { return std::log2(in); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 90b7997a37..bc0f26cdee 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -25,10 +25,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp index 10e4e0cbff..988d1ed380 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp index 78bacbe686..826af2ee37 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_not.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp index bfb1288870..333951e6b5 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp index 44d361cfc1..ce4bde9e6b 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" From c5f26eb5781d8c8cf6d5049af4edfbae100055e5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 29 Sep 2023 09:43:28 -0500 Subject: [PATCH 08/19] Use oneapi extension for complexes for remaining elementwise functions Used functions from sycl::ext::oneapi::experimental context to implement evaluation on data of complex type. --- .../kernels/elementwise_functions/maximum.hpp | 2 +- .../kernels/elementwise_functions/minimum.hpp | 2 +- .../elementwise_functions/multiply.hpp | 17 ++++++++-- .../elementwise_functions/negative.hpp | 2 +- .../elementwise_functions/not_equal.hpp | 2 +- .../elementwise_functions/positive.hpp | 2 +- .../kernels/elementwise_functions/pow.hpp | 13 +++++++- .../kernels/elementwise_functions/proj.hpp | 2 +- .../kernels/elementwise_functions/real.hpp | 2 +- .../elementwise_functions/remainder.hpp | 2 +- .../kernels/elementwise_functions/round.hpp | 2 +- .../kernels/elementwise_functions/sign.hpp | 23 ++++++++------ .../kernels/elementwise_functions/signbit.hpp | 2 +- .../kernels/elementwise_functions/sinh.hpp | 2 +- .../kernels/elementwise_functions/sqrt.hpp | 9 ++++-- .../kernels/elementwise_functions/square.hpp | 15 +++++++-- .../elementwise_functions/subtract.hpp | 2 +- .../kernels/elementwise_functions/tan.hpp | 2 +- .../kernels/elementwise_functions/tanh.hpp | 2 +- .../elementwise_functions/true_divide.hpp | 31 +++++++++++++++++-- .../kernels/elementwise_functions/trunc.hpp | 2 +- 21 files changed, 105 insertions(+), 33 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp index 324f3f5ad2..8a1990ba7d 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/math_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp index 9a7ec72e56..fb3490ee19 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/math_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp index c316279a76..612ad78360 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp @@ -24,9 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include +#include #include #include "utils/offset_utils.hpp" @@ -49,6 +50,7 @@ namespace multiply namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; +namespace exprm_ns = sycl::ext::oneapi::experimental; template struct MultiplyFunctor { @@ -60,7 +62,18 @@ template struct MultiplyFunctor resT operator()(const argT1 &in1, const argT2 &in2) const { - return in1 * in2; + if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::complex(in1) * + exprm_ns::complex(in2); + } + else { + return in1 * in2; + } } template diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp index cbeeb60b7c..bc28aafad7 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/negative.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp index 88e077b402..faeab82580 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp index cbeba2e91d..b3e109c76c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/positive.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp index ba9241b8db..b02a8d4126 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp @@ -24,10 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "utils/offset_utils.hpp" @@ -49,6 +50,7 @@ namespace pow namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; +namespace exprm_ns = sycl::ext::oneapi::experimental; template struct PowFunctor { @@ -83,6 +85,15 @@ template struct PowFunctor } return res; } + else if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::pow(exprm_ns::complex(in1), + exprm_ns::complex(in2)); + } else { return std::pow(in1, in2); } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp index dcaa4b0f5f..92f5ffa729 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp @@ -24,12 +24,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp index 294b796e96..6a7580d548 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/real.hpp @@ -24,11 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp index 6cd306a900..fcf2775ef3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp @@ -25,9 +25,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp index 84d8fb7252..547d31b392 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/round.hpp @@ -23,10 +23,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp index fc3d44dcfa..162db394de 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp @@ -23,11 +23,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace sign namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; using dpctl::tensor::type_utils::vec_cast; @@ -61,38 +63,41 @@ template struct SignFunctor std::disjunction, is_complex>>; using supports_sg_loadstore = std::false_type; - resT operator()(const argT &x) const + resT operator()(const argT &in) const { if constexpr (std::is_integral_v) { if constexpr (std::is_unsigned_v) { - return resT(0 < x); + return resT(0 < in); } else { - return sign(x); + return sign_impl(in); } } else { if constexpr (is_complex::value) { - if (x == argT(0)) { + using realT = typename argT::value_type; + + if (in == argT(0)) { return resT(0); } else { - return (x / std::abs(x)); + auto z = exprm_ns::complex(in); + return (z / exprm_ns::abs(z)); } } else { - if (std::isnan(x)) { + if (std::isnan(in)) { return std::numeric_limits::quiet_NaN(); } else { - return sign(x); + return sign_impl(in); } } } } private: - template T sign(const T &v) const + template T sign_impl(const T &v) const { return (T(0) < v) - (v < T(0)); } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp index 0f509f7950..3e961c466d 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp index 55fdc44531..af887431d7 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp index efa580d70e..b638e4a55f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp @@ -24,12 +24,13 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -50,6 +51,7 @@ namespace sqrt namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -74,7 +76,10 @@ template struct SqrtFunctor // #else // return std::sqrt(in); // #endif - return csqrt(in); + using realT = typename argT::value_type; + + // return csqrt(in); + return exprm_ns::sqrt(exprm_ns::complex(in)); } else { return std::sqrt(in); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp index 6b5f372c3d..2c37ce87d9 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/square.hpp @@ -24,10 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace square namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; using dpctl::tensor::type_utils::vec_cast; @@ -68,7 +70,16 @@ template struct SquareFunctor resT operator()(const argT &in) const { - return in * in; + if constexpr (is_complex::value) { + using realT = typename argT::value_type; + + auto z = exprm_ns::complex(in); + + return z * z; + } + else { + return in * in; + } } template diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp index 3eb8420933..332f52930d 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp index d01a3e906a..1f97b59054 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp @@ -23,12 +23,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp index 237d8d2618..453ce17b54 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp @@ -24,12 +24,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp index 138f7a3f91..fcbb6adc08 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp @@ -24,9 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include +#include #include #include "utils/offset_utils.hpp" @@ -48,6 +49,7 @@ namespace true_divide namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; +namespace exprm_ns = sycl::ext::oneapi::experimental; template struct TrueDivideFunctor @@ -60,7 +62,32 @@ struct TrueDivideFunctor resT operator()(const argT1 &in1, const argT2 &in2) const { - return in1 / in2; + if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::complex(in1) / + exprm_ns::complex(in2); + } + else if constexpr (tu_ns::is_complex::value && + !tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + + return exprm_ns::complex(in1) / in2; + } + else if constexpr (!tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT2 = typename argT2::value_type; + + return in1 / exprm_ns::complex(in2); + } + else { + return in1 / in2; + } } template diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp index 33e942dd6a..0e08d966e9 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp @@ -23,10 +23,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" From f6c3e56a714d0915abecfe8b74f2a15fc7e6e88a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 29 Sep 2023 09:53:24 -0500 Subject: [PATCH 09/19] Changes include CL/sycl.hpp to sycl/sycl.hpp per SYCL-2020 spec --- dpctl/tensor/libtensor/include/kernels/accumulators.hpp | 2 +- .../libtensor/include/kernels/boolean_advanced_indexing.hpp | 2 +- dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp | 2 +- dpctl/tensor/libtensor/include/kernels/constructors.hpp | 2 +- dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp | 2 +- .../libtensor/include/kernels/integer_advanced_indexing.hpp | 2 +- dpctl/tensor/libtensor/include/kernels/reductions.hpp | 3 +-- dpctl/tensor/libtensor/include/kernels/repeat.hpp | 2 +- dpctl/tensor/libtensor/include/kernels/where.hpp | 2 +- dpctl/tensor/libtensor/include/utils/offset_utils.hpp | 2 +- dpctl/tensor/libtensor/include/utils/sycl_utils.hpp | 2 +- dpctl/tensor/libtensor/include/utils/type_dispatch.hpp | 2 +- dpctl/tensor/libtensor/include/utils/type_utils.hpp | 2 +- 13 files changed, 13 insertions(+), 14 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/accumulators.hpp b/dpctl/tensor/libtensor/include/kernels/accumulators.hpp index 110010706c..40ddc49e77 100644 --- a/dpctl/tensor/libtensor/include/kernels/accumulators.hpp +++ b/dpctl/tensor/libtensor/include/kernels/accumulators.hpp @@ -23,11 +23,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp b/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp index 968459fb68..522baadc6d 100644 --- a/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp @@ -23,10 +23,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp b/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp index 9736b2c2a3..61fb0f6ba0 100644 --- a/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp +++ b/dpctl/tensor/libtensor/include/kernels/boolean_reductions.hpp @@ -24,7 +24,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/include/kernels/constructors.hpp b/dpctl/tensor/libtensor/include/kernels/constructors.hpp index 8870e26ac2..c28033d23d 100644 --- a/dpctl/tensor/libtensor/include/kernels/constructors.hpp +++ b/dpctl/tensor/libtensor/include/kernels/constructors.hpp @@ -27,9 +27,9 @@ #include "utils/offset_utils.hpp" #include "utils/strided_iters.hpp" #include "utils/type_utils.hpp" -#include #include #include +#include namespace dpctl { diff --git a/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp b/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp index 0db1f071a1..9d1c788626 100644 --- a/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp +++ b/dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp @@ -23,10 +23,10 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp index 6acf0a9f50..769774f4dd 100644 --- a/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp @@ -23,11 +23,11 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/reductions.hpp b/dpctl/tensor/libtensor/include/kernels/reductions.hpp index 7cb97cd4f9..f9b3c5bcd0 100644 --- a/dpctl/tensor/libtensor/include/kernels/reductions.hpp +++ b/dpctl/tensor/libtensor/include/kernels/reductions.hpp @@ -23,11 +23,10 @@ //===----------------------------------------------------------------------===// #pragma once -#include -#include #include #include #include +#include #include #include #include diff --git a/dpctl/tensor/libtensor/include/kernels/repeat.hpp b/dpctl/tensor/libtensor/include/kernels/repeat.hpp index 1f2335fc6c..05b57a8cda 100644 --- a/dpctl/tensor/libtensor/include/kernels/repeat.hpp +++ b/dpctl/tensor/libtensor/include/kernels/repeat.hpp @@ -23,11 +23,11 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/where.hpp b/dpctl/tensor/libtensor/include/kernels/where.hpp index fc9546a9a8..9558603d5e 100644 --- a/dpctl/tensor/libtensor/include/kernels/where.hpp +++ b/dpctl/tensor/libtensor/include/kernels/where.hpp @@ -27,11 +27,11 @@ #include "pybind11/stl.h" #include "utils/offset_utils.hpp" #include "utils/type_utils.hpp" -#include #include #include #include #include +#include #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp index 29517ce2c5..523620737b 100644 --- a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp @@ -26,9 +26,9 @@ #pragma once -#include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp index 0d4240c516..31d3d874a0 100644 --- a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp @@ -23,10 +23,10 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "math_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp index afc458169e..af031a963b 100644 --- a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp +++ b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp @@ -25,8 +25,8 @@ #pragma once #include "dpctl4pybind11.hpp" -#include #include +#include namespace dpctl { diff --git a/dpctl/tensor/libtensor/include/utils/type_utils.hpp b/dpctl/tensor/libtensor/include/utils/type_utils.hpp index 4ea17ac730..a50e5159e4 100644 --- a/dpctl/tensor/libtensor/include/utils/type_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/type_utils.hpp @@ -23,9 +23,9 @@ //===----------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include namespace dpctl From 3b9d81d3b350d7cf625d57e7938869b7b9e5b2f9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 29 Sep 2023 09:54:05 -0500 Subject: [PATCH 10/19] Change include CL/sycl.hpp to sycl/sycl.hpp per SYCL-2020 spec --- dpctl/tensor/libtensor/source/accumulators.cpp | 2 +- dpctl/tensor/libtensor/source/accumulators.hpp | 2 +- dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp | 2 +- dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp | 2 +- dpctl/tensor/libtensor/source/boolean_reductions.cpp | 2 +- dpctl/tensor/libtensor/source/boolean_reductions.hpp | 2 +- dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp | 2 +- dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp | 2 +- dpctl/tensor/libtensor/source/copy_for_reshape.cpp | 2 +- dpctl/tensor/libtensor/source/copy_for_reshape.hpp | 2 +- dpctl/tensor/libtensor/source/copy_for_roll.cpp | 2 +- dpctl/tensor/libtensor/source/copy_for_roll.hpp | 2 +- .../libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp | 2 +- .../libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp | 2 +- dpctl/tensor/libtensor/source/device_support_queries.cpp | 2 +- dpctl/tensor/libtensor/source/device_support_queries.hpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions.hpp | 2 +- dpctl/tensor/libtensor/source/eye_ctor.cpp | 2 +- dpctl/tensor/libtensor/source/eye_ctor.hpp | 2 +- dpctl/tensor/libtensor/source/full_ctor.cpp | 2 +- dpctl/tensor/libtensor/source/full_ctor.hpp | 2 +- dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp | 2 +- dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp | 2 +- dpctl/tensor/libtensor/source/linear_sequences.cpp | 2 +- dpctl/tensor/libtensor/source/linear_sequences.hpp | 2 +- dpctl/tensor/libtensor/source/reduction_over_axis.hpp | 4 +++- dpctl/tensor/libtensor/source/repeat.cpp | 2 +- dpctl/tensor/libtensor/source/repeat.hpp | 2 +- dpctl/tensor/libtensor/source/tensor_py.cpp | 2 +- dpctl/tensor/libtensor/source/triul_ctor.cpp | 2 +- dpctl/tensor/libtensor/source/triul_ctor.hpp | 2 +- dpctl/tensor/libtensor/source/where.cpp | 2 +- dpctl/tensor/libtensor/source/where.hpp | 2 +- 34 files changed, 36 insertions(+), 34 deletions(-) diff --git a/dpctl/tensor/libtensor/source/accumulators.cpp b/dpctl/tensor/libtensor/source/accumulators.cpp index 40f4424ef9..0a2ce69f69 100644 --- a/dpctl/tensor/libtensor/source/accumulators.cpp +++ b/dpctl/tensor/libtensor/source/accumulators.cpp @@ -23,11 +23,11 @@ //===----------------------------------------------------------------------===// #include "dpctl4pybind11.hpp" -#include #include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/accumulators.hpp b/dpctl/tensor/libtensor/source/accumulators.hpp index 4979eab54f..ba40c38e1d 100644 --- a/dpctl/tensor/libtensor/source/accumulators.hpp +++ b/dpctl/tensor/libtensor/source/accumulators.hpp @@ -23,7 +23,7 @@ //===--------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp index ff7b32d0f7..903e1b5536 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -24,11 +24,11 @@ //===----------------------------------------------------------------------===// #include "dpctl4pybind11.hpp" -#include #include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp index 26f1c6a646..8347d9f687 100644 --- a/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/source/boolean_advanced_indexing.hpp @@ -24,7 +24,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/boolean_reductions.cpp b/dpctl/tensor/libtensor/source/boolean_reductions.cpp index 5f3c1f5e51..32deab6da9 100644 --- a/dpctl/tensor/libtensor/source/boolean_reductions.cpp +++ b/dpctl/tensor/libtensor/source/boolean_reductions.cpp @@ -24,8 +24,8 @@ /// dpctl.tensor.all and dpctl.tensor.any //===----------------------------------------------------------------------===// -#include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/boolean_reductions.hpp b/dpctl/tensor/libtensor/source/boolean_reductions.hpp index 5a0d5d381a..4d59463f8b 100644 --- a/dpctl/tensor/libtensor/source/boolean_reductions.hpp +++ b/dpctl/tensor/libtensor/source/boolean_reductions.hpp @@ -25,11 +25,11 @@ #pragma once #include "dpctl4pybind11.hpp" -#include #include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp index 290ab88fe8..51ddd81312 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp @@ -22,7 +22,6 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===----------------------------------------------------------------------===// -#include #include #include #include @@ -30,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp index c2161f1ba6..c8196b416a 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.hpp @@ -23,7 +23,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp index c9ab58528a..235878b820 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp @@ -22,7 +22,7 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.hpp b/dpctl/tensor/libtensor/source/copy_for_reshape.hpp index 2f25a68480..cd4ca68ff0 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.hpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.hpp @@ -23,7 +23,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/copy_for_roll.cpp b/dpctl/tensor/libtensor/source/copy_for_roll.cpp index cc319e6e08..ab36f543af 100644 --- a/dpctl/tensor/libtensor/source/copy_for_roll.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_roll.cpp @@ -22,7 +22,7 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/copy_for_roll.hpp b/dpctl/tensor/libtensor/source/copy_for_roll.hpp index 38e84b9c6a..357d821eff 100644 --- a/dpctl/tensor/libtensor/source/copy_for_roll.hpp +++ b/dpctl/tensor/libtensor/source/copy_for_roll.hpp @@ -23,7 +23,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp index bb367a42b9..f644522c18 100644 --- a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp +++ b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp @@ -22,8 +22,8 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===----------------------------------------------------------------------===// -#include #include +#include #include #include "dpctl4pybind11.hpp" diff --git a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp index 3f1833ec99..247a5d7314 100644 --- a/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp +++ b/dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.hpp @@ -23,7 +23,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include "dpctl4pybind11.hpp" diff --git a/dpctl/tensor/libtensor/source/device_support_queries.cpp b/dpctl/tensor/libtensor/source/device_support_queries.cpp index 9f793cb00a..cb0dbc02a5 100644 --- a/dpctl/tensor/libtensor/source/device_support_queries.cpp +++ b/dpctl/tensor/libtensor/source/device_support_queries.cpp @@ -25,9 +25,9 @@ #include #include "dpctl4pybind11.hpp" -#include #include #include +#include namespace dpctl { diff --git a/dpctl/tensor/libtensor/source/device_support_queries.hpp b/dpctl/tensor/libtensor/source/device_support_queries.hpp index 3367f8bfc2..efffd4ac93 100644 --- a/dpctl/tensor/libtensor/source/device_support_queries.hpp +++ b/dpctl/tensor/libtensor/source/device_support_queries.hpp @@ -26,9 +26,9 @@ #include #include "dpctl4pybind11.hpp" -#include #include #include +#include namespace dpctl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 043cac0cd2..9ab7c0807c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -24,10 +24,10 @@ //===----------------------------------------------------------------------===// #include "dpctl4pybind11.hpp" -#include #include #include #include +#include #include #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions.hpp index 523e4259c3..666e34773c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.hpp @@ -26,10 +26,10 @@ #pragma once #include "dpctl4pybind11.hpp" -#include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/eye_ctor.cpp b/dpctl/tensor/libtensor/source/eye_ctor.cpp index 5d7657d047..c768a5e395 100644 --- a/dpctl/tensor/libtensor/source/eye_ctor.cpp +++ b/dpctl/tensor/libtensor/source/eye_ctor.cpp @@ -22,7 +22,7 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===--------------------------------------------------------------------===// -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/eye_ctor.hpp b/dpctl/tensor/libtensor/source/eye_ctor.hpp index 4307e0f3b2..58249f08d7 100644 --- a/dpctl/tensor/libtensor/source/eye_ctor.hpp +++ b/dpctl/tensor/libtensor/source/eye_ctor.hpp @@ -23,7 +23,7 @@ //===--------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/full_ctor.cpp b/dpctl/tensor/libtensor/source/full_ctor.cpp index 085bdcaf2a..c8004bfae8 100644 --- a/dpctl/tensor/libtensor/source/full_ctor.cpp +++ b/dpctl/tensor/libtensor/source/full_ctor.cpp @@ -23,10 +23,10 @@ //===--------------------------------------------------------------------===// #include "dpctl4pybind11.hpp" -#include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/full_ctor.hpp b/dpctl/tensor/libtensor/source/full_ctor.hpp index 3894babf1f..66456f9a7f 100644 --- a/dpctl/tensor/libtensor/source/full_ctor.hpp +++ b/dpctl/tensor/libtensor/source/full_ctor.hpp @@ -23,7 +23,7 @@ //===--------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index a17a229fc1..0fd3d2615d 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -23,13 +23,13 @@ /// dpctl.tensor.put //===----------------------------------------------------------------------===// -#include #include #include #include #include #include #include +#include #include #include "dpctl4pybind11.hpp" diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp index f845f7d23b..011fe670a9 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.hpp @@ -24,7 +24,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/linear_sequences.cpp b/dpctl/tensor/libtensor/source/linear_sequences.cpp index 34db93de12..72d292df5f 100644 --- a/dpctl/tensor/libtensor/source/linear_sequences.cpp +++ b/dpctl/tensor/libtensor/source/linear_sequences.cpp @@ -23,10 +23,10 @@ //===--------------------------------------------------------------------===// #include "dpctl4pybind11.hpp" -#include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/linear_sequences.hpp b/dpctl/tensor/libtensor/source/linear_sequences.hpp index 61e613b45f..fd13677680 100644 --- a/dpctl/tensor/libtensor/source/linear_sequences.hpp +++ b/dpctl/tensor/libtensor/source/linear_sequences.hpp @@ -23,7 +23,7 @@ //===--------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/reduction_over_axis.hpp b/dpctl/tensor/libtensor/source/reduction_over_axis.hpp index 1a9cb6f5e7..46ef02f064 100644 --- a/dpctl/tensor/libtensor/source/reduction_over_axis.hpp +++ b/dpctl/tensor/libtensor/source/reduction_over_axis.hpp @@ -25,10 +25,12 @@ #pragma once -#include #include +#include #include #include +#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/repeat.cpp b/dpctl/tensor/libtensor/source/repeat.cpp index f3a20cbbaa..fe11684ab9 100644 --- a/dpctl/tensor/libtensor/source/repeat.cpp +++ b/dpctl/tensor/libtensor/source/repeat.cpp @@ -23,11 +23,11 @@ //===--------------------------------------------------------------------===// #include "dpctl4pybind11.hpp" -#include #include #include #include #include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/repeat.hpp b/dpctl/tensor/libtensor/source/repeat.hpp index 65ace36516..2d37aa33e9 100644 --- a/dpctl/tensor/libtensor/source/repeat.hpp +++ b/dpctl/tensor/libtensor/source/repeat.hpp @@ -23,7 +23,7 @@ //===--------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 0e8b4236b6..8bab9c8bb8 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -22,12 +22,12 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===----------------------------------------------------------------------===// -#include #include #include #include #include #include +#include #include #include #include diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index 40dd5cf48a..03fcd2994c 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -22,7 +22,7 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===--------------------------------------------------------------------===// -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/triul_ctor.hpp b/dpctl/tensor/libtensor/source/triul_ctor.hpp index 9e7053c638..de80d20407 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.hpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.hpp @@ -23,7 +23,7 @@ //===--------------------------------------------------------------------===// #pragma once -#include +#include #include #include diff --git a/dpctl/tensor/libtensor/source/where.cpp b/dpctl/tensor/libtensor/source/where.cpp index ed782bda34..e533fd2ee1 100644 --- a/dpctl/tensor/libtensor/source/where.cpp +++ b/dpctl/tensor/libtensor/source/where.cpp @@ -24,12 +24,12 @@ //===----------------------------------------------------------------------===// #include "dpctl4pybind11.hpp" -#include #include #include #include #include #include +#include #include #include "kernels/where.hpp" diff --git a/dpctl/tensor/libtensor/source/where.hpp b/dpctl/tensor/libtensor/source/where.hpp index 6fe6527080..2ca3b39e02 100644 --- a/dpctl/tensor/libtensor/source/where.hpp +++ b/dpctl/tensor/libtensor/source/where.hpp @@ -24,7 +24,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include +#include #include #include From 44abcb4b8d1146c52bbc87e017d9a487dded88cf Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 11 Oct 2023 02:07:19 -0500 Subject: [PATCH 11/19] Use experimental::complex for in-place division --- .../elementwise_functions/true_divide.hpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp index fcbb6adc08..742a9c1a80 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp @@ -407,7 +407,24 @@ template struct TrueDivideInplaceFunctor void operator()(resT &res, const argT &in) { - res /= in; + if constexpr (tu_ns::is_complex::value) { + using res_rT = typename resT::value_type; + if constexpr (tu_ns::is_complex::value) { + using arg_rT = typename argT::value_type; + + auto res1 = exprm_ns::complex(res); + res1 /= exprm_ns::complex(in); + res = res1; + } + else { + auto res1 = exprm_ns::complex(res); + res1 /= in; + res = res1; + } + } + else { + res /= in; + } } template From 23aeec6833fec091931b4dbe6f9362676d3f2f37 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 14 Oct 2023 13:47:57 -0500 Subject: [PATCH 12/19] include "CL/sycl.hpp" -> include "sycl/sycl.hpp" --- dpctl/_host_task_util.hpp | 2 +- dpctl/apis/include/dpctl4pybind11.hpp | 2 +- dpctl/sycl.pxd | 2 +- dpctl/tensor/libtensor/source/reduction_over_axis.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dpctl/_host_task_util.hpp b/dpctl/_host_task_util.hpp index 8db17594fd..308b85a249 100644 --- a/dpctl/_host_task_util.hpp +++ b/dpctl/_host_task_util.hpp @@ -31,7 +31,7 @@ #include "Python.h" #include "syclinterface/dpctl_data_types.h" -#include +#include int async_dec_ref(DPCTLSyclQueueRef QRef, PyObject **obj_array, diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index f68826af48..10ee4602c3 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -26,10 +26,10 @@ #pragma once #include "dpctl_capi.h" -#include #include #include #include +#include #include #include diff --git a/dpctl/sycl.pxd b/dpctl/sycl.pxd index 918f476298..0318868ef8 100644 --- a/dpctl/sycl.pxd +++ b/dpctl/sycl.pxd @@ -20,7 +20,7 @@ from . cimport _backend as dpctl_backend -cdef extern from "CL/sycl.hpp" namespace "sycl": +cdef extern from "sycl/sycl.hpp" namespace "sycl": cdef cppclass queue "sycl::queue": pass diff --git a/dpctl/tensor/libtensor/source/reduction_over_axis.cpp b/dpctl/tensor/libtensor/source/reduction_over_axis.cpp index c67fcd5ba3..00e4a0a076 100644 --- a/dpctl/tensor/libtensor/source/reduction_over_axis.cpp +++ b/dpctl/tensor/libtensor/source/reduction_over_axis.cpp @@ -22,8 +22,8 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===--------------------------------------------------------------------===// -#include #include +#include #include #include From 66ba04e17edcd02b0bd131e096076785a5bf7648 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 14 Oct 2023 15:23:49 -0500 Subject: [PATCH 13/19] include CL/sycl.hpp -> include sycl/sycl.hpp --- .../helper/include/dpctl_error_handlers.h | 2 +- .../helper/include/dpctl_utils_helper.h | 2 +- .../include/dpctl_device_selection.hpp | 2 +- .../include/dpctl_sycl_type_casters.hpp | 2 +- libsyclinterface/source/dpctl_device_selection.cpp | 2 +- .../source/dpctl_sycl_context_interface.cpp | 2 +- .../source/dpctl_sycl_device_interface.cpp | 2 +- .../source/dpctl_sycl_device_manager.cpp | 2 +- .../dpctl_sycl_device_selector_interface.cpp | 2 +- .../source/dpctl_sycl_event_interface.cpp | 2 +- .../source/dpctl_sycl_kernel_bundle_interface.cpp | 14 +++----------- .../source/dpctl_sycl_kernel_interface.cpp | 2 +- .../source/dpctl_sycl_platform_interface.cpp | 2 +- .../source/dpctl_sycl_platform_manager.cpp | 2 +- .../source/dpctl_sycl_queue_interface.cpp | 2 +- .../source/dpctl_sycl_queue_manager.cpp | 2 +- .../source/dpctl_sycl_usm_interface.cpp | 2 +- libsyclinterface/tests/test_helper.cpp | 2 +- .../tests/test_sycl_context_interface.cpp | 2 +- .../tests/test_sycl_device_aspects.cpp | 2 +- .../tests/test_sycl_device_interface.cpp | 2 +- .../tests/test_sycl_device_invalid_filters.cpp | 2 +- .../tests/test_sycl_device_selector_interface.cpp | 2 +- .../tests/test_sycl_device_subdevices.cpp | 2 +- .../tests/test_sycl_event_interface.cpp | 2 +- .../tests/test_sycl_kernel_bundle_interface.cpp | 2 +- .../tests/test_sycl_kernel_interface.cpp | 2 +- .../tests/test_sycl_platform_interface.cpp | 2 +- .../tests/test_sycl_platform_invalid_filters.cpp | 2 +- .../tests/test_sycl_queue_interface.cpp | 2 +- libsyclinterface/tests/test_sycl_queue_manager.cpp | 2 +- libsyclinterface/tests/test_sycl_queue_submit.cpp | 2 +- libsyclinterface/tests/test_sycl_usm_interface.cpp | 2 +- 33 files changed, 35 insertions(+), 43 deletions(-) diff --git a/libsyclinterface/helper/include/dpctl_error_handlers.h b/libsyclinterface/helper/include/dpctl_error_handlers.h index 2947dd1d5e..5c7c12f1a4 100644 --- a/libsyclinterface/helper/include/dpctl_error_handlers.h +++ b/libsyclinterface/helper/include/dpctl_error_handlers.h @@ -27,7 +27,7 @@ #include "Support/DllExport.h" #include "dpctl_error_handler_type.h" -#include +#include /*! * @brief Functor class used by DPCTL to handle SYCL asynchronous errors. diff --git a/libsyclinterface/helper/include/dpctl_utils_helper.h b/libsyclinterface/helper/include/dpctl_utils_helper.h index 9ed29514df..f7f484a32a 100644 --- a/libsyclinterface/helper/include/dpctl_utils_helper.h +++ b/libsyclinterface/helper/include/dpctl_utils_helper.h @@ -26,7 +26,7 @@ #include "Support/DllExport.h" #include "dpctl_sycl_enum_types.h" -#include +#include /*! * @brief Converts a sycl::info::device_type input value to a string. diff --git a/libsyclinterface/include/dpctl_device_selection.hpp b/libsyclinterface/include/dpctl_device_selection.hpp index 9da0072ab1..605078586c 100644 --- a/libsyclinterface/include/dpctl_device_selection.hpp +++ b/libsyclinterface/include/dpctl_device_selection.hpp @@ -28,7 +28,7 @@ #pragma once #include "Support/DllExport.h" -#include +#include namespace dpctl { diff --git a/libsyclinterface/include/dpctl_sycl_type_casters.hpp b/libsyclinterface/include/dpctl_sycl_type_casters.hpp index 470165afdd..107fc43ff4 100644 --- a/libsyclinterface/include/dpctl_sycl_type_casters.hpp +++ b/libsyclinterface/include/dpctl_sycl_type_casters.hpp @@ -30,7 +30,7 @@ #include "dpctl_device_selection.hpp" #include "dpctl_sycl_types.h" -#include +#include #include namespace dpctl::syclinterface diff --git a/libsyclinterface/source/dpctl_device_selection.cpp b/libsyclinterface/source/dpctl_device_selection.cpp index 7203bc3b1a..299ca5be41 100644 --- a/libsyclinterface/source/dpctl_device_selection.cpp +++ b/libsyclinterface/source/dpctl_device_selection.cpp @@ -27,7 +27,7 @@ #include "dpctl_device_selection.hpp" #include "Config/dpctl_config.h" -#include +#include namespace { diff --git a/libsyclinterface/source/dpctl_sycl_context_interface.cpp b/libsyclinterface/source/dpctl_sycl_context_interface.cpp index a19286a779..ab9923652c 100644 --- a/libsyclinterface/source/dpctl_sycl_context_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_context_interface.cpp @@ -28,7 +28,7 @@ #include "Config/dpctl_config.h" #include "dpctl_error_handlers.h" #include "dpctl_sycl_type_casters.hpp" -#include +#include #include #include diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index b5a97013c2..e30a690cfb 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -32,9 +32,9 @@ #include "dpctl_sycl_device_manager.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" -#include /* SYCL headers */ #include #include +#include /* SYCL headers */ #include #include diff --git a/libsyclinterface/source/dpctl_sycl_device_manager.cpp b/libsyclinterface/source/dpctl_sycl_device_manager.cpp index 0eb71df412..f36f5db21e 100644 --- a/libsyclinterface/source/dpctl_sycl_device_manager.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_manager.cpp @@ -29,10 +29,10 @@ #include "dpctl_sycl_enum_types.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" -#include /* SYCL headers */ #include /* Config */ #include #include +#include /* SYCL headers */ #include #include #include diff --git a/libsyclinterface/source/dpctl_sycl_device_selector_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_selector_interface.cpp index 9753c32613..834e9a57a2 100644 --- a/libsyclinterface/source/dpctl_sycl_device_selector_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_selector_interface.cpp @@ -28,7 +28,7 @@ #include "dpctl_device_selection.hpp" #include "dpctl_error_handlers.h" #include "dpctl_sycl_type_casters.hpp" -#include /* SYCL headers */ +#include /* SYCL headers */ using namespace sycl; diff --git a/libsyclinterface/source/dpctl_sycl_event_interface.cpp b/libsyclinterface/source/dpctl_sycl_event_interface.cpp index 3f872f4493..7a109faca9 100644 --- a/libsyclinterface/source/dpctl_sycl_event_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_event_interface.cpp @@ -29,7 +29,7 @@ #include "dpctl_error_handlers.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" -#include /* SYCL headers */ +#include /* SYCL headers */ #include using namespace sycl; diff --git a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp index 201c8172e3..d32f278c07 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp @@ -30,14 +30,10 @@ #include "dpctl_dynamic_lib_helper.h" #include "dpctl_error_handlers.h" #include "dpctl_sycl_type_casters.hpp" -#include /* OpenCL headers */ -#include /* Sycl headers */ -#if __has_include() -#include -#else -#include -#endif +#include /* OpenCL headers */ #include +#include +#include /* Sycl headers */ #include #ifdef DPCTL_ENABLE_L0_PROGRAM_CREATION @@ -45,11 +41,7 @@ // not reorder the includes. // clang-format off #include "ze_api.h" /* Level Zero headers */ -#if __has_include() #include -#else -#include -#endif // clang-format on #endif diff --git a/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp index 8a5af3f179..abd7f9a443 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp @@ -29,8 +29,8 @@ #include "dpctl_error_handlers.h" #include "dpctl_string_utils.hpp" #include "dpctl_sycl_type_casters.hpp" -#include /* Sycl headers */ #include +#include /* Sycl headers */ using namespace sycl; diff --git a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp index fb0fbd6bd2..409b600355 100644 --- a/libsyclinterface/source/dpctl_sycl_platform_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_platform_interface.cpp @@ -31,11 +31,11 @@ #include "dpctl_string_utils.hpp" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" -#include #include #include #include #include +#include #include #include diff --git a/libsyclinterface/source/dpctl_sycl_platform_manager.cpp b/libsyclinterface/source/dpctl_sycl_platform_manager.cpp index 6717b48c6f..f01f7a76a5 100644 --- a/libsyclinterface/source/dpctl_sycl_platform_manager.cpp +++ b/libsyclinterface/source/dpctl_sycl_platform_manager.cpp @@ -31,11 +31,11 @@ #include "dpctl_sycl_platform_interface.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" -#include #include #include #include #include +#include using namespace sycl; diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index 4903b888ff..3eec8934a8 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -31,9 +31,9 @@ #include "dpctl_sycl_device_interface.h" #include "dpctl_sycl_device_manager.h" #include "dpctl_sycl_type_casters.hpp" -#include /* SYCL headers */ #include #include +#include /* SYCL headers */ #include using namespace sycl; diff --git a/libsyclinterface/source/dpctl_sycl_queue_manager.cpp b/libsyclinterface/source/dpctl_sycl_queue_manager.cpp index 54e97c0efa..651689e105 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_manager.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_manager.cpp @@ -28,7 +28,7 @@ #include "dpctl_error_handlers.h" #include "dpctl_sycl_device_manager.h" #include "dpctl_sycl_type_casters.hpp" -#include /* SYCL headers */ +#include /* SYCL headers */ #include using namespace sycl; diff --git a/libsyclinterface/source/dpctl_sycl_usm_interface.cpp b/libsyclinterface/source/dpctl_sycl_usm_interface.cpp index 2ebae9801e..b993ee32a8 100644 --- a/libsyclinterface/source/dpctl_sycl_usm_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_usm_interface.cpp @@ -29,7 +29,7 @@ #include "dpctl_error_handlers.h" #include "dpctl_sycl_device_interface.h" #include "dpctl_sycl_type_casters.hpp" -#include /* SYCL headers */ +#include /* SYCL headers */ #include using namespace sycl; diff --git a/libsyclinterface/tests/test_helper.cpp b/libsyclinterface/tests/test_helper.cpp index ea529cbf24..467274849d 100644 --- a/libsyclinterface/tests/test_helper.cpp +++ b/libsyclinterface/tests/test_helper.cpp @@ -26,9 +26,9 @@ #include "Config/dpctl_config.h" #include "dpctl_utils_helper.h" -#include #include #include +#include struct TestHelperFns : public ::testing::Test { diff --git a/libsyclinterface/tests/test_sycl_context_interface.cpp b/libsyclinterface/tests/test_sycl_context_interface.cpp index 36b2ff6e97..75fbbe7cb2 100644 --- a/libsyclinterface/tests/test_sycl_context_interface.cpp +++ b/libsyclinterface/tests/test_sycl_context_interface.cpp @@ -29,8 +29,8 @@ #include "dpctl_sycl_device_interface.h" #include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_types.h" -#include #include +#include #include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_device_aspects.cpp b/libsyclinterface/tests/test_sycl_device_aspects.cpp index 9019d7f718..e2e42db74d 100644 --- a/libsyclinterface/tests/test_sycl_device_aspects.cpp +++ b/libsyclinterface/tests/test_sycl_device_aspects.cpp @@ -30,8 +30,8 @@ #include "dpctl_sycl_enum_types.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils_helper.h" -#include #include +#include #include namespace diff --git a/libsyclinterface/tests/test_sycl_device_interface.cpp b/libsyclinterface/tests/test_sycl_device_interface.cpp index dd20c738df..a0544482ff 100644 --- a/libsyclinterface/tests/test_sycl_device_interface.cpp +++ b/libsyclinterface/tests/test_sycl_device_interface.cpp @@ -29,8 +29,8 @@ #include "dpctl_sycl_platform_interface.h" #include "dpctl_utils.h" #include "dpctl_utils_helper.h" -#include #include +#include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_device_invalid_filters.cpp b/libsyclinterface/tests/test_sycl_device_invalid_filters.cpp index c6a722c87a..50cfc6ba67 100644 --- a/libsyclinterface/tests/test_sycl_device_invalid_filters.cpp +++ b/libsyclinterface/tests/test_sycl_device_invalid_filters.cpp @@ -25,8 +25,8 @@ #include "dpctl_sycl_device_interface.h" #include "dpctl_sycl_device_selector_interface.h" -#include #include +#include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_device_selector_interface.cpp b/libsyclinterface/tests/test_sycl_device_selector_interface.cpp index eff7e4ee41..8e5df58769 100644 --- a/libsyclinterface/tests/test_sycl_device_selector_interface.cpp +++ b/libsyclinterface/tests/test_sycl_device_selector_interface.cpp @@ -28,8 +28,8 @@ #include "dpctl_sycl_device_manager.h" #include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_type_casters.hpp" -#include #include +#include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_device_subdevices.cpp b/libsyclinterface/tests/test_sycl_device_subdevices.cpp index cb73359059..f2039c6dcf 100644 --- a/libsyclinterface/tests/test_sycl_device_subdevices.cpp +++ b/libsyclinterface/tests/test_sycl_device_subdevices.cpp @@ -32,8 +32,8 @@ #include "dpctl_sycl_type_casters.hpp" #include "dpctl_utils.h" #include "dpctl_utils_helper.h" -#include #include +#include using namespace sycl; using namespace dpctl::syclinterface; diff --git a/libsyclinterface/tests/test_sycl_event_interface.cpp b/libsyclinterface/tests/test_sycl_event_interface.cpp index 0cc11af731..615755ebc3 100644 --- a/libsyclinterface/tests/test_sycl_event_interface.cpp +++ b/libsyclinterface/tests/test_sycl_event_interface.cpp @@ -27,8 +27,8 @@ #include "Config/dpctl_config.h" #include "dpctl_sycl_event_interface.h" #include "dpctl_sycl_types.h" -#include #include +#include #include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp b/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp index 6383b730a0..c450d6722d 100644 --- a/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp @@ -34,11 +34,11 @@ #include "dpctl_sycl_kernel_interface.h" #include "dpctl_sycl_queue_interface.h" #include "dpctl_sycl_queue_manager.h" -#include #include #include #include #include +#include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_kernel_interface.cpp b/libsyclinterface/tests/test_sycl_kernel_interface.cpp index 97fba96bc3..d7e7cb4087 100644 --- a/libsyclinterface/tests/test_sycl_kernel_interface.cpp +++ b/libsyclinterface/tests/test_sycl_kernel_interface.cpp @@ -33,9 +33,9 @@ #include "dpctl_sycl_queue_interface.h" #include "dpctl_sycl_queue_manager.h" #include "dpctl_utils.h" -#include #include #include +#include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_platform_interface.cpp b/libsyclinterface/tests/test_sycl_platform_interface.cpp index f04cead0e1..3164aef7ec 100644 --- a/libsyclinterface/tests/test_sycl_platform_interface.cpp +++ b/libsyclinterface/tests/test_sycl_platform_interface.cpp @@ -29,8 +29,8 @@ #include "dpctl_sycl_platform_interface.h" #include "dpctl_sycl_platform_manager.h" #include "dpctl_utils.h" -#include #include +#include #include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_platform_invalid_filters.cpp b/libsyclinterface/tests/test_sycl_platform_invalid_filters.cpp index 5499f88430..41f0cc4a53 100644 --- a/libsyclinterface/tests/test_sycl_platform_invalid_filters.cpp +++ b/libsyclinterface/tests/test_sycl_platform_invalid_filters.cpp @@ -26,8 +26,8 @@ #include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_platform_interface.h" -#include #include +#include using namespace sycl; diff --git a/libsyclinterface/tests/test_sycl_queue_interface.cpp b/libsyclinterface/tests/test_sycl_queue_interface.cpp index 8d23929d39..db170d8d26 100644 --- a/libsyclinterface/tests/test_sycl_queue_interface.cpp +++ b/libsyclinterface/tests/test_sycl_queue_interface.cpp @@ -34,8 +34,8 @@ #include "dpctl_sycl_queue_manager.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_sycl_usm_interface.h" -#include #include +#include using namespace sycl; using namespace dpctl::syclinterface; diff --git a/libsyclinterface/tests/test_sycl_queue_manager.cpp b/libsyclinterface/tests/test_sycl_queue_manager.cpp index 4f9e84ea20..0fc640f4ab 100644 --- a/libsyclinterface/tests/test_sycl_queue_manager.cpp +++ b/libsyclinterface/tests/test_sycl_queue_manager.cpp @@ -30,8 +30,8 @@ #include "dpctl_sycl_queue_interface.h" #include "dpctl_sycl_queue_manager.h" #include "dpctl_sycl_type_casters.hpp" -#include #include +#include #include using namespace std; diff --git a/libsyclinterface/tests/test_sycl_queue_submit.cpp b/libsyclinterface/tests/test_sycl_queue_submit.cpp index 680314b719..3ef37978d3 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit.cpp @@ -32,10 +32,10 @@ #include "dpctl_sycl_queue_interface.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_sycl_usm_interface.h" -#include #include #include #include +#include namespace { diff --git a/libsyclinterface/tests/test_sycl_usm_interface.cpp b/libsyclinterface/tests/test_sycl_usm_interface.cpp index a6dbb2290a..99f8e52051 100644 --- a/libsyclinterface/tests/test_sycl_usm_interface.cpp +++ b/libsyclinterface/tests/test_sycl_usm_interface.cpp @@ -32,9 +32,9 @@ #include "dpctl_sycl_queue_manager.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_sycl_usm_interface.h" -#include #include #include +#include using namespace sycl; From fd9df2a98e4a6d8b14b56e5dad390a93d9973f26 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 18 Oct 2023 12:07:02 -0700 Subject: [PATCH 14/19] Add target_compile_options setting sycl-targets for targets needing SYCL For every CMake target, where add_sycl_to_target is used, we also run target_compile_options( ${target_name} PRIVATE -fysl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda ) --- dpctl/CMakeLists.txt | 8 +++++++- dpctl/tensor/CMakeLists.txt | 7 +++++++ dpctl/utils/CMakeLists.txt | 6 ++++++ libsyclinterface/CMakeLists.txt | 6 ++++++ libsyclinterface/tests/CMakeLists.txt | 6 ++++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt index cb872ff45f..604506e2bc 100644 --- a/dpctl/CMakeLists.txt +++ b/dpctl/CMakeLists.txt @@ -143,7 +143,13 @@ function(build_dpctl_ext _trgt _src _dest) add_custom_target(${_cythonize_trgt} DEPENDS ${_src}) Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src}) if (BUILD_DPCTL_EXT_SYCL) - add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) + add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) + # make fat binary + target_compile_options( + ${_trgt} + PRIVATE + -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda + ) endif() target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR}) add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt}) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 15bc58f57b..3eabfc433a 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -55,6 +55,13 @@ set(_tensor_impl_sources set(python_module_name _tensor_impl) pybind11_add_module(${python_module_name} MODULE ${_tensor_impl_sources}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_tensor_impl_sources}) +# make fat binary +target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda +) + set(_clang_prefix "") if (WIN32) set(_clang_prefix "/clang:") diff --git a/dpctl/utils/CMakeLists.txt b/dpctl/utils/CMakeLists.txt index aadc1c0fe0..8b9225de69 100644 --- a/dpctl/utils/CMakeLists.txt +++ b/dpctl/utils/CMakeLists.txt @@ -21,6 +21,12 @@ pybind11_add_module(${python_module_name} MODULE ${_module_src} ) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +# make fat binary +target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda +) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../include diff --git a/libsyclinterface/CMakeLists.txt b/libsyclinterface/CMakeLists.txt index 64ec3271b1..04dc14153d 100644 --- a/libsyclinterface/CMakeLists.txt +++ b/libsyclinterface/CMakeLists.txt @@ -205,6 +205,12 @@ add_library(DPCTLSyclInterface ${helper_sources} ) add_sycl_to_target(TARGET DPCTLSyclInterface SOURCES ${sources} ${helper_sources}) +# make fat binary +target_compile_options( + DPCTLSyclInterface + PRIVATE + -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda +) if(DPCTL_GENERATE_COVERAGE) target_link_options(DPCTLSyclInterface diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 472e1787fa..17dff208e5 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -52,6 +52,12 @@ add_sycl_to_target( ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp ) +# make fat binary +target_compile_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda +) if(DPCTL_GENERATE_COVERAGE) target_include_directories(dpctl_c_api_tests From 9561b6e178e4c08636a16376056fb492e6954d4c Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 18 Oct 2023 13:17:07 -0700 Subject: [PATCH 15/19] Fix compilation error --- dpctl/CMakeLists.txt | 7 ++++++- dpctl/tensor/CMakeLists.txt | 7 ++++++- dpctl/utils/CMakeLists.txt | 7 ++++++- libsyclinterface/CMakeLists.txt | 7 ++++++- libsyclinterface/tests/CMakeLists.txt | 7 ++++++- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt index 604506e2bc..3f81c66972 100644 --- a/dpctl/CMakeLists.txt +++ b/dpctl/CMakeLists.txt @@ -148,8 +148,13 @@ function(build_dpctl_ext _trgt _src _dest) target_compile_options( ${_trgt} PRIVATE - -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown ) + target_link_options( + ${_trgt} + PRIVATE + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown + ) endif() target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR}) add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt}) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 3eabfc433a..ed82f58437 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -59,7 +59,12 @@ add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_tensor_impl_sources}) target_compile_options( ${python_module_name} PRIVATE - -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown +) +target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown ) set(_clang_prefix "") diff --git a/dpctl/utils/CMakeLists.txt b/dpctl/utils/CMakeLists.txt index 8b9225de69..194946e4da 100644 --- a/dpctl/utils/CMakeLists.txt +++ b/dpctl/utils/CMakeLists.txt @@ -25,7 +25,12 @@ add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) target_compile_options( ${python_module_name} PRIVATE - -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown +) +target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown ) target_include_directories(${python_module_name} PRIVATE diff --git a/libsyclinterface/CMakeLists.txt b/libsyclinterface/CMakeLists.txt index 04dc14153d..916421d455 100644 --- a/libsyclinterface/CMakeLists.txt +++ b/libsyclinterface/CMakeLists.txt @@ -209,7 +209,12 @@ add_sycl_to_target(TARGET DPCTLSyclInterface SOURCES ${sources} ${helper_sources target_compile_options( DPCTLSyclInterface PRIVATE - -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown +) +target_link_options( + DPCTLSyclInterface + PRIVATE + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown ) if(DPCTL_GENERATE_COVERAGE) diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 17dff208e5..e83db384dd 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -56,7 +56,12 @@ add_sycl_to_target( target_compile_options( dpctl_c_api_tests PRIVATE - -fsycl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown +) +target_link_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown ) if(DPCTL_GENERATE_COVERAGE) From c101748c3e25b229bc57df107473a67908f5a1bf Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 25 Oct 2023 08:03:16 -0700 Subject: [PATCH 16/19] Replace overlooked std::log, std::sinh, std::exp for complex types Replaced them with uses of sycl::ext::oneapi::experimental namespace functions instead. --- .../kernels/elementwise_functions/acos.hpp | 7 ++++--- .../kernels/elementwise_functions/acosh.hpp | 12 +++++++----- .../kernels/elementwise_functions/asin.hpp | 15 ++++++++------- .../kernels/elementwise_functions/asinh.hpp | 9 ++++++--- .../kernels/elementwise_functions/exp2.hpp | 6 ++++-- .../include/kernels/elementwise_functions/sin.hpp | 6 +++--- .../kernels/elementwise_functions/sinh.hpp | 4 ++-- 7 files changed, 34 insertions(+), 25 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp index 28968de761..b64296ec1a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp @@ -105,10 +105,11 @@ template struct AcosFunctor constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - argT log_in = std::log(in); + using sycl_complexT = exprm_ns::complex; + sycl_complexT log_in = exprm_ns::log(exprm_ns::complex(in)); - const realT wx = std::real(log_in); - const realT wy = std::imag(log_in); + const realT wx = log_in.real(); + const realT wy = log_in.imag(); const realT rx = std::abs(wy); realT ry = wx + std::log(realT(2)); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp index 7323579df5..56730a411c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp @@ -48,7 +48,7 @@ namespace acosh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -112,16 +112,18 @@ template struct AcoshFunctor * For large x or y including acos(+-Inf + I*+-Inf) */ if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - const realT wx = std::real(std::log(in)); - const realT wy = std::imag(std::log(in)); + using sycl_complexT = typename exprm_ns::complex; + const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in)); + const realT wx = log_in.real(); + const realT wy = log_in.imag(); const realT rx = std::abs(wy); realT ry = wx + std::log(realT(2)); acos_in = resT{rx, (std::signbit(y)) ? ry : -ry}; } else { /* ordinary cases */ - acos_in = cmplx_ns::acos( - cmplx_ns::complex(in)); // std::acos(in); + acos_in = exprm_ns::acos( + exprm_ns::complex(in)); // std::acos(in); } /* Now we calculate acosh(z) */ diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp index 595d75eeca..5f83c95323 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp @@ -119,17 +119,18 @@ template struct AsinFunctor constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - const resT z = {x, y}; + using sycl_complexT = exprm_ns::complex; + const sycl_complexT z {x, y}; realT wx, wy; if (!std::signbit(x)) { - auto log_z = std::log(z); - wx = std::real(log_z) + std::log(realT(2)); - wy = std::imag(log_z); + auto log_z = exprm_ns::log(z); + wx = log_z.real() + std::log(realT(2)); + wy = log_z.imag(); } else { - auto log_mz = std::log(-z); - wx = std::real(log_mz) + std::log(realT(2)); - wy = std::imag(log_mz); + auto log_mz = exprm_ns::log(-z); + wx = log_mz.real() + std::log(realT(2)); + wy = log_mz.imag(); } const realT asinh_re = std::copysign(wx, x); const realT asinh_im = std::copysign(wy, y); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp index d126afba87..50ba0d8e86 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp @@ -108,9 +108,12 @@ template struct AsinhFunctor realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - resT log_in = (std::signbit(x)) ? std::log(-in) : std::log(in); - realT wx = std::real(log_in) + std::log(realT(2)); - realT wy = std::imag(log_in); + using sycl_complexT = exprm_ns::complex; + sycl_complexT log_in = (std::signbit(x)) ? + exprm_ns::log(sycl_complexT(-in)) : + exprm_ns::log(sycl_complexT(in)); + realT wx = log_in.real() + std::log(realT(2)); + realT wy = log_in.imag(); const realT res_re = std::copysign(wx, x); const realT res_im = std::copysign(wy, y); return resT{res_re, res_im}; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp index 67ee23df48..b6b2f32e83 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp @@ -24,10 +24,11 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include +#include #include #include "kernels/elementwise_functions/common.hpp" @@ -48,6 +49,7 @@ namespace exp2 namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -76,7 +78,7 @@ template struct Exp2Functor const realT y = std::imag(tmp); if (std::isfinite(x)) { if (std::isfinite(y)) { - return std::exp(tmp); + return exprm_ns::exp(exprm_ns::complex(tmp)); } else { return resT{q_nan, q_nan}; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp index 0f35aa306e..e1e9e79c57 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp @@ -48,7 +48,7 @@ namespace sin namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -81,8 +81,8 @@ template struct SinFunctor * real and imaginary parts of input are finite. */ if (in_re_finite && in_im_finite) { - return cmplx_ns::sin( - cmplx_ns::complex(in)); // std::sin(in); + return exprm_ns::sin( + exprm_ns::complex(in)); // std::sin(in); } /* diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp index af887431d7..37ac3734e3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp @@ -48,7 +48,7 @@ namespace sinh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -81,7 +81,7 @@ template struct SinhFunctor * real and imaginary parts of input are finite. */ if (xfinite && yfinite) { - return std::sinh(in); + return exprm_ns::sinh(exprm_ns::complex(in)); } /* * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. From 0827f3d1509c8c1ddf7fc4146ec829bf7a1f456a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 25 Oct 2023 08:04:42 -0700 Subject: [PATCH 17/19] Replaced include CL/sycl.hpp with include sycl/sycl.hpp --- .../libtensor/include/kernels/elementwise_functions/cbrt.hpp | 2 +- .../include/kernels/elementwise_functions/copysign.hpp | 2 +- .../libtensor/include/kernels/elementwise_functions/rsqrt.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp index 1d4aa65002..92584f0dfe 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp @@ -24,10 +24,10 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp index b1997d06b4..43e06cb281 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp @@ -24,9 +24,9 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include +#include #include #include "utils/offset_utils.hpp" diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp index de51b31c30..d9e0c33081 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp @@ -25,12 +25,12 @@ //===---------------------------------------------------------------------===// #pragma once -#include #include #include #include #include #include +#include #include #include "kernels/elementwise_functions/common.hpp" From 5eefdd10cb649103e042dfa3405d4c741a12b349 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 25 Oct 2023 08:05:21 -0700 Subject: [PATCH 18/19] Add DPCTL_TARGET_CUDA Boolean cmake option Also DPCTL_SYCL_TARGETS parameter can be used to specify targets to build for. DPCTL_TARGET_CUDA could be set via cmake option, or via environment variable, e.g. ``` $ DPCTL_TARGET_CUDA=1 python scripts/build_locally.py --verbose ``` --- CMakeLists.txt | 21 +++++++++++++++++++++ dpctl/CMakeLists.txt | 24 +++++++++++++----------- dpctl/tensor/CMakeLists.txt | 24 +++++++++++++----------- dpctl/utils/CMakeLists.txt | 24 +++++++++++++----------- libsyclinterface/CMakeLists.txt | 22 ++++++++++++---------- libsyclinterface/tests/CMakeLists.txt | 2 ++ 6 files changed, 74 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb53db12ec..adfb4fbddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,30 @@ option(DPCTL_GENERATE_COVERAGE "Build dpctl with coverage instrumentation" OFF ) +option(DPCTL_TARGET_CUDA + "Build DPCTL to target CUDA devices" + OFF +) find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH) +set(_dpctl_sycl_targets) +if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") + if(DPCTL_TARGET_CUDA) + set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + else() + if(DEFINED ENV{DPCTL_TARGET_CUDA}) + set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + endif() + endif() +else() + set(_dpctl_sycl_targets ${DPCTL_SYCL_TARGETS}) +endif() + +if(_dpctl_sycl_targets) + message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}") +endif() + add_subdirectory(libsyclinterface) file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*) diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt index 3f81c66972..616f270ad3 100644 --- a/dpctl/CMakeLists.txt +++ b/dpctl/CMakeLists.txt @@ -144,17 +144,19 @@ function(build_dpctl_ext _trgt _src _dest) Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src}) if (BUILD_DPCTL_EXT_SYCL) add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) - # make fat binary - target_compile_options( - ${_trgt} - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown - ) - target_link_options( - ${_trgt} - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown - ) + if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + ${_trgt} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + ${_trgt} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + endif() endif() target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR}) add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt}) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index ed82f58437..0f63e4bdeb 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -55,17 +55,19 @@ set(_tensor_impl_sources set(python_module_name _tensor_impl) pybind11_add_module(${python_module_name} MODULE ${_tensor_impl_sources}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_tensor_impl_sources}) -# make fat binary -target_compile_options( - ${python_module_name} - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown -) -target_link_options( - ${python_module_name} - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown -) +if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) +endif() set(_clang_prefix "") if (WIN32) diff --git a/dpctl/utils/CMakeLists.txt b/dpctl/utils/CMakeLists.txt index 194946e4da..e7d3951e5b 100644 --- a/dpctl/utils/CMakeLists.txt +++ b/dpctl/utils/CMakeLists.txt @@ -21,17 +21,19 @@ pybind11_add_module(${python_module_name} MODULE ${_module_src} ) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) -# make fat binary -target_compile_options( - ${python_module_name} - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown -) -target_link_options( - ${python_module_name} - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown -) +if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) +endif() target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../include diff --git a/libsyclinterface/CMakeLists.txt b/libsyclinterface/CMakeLists.txt index 916421d455..e84959c1c3 100644 --- a/libsyclinterface/CMakeLists.txt +++ b/libsyclinterface/CMakeLists.txt @@ -206,16 +206,18 @@ add_library(DPCTLSyclInterface ) add_sycl_to_target(TARGET DPCTLSyclInterface SOURCES ${sources} ${helper_sources}) # make fat binary -target_compile_options( - DPCTLSyclInterface - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown -) -target_link_options( - DPCTLSyclInterface - PRIVATE - -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown -) +if(_dpctl_sycl_targets) + target_compile_options( + DPCTLSyclInterface + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + DPCTLSyclInterface + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) +endif() if(DPCTL_GENERATE_COVERAGE) target_link_options(DPCTLSyclInterface diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index e83db384dd..5a672e312f 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -52,6 +52,7 @@ add_sycl_to_target( ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp ) +if (_dpctl_sycl_targets) # make fat binary target_compile_options( dpctl_c_api_tests @@ -63,6 +64,7 @@ target_link_options( PRIVATE -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown ) +endif() if(DPCTL_GENERATE_COVERAGE) target_include_directories(dpctl_c_api_tests From 986dc6f88b88db98e4fc53ba0741018bc04bd0c8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 25 Oct 2023 09:07:45 -0700 Subject: [PATCH 19/19] clang-format fixes --- .../include/kernels/elementwise_functions/acos.hpp | 3 ++- .../include/kernels/elementwise_functions/asin.hpp | 2 +- .../include/kernels/elementwise_functions/asinh.hpp | 6 +++--- .../include/kernels/elementwise_functions/sinh.hpp | 2 +- dpctl/tensor/libtensor/include/utils/sycl_utils.hpp | 2 +- dpctl/tensor/libtensor/source/reduction_over_axis.hpp | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp index b64296ec1a..23a87b9d44 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp @@ -106,7 +106,8 @@ template struct AcosFunctor realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { using sycl_complexT = exprm_ns::complex; - sycl_complexT log_in = exprm_ns::log(exprm_ns::complex(in)); + sycl_complexT log_in = + exprm_ns::log(exprm_ns::complex(in)); const realT wx = log_in.real(); const realT wy = log_in.imag(); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp index 5f83c95323..035480c437 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp @@ -120,7 +120,7 @@ template struct AsinFunctor realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { using sycl_complexT = exprm_ns::complex; - const sycl_complexT z {x, y}; + const sycl_complexT z{x, y}; realT wx, wy; if (!std::signbit(x)) { auto log_z = exprm_ns::log(z); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp index 50ba0d8e86..523ca4f01f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp @@ -109,9 +109,9 @@ template struct AsinhFunctor if (std::abs(x) > r_eps || std::abs(y) > r_eps) { using sycl_complexT = exprm_ns::complex; - sycl_complexT log_in = (std::signbit(x)) ? - exprm_ns::log(sycl_complexT(-in)) : - exprm_ns::log(sycl_complexT(in)); + sycl_complexT log_in = (std::signbit(x)) + ? exprm_ns::log(sycl_complexT(-in)) + : exprm_ns::log(sycl_complexT(in)); realT wx = log_in.real() + std::log(realT(2)); realT wy = log_in.imag(); const realT res_re = std::copysign(wx, x); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp index 37ac3734e3..b11c7402d0 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp @@ -81,7 +81,7 @@ template struct SinhFunctor * real and imaginary parts of input are finite. */ if (xfinite && yfinite) { - return exprm_ns::sinh(exprm_ns::complex(in)); + return exprm_ns::sinh(exprm_ns::complex(in)); } /* * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. diff --git a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp index 31d3d874a0..2f5e74172f 100644 --- a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp @@ -25,8 +25,8 @@ #pragma once #include #include -#include #include +#include #include #include "math_utils.hpp" diff --git a/dpctl/tensor/libtensor/source/reduction_over_axis.hpp b/dpctl/tensor/libtensor/source/reduction_over_axis.hpp index 46ef02f064..e9ccd1d52a 100644 --- a/dpctl/tensor/libtensor/source/reduction_over_axis.hpp +++ b/dpctl/tensor/libtensor/source/reduction_over_axis.hpp @@ -28,9 +28,9 @@ #include #include #include -#include #include #include +#include #include #include