diff --git a/CMakeLists.txt b/CMakeLists.txt index eb53db12ec..adfb4fbddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,30 @@ option(DPCTL_GENERATE_COVERAGE "Build dpctl with coverage instrumentation" OFF ) +option(DPCTL_TARGET_CUDA + "Build DPCTL to target CUDA devices" + OFF +) find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH) +set(_dpctl_sycl_targets) +if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x") + if(DPCTL_TARGET_CUDA) + set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + else() + if(DEFINED ENV{DPCTL_TARGET_CUDA}) + set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown") + endif() + endif() +else() + set(_dpctl_sycl_targets ${DPCTL_SYCL_TARGETS}) +endif() + +if(_dpctl_sycl_targets) + message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}") +endif() + add_subdirectory(libsyclinterface) file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*) diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt index cb872ff45f..616f270ad3 100644 --- a/dpctl/CMakeLists.txt +++ b/dpctl/CMakeLists.txt @@ -143,7 +143,20 @@ function(build_dpctl_ext _trgt _src _dest) add_custom_target(${_cythonize_trgt} DEPENDS ${_src}) Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src}) if (BUILD_DPCTL_EXT_SYCL) - add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) + add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) + if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + ${_trgt} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + ${_trgt} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + endif() endif() target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR}) add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt}) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 15bc58f57b..0f63e4bdeb 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -55,6 +55,20 @@ set(_tensor_impl_sources set(python_module_name _tensor_impl) pybind11_add_module(${python_module_name} MODULE ${_tensor_impl_sources}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_tensor_impl_sources}) +if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) +endif() + set(_clang_prefix "") if (WIN32) set(_clang_prefix "/clang:") diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 3473d5cde5..bab31379b7 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -110,13 +110,16 @@ bitwise_or, bitwise_right_shift, bitwise_xor, + cbrt, ceil, conj, + copysign, cos, cosh, divide, equal, exp, + exp2, expm1, floor, floor_divide, @@ -149,6 +152,7 @@ real, remainder, round, + rsqrt, sign, signbit, sin, @@ -314,4 +318,8 @@ "argmax", "argmin", "prod", + "cbrt", + "exp2", + "copysign", + "rsqrt", ] diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 259443f8e3..24ae7fa8cf 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -1761,3 +1761,116 @@ hypot = BinaryElementwiseFunc( "hypot", ti._hypot_result_type, ti._hypot, _hypot_docstring_ ) + + +# U37: ==== CBRT (x) +_cbrt_docstring_ = """ +cbrt(x, out=None, order='K') + +Computes positive cube-root for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a real floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise positive cube-root. + The data type of the returned array is determined by + the Type Promotion Rules. +""" + +cbrt = UnaryElementwiseFunc( + "cbrt", ti._cbrt_result_type, ti._cbrt, _cbrt_docstring_ +) + + +# U38: ==== EXP2 (x) +_exp2_docstring_ = """ +exp2(x, out=None, order='K') + +Computes the base-2 exponential for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise base-2 exponentials. + The data type of the returned array is determined by + the Type Promotion Rules. +""" + +exp2 = UnaryElementwiseFunc( + "exp2", ti._exp2_result_type, ti._exp2, _exp2_docstring_ +) + + +# B25: ==== COPYSIGN (x1, x2) +_copysign_docstring_ = """ +copysign(x1, x2, out=None, order='K') + +Composes a floating-point value with the magnitude of `x1_i` and the sign of +`x2_i` for each element of input arrays `x1` and `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real floating-point data type. + x2 (usm_ndarray): + Second input array, also expected to have a real floating-point data + type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise results. The data type + of the returned array is determined by the Type Promotion Rules. +""" +copysign = BinaryElementwiseFunc( + "copysign", + ti._copysign_result_type, + ti._copysign, + _copysign_docstring_, +) + + +# U39: ==== RSQRT (x) +_rsqrt_docstring_ = """ +rsqrt(x, out=None, order='K') + +Computes the reciprocal square-root for each element `x_i` for input array `x`. + +Args: + x (usm_ndarray): + Input array, expected to have a real floating-point data type. + out ({None, usm_ndarray}, optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the newly output array, if parameter `out` is `None`. + Default: "K". +Returns: + usm_narray: + An array containing the element-wise reciprocal square-root. + The data type of the returned array is determined by + the Type Promotion Rules. +""" + +rsqrt = UnaryElementwiseFunc( + "rsqrt", ti._rsqrt_result_type, ti._rsqrt, _rsqrt_docstring_ +) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp index 28968de761..23a87b9d44 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp @@ -105,10 +105,12 @@ template struct AcosFunctor constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - argT log_in = std::log(in); + using sycl_complexT = exprm_ns::complex; + sycl_complexT log_in = + exprm_ns::log(exprm_ns::complex(in)); - const realT wx = std::real(log_in); - const realT wy = std::imag(log_in); + const realT wx = log_in.real(); + const realT wy = log_in.imag(); const realT rx = std::abs(wy); realT ry = wx + std::log(realT(2)); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp index 7323579df5..56730a411c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp @@ -48,7 +48,7 @@ namespace acosh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -112,16 +112,18 @@ template struct AcoshFunctor * For large x or y including acos(+-Inf + I*+-Inf) */ if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - const realT wx = std::real(std::log(in)); - const realT wy = std::imag(std::log(in)); + using sycl_complexT = typename exprm_ns::complex; + const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in)); + const realT wx = log_in.real(); + const realT wy = log_in.imag(); const realT rx = std::abs(wy); realT ry = wx + std::log(realT(2)); acos_in = resT{rx, (std::signbit(y)) ? ry : -ry}; } else { /* ordinary cases */ - acos_in = cmplx_ns::acos( - cmplx_ns::complex(in)); // std::acos(in); + acos_in = exprm_ns::acos( + exprm_ns::complex(in)); // std::acos(in); } /* Now we calculate acosh(z) */ diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp index 595d75eeca..035480c437 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp @@ -119,17 +119,18 @@ template struct AsinFunctor constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - const resT z = {x, y}; + using sycl_complexT = exprm_ns::complex; + const sycl_complexT z{x, y}; realT wx, wy; if (!std::signbit(x)) { - auto log_z = std::log(z); - wx = std::real(log_z) + std::log(realT(2)); - wy = std::imag(log_z); + auto log_z = exprm_ns::log(z); + wx = log_z.real() + std::log(realT(2)); + wy = log_z.imag(); } else { - auto log_mz = std::log(-z); - wx = std::real(log_mz) + std::log(realT(2)); - wy = std::imag(log_mz); + auto log_mz = exprm_ns::log(-z); + wx = log_mz.real() + std::log(realT(2)); + wy = log_mz.imag(); } const realT asinh_re = std::copysign(wx, x); const realT asinh_im = std::copysign(wy, y); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp index d126afba87..523ca4f01f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp @@ -108,9 +108,12 @@ template struct AsinhFunctor realT(1) / std::numeric_limits::epsilon(); if (std::abs(x) > r_eps || std::abs(y) > r_eps) { - resT log_in = (std::signbit(x)) ? std::log(-in) : std::log(in); - realT wx = std::real(log_in) + std::log(realT(2)); - realT wy = std::imag(log_in); + using sycl_complexT = exprm_ns::complex; + sycl_complexT log_in = (std::signbit(x)) + ? exprm_ns::log(sycl_complexT(-in)) + : exprm_ns::log(sycl_complexT(in)); + realT wx = log_in.real() + std::log(realT(2)); + realT wy = log_in.imag(); const realT res_re = std::copysign(wx, x); const realT res_im = std::copysign(wy, y); return resT{res_re, res_im}; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp new file mode 100644 index 0000000000..92584f0dfe --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp @@ -0,0 +1,172 @@ +//=== cbrt.hpp - Unary function CBRT ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of CBRT(x) +/// function that compute a square root. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace cbrt +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +template struct CbrtFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::true_type; + + resT operator()(const argT &in) const + { + return sycl::cbrt(in); + } +}; + +template +using CbrtContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using CbrtStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct CbrtOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class cbrt_contig_kernel; + +template +sycl::event cbrt_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, CbrtOutputType, CbrtContigFunctor, cbrt_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct CbrtContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = cbrt_contig_impl; + return fn; + } + } +}; + +template struct CbrtTypeMapFactory +{ + /*! @brief get typeid for output type of std::cbrt(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename CbrtOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class cbrt_strided_kernel; + +template +sycl::event +cbrt_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, CbrtOutputType, CbrtStridedFunctor, cbrt_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct CbrtStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = cbrt_strided_impl; + return fn; + } + } +}; + +} // namespace cbrt +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp new file mode 100644 index 0000000000..43e06cb281 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp @@ -0,0 +1,215 @@ +//=== copysign.hpp - Binary function COPYSIGN ------ *-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of COPYSIGN(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace copysign +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template struct CopysignFunctor +{ + + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + return sycl::copysign(in1, in2); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = sycl::copysign(in1, in2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using CopysignContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs>; + +template +using CopysignStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + CopysignFunctor>; + +template struct CopysignOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class copysign_contig_kernel; + +template +sycl::event copysign_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_impl< + argTy1, argTy2, CopysignOutputType, CopysignContigFunctor, + copysign_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template struct CopysignContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename CopysignOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = copysign_contig_impl; + return fn; + } + } +}; + +template struct CopysignTypeMapFactory +{ + /*! @brief get typeid for output type of divide(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename CopysignOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class copysign_strided_kernel; + +template +sycl::event +copysign_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg1_p, + py::ssize_t arg1_offset, + const char *arg2_p, + py::ssize_t arg2_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, CopysignOutputType, CopysignStridedFunctor, + copysign_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct CopysignStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename CopysignOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = copysign_strided_impl; + return fn; + } + } +}; + +} // namespace copysign +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp new file mode 100644 index 0000000000..b6b2f32e83 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp @@ -0,0 +1,231 @@ +//=== exp2.hpp - Unary function EXP2 ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of EXP2(x) function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace exp2 +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace exprm_ns = sycl::ext::oneapi::experimental; + +using dpctl::tensor::type_utils::is_complex; + +template struct Exp2Functor +{ + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::negation< + std::disjunction, is_complex>>; + + resT operator()(const argT &in) const + { + if constexpr (is_complex::value) { + using realT = typename argT::value_type; + + const argT tmp = in * std::log(realT(2)); + + constexpr realT q_nan = std::numeric_limits::quiet_NaN(); + + const realT x = std::real(tmp); + const realT y = std::imag(tmp); + if (std::isfinite(x)) { + if (std::isfinite(y)) { + return exprm_ns::exp(exprm_ns::complex(tmp)); + } + else { + return resT{q_nan, q_nan}; + } + } + else if (std::isnan(x)) { + /* x is nan */ + if (y == realT(0)) { + return resT{in}; + } + else { + return resT{x, q_nan}; + } + } + else { + if (!std::signbit(x)) { /* x is +inf */ + if (y == realT(0)) { + return resT{x, y}; + } + else if (std::isfinite(y)) { + return resT{x * std::cos(y), x * std::sin(y)}; + } + else { + /* x = +inf, y = +-inf || nan */ + return resT{x, q_nan}; + } + } + else { /* x is -inf */ + if (std::isfinite(y)) { + realT exp_x = std::exp(x); + return resT{exp_x * std::cos(y), exp_x * std::sin(y)}; + } + else { + /* x = -inf, y = +-inf || nan */ + return resT{0, 0}; + } + } + } + } + else { + return sycl::exp2(in); + } + } +}; + +template +using Exp2ContigFunctor = elementwise_common:: + UnaryContigFunctor, vec_sz, n_vecs>; + +template +using Exp2StridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct Exp2OutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class exp2_contig_kernel; + +template +sycl::event exp2_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, Exp2OutputType, Exp2ContigFunctor, exp2_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct Exp2ContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = exp2_contig_impl; + return fn; + } + } +}; + +template struct Exp2TypeMapFactory +{ + /*! @brief get typeid for output type of std::exp2(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename Exp2OutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class exp2_strided_kernel; + +template +sycl::event +exp2_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, Exp2OutputType, Exp2StridedFunctor, exp2_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct Exp2StridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = exp2_strided_impl; + return fn; + } + } +}; + +} // namespace exp2 +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp new file mode 100644 index 0000000000..d9e0c33081 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp @@ -0,0 +1,179 @@ +//=== rsqrt.hpp - Unary function RSQRT ------ +//*-C++-*--/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of RSQRT(x) +/// function that computes the reciprocal square root. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" +#include + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace rsqrt +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +template struct RsqrtFunctor +{ + + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT constant_value = resT{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argTy and resTy support sugroup store/load operation + using supports_sg_loadstore = typename std::true_type; + + resT operator()(const argT &in) const + { + return sycl::rsqrt(in); + } +}; + +template +using RsqrtContigFunctor = + elementwise_common::UnaryContigFunctor, + vec_sz, + n_vecs>; + +template +using RsqrtStridedFunctor = elementwise_common:: + UnaryStridedFunctor>; + +template struct RsqrtOutputType +{ + using value_type = typename std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; +}; + +template +class rsqrt_contig_kernel; + +template +sycl::event rsqrt_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res_p, + const std::vector &depends = {}) +{ + return elementwise_common::unary_contig_impl< + argTy, RsqrtOutputType, RsqrtContigFunctor, rsqrt_contig_kernel>( + exec_q, nelems, arg_p, res_p, depends); +} + +template struct RsqrtContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = rsqrt_contig_impl; + return fn; + } + } +}; + +template struct RsqrtTypeMapFactory +{ + /*! @brief get typeid for output type of sycl::rsqrt(T x) */ + std::enable_if_t::value, int> get() + { + using rT = typename RsqrtOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template class rsqrt_strided_kernel; + +template +sycl::event +rsqrt_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::unary_strided_impl< + argTy, RsqrtOutputType, RsqrtStridedFunctor, rsqrt_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template struct RsqrtStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = rsqrt_strided_impl; + return fn; + } + } +}; + +} // namespace rsqrt +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp index 0f35aa306e..e1e9e79c57 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp @@ -48,7 +48,7 @@ namespace sin namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -81,8 +81,8 @@ template struct SinFunctor * real and imaginary parts of input are finite. */ if (in_re_finite && in_im_finite) { - return cmplx_ns::sin( - cmplx_ns::complex(in)); // std::sin(in); + return exprm_ns::sin( + exprm_ns::complex(in)); // std::sin(in); } /* diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp index af887431d7..b11c7402d0 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp @@ -48,7 +48,7 @@ namespace sinh namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace cmplx_ns = sycl::ext::oneapi::experimental; +namespace exprm_ns = sycl::ext::oneapi::experimental; using dpctl::tensor::type_utils::is_complex; @@ -81,7 +81,7 @@ template struct SinhFunctor * real and imaginary parts of input are finite. */ if (xfinite && yfinite) { - return std::sinh(in); + return exprm_ns::sinh(exprm_ns::complex(in)); } /* * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. diff --git a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp index 31d3d874a0..2f5e74172f 100644 --- a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp @@ -25,8 +25,8 @@ #pragma once #include #include -#include #include +#include #include #include "math_utils.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index de5e6b09d0..9ab7c0807c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -48,12 +48,15 @@ #include "kernels/elementwise_functions/bitwise_or.hpp" #include "kernels/elementwise_functions/bitwise_right_shift.hpp" #include "kernels/elementwise_functions/bitwise_xor.hpp" +#include "kernels/elementwise_functions/cbrt.hpp" #include "kernels/elementwise_functions/ceil.hpp" #include "kernels/elementwise_functions/conj.hpp" +#include "kernels/elementwise_functions/copysign.hpp" #include "kernels/elementwise_functions/cos.hpp" #include "kernels/elementwise_functions/cosh.hpp" #include "kernels/elementwise_functions/equal.hpp" #include "kernels/elementwise_functions/exp.hpp" +#include "kernels/elementwise_functions/exp2.hpp" #include "kernels/elementwise_functions/expm1.hpp" #include "kernels/elementwise_functions/floor.hpp" #include "kernels/elementwise_functions/floor_divide.hpp" @@ -86,6 +89,7 @@ #include "kernels/elementwise_functions/real.hpp" #include "kernels/elementwise_functions/remainder.hpp" #include "kernels/elementwise_functions/round.hpp" +#include "kernels/elementwise_functions/rsqrt.hpp" #include "kernels/elementwise_functions/sign.hpp" #include "kernels/elementwise_functions/signbit.hpp" #include "kernels/elementwise_functions/sin.hpp" @@ -2749,7 +2753,6 @@ void populate_trunc_dispatch_vectors(void) } // namespace impl // B24: ==== HYPOT (x1, x2) - namespace impl { namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; @@ -2788,6 +2791,151 @@ void populate_hypot_dispatch_tables(void) } // namespace impl +// U37: ==== CBRT (x) +namespace impl +{ + +namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; + +static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; +static int cbrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cbrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_cbrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cbrt_fn_ns; + + using fn_ns::CbrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); + + using fn_ns::CbrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); + + using fn_ns::CbrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); +} + +} // namespace impl + +// B24: ==== COPYSIGN (x1, x2) +namespace impl +{ +namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; + +static binary_contig_impl_fn_ptr_t + copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_copysign_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = copysign_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::CopysignTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(copysign_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::CopysignStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(copysign_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::CopysignContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(copysign_contig_dispatch_table); +}; + +} // namespace impl + +// U38: ==== EXP2 (x) +namespace impl +{ + +namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; + +static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; +static int exp2_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + exp2_strided_dispatch_vector[td_ns::num_types]; + +void populate_exp2_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = exp2_fn_ns; + + using fn_ns::Exp2ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); + + using fn_ns::Exp2StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); + + using fn_ns::Exp2TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp2_output_typeid_vector); +} + +} // namespace impl + +// U39: ==== RSQRT (x) +namespace impl +{ + +namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; + +static unary_contig_impl_fn_ptr_t + rsqrt_contig_dispatch_vector[td_ns::num_types]; +static int rsqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + rsqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_rsqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = rsqrt_fn_ns; + + using fn_ns::RsqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); + + using fn_ns::RsqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); + + using fn_ns::RsqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); +} + +} // namespace impl + // ========================================================================================== // // @@ -4889,6 +5037,117 @@ void init_elementwise_functions(py::module_ m) py::arg("depends") = py::list()); m.def("_hypot_result_type", hypot_result_type_pyapi, ""); } + + // U37: ==== CBRT (x) + { + impl::populate_cbrt_dispatch_vectors(); + using impl::cbrt_contig_dispatch_vector; + using impl::cbrt_output_typeid_vector; + using impl::cbrt_strided_dispatch_vector; + + auto cbrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cbrt_output_typeid_vector, + cbrt_contig_dispatch_vector, cbrt_strided_dispatch_vector); + }; + m.def("_cbrt", cbrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cbrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cbrt_output_typeid_vector); + }; + m.def("_cbrt_result_type", cbrt_result_type_pyapi); + } + + // B25: ==== COPYSIGN (x1, x2) + { + impl::populate_copysign_dispatch_tables(); + using impl::copysign_contig_dispatch_table; + using impl::copysign_output_id_table; + using impl::copysign_strided_dispatch_table; + + auto copysign_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, copysign_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + copysign_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + copysign_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto copysign_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + copysign_output_id_table); + }; + m.def("_copysign", copysign_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_copysign_result_type", copysign_result_type_pyapi, ""); + } + + // U38: ==== EXP2 (x) + { + impl::populate_exp2_dispatch_vectors(); + using impl::exp2_contig_dispatch_vector; + using impl::exp2_output_typeid_vector; + using impl::exp2_strided_dispatch_vector; + + auto exp2_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, exp2_output_typeid_vector, + exp2_contig_dispatch_vector, exp2_strided_dispatch_vector); + }; + m.def("_exp2", exp2_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto exp2_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, exp2_output_typeid_vector); + }; + m.def("_exp2_result_type", exp2_result_type_pyapi); + } + + // U39: ==== RSQRT (x) + { + impl::populate_rsqrt_dispatch_vectors(); + using impl::rsqrt_contig_dispatch_vector; + using impl::rsqrt_output_typeid_vector; + using impl::rsqrt_strided_dispatch_vector; + + auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, rsqrt_output_typeid_vector, + rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); + }; + m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + rsqrt_output_typeid_vector); + }; + m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); + } } } // namespace py_internal diff --git a/dpctl/tensor/libtensor/source/reduction_over_axis.hpp b/dpctl/tensor/libtensor/source/reduction_over_axis.hpp index 46ef02f064..e9ccd1d52a 100644 --- a/dpctl/tensor/libtensor/source/reduction_over_axis.hpp +++ b/dpctl/tensor/libtensor/source/reduction_over_axis.hpp @@ -28,9 +28,9 @@ #include #include #include -#include #include #include +#include #include #include diff --git a/dpctl/tests/elementwise/test_cbrt.py b/dpctl/tests/elementwise/test_cbrt.py new file mode 100644 index 0000000000..b06a8d19cf --- /dev/null +++ b/dpctl/tests/elementwise/test_cbrt.py @@ -0,0 +1,79 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _map_to_device_dtype, _no_complex_dtypes, _real_fp_dtypes + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes) +def test_cbrt_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.cbrt(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.cbrt(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_cbrt_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.cbrt(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.cbrt(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_cbrt_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2054 + + X = dpt.linspace(0, 13, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.cbrt(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.cbrt(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.usefixtures("suppress_invalid_numpy_warnings") +def test_cbrt_special_cases(): + get_queue_or_skip() + + X = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = dpt.cbrt(X) + expected = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + tol = dpt.finfo(dpt.float32).resolution + + assert dpt.allclose(res, expected, atol=tol, rtol=tol, equal_nan=True) diff --git a/dpctl/tests/elementwise/test_copysign.py b/dpctl/tests/elementwise/test_copysign.py new file mode 100644 index 0000000000..26a285343c --- /dev/null +++ b/dpctl/tests/elementwise/test_copysign.py @@ -0,0 +1,111 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ctypes + +import numpy as np +import pytest + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _compare_dtypes, _no_complex_dtypes, _real_fp_dtypes + + +@pytest.mark.parametrize("op1_dtype", _no_complex_dtypes) +@pytest.mark.parametrize("op2_dtype", _no_complex_dtypes) +def test_copysign_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + + r = dpt.copysign(ar1, ar2) + assert isinstance(r, dpt.usm_ndarray) + expected = np.copysign( + np.ones(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar1.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + assert r.sycl_queue == ar1.sycl_queue + + ar3 = dpt.ones(sz, dtype=op1_dtype) + ar4 = dpt.ones(2 * sz, dtype=op2_dtype) + + r = dpt.copysign(ar3[::-1], ar4[::2]) + assert isinstance(r, dpt.usm_ndarray) + expected = np.copysign( + np.ones(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype) + ) + assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q) + assert r.shape == ar3.shape + assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all() + + +@pytest.mark.parametrize("arr_dt", _real_fp_dtypes) +def test_copysign_python_scalar(arr_dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(arr_dt, q) + + X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q) + py_ones = ( + bool(1), + int(1), + float(1), + np.float32(1), + ctypes.c_int(1), + ) + for sc in py_ones: + R = dpt.copysign(X, sc) + assert isinstance(R, dpt.usm_ndarray) + R = dpt.copysign(sc, X) + assert isinstance(R, dpt.usm_ndarray) + + +@pytest.mark.parametrize("dt", _real_fp_dtypes) +def test_copysign(dt): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dt, q) + + x = dpt.arange(100, dtype=dt, sycl_queue=q) + x[1::2] *= -1 + y = dpt.ones(100, dtype=dt, sycl_queue=q) + y[::2] *= -1 + res = dpt.copysign(x, y) + expected = dpt.negative(x) + tol = dpt.finfo(dt).resolution + assert dpt.allclose(res, expected, atol=tol, rtol=tol) + + +def test_copysign_special_values(): + get_queue_or_skip() + + x1 = dpt.asarray([1.0, 0.0, dpt.nan, dpt.nan], dtype="f4") + y1 = dpt.asarray([-1.0, -0.0, -dpt.nan, -1], dtype="f4") + res = dpt.copysign(x1, y1) + assert dpt.all(dpt.signbit(res)) + x2 = dpt.asarray([-1.0, -0.0, -dpt.nan, -dpt.nan], dtype="f4") + res = dpt.copysign(x2, y1) + assert dpt.all(dpt.signbit(res)) + y2 = dpt.asarray([0.0, 1.0, dpt.nan, 1.0], dtype="f4") + res = dpt.copysign(x2, y2) + assert not dpt.any(dpt.signbit(res)) + res = dpt.copysign(x1, y2) + assert not dpt.any(dpt.signbit(res)) diff --git a/dpctl/tests/elementwise/test_exp2.py b/dpctl/tests/elementwise/test_exp2.py new file mode 100644 index 0000000000..d4bef1efab --- /dev/null +++ b/dpctl/tests/elementwise/test_exp2.py @@ -0,0 +1,168 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _all_dtypes, _map_to_device_dtype, _usm_types + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_exp2_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + X = dpt.asarray(0, dtype=dtype, sycl_queue=q) + expected_dtype = np.exp2(np.array(0, dtype=dtype)).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.exp2(X).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_exp2_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + X = dpt.linspace(1, 5, num=n_seq, dtype=dtype, sycl_queue=q) + Xnp = dpt.asnumpy(X) + + Y = dpt.exp2(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.exp2(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", ["f2", "f4", "f8", "c8", "c16"]) +def test_exp2_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2 * 1027 + + X = dpt.linspace(1, 5, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + Xnp = dpt.asnumpy(X) + + Y = dpt.exp2(X) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), np.exp2(Xnp), atol=tol, rtol=tol) + + +@pytest.mark.parametrize("usm_type", _usm_types) +def test_exp2_usm_type(usm_type): + q = get_queue_or_skip() + + arg_dt = np.dtype("f4") + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, usm_type=usm_type, sycl_queue=q) + X[..., 0::2] = 1 / 4 + X[..., 1::2] = 1 / 2 + + Y = dpt.exp2(X) + assert Y.usm_type == X.usm_type + assert Y.sycl_queue == X.sycl_queue + assert Y.flags.c_contiguous + + expected_Y = np.empty(input_shape, dtype=arg_dt) + expected_Y[..., 0::2] = np.exp2(np.float32(1 / 4)) + expected_Y[..., 1::2] = np.exp2(np.float32(1 / 2)) + tol = 8 * dpt.finfo(Y.dtype).resolution + + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_exp2_order(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + arg_dt = np.dtype(dtype) + input_shape = (10, 10, 10, 10) + X = dpt.empty(input_shape, dtype=arg_dt, sycl_queue=q) + X[..., 0::2] = 1 / 4 + X[..., 1::2] = 1 / 2 + + for ord in ["C", "F", "A", "K"]: + for perms in itertools.permutations(range(4)): + U = dpt.permute_dims(X[:, ::-1, ::-1, :], perms) + Y = dpt.exp2(U, order=ord) + expected_Y = np.exp2(dpt.asnumpy(U)) + tol = 8 * max( + dpt.finfo(Y.dtype).resolution, + np.finfo(expected_Y.dtype).resolution, + ) + assert_allclose(dpt.asnumpy(Y), expected_Y, atol=tol, rtol=tol) + + +def test_exp2_special_cases(): + get_queue_or_skip() + + X = dpt.asarray([dpt.nan, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = np.asarray([np.nan, 1.0, 1.0, np.inf, 0.0], dtype="f4") + + tol = dpt.finfo(X.dtype).resolution + assert_allclose(dpt.asnumpy(dpt.exp2(X)), res, atol=tol, rtol=tol) + + # special cases for complex variant + num_finite = 1.0 + vals = [ + complex(0.0, 0.0), + complex(num_finite, dpt.inf), + complex(num_finite, dpt.nan), + complex(dpt.inf, 0.0), + complex(-dpt.inf, num_finite), + complex(dpt.inf, num_finite), + complex(-dpt.inf, dpt.inf), + complex(dpt.inf, dpt.inf), + complex(-dpt.inf, dpt.nan), + complex(dpt.inf, dpt.nan), + complex(dpt.nan, 0.0), + complex(dpt.nan, num_finite), + complex(dpt.nan, dpt.nan), + ] + X = dpt.asarray(vals, dtype=dpt.complex64) + cis_1 = complex(np.cos(num_finite), np.sin(num_finite)) + c_nan = complex(np.nan, np.nan) + res = np.asarray( + [ + complex(1.0, 0.0), + c_nan, + c_nan, + complex(np.inf, 0.0), + 0.0, + np.inf * cis_1, + complex(0.0, 0.0), + complex(np.inf, np.nan), + complex(0.0, 0.0), + complex(np.inf, np.nan), + complex(np.nan, 0.0), + c_nan, + c_nan, + ], + dtype=np.complex64, + ) + + tol = dpt.finfo(X.dtype).resolution + with np.errstate(invalid="ignore"): + assert_allclose(dpt.asnumpy(dpt.exp2(X)), res, atol=tol, rtol=tol) diff --git a/dpctl/tests/elementwise/test_rsqrt.py b/dpctl/tests/elementwise/test_rsqrt.py new file mode 100644 index 0000000000..ef9378ade2 --- /dev/null +++ b/dpctl/tests/elementwise/test_rsqrt.py @@ -0,0 +1,74 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +import dpctl.tensor as dpt +from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported + +from .utils import _map_to_device_dtype, _no_complex_dtypes, _real_fp_dtypes + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes) +def test_rsqrt_out_type(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.asarray(1, dtype=dtype, sycl_queue=q) + expected_dtype = np.reciprocal(np.sqrt(np.array(1, dtype=dtype))).dtype + expected_dtype = _map_to_device_dtype(expected_dtype, q.sycl_device) + assert dpt.rsqrt(x).dtype == expected_dtype + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_rsqrt_output_contig(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 1027 + + x = dpt.linspace(1, 13, num=n_seq, dtype=dtype, sycl_queue=q) + res = dpt.rsqrt(x) + expected = np.reciprocal(np.sqrt(dpt.asnumpy(x), dtype=dtype)) + tol = 8 * dpt.finfo(res.dtype).resolution + assert_allclose(dpt.asnumpy(res), expected, atol=tol, rtol=tol) + + +@pytest.mark.parametrize("dtype", _real_fp_dtypes) +def test_rsqrt_output_strided(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + n_seq = 2054 + + x = dpt.linspace(1, 13, num=n_seq, dtype=dtype, sycl_queue=q)[::-2] + res = dpt.rsqrt(x) + expected = np.reciprocal(np.sqrt(dpt.asnumpy(x), dtype=dtype)) + tol = 8 * dpt.finfo(res.dtype).resolution + assert_allclose(dpt.asnumpy(res), expected, atol=tol, rtol=tol) + + +def test_rsqrt_special_cases(): + get_queue_or_skip() + + x = dpt.asarray([dpt.nan, -1.0, 0.0, -0.0, dpt.inf, -dpt.inf], dtype="f4") + res = dpt.rsqrt(x) + expected = dpt.asarray( + [dpt.nan, dpt.nan, dpt.inf, -dpt.inf, 0.0, dpt.nan], dtype="f4" + ) + assert dpt.allclose(res, expected, equal_nan=True) diff --git a/dpctl/tests/test_utils.py b/dpctl/tests/test_utils.py index 1aab7fd7e7..05b2dc7890 100644 --- a/dpctl/tests/test_utils.py +++ b/dpctl/tests/test_utils.py @@ -21,7 +21,6 @@ import dpctl import dpctl.utils -from dpctl.enum_types import backend_type def test_get_execution_queue_input_validation(): @@ -132,9 +131,7 @@ def test_intel_device_info(): pytest.skip("Default device could not be created") descr = dpctl.utils.intel_device_info(d) assert isinstance(descr, dict) - assert ("device_id" in descr) or ( - not d.has_aspect_cpu and not d.backend == backend_type.level_zero - ) + assert ("device_id" in descr) or not descr allowed_names = [ "device_id", "gpu_slices", diff --git a/dpctl/utils/CMakeLists.txt b/dpctl/utils/CMakeLists.txt index aadc1c0fe0..e7d3951e5b 100644 --- a/dpctl/utils/CMakeLists.txt +++ b/dpctl/utils/CMakeLists.txt @@ -21,6 +21,19 @@ pybind11_add_module(${python_module_name} MODULE ${_module_src} ) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) +endif() target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../include diff --git a/libsyclinterface/CMakeLists.txt b/libsyclinterface/CMakeLists.txt index 64ec3271b1..e84959c1c3 100644 --- a/libsyclinterface/CMakeLists.txt +++ b/libsyclinterface/CMakeLists.txt @@ -205,6 +205,19 @@ add_library(DPCTLSyclInterface ${helper_sources} ) add_sycl_to_target(TARGET DPCTLSyclInterface SOURCES ${sources} ${helper_sources}) +# make fat binary +if(_dpctl_sycl_targets) + target_compile_options( + DPCTLSyclInterface + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + DPCTLSyclInterface + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) +endif() if(DPCTL_GENERATE_COVERAGE) target_link_options(DPCTLSyclInterface diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 472e1787fa..5a672e312f 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -52,6 +52,19 @@ add_sycl_to_target( ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp ) +if (_dpctl_sycl_targets) +# make fat binary +target_compile_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown +) +target_link_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=nvptx64-nvidia-cuda,spir64-unknown-unknown +) +endif() if(DPCTL_GENERATE_COVERAGE) target_include_directories(dpctl_c_api_tests