diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp index 62346c2243d..353d1400320 100644 --- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp +++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp @@ -114,16 +114,16 @@ MACRO_2ARG_3TYPES_OP(dpnp_add_c, std::complex)) MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, - sycl::atan2((double)input1_elem, (double)input2_elem), - nullptr, - std::false_type, + sycl::atan2(input1_elem, input2_elem), + sycl::atan2(x1, x2), + MACRO_UNPACK_TYPES(float, double), oneapi::mkl::vm::atan2, MACRO_UNPACK_TYPES(float, double)) MACRO_2ARG_3TYPES_OP(dpnp_copysign_c, - sycl::copysign((double)input1_elem, (double)input2_elem), - nullptr, - std::false_type, + sycl::copysign(input1_elem, input2_elem), + sycl::copysign(x1, x2), + MACRO_UNPACK_TYPES(float, double), oneapi::mkl::vm::copysign, MACRO_UNPACK_TYPES(float, double)) @@ -137,17 +137,18 @@ MACRO_2ARG_3TYPES_OP(dpnp_divide_c, std::complex, std::complex)) -MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, - sycl::fmod((double)input1_elem, (double)input2_elem), - nullptr, - std::false_type, - oneapi::mkl::vm::fmod, - MACRO_UNPACK_TYPES(float, double)) +MACRO_2ARG_3TYPES_OP( + dpnp_fmod_c, + dispatch_fmod_op(input1_elem, input2_elem), + dispatch_fmod_op(x1, x2), + MACRO_UNPACK_TYPES(std::int32_t, std::int64_t, float, double), + oneapi::mkl::vm::fmod, + MACRO_UNPACK_TYPES(float, double)) MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, - sycl::hypot((double)input1_elem, (double)input2_elem), - nullptr, - std::false_type, + sycl::hypot(input1_elem, input2_elem), + sycl::hypot(x1, x2), + MACRO_UNPACK_TYPES(float, double), oneapi::mkl::vm::hypot, MACRO_UNPACK_TYPES(float, double)) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index 404657965ff..1bd0d1922e0 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -819,6 +819,36 @@ constexpr T dispatch_sign_op(T elem) } } +template +constexpr auto dispatch_fmod_op(T elem1, T elem2) +{ + if constexpr (sycl::detail::is_integral::value) { + if constexpr (sycl::detail::is_vec::value) { + T rem; + using ElemT = typename T::element_type; +#pragma unroll + for (size_t i = 0; i < rem.size(); i++) { + if (elem2[i] == ElemT(0)) { + rem[i] = ElemT(0); + } + else { + rem[i] = elem1[i] % elem2[i]; + } + } + return rem; + } + else { + if (elem2 == T(0)) { + return T(0); + } + return elem1 % elem2; + } + } + else { + return sycl::fmod(elem1, elem2); + } +} + #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__) \ template \ class __name__##_kernel; \ @@ -1644,12 +1674,104 @@ static void func_map_elemwise_2arg_3type_core(func_map_t &fmap) ...); } +template +static void func_map_elemwise_2arg_3type_short_core(func_map_t &fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][FT1][FTs] = + {get_floating_res_type(), + (void *)dpnp_arctan2_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_floating_res_type(), + (void *)dpnp_arctan2_c_ext< + func_type_map_t::find_type< + get_floating_res_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][FT1][FTs] = + {get_floating_res_type(), + (void *)dpnp_copysign_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_floating_res_type(), + (void *)dpnp_copysign_c_ext< + func_type_map_t::find_type< + get_floating_res_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][FT1][FTs] = + {get_floating_res_type(), + (void *) + dpnp_fmod_c_ext()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_floating_res_type(), + (void *)dpnp_fmod_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][FT1][FTs] = + {get_floating_res_type(), + (void *)dpnp_hypot_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_floating_res_type(), + (void *)dpnp_hypot_c_ext< + func_type_map_t::find_type< + get_floating_res_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][FT1][FTs] = + {get_floating_res_type(), + (void *)dpnp_maximum_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_floating_res_type(), + (void *)dpnp_maximum_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); + ((fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][FT1][FTs] = + {get_floating_res_type(), + (void *)dpnp_minimum_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_floating_res_type(), + (void *)dpnp_minimum_c_ext< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); +} + template static void func_map_elemwise_2arg_3type_helper(func_map_t &fmap) { ((func_map_elemwise_2arg_3type_core(fmap)), ...); } +template +static void func_map_elemwise_2arg_3type_short_helper(func_map_t &fmap) +{ + ((func_map_elemwise_2arg_3type_short_core(fmap)), ...); +} + static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) { fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = { @@ -1718,39 +1840,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_arctan2_c_default}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_INT][eft_LNG] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_LNG][eft_INT] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_ARCTAN2_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_arctan2_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_copysign_c_default}; fmap[DPNPFuncName::DPNP_FN_COPYSIGN][eft_INT][eft_LNG] = { @@ -1784,39 +1873,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_COPYSIGN][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_copysign_c_default}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_INT][eft_LNG] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_LNG][eft_INT] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_COPYSIGN_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_copysign_c_ext}; - fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_divide_c_default}; fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_INT][eft_LNG] = { @@ -1883,39 +1939,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_FMOD][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_fmod_c_default}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_FMOD_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_fmod_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_hypot_c_default}; fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_INT][eft_LNG] = { @@ -1949,39 +1972,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_HYPOT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_hypot_c_default}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_INT][eft_LNG] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_LNG][eft_INT] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_HYPOT_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_hypot_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM][eft_INT][eft_INT] = { eft_INT, (void *)dpnp_maximum_c_default}; fmap[DPNPFuncName::DPNP_FN_MAXIMUM][eft_INT][eft_LNG] = { @@ -2015,39 +2005,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_MAXIMUM][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_maximum_c_default}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MAXIMUM_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_maximum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM][eft_INT][eft_INT] = { eft_INT, (void *)dpnp_minimum_c_default}; fmap[DPNPFuncName::DPNP_FN_MINIMUM][eft_INT][eft_LNG] = { @@ -2081,39 +2038,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_MINIMUM][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_minimum_c_default}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MINIMUM_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_minimum_c_ext}; - fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_BLN][eft_BLN] = { eft_BLN, (void *)dpnp_multiply_c_default}; fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_BLN][eft_INT] = { @@ -2285,6 +2209,9 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) func_map_elemwise_2arg_3type_helper(fmap); + func_map_elemwise_2arg_3type_short_helper(fmap); + return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp index 5957cb4a699..32be7b92983 100644 --- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp @@ -1141,6 +1141,31 @@ DPCTLSyclEventRef (*dpnp_trapz_ext_c)(DPCTLSyclQueueRef, const DPCTLEventVectorRef) = dpnp_trapz_c<_DataType_input1, _DataType_input2, _DataType_output>; +template +static void func_map_elemwise_2arg_3type_core(func_map_t &fmap) +{ + ((fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][FT1][FTs] = + {get_floating_res_type(), + (void *)dpnp_cross_ext_c< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>, + get_floating_res_type(), + (void *)dpnp_cross_ext_c< + func_type_map_t::find_type()>, + func_type_map_t::find_type, + func_type_map_t::find_type>}), + ...); +} + +template +static void func_map_elemwise_2arg_3type_helper(func_map_t &fmap) +{ + ((func_map_elemwise_2arg_3type_core(fmap)), ...); +} + void func_map_init_mathematical(func_map_t &fmap) { fmap[DPNPFuncName::DPNP_FN_ABSOLUTE][eft_INT][eft_INT] = { @@ -1218,39 +1243,6 @@ void func_map_init_mathematical(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_CROSS][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_cross_default_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CROSS_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_cross_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CUMPROD][eft_INT][eft_INT] = { eft_LNG, (void *)dpnp_cumprod_default_c}; fmap[DPNPFuncName::DPNP_FN_CUMPROD][eft_LNG][eft_LNG] = { @@ -1342,39 +1334,6 @@ void func_map_init_mathematical(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_floor_divide_default_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_INT][eft_FLT] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_LNG][eft_FLT] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_FLT][eft_INT] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_FLT][eft_LNG] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_FLOOR_DIVIDE_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_floor_divide_ext_c}; - fmap[DPNPFuncName::DPNP_FN_MODF][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_modf_default_c}; fmap[DPNPFuncName::DPNP_FN_MODF][eft_LNG][eft_LNG] = { @@ -1525,5 +1484,8 @@ void func_map_init_mathematical(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_TRAPZ_EXT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_trapz_ext_c}; + func_map_elemwise_2arg_3type_helper( + fmap); + return; } diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index 446083204fc..d485711f8f5 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -271,6 +271,45 @@ static constexpr DPNPFuncType get_default_floating_type() : DPNPFuncType::DPNP_FT_FLOAT; } +/** + * A template function that determines the resulting floating-point type + * based on the value of the template parameter has_fp64. + */ +template +static constexpr DPNPFuncType get_floating_res_type() +{ + constexpr auto widest_type = populate_func_types(); + constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1; + + // Return integer result type if save_int is True + if constexpr (keep_int::value) { + if constexpr (widest_type == DPNPFuncType::DPNP_FT_INT || + widest_type == DPNPFuncType::DPNP_FT_LONG) + { + return widest_type; + } + } + + // Check for double + if constexpr (widest_type == DPNPFuncType::DPNP_FT_DOUBLE) { + return widest_type; + } + + // Check for float + else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT) { + // Check if the shortest type is also float + if constexpr (shortes_type == DPNPFuncType::DPNP_FT_FLOAT) { + return widest_type; + } + } + + // Default case + return get_default_floating_type(); +} + /** * FPTR interface initialization functions */ diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 873a134a9c1..fed1928e076 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -13,6 +13,7 @@ from .helper import ( get_all_dtypes, get_float_complex_dtypes, + has_support_aspect64, is_cpu_device, is_win_platform, ) @@ -162,6 +163,16 @@ def test_divide(self, dtype, lhs, rhs): "dtype", get_all_dtypes(no_bool=True, no_complex=True) ) def test_fmod(self, dtype, lhs, rhs): + if dtype == None and rhs == 0.3 and not has_support_aspect64(): + """ + Due to accuracy reason NumPy behaves differently, when: + >>> numpy.fmod(numpy.array([3.9], dtype=numpy.float32), 0.3) + array([0.29999995], dtype=float32) + while numpy with float64 returns something around zero which is aligned with dpnp: + >>> numpy.fmod(numpy.array([3.9], dtype=numpy.float64), 0.3) + array([9.53674318e-08]) + """ + pytest.skip("missaligned between numpy results") self._test_mathematical("fmod", dtype, lhs, rhs) @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) @@ -197,6 +208,9 @@ def test_minimum(self, dtype, lhs, rhs): def test_multiply(self, dtype, lhs, rhs): self._test_mathematical("multiply", dtype, lhs, rhs) + @pytest.mark.skipif( + not has_support_aspect64(), reason="Aborted on Iris Xe: SAT-6039" + ) @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize( "dtype", get_all_dtypes(no_bool=True, no_complex=True) @@ -382,7 +396,7 @@ def test_negative(data, dtype): result = dpnp.negative(dpnp_a) expected = numpy.negative(np_a) - assert_array_equal(result, expected) + assert_allclose(result, expected) @pytest.mark.parametrize("val_type", get_all_dtypes(no_none=True)) @@ -768,9 +782,9 @@ def test_inplace_strided_out(self, dtype): "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"] ) def test_invalid_shape(self, shape): - dp_array1 = dpnp.arange(10, dtype=dpnp.float64) - dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64) - dp_out = dpnp.empty(shape, dtype=dpnp.float64) + dp_array1 = dpnp.arange(10) + dp_array2 = dpnp.arange(5, 15) + dp_out = dpnp.empty(shape) with pytest.raises(TypeError): dpnp.add(dp_array1, dp_array2, out=dp_out) @@ -858,9 +872,9 @@ def test_inplace_strided_out(self, dtype): "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"] ) def test_invalid_shape(self, shape): - dp_array1 = dpnp.arange(10, dtype=dpnp.float64) - dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64) - dp_out = dpnp.empty(shape, dtype=dpnp.float64) + dp_array1 = dpnp.arange(10) + dp_array2 = dpnp.arange(5, 15) + dp_out = dpnp.empty(shape) with pytest.raises(TypeError): dpnp.multiply(dp_array1, dp_array2, out=dp_out)