Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use sycl ext oneapi experimental for complex #1411

Merged
merged 4 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,30 @@ option(DPCTL_GENERATE_COVERAGE
"Build dpctl with coverage instrumentation"
OFF
)
option(DPCTL_TARGET_CUDA
"Build DPCTL to target CUDA devices"
OFF
)

find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH)

set(_dpctl_sycl_targets)
if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
if(DPCTL_TARGET_CUDA)
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
else()
if(DEFINED ENV{DPCTL_TARGET_CUDA})
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
endif()
endif()
else()
set(_dpctl_sycl_targets ${DPCTL_SYCL_TARGETS})
endif()

if(_dpctl_sycl_targets)
message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}")
endif()

add_subdirectory(libsyclinterface)

file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*)
Expand Down
15 changes: 14 additions & 1 deletion dpctl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,20 @@ function(build_dpctl_ext _trgt _src _dest)
add_custom_target(${_cythonize_trgt} DEPENDS ${_src})
Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
if (BUILD_DPCTL_EXT_SYCL)
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
if(_dpctl_sycl_targets)
# make fat binary
target_compile_options(
${_trgt}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
target_link_options(
${_trgt}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
endif()
endif()
target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR})
add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt})
Expand Down
2 changes: 1 addition & 1 deletion dpctl/_host_task_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "Python.h"
#include "syclinterface/dpctl_data_types.h"
#include "syclinterface/dpctl_sycl_type_casters.hpp"
#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

DPCTLSyclEventRef async_dec_ref(DPCTLSyclQueueRef QRef,
PyObject **obj_array,
Expand Down
2 changes: 1 addition & 1 deletion dpctl/apis/include/dpctl4pybind11.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
#pragma once

#include "dpctl_capi.h"
#include <CL/sycl.hpp>
#include <complex>
#include <memory>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <utility>
#include <vector>

Expand Down
2 changes: 1 addition & 1 deletion dpctl/sycl.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from . cimport _backend as dpctl_backend


cdef extern from "CL/sycl.hpp" namespace "sycl":
cdef extern from "sycl/sycl.hpp" namespace "sycl":
cdef cppclass queue "sycl::queue":
pass

Expand Down
28 changes: 23 additions & 5 deletions dpctl/tensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,17 @@ foreach(_src_fn ${_no_fast_math_sources})
PROPERTIES COMPILE_OPTIONS "${_combined_options_prop}"
)
endforeach()
if (UNIX)

set(_compiler_definitions "USE_SYCL_FOR_COMPLEX_TYPES")

foreach(_src_fn ${_elementwise_sources})
get_source_file_property(_cmpl_options_defs ${_src_fn} COMPILE_DEFINITIONS)
set(_combined_options_defs ${_cmpl_options_defs} "${_compiler_definitions}")
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES")
endif()
${_src_fn}
PROPERTIES COMPILE_DEFINITIONS "${_combined_options_defs}"
)
endforeach()

set(_linker_options "LINKER:${DPCTL_LDFLAGS}")
foreach(python_module_name ${_py_trgts})
Expand All @@ -209,6 +214,19 @@ foreach(python_module_name ${_py_trgts})
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/
)
target_link_options(${python_module_name} PRIVATE ${_linker_options})
if(_dpctl_sycl_targets)
# make fat binary
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
endif()
add_dependencies(${python_module_name} _dpctl4pybind11_deps)
install(TARGETS ${python_module_name} DESTINATION "dpctl/tensor")
endforeach()
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/accumulators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <array>
#include <cstdint>
#include <limits>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <utility>
#include <vector>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cstdint>
#include <limits>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <utility>
#include <vector>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
//===----------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

#include <complex>
#include <cstdint>
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/constructors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
#include "utils/offset_utils.hpp"
#include "utils/strided_iters.hpp"
#include "utils/type_utils.hpp"
#include <CL/sycl.hpp>
#include <complex>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>

namespace dpctl
{
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
//===----------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <complex>
#include <cstdint>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "utils/offset_utils.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cmath>
#include <complex>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch.hpp"
Expand Down Expand Up @@ -119,8 +120,8 @@ template <typename argT, typename resT> struct AbsFunctor
return q_nan;
}
else {
#ifdef USE_STD_ABS_FOR_COMPLEX_TYPES
return std::abs(z);
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
return exprm_ns::abs(exprm_ns::complex<realT>(z));
#else
return std::hypot(std::real(z), std::imag(z));
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch.hpp"
Expand Down Expand Up @@ -103,18 +104,35 @@ template <typename argT, typename resT> struct AcosFunctor
constexpr realT r_eps =
realT(1) / std::numeric_limits<realT>::epsilon();
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
argT log_in = std::log(in);
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
using sycl_complexT = exprm_ns::complex<realT>;
sycl_complexT log_in =
exprm_ns::log(exprm_ns::complex<realT>(in));

const realT wx = log_in.real();
const realT wy = log_in.imag();
const realT rx = std::abs(wy);

realT ry = wx + std::log(realT(2));
return resT{rx, (std::signbit(y)) ? ry : -ry};
#else
resT log_in = std::log(in);
const realT wx = std::real(log_in);
const realT wy = std::imag(log_in);
const realT rx = std::abs(wy);

realT ry = wx + std::log(realT(2));
return resT{rx, (std::signbit(y)) ? ry : -ry};
#endif
}

/* ordinary cases */
#if USE_SYCL_FOR_COMPLEX_TYPES
return exprm_ns::acos(
exprm_ns::complex<realT>(in)); // std::acos(in);
#else
return std::acos(in);
#endif
}
else {
static_assert(std::is_floating_point_v<argT> ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch.hpp"
Expand Down Expand Up @@ -110,15 +111,28 @@ template <typename argT, typename resT> struct AcoshFunctor
* For large x or y including acos(+-Inf + I*+-Inf)
*/
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
const realT wx = std::real(std::log(in));
const realT wy = std::imag(std::log(in));
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
using sycl_complexT = typename exprm_ns::complex<realT>;
const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in));
const realT wx = log_in.real();
const realT wy = log_in.imag();
#else
const resT log_in = std::log(in);
const realT wx = std::real(log_in);
const realT wy = std::imag(log_in);
#endif
const realT rx = std::abs(wy);
realT ry = wx + std::log(realT(2));
acos_in = resT{rx, (std::signbit(y)) ? ry : -ry};
}
else {
/* ordinary cases */
#if USE_SYCL_FOR_COMPLEX_TYPES
acos_in = exprm_ns::acos(
exprm_ns::complex<realT>(in)); // std::acos(in);
#else
acos_in = std::acos(in);
#endif
}

/* Now we calculate acosh(z) */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cstddef>
#include <cstdint>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "sycl_complex.hpp"
#include "utils/offset_utils.hpp"
#include "utils/type_dispatch.hpp"
#include "utils/type_utils.hpp"
Expand Down Expand Up @@ -60,7 +61,43 @@ template <typename argT1, typename argT2, typename resT> struct AddFunctor

resT operator()(const argT1 &in1, const argT2 &in2) const
{
return in1 + in2;
if constexpr (tu_ns::is_complex<argT1>::value &&
tu_ns::is_complex<argT2>::value)
{
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
using rT1 = typename argT1::value_type;
using rT2 = typename argT2::value_type;

return exprm_ns::complex<rT1>(in1) + exprm_ns::complex<rT2>(in2);
#else
return in1 + in2;
#endif
}
else if constexpr (tu_ns::is_complex<argT1>::value &&
!tu_ns::is_complex<argT2>::value)
{
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
using rT1 = typename argT1::value_type;

return exprm_ns::complex<rT1>(in1) + in2;
#else
return in1 + in2;
#endif
}
else if constexpr (!tu_ns::is_complex<argT1>::value &&
tu_ns::is_complex<argT2>::value)
{
#ifdef USE_SYCL_FOR_COMPLEX_TYPES
using rT2 = typename argT2::value_type;

return in1 + exprm_ns::complex<rT2>(in2);
#else
return in1 + in2;
#endif
}
else {
return in1 + in2;
}
}

template <int vec_sz>
Expand Down
Loading
Loading