Skip to content

Commit

Permalink
Use sycl::ext::oneapi::experimental for complex tyes
Browse files Browse the repository at this point in the history
This works around use of double precision functions/literals in implementations
of these functions in MSVC headers, causing failures to offload on Iris Xe for
single precision input citing lack of fp64 support by the hardware.

Changes include CL/sycl.hpp to sycl/sycl.hpp per SYCL-2020 spec

For every CMake target, where add_sycl_to_target is used, we also run
target_compile_options(
   ${target_name}
   PRIVATE
   -fysl-targets=spir64-unknown-unknown,nvptx64-nvidia-cuda
)

Add DPCTL_TARGET_CUDA Boolean cmake option

Also DPCTL_SYCL_TARGETS parameter can be used to specify targets
to build for.

DPCTL_TARGET_CUDA could be set via cmake option, or via environment
variable, e.g.

```
$ DPCTL_TARGET_CUDA=1 python scripts/build_locally.py --verbose
```

This calls `target_compile_options` to set sycl-targets for targets
needing SYCL
  • Loading branch information
oleksandr-pavlyk committed Nov 16, 2023
1 parent f772888 commit 6d3be5d
Show file tree
Hide file tree
Showing 160 changed files with 6,871 additions and 218 deletions.
21 changes: 21 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,30 @@ option(DPCTL_GENERATE_COVERAGE
"Build dpctl with coverage instrumentation"
OFF
)
option(DPCTL_TARGET_CUDA
"Build DPCTL to target CUDA devices"
OFF
)

find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH)

set(_dpctl_sycl_targets)
if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
if(DPCTL_TARGET_CUDA)
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
else()
if(DEFINED ENV{DPCTL_TARGET_CUDA})
set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
endif()
endif()
else()
set(_dpctl_sycl_targets ${DPCTL_SYCL_TARGETS})
endif()

if(_dpctl_sycl_targets)
message(STATUS "Compiling for -fsycl-targets=${_dpctl_sycl_targets}")
endif()

add_subdirectory(libsyclinterface)

file(GLOB _dpctl_capi_headers dpctl/apis/include/*.h*)
Expand Down
15 changes: 14 additions & 1 deletion dpctl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,20 @@ function(build_dpctl_ext _trgt _src _dest)
add_custom_target(${_cythonize_trgt} DEPENDS ${_src})
Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
if (BUILD_DPCTL_EXT_SYCL)
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
if(_dpctl_sycl_targets)
# make fat binary
target_compile_options(
${_trgt}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
target_link_options(
${_trgt}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
endif()
endif()
target_include_directories(${_trgt} PRIVATE ${NumPy_INCLUDE_DIR} ${DPCTL_INCLUDE_DIR})
add_dependencies(${_trgt} _build_time_create_dpctl_include_copy ${_cythonize_trgt})
Expand Down
2 changes: 1 addition & 1 deletion dpctl/_host_task_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "Python.h"
#include "syclinterface/dpctl_data_types.h"
#include "syclinterface/dpctl_sycl_type_casters.hpp"
#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

DPCTLSyclEventRef async_dec_ref(DPCTLSyclQueueRef QRef,
PyObject **obj_array,
Expand Down
2 changes: 1 addition & 1 deletion dpctl/apis/include/dpctl4pybind11.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
#pragma once

#include "dpctl_capi.h"
#include <CL/sycl.hpp>
#include <complex>
#include <memory>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <utility>
#include <vector>

Expand Down
2 changes: 1 addition & 1 deletion dpctl/sycl.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from . cimport _backend as dpctl_backend


cdef extern from "CL/sycl.hpp" namespace "sycl":
cdef extern from "sycl/sycl.hpp" namespace "sycl":
cdef cppclass queue "sycl::queue":
pass

Expand Down
29 changes: 25 additions & 4 deletions dpctl/tensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,20 @@ foreach(_src_fn ${_no_fast_math_sources})
)
endforeach()
if (UNIX)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES")
set(_compiler_definitions "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES;SYCL_EXT_ONEAPI_COMPLEX")
else()
set(_compiler_definitions "SYCL_EXT_ONEAPI_COMPLEX")
endif()

foreach(_src_fn ${_elementwise_sources})
get_source_file_property(_cmpl_options_defs ${_src_fn} COMPILE_DEFINITIONS)
set(_combined_options_defs ${_cmpl_options_defs} "${_compiler_definitions}")
set_source_files_properties(
${_src_fn}
PROPERTIES COMPILE_DEFINITIONS "${_combined_options_defs}"
)
endforeach()

set(_linker_options "LINKER:${DPCTL_LDFLAGS}")
foreach(python_module_name ${_py_trgts})
target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int)
Expand All @@ -209,6 +217,19 @@ foreach(python_module_name ${_py_trgts})
${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/
)
target_link_options(${python_module_name} PRIVATE ${_linker_options})
if(_dpctl_sycl_targets)
# make fat binary
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpctl_sycl_targets}
)
endif()
add_dependencies(${python_module_name} _dpctl4pybind11_deps)
install(TARGETS ${python_module_name} DESTINATION "dpctl/tensor")
endforeach()
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/accumulators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <array>
#include <cstdint>
#include <limits>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <utility>
#include <vector>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cstdint>
#include <limits>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <utility>
#include <vector>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
//===----------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

#include <complex>
#include <cstdint>
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/constructors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
#include "utils/offset_utils.hpp"
#include "utils/strided_iters.hpp"
#include "utils/type_utils.hpp"
#include <CL/sycl.hpp>
#include <complex>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>

namespace dpctl
{
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
//===----------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <complex>
#include <cstdint>
#include <pybind11/pybind11.h>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "utils/offset_utils.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cmath>
#include <complex>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
Expand All @@ -49,6 +50,7 @@ namespace abs

namespace py = pybind11;
namespace td_ns = dpctl::tensor::type_dispatch;
namespace exprm_ns = sycl::ext::oneapi::experimental;

using dpctl::tensor::type_utils::is_complex;

Expand Down Expand Up @@ -120,7 +122,7 @@ template <typename argT, typename resT> struct AbsFunctor
}
else {
#ifdef USE_STD_ABS_FOR_COMPLEX_TYPES
return std::abs(z);
return exprm_ns::abs(exprm_ns::complex<realT>(z));
#else
return std::hypot(std::real(z), std::imag(z));
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
Expand All @@ -47,6 +48,7 @@ namespace acos

namespace py = pybind11;
namespace td_ns = dpctl::tensor::type_dispatch;
namespace exprm_ns = sycl::ext::oneapi::experimental;

using dpctl::tensor::type_utils::is_complex;

Expand Down Expand Up @@ -103,18 +105,21 @@ template <typename argT, typename resT> struct AcosFunctor
constexpr realT r_eps =
realT(1) / std::numeric_limits<realT>::epsilon();
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
argT log_in = std::log(in);
using sycl_complexT = exprm_ns::complex<realT>;
sycl_complexT log_in =
exprm_ns::log(exprm_ns::complex<realT>(in));

const realT wx = std::real(log_in);
const realT wy = std::imag(log_in);
const realT wx = log_in.real();
const realT wy = log_in.imag();
const realT rx = std::abs(wy);

realT ry = wx + std::log(realT(2));
return resT{rx, (std::signbit(y)) ? ry : -ry};
}

/* ordinary cases */
return std::acos(in);
return exprm_ns::acos(
exprm_ns::complex<realT>(in)); // std::acos(in);
}
else {
static_assert(std::is_floating_point_v<argT> ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
Expand All @@ -47,6 +48,7 @@ namespace acosh

namespace py = pybind11;
namespace td_ns = dpctl::tensor::type_dispatch;
namespace exprm_ns = sycl::ext::oneapi::experimental;

using dpctl::tensor::type_utils::is_complex;

Expand Down Expand Up @@ -110,15 +112,18 @@ template <typename argT, typename resT> struct AcoshFunctor
* For large x or y including acos(+-Inf + I*+-Inf)
*/
if (std::abs(x) > r_eps || std::abs(y) > r_eps) {
const realT wx = std::real(std::log(in));
const realT wy = std::imag(std::log(in));
using sycl_complexT = typename exprm_ns::complex<realT>;
const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in));
const realT wx = log_in.real();
const realT wy = log_in.imag();
const realT rx = std::abs(wy);
realT ry = wx + std::log(realT(2));
acos_in = resT{rx, (std::signbit(y)) ? ry : -ry};
}
else {
/* ordinary cases */
acos_in = std::acos(in);
acos_in = exprm_ns::acos(
exprm_ns::complex<realT>(in)); // std::acos(in);
}

/* Now we calculate acosh(z) */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <CL/sycl.hpp>
#include <cstddef>
#include <cstdint>
#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
#include <sycl/sycl.hpp>
#include <type_traits>

#include "utils/offset_utils.hpp"
Expand All @@ -49,6 +50,7 @@ namespace add
namespace py = pybind11;
namespace td_ns = dpctl::tensor::type_dispatch;
namespace tu_ns = dpctl::tensor::type_utils;
namespace exprm_ns = sycl::ext::oneapi::experimental;

template <typename argT1, typename argT2, typename resT> struct AddFunctor
{
Expand All @@ -60,7 +62,31 @@ template <typename argT1, typename argT2, typename resT> struct AddFunctor

resT operator()(const argT1 &in1, const argT2 &in2) const
{
return in1 + in2;
if constexpr (tu_ns::is_complex<argT1>::value &&
tu_ns::is_complex<argT2>::value)
{
using rT1 = typename argT1::value_type;
using rT2 = typename argT2::value_type;

return exprm_ns::complex<rT1>(in1) + exprm_ns::complex<rT2>(in2);
}
else if constexpr (tu_ns::is_complex<argT1>::value &&
!tu_ns::is_complex<argT2>::value)
{
using rT1 = typename argT1::value_type;

return exprm_ns::complex<rT1>(in1) + in2;
}
else if constexpr (!tu_ns::is_complex<argT1>::value &&
tu_ns::is_complex<argT2>::value)
{
using rT2 = typename argT2::value_type;

return in1 + exprm_ns::complex<rT2>(in2);
}
else {
return in1 + in2;
}
}

template <int vec_sz>
Expand Down
Loading

0 comments on commit 6d3be5d

Please sign in to comment.