Skip to content

Commit

Permalink
WIP-01
Browse files Browse the repository at this point in the history
  • Loading branch information
chudur-budur committed Oct 5, 2023
1 parent 70f4ed9 commit 7e22ec5
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 57 deletions.
1 change: 1 addition & 0 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -1450,6 +1450,7 @@ static PyObject *build_c_helpers_dict(void)

MOD_INIT(_dpexrt_python)
{
printf("======================> MOD_INIT\n");
PyObject *m = NULL;
PyObject *dpnp_array_type = NULL;
PyObject *dpnp_array_mod = NULL;
Expand Down
2 changes: 1 addition & 1 deletion numba_dpex/core/runtime/kernels/sequences.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ static ndpx::runtime::kernel::tensor::sequence_step_ptr_t

static ndpx::runtime::kernel::tensor::affine_sequence_step_ptr_t
affine_sequence_step_dispatch_vector
[npdx::runtime::kernel::types::num_types];
[ndpx::runtime::kernel::types::num_types];

void init_sequence_dispatch_vectors(void)
{
Expand Down
112 changes: 56 additions & 56 deletions numba_dpex/core/runtime/kernels/sequences.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,57 +16,57 @@ namespace kernel
namespace tensor
{

template <typename Ty> class ndpx_sequence_step_kernel;
template <typename Ty> class ndpx_affine_sequence_step_kernel;
template <typename T> class ndpx_sequence_step_kernel;
template <typename T, typename wT> class ndpx_affine_sequence_step_kernel;

template <typename Ty> class SequenceStepFunctor
template <typename T> class SequenceStepFunctor
{
private:
Ty *p = nullptr;
Ty start_v;
Ty step_v;
T *p = nullptr;
T start_v;
T step_v;

public:
SequenceStepFunctor(char *dst_p, Ty v0, Ty dv)
: p(reinterpret_cast<Ty *>(dst_p)), start_v(v0), step_v(dv)
SequenceStepFunctor(char *dst_p, T v0, T dv)
: p(reinterpret_cast<T *>(dst_p)), start_v(v0), step_v(dv)
{
}

void operator()(sycl::id<1> wiid) const
{
auto i = wiid.get(0);
if constexpr (ndpx::runtime::kernel::types::is_complex<Ty>::value) {
p[i] = Ty{start_v.real() + i * step_v.real(),
start_v.imag() + i * step_v.imag()};
if constexpr (ndpx::runtime::kernel::types::is_complex<T>::value) {
p[i] = T{start_v.real() + i * step_v.real(),
start_v.imag() + i * step_v.imag()};
}
else {
p[i] = start_v + i * step_v;
}
}
};

template <typename Ty, typename wTy> class AffineSequenceStepFunctor
template <typename T, typename wT> class AffineSequenceStepFunctor
{
private:
Ty *p = nullptr;
Ty start_v;
Ty end_v;
T *p = nullptr;
T start_v;
T end_v;
size_t n;

public:
AffineSequenceStepFunctor(char *dst_p, Ty v0, Ty v1, size_t den)
: p(reinterpret_cast<Ty *>(dst_p)), start_v(v0), end_v(v1),
AffineSequenceStepFunctor(char *dst_p, T v0, T v1, size_t den)
: p(reinterpret_cast<T *>(dst_p)), start_v(v0), end_v(v1),
n((den == 0) ? 1 : den)
{
}

void operator()(sycl::id<1> wiid) const
{
auto i = wiid.get(0);
wTy wc = wTy(i) / n;
wTy w = wTy(n - i) / n;
if constexpr (ndpx::runtime::kernel::types::is_complex<Ty>::value) {
using reT = typename Ty::value_type;
wT wc = wT(i) / n;
wT w = wT(n - i) / n;
if constexpr (ndpx::runtime::kernel::types::is_complex<T>::value) {
using reT = typename T::value_type;
auto _w = static_cast<reT>(w);
auto _wc = static_cast<reT>(wc);
auto re_comb = sycl::fma(start_v.real(), _w, reT(0));
Expand All @@ -77,68 +77,68 @@ template <typename Ty, typename wTy> class AffineSequenceStepFunctor
sycl::fma(start_v.imag(), _w,
reT(0)); // start_v.imag() * _w + end_v.imag() * _wc;
im_comb = sycl::fma(end_v.imag(), _wc, im_comb);
Ty affine_comb = Ty{re_comb, im_comb};
T affine_comb = T{re_comb, im_comb};
p[i] = affine_comb;
}
else if constexpr (std::is_floating_point<Ty>::value) {
Ty _w = static_cast<Ty>(w);
Ty _wc = static_cast<Ty>(wc);
else if constexpr (std::is_floating_point<T>::value) {
T _w = static_cast<T>(w);
T _wc = static_cast<T>(wc);
auto affine_comb =
sycl::fma(start_v, _w, Ty(0)); // start_v * w + end_v * wc;
sycl::fma(start_v, _w, T(0)); // start_v * w + end_v * wc;
affine_comb = sycl::fma(end_v, _wc, affine_comb);
p[i] = affine_comb;
}
else {
auto affine_comb = start_v * w + end_v * wc;
p[i] = ndpx::runtime::kernel::types::convert_impl<
Ty, decltype(affine_comb)>(affine_comb);
T, decltype(affine_comb)>(affine_comb);
}
}
};

template <typename Ty>
template <typename T>
sycl::event sequence_step_kernel(sycl::queue exec_q,
size_t nelems,
Ty start_v,
Ty step_v,
T start_v,
T step_v,
char *array_data,
const std::vector<sycl::event> &depends)
{
ndpx::runtime::kernel::types::validate_type_for_device<Ty>(exec_q);
ndpx::runtime::kernel::types::validate_type_for_device<T>(exec_q);
sycl::event seq_step_event = exec_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(depends);
cgh.parallel_for<ndpx_sequence_step_kernel<Ty>>(
cgh.parallel_for<ndpx_sequence_step_kernel<T>>(
sycl::range<1>{nelems},
SequenceStepFunctor<Ty>(array_data, start_v, step_v));
SequenceStepFunctor<T>(array_data, start_v, step_v));
});

return seq_step_event;
}

template <typename Ty>
template <typename T>
sycl::event affine_sequence_step_kernel(sycl::queue &exec_q,
size_t nelems,
Ty start_v,
Ty end_v,
T start_v,
T end_v,
bool include_endpoint,
char *array_data,
const std::vector<sycl::event> &depends)
{
ndpx::runtime::kernel::types::validate_type_for_device<Ty>(exec_q);
ndpx::runtime::kernel::types::validate_type_for_device<T>(exec_q);
bool device_supports_doubles = exec_q.get_device().has(sycl::aspect::fp64);
sycl::event affine_seq_step_event = exec_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(depends);
if (device_supports_doubles) {
cgh.parallel_for<ndpx_affine_sequence_step_kernel<Ty, double>>(
cgh.parallel_for<ndpx_affine_sequence_step_kernel<T, double>>(
sycl::range<1>{nelems},
AffineSequenceStepFunctor<Ty, double>(
AffineSequenceStepFunctor<T, double>(
array_data, start_v, end_v,
(include_endpoint) ? nelems - 1 : nelems));
}
else {
cgh.parallel_for<ndpx_affine_sequence_step_kernel<Ty, float>>(
cgh.parallel_for<ndpx_affine_sequence_step_kernel<T, float>>(
sycl::range<1>{nelems},
AffineSequenceStepFunctor<Ty, float>(
AffineSequenceStepFunctor<T, float>(
array_data, start_v, end_v,
(include_endpoint) ? nelems - 1 : nelems));
}
Expand All @@ -147,29 +147,29 @@ sycl::event affine_sequence_step_kernel(sycl::queue &exec_q,
return affine_seq_step_event;
}

template <typename Ty>
template <typename T>
sycl::event sequence_step(sycl::queue &exec_q,
size_t nelems,
void *start,
void *step,
char *array_data,
const std::vector<sycl::event> &depends)
{
Ty *start_v, *step_v;
T *start_v, *step_v;
try {
start_v = reinterpret_cast<Ty *>(start);
step_v = reinterpret_cast<Ty *>(step);
start_v = reinterpret_cast<T *>(start);
step_v = reinterpret_cast<T *>(step);
} catch (const std::exception &e) {
std::cerr << e.what() << std::endl;
}

auto sequence_step_event = sequence_step_kernel<Ty>(
auto sequence_step_event = sequence_step_kernel<T>(
exec_q, nelems, *start_v, *step_v, array_data, depends);

return sequence_step_event;
}

template <typename Ty>
template <typename T>
sycl::event affine_sequence_step(sycl::queue &exec_q,
size_t nelems,
void *start,
Expand All @@ -178,35 +178,35 @@ sycl::event affine_sequence_step(sycl::queue &exec_q,
char *array_data,
const std::vector<sycl::event> &depends)
{
Ty *start_v, *end_v;
T *start_v, *end_v;
try {
start_v = reinterpret_cast<Ty *>(start);
end_v = reinterpret_cast<Ty *>(end);
start_v = reinterpret_cast<T *>(start);
end_v = reinterpret_cast<T *>(end);
} catch (const std::exception &e) {
std::cerr << e.what() << std::endl;
}

auto affine_sequence_step_event =
affine_sequence_step_kernel<Ty>(exec_q, nelems, *start_v, *end_v,
include_endpoint, array_data, depends);
affine_sequence_step_kernel<T>(exec_q, nelems, *start_v, *end_v,
include_endpoint, array_data, depends);

return affine_sequence_step_event;
}

template <typename fnT, typename Ty> struct SequenceStepFactory
template <typename fnT, typename T> struct SequenceStepFactory
{
fnT get()
{
fnT f = sequence_step<Ty>;
fnT f = sequence_step<T>;
return f;
}
};

template <typename fnT, typename Ty> struct AffineSequenceStepFactory
template <typename fnT, typename T> struct AffineSequenceStepFactory
{
fnT get()
{
fnT f = affine_sequence_step<Ty>;
fnT f = affine_sequence_step<T>;
return f;
}
};
Expand Down

0 comments on commit 7e22ec5

Please sign in to comment.