Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KOKKOS_FUNCTION-annotated for_each and transform_reduce #708

Draft
wants to merge 32 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a8a4cbc
init
blegouix Dec 15, 2024
2ce680e
wip
blegouix Dec 15, 2024
00469c3
wip
blegouix Dec 15, 2024
81d8c9b
wip
blegouix Dec 15, 2024
4b83f7c
wip
blegouix Dec 15, 2024
f6d53fc
wip
blegouix Dec 15, 2024
df3da08
fix
blegouix Dec 15, 2024
98866b4
Apply suggestions from code review
blegouix Dec 15, 2024
d1f17f6
Apply suggestions from code review
blegouix Dec 15, 2024
f85b51a
Apply suggestions from code review
blegouix Dec 15, 2024
63b15d8
Apply suggestions from code review
blegouix Dec 15, 2024
8edf5fd
wip
blegouix Dec 15, 2024
95ba907
Update for_each.hpp
blegouix Dec 15, 2024
ef388cd
Update transform_reduce.hpp
blegouix Dec 15, 2024
102f0a9
Update for_each.cpp
blegouix Dec 15, 2024
b8a31a5
Update transform_reduce.cpp
blegouix Dec 15, 2024
b1d7075
clang-format
blegouix Dec 15, 2024
fa63605
wip
blegouix Dec 16, 2024
be9d263
doc
blegouix Dec 16, 2024
bbc693f
doc
blegouix Dec 16, 2024
ce927d9
1D test with for_each
blegouix Dec 17, 2024
f73122f
nonsense fix
blegouix Dec 30, 2024
5a2b798
Apply suggestions from code review
blegouix Dec 30, 2024
615f922
clang-format
blegouix Dec 30, 2024
6a3625e
HIPCC_COMPATIBLE_MAYBE_UNUSED
blegouix Dec 30, 2024
382d5b1
doc
blegouix Dec 30, 2024
6c37f29
comment the issue with nvcc
blegouix Dec 30, 2024
73ee089
fix attempt for hipcc
blegouix Dec 30, 2024
484f996
remove static_cast<int>
blegouix Jan 2, 2025
40a1dba
Merge branch 'main' into annotated_for_each
blegouix Jan 9, 2025
9bf07fc
Merge branch 'main' into annotated_for_each
blegouix Jan 11, 2025
ff64e39
Merge branch 'main' into annotated_for_each
blegouix Jan 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions include/ddc/for_each.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ void for_each_serial(
}
}

template <class RetType, class Element, std::size_t N, class Functor, class... Is>
KOKKOS_FUNCTION void annotated_for_each_serial(
std::array<Element, N> const& begin,
std::array<Element, N> const& end,
Functor const& f,
Is const&... is) noexcept
{
static constexpr std::size_t I = sizeof...(Is);
if constexpr (I == N) {
f(RetType(is...));
} else {
for (Element ii = begin[I]; ii < end[I]; ++ii) {
annotated_for_each_serial<RetType>(begin, end, f, is..., ii);
}
}
}

} // namespace detail

/** iterates over a nD domain in serial
Expand All @@ -46,4 +63,21 @@ void for_each(Support const& domain, Functor&& f) noexcept
detail::for_each_serial(domain, size, std::forward<Functor>(f));
}

/** iterates over a nD domain in serial. Can be called from a device kernel.
* @param[in] domain the domain over which to iterate
* @param[in] f a functor taking an index as parameter
*/
template <class... DDims, class Functor>
KOKKOS_FUNCTION void annotated_for_each(
DiscreteDomain<DDims...> const& domain,
Functor&& f) noexcept
{
DiscreteElement<DDims...> const ddc_begin = domain.front();
DiscreteElement<DDims...> const ddc_end = domain.front() + domain.extents();
std::array const begin = detail::array(ddc_begin);
std::array const end = detail::array(ddc_end);
detail::annotated_for_each_serial<
DiscreteElement<DDims...>>(begin, end, std::forward<Functor>(f));
}

} // namespace ddc
66 changes: 66 additions & 0 deletions include/ddc/transform_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,50 @@ T transform_reduce_serial(
DDC_IF_NVCC_THEN_POP
}

/** A serial reduction over a nD domain. Can be called from a device kernel.
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
* @param[in] dcoords discrete elements from dimensions already in a loop
*/
template <
class... DDims,
class T,
class BinaryReductionOp,
class UnaryTransformOp,
class... DCoords>
KOKKOS_FUNCTION T annotated_transform_reduce_serial(
DiscreteDomain<DDims...> const& domain,
[[maybe_unused]] T const neutral,
BinaryReductionOp const& reduce,
UnaryTransformOp const& transform,
DCoords const&... dcoords) noexcept
{
DDC_IF_NVCC_THEN_PUSH_AND_SUPPRESS(implicit_return_from_non_void_function)
if constexpr (sizeof...(DCoords) == sizeof...(DDims)) {
return transform(DiscreteElement<DDims...>(dcoords...));
} else {
using CurrentDDim = type_seq_element_t<sizeof...(DCoords), detail::TypeSeq<DDims...>>;
T result = neutral;
for (DiscreteElement<CurrentDDim> const ii : select<CurrentDDim>(domain)) {
result = reduce(
result,
annotated_transform_reduce_serial(
domain,
neutral,
reduce,
transform,
dcoords...,
ii));
}
return result;
}
DDC_IF_NVCC_THEN_POP
}

} // namespace detail

/** A reduction over a nD domain in serial
Expand All @@ -71,4 +115,26 @@ T transform_reduce(
std::forward<UnaryTransformOp>(transform));
}

/** A reduction over a nD domain in serial. Can be called from a device kernel.
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
*/
template <class... DDims, class T, class BinaryReductionOp, class UnaryTransformOp>
KOKKOS_FUNCTION T annotated_transform_reduce(
DiscreteDomain<DDims...> const& domain,
T neutral,
BinaryReductionOp&& reduce,
UnaryTransformOp&& transform) noexcept
{
return detail::annotated_transform_reduce_serial(
domain,
neutral,
std::forward<BinaryReductionOp>(reduce),
std::forward<UnaryTransformOp>(transform));
}

} // namespace ddc
70 changes: 70 additions & 0 deletions tests/for_each.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#include <gtest/gtest.h>

#include <Kokkos_StdAlgorithms.hpp>

namespace DDC_HIP_5_7_ANONYMOUS_NAMESPACE_WORKAROUND(FOR_EACH_CPP) {

using DElem0D = ddc::DiscreteElement<>;
Expand Down Expand Up @@ -81,3 +83,71 @@ TEST(ForEachSerialHost, TwoDimensions)
ddc::for_each(dom, [=](DElemXY const ixy) { view(ixy) += 1; });
EXPECT_EQ(std::count(storage.begin(), storage.end(), 1), dom.size());
}

void TestAnnotatedForEachSerialDevice1D(ddc::ChunkSpan<
int,
DDomX,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> view)
{
ddc::parallel_for_each(
Kokkos::DefaultExecutionSpace(),
DDom0D(),
KOKKOS_LAMBDA(DElem0D) {
ddc::annotated_for_each(view.domain(), [=](DElemX const ix) { view(ix) = 1; });
});
}

TEST(AnnotatedForEachSerialDevice, OneDimension)
{
DDomX const dom(lbound_x, nelems_x);
Kokkos::View<int*, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const
storage("", dom.size());
ddc::ChunkSpan<
int,
DDomX,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom);
TestAnnotatedForEachSerialDevice1D(view);
EXPECT_EQ(
Kokkos::Experimental::
count(Kokkos::DefaultExecutionSpace(),
Kokkos::Experimental::begin(storage),
Kokkos::Experimental::end(storage),
1),
dom.size());
}

void TestAnnotatedForEachSerialDevice2D(ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> view)
{
ddc::parallel_for_each(
Kokkos::DefaultExecutionSpace(),
DDom0D(),
KOKKOS_LAMBDA(DElem0D) {
ddc::annotated_for_each(view.domain(), [=](DElemXY const ixy) { view(ixy) = 1; });
});
}

TEST(AnnotatedForEachSerialDevice, TwoDimensions)
{
DDomXY const dom(lbound_x_y, nelems_x_y);
Kokkos::View<int*, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const
storage("", dom.size());
ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> const view(storage.data(), dom);
TestAnnotatedForEachSerialDevice2D(view);
EXPECT_EQ(
Kokkos::Experimental::
count(Kokkos::DefaultExecutionSpace(),
Kokkos::Experimental::begin(storage),
Kokkos::Experimental::end(storage),
1),
dom.size());
}
38 changes: 38 additions & 0 deletions tests/transform_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <gtest/gtest.h>

#include <Kokkos_StdAlgorithms.hpp>

using DElem0D = ddc::DiscreteElement<>;
using DVect0D = ddc::DiscreteVector<>;
using DDom0D = ddc::DiscreteDomain<>;
Expand Down Expand Up @@ -74,3 +76,39 @@ TEST(TransformReduce, TwoDimensions)
ddc::transform_reduce(dom, 0, ddc::reducer::sum<int>(), chunk),
dom.size() * (dom.size() - 1) / 2);
}

int TestAnnotatedTransformReduce(ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> chunk)
{
Kokkos::View<int, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const count("");
ddc::parallel_for_each(
Kokkos::DefaultExecutionSpace(),
DDom0D(),
KOKKOS_LAMBDA(DElem0D) {
count() = ddc::annotated_transform_reduce(
chunk.domain(),
0,
ddc::reducer::sum<int>(),
chunk);
});
Kokkos::View<int, Kokkos::LayoutRight, Kokkos::DefaultHostExecutionSpace> const count_host
= Kokkos::create_mirror_view_and_copy(Kokkos::DefaultHostExecutionSpace(), count);
return count_host();
}

TEST(AnnotatedTransformReduce, TwoDimensions)
{
DDomXY const dom(lbound_x_y, nelems_x_y);
Kokkos::View<int*, Kokkos::LayoutRight, Kokkos::DefaultExecutionSpace> const
storage("", dom.size());
Kokkos::Experimental::fill(Kokkos::DefaultExecutionSpace(), storage, 1);
ddc::ChunkSpan<
int,
DDomXY,
Kokkos::layout_right,
typename Kokkos::DefaultExecutionSpace::memory_space> const chunk(storage.data(), dom);
EXPECT_EQ(TestAnnotatedTransformReduce(chunk), dom.size());
}