Skip to content

Commit

Permalink
Need barrier after call to custom inclusive scan to avoid race condit…
Browse files Browse the repository at this point in the history
…ion (#1624)

added comments explaining why barriers are needed
  • Loading branch information
oleksandr-pavlyk authored Apr 1, 2024
1 parent 0d40493 commit b1016bf
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
3 changes: 3 additions & 0 deletions dpctl/tensor/libtensor/include/kernels/accumulators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ inclusive_scan_base_step(sycl::queue &exec_q,
else {
wg_iscan_val = su_ns::custom_inclusive_scan_over_group(
it.get_group(), slm_iscan_tmp, local_iscan.back(), scan_op);
// ensure all finished reading from SLM, to avoid race condition
// with subsequent writes into SLM
it.barrier(sycl::access::fence_space::local_space);
}

slm_iscan_tmp[(lid + 1) % wg_size] = wg_iscan_val;
Expand Down
16 changes: 9 additions & 7 deletions dpctl/tensor/libtensor/include/utils/sycl_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#pragma once
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <sycl/sycl.hpp>
#include <type_traits>
#include <vector>
Expand Down Expand Up @@ -160,22 +161,23 @@ T custom_inclusive_scan_over_group(const GroupT &wg,
const T local_val,
const OpT &op)
{
auto local_id = wg.get_local_id(0);
auto wgs = wg.get_local_range(0);
const std::uint32_t local_id = wg.get_local_id(0);
const std::uint32_t wgs = wg.get_local_range(0);
local_mem_acc[local_id] = local_val;

sycl::group_barrier(wg, sycl::memory_scope::work_group);

if (wg.leader()) {
for (size_t i = 1; i < wgs; ++i) {
local_mem_acc[i] = op(local_mem_acc[i], local_mem_acc[i - 1]);
T scan_val = local_mem_acc[0];
for (std::uint32_t i = 1; i < wgs; ++i) {
scan_val = op(local_mem_acc[i], scan_val);
local_mem_acc[i] = scan_val;
}
}

T accumulated_local_val = local_mem_acc[local_id];
// ensure all work-items see the same SLM that leader updated
sycl::group_barrier(wg, sycl::memory_scope::work_group);

return accumulated_local_val;
return local_mem_acc[local_id];
}

// Reduction functors
Expand Down

0 comments on commit b1016bf

Please sign in to comment.