Skip to content

Commit

Permalink
Remove redundant synchronizations (#75)
Browse files Browse the repository at this point in the history
  • Loading branch information
tkf authored Jul 19, 2021
1 parent d5d90bb commit 63968d9
Showing 1 changed file with 3 additions and 5 deletions.
8 changes: 3 additions & 5 deletions src/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,7 @@ function _transduce!(buf, rf::F, init, arrays...) where {F}
@assert blocks <= kernel_config.blocks

if Base.issingletontype(acctype)
# TODO: do I need sync here?
CUDA.@sync @cuda(
@cuda(
threads = threads,
blocks = blocks,
shmem = shmem,
Expand All @@ -165,8 +164,7 @@ function _transduce!(buf, rf::F, init, arrays...) where {F}
# )
# end

# TODO: do I need sync here?
CUDA.@sync @cuda(
@cuda(
threads = threads,
blocks = blocks,
shmem = shmem,
Expand Down Expand Up @@ -298,6 +296,6 @@ function complete_on_device(rf_dev::RF, acc::ACC) where {RF, ACC}
return resulttype.instance
end
buf = allocate_buffer(resulttype, 1)
CUDA.@sync @cuda complete_kernel!(buf, rf_dev, acc)
@cuda complete_kernel!(buf, rf_dev, acc)
return @allowscalar buf[1]
end

0 comments on commit 63968d9

Please sign in to comment.