-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implementation of barrier operations + test cases
- Loading branch information
1 parent
3bca91c
commit 8774cd4
Showing
5 changed files
with
224 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
132 changes: 132 additions & 0 deletions
132
numba_dpex/experimental/_kernel_dpcpp_spirv_overloads/_barriers_overloads.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# SPDX-FileCopyrightText: 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
""" | ||
Provides overloads for functions included in kernel_iface.barrier that | ||
generate dpcpp SPIR-V LLVM IR intrinsic function calls. | ||
""" | ||
from llvmlite import ir as llvmir | ||
from numba.core import cgutils, types | ||
from numba.extending import intrinsic, overload | ||
|
||
from numba_dpex.core import itanium_mangler as ext_itanium_mangler | ||
from numba_dpex.experimental.kernel_iface import ( | ||
group_barrier, | ||
sub_group_barrier, | ||
) | ||
from numba_dpex.experimental.kernel_iface.memory_enums import ( | ||
MemoryOrder, | ||
MemoryScope, | ||
) | ||
from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME | ||
|
||
from ._spv_atomic_inst_helper import get_memory_semantics_mask, get_scope | ||
|
||
|
||
def _get_memory_scope(fence_scope): | ||
if isinstance(fence_scope, types.Literal): | ||
return get_scope(fence_scope.literal_value) | ||
return get_scope(fence_scope.value) | ||
|
||
|
||
@intrinsic | ||
def _intrinsic_barrier( | ||
ty_context, # pylint: disable=unused-argument | ||
ty_exec_scope, # pylint: disable=unused-argument | ||
ty_mem_scope, # pylint: disable=unused-argument | ||
ty_spirv_mem_sem_mask, # pylint: disable=unused-argument | ||
): | ||
sig = types.void(types.uint32, types.uint32, types.uint32) | ||
|
||
def _intrinsic_barrier_codegen( | ||
context, builder, sig, args | ||
): # pylint: disable=unused-argument | ||
fn_name = "__spirv_ControlBarrier" | ||
mangled_fn_name = ext_itanium_mangler.mangle_ext( | ||
fn_name, [types.uint32, types.uint32, types.uint32] | ||
) | ||
|
||
spirv_fn_arg_types = [ | ||
llvmir.IntType(32), | ||
llvmir.IntType(32), | ||
llvmir.IntType(32), | ||
] | ||
|
||
fnty = llvmir.FunctionType(llvmir.VoidType(), spirv_fn_arg_types) | ||
|
||
exec_scope_arg = builder.trunc(args[0], llvmir.IntType(32)) | ||
mem_scope_arg = builder.trunc(args[1], llvmir.IntType(32)) | ||
spirv_memory_semantics_mask_arg = builder.trunc( | ||
args[2], llvmir.IntType(32) | ||
) | ||
|
||
fn_args = [ | ||
exec_scope_arg, | ||
mem_scope_arg, | ||
spirv_memory_semantics_mask_arg, | ||
] | ||
|
||
fn = cgutils.get_or_insert_function( | ||
builder.module, fnty, mangled_fn_name | ||
) | ||
|
||
fn.attributes.add("convergent") | ||
fn.attributes.add("nounwind") | ||
fn.calling_convention = "spir_func" | ||
|
||
callinst = builder.call(fn, fn_args) | ||
|
||
callinst.attributes.add("convergent") | ||
callinst.attributes.add("nounwind") | ||
|
||
return ( | ||
sig, | ||
_intrinsic_barrier_codegen, | ||
) | ||
|
||
|
||
@overload( | ||
group_barrier, | ||
prefer_literal=True, | ||
target=DPEX_KERNEL_EXP_TARGET_NAME, | ||
) | ||
def _ol_group_barrier(fence_scope=MemoryScope.WORK_GROUP): | ||
spirv_memory_semantics_mask = get_memory_semantics_mask( | ||
MemoryOrder.SEQ_CST.value | ||
) | ||
exec_scope = get_scope(MemoryScope.WORK_GROUP.value) | ||
mem_scope = _get_memory_scope(fence_scope) | ||
|
||
def _ol_group_barrier_impl( | ||
fence_scope=MemoryScope.WORK_GROUP, | ||
): # pylint: disable=unused-argument | ||
# pylint: disable=no-value-for-parameter | ||
return _intrinsic_barrier( | ||
exec_scope, mem_scope, spirv_memory_semantics_mask | ||
) | ||
|
||
return _ol_group_barrier_impl | ||
|
||
|
||
@overload( | ||
sub_group_barrier, | ||
prefer_literal=True, | ||
target=DPEX_KERNEL_EXP_TARGET_NAME, | ||
) | ||
def _ol_sub_group_barrier(fence_scope=MemoryScope.SUB_GROUP): | ||
spirv_memory_semantics_mask = get_memory_semantics_mask( | ||
MemoryOrder.SEQ_CST.value | ||
) | ||
exec_scope = get_scope(MemoryScope.SUB_GROUP.value) | ||
mem_scope = _get_memory_scope(fence_scope) | ||
|
||
def _ol_sub_group_barrier_impl( | ||
fence_scope=MemoryScope.SUB_GROUP, | ||
): # pylint: disable=unused-argument | ||
# pylint: disable=no-value-for-parameter | ||
return _intrinsic_barrier( | ||
exec_scope, mem_scope, spirv_memory_semantics_mask | ||
) | ||
|
||
return _ol_sub_group_barrier_impl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# SPDX-FileCopyrightText: 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
"""Python functions that simulate SYCL's barrier primitives. | ||
""" | ||
|
||
from .memory_enums import MemoryScope | ||
|
||
|
||
def group_barrier(fence_scope=MemoryScope.WORK_GROUP): | ||
"""Performs a barrier operation across all work-items in a work group. | ||
The function is modeled after the ``sycl::group_barrier`` function. It | ||
synchronizes work within a group of work items. All the work-items | ||
of the group must execute the barrier construct before any work-item | ||
continues execution beyond the barrier. However, unlike | ||
``sycl::group_barrier`` the numba_dpex function implicitly synchronizes at | ||
the level of a work group and does not allow specifying the group as an | ||
argument. The :func:`sub_group_barrier` function should be used if | ||
synchronization has to be performed only across a sub-group. | ||
The ``group_barrier`` performs mem-fence operations ensuring that memory | ||
accesses issued before the barrier are not re-ordered with those issued | ||
after the barrier: all work-items in group g execute a release fence prior | ||
to synchronizing at the barrier, all work-items in group g execute an | ||
acquire fence afterwards, and there is an implicit synchronization of these | ||
fences as if provided by an explicit atomic operation on an atomic object. | ||
Args: | ||
fence_scope (optional): scope of any memory consistency | ||
operations that are performed by the barrier. | ||
""" | ||
|
||
# TODO: A pure Python simulation of a group_barrier will be added later. | ||
raise NotImplementedError | ||
|
||
|
||
def sub_group_barrier(fence_scope=MemoryScope.SUB_GROUP): | ||
"""Performs a barrier operation across all work-items in a sub-group. | ||
Modeled after ``sycl::group_barrier`` function when invoked on a | ||
sub-group. Refer :func:`group_barrier` for further details. | ||
Args: | ||
fence_scope (optional): scope of any memory consistency | ||
operations that are performed by the barrier. | ||
""" | ||
|
||
# TODO: A pure Python simulation of a sub_group_barrier will be added later. | ||
raise NotImplementedError |
28 changes: 28 additions & 0 deletions
28
numba_dpex/tests/experimental/kernel_iface/spv_overloads/test_barriers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import dpnp | ||
|
||
import numba_dpex as dpex | ||
import numba_dpex.experimental as dpex_exp | ||
from numba_dpex.experimental.kernel_iface import group_barrier | ||
|
||
|
||
def test_group_barrier(): | ||
"""A test for group_barrier function.""" | ||
|
||
@dpex_exp.kernel | ||
def _kernel(a, N): | ||
i = dpex.get_global_id(0) | ||
|
||
a[i] += 1 | ||
group_barrier() | ||
|
||
if i == 0: | ||
for idx in range(1, N): | ||
a[0] += a[idx] | ||
|
||
N = 8196 | ||
a = dpnp.ones(N, dtype=dpnp.int32) | ||
b = dpnp.ones(N, dtype=dpnp.int32) | ||
|
||
dpex_exp.call_kernel(_kernel, dpex.Range(N), a, N) | ||
|
||
assert a[0] == N * 2 |