-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
implementation of barrier operation and test case
Co-authored-by: Yevhenii Havrylko <[email protected]>
- Loading branch information
1 parent
b0ea6aa
commit 8ca9551
Showing
5 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
137 changes: 137 additions & 0 deletions
137
numba_dpex/experimental/_kernel_dpcpp_spirv_overloads/_barriers_overloads.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
# SPDX-FileCopyrightText: 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
""" | ||
Provides overloads for functions included in kernel_iface.barrier that | ||
generate dpcpp SPIR-V LLVM IR intrinsic function calls. | ||
""" | ||
from llvmlite import ir as llvmir | ||
from numba.core import cgutils, types | ||
from numba.extending import intrinsic, overload | ||
|
||
from numba_dpex.core import itanium_mangler as ext_itanium_mangler | ||
from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME | ||
from numba_dpex.kernel_api import group_barrier | ||
from numba_dpex.kernel_api.memory_enums import MemoryOrder, MemoryScope | ||
|
||
from ._spv_atomic_inst_helper import get_memory_semantics_mask, get_scope | ||
|
||
_SUPPORT_CONVERGENT = True | ||
|
||
try: | ||
llvmir.FunctionAttributes("convergent") | ||
except ValueError: | ||
_SUPPORT_CONVERGENT = False | ||
|
||
|
||
def _get_memory_scope(fence_scope): | ||
if isinstance(fence_scope, types.Literal): | ||
return get_scope(fence_scope.literal_value) | ||
return get_scope(fence_scope.value) | ||
|
||
|
||
@intrinsic | ||
def _intrinsic_barrier( | ||
ty_context, # pylint: disable=unused-argument | ||
ty_exec_scope, # pylint: disable=unused-argument | ||
ty_mem_scope, # pylint: disable=unused-argument | ||
ty_spirv_mem_sem_mask, # pylint: disable=unused-argument | ||
): | ||
# Signature of `__spirv_control_barrier` call that is | ||
# generated for group_barrier. It takes three arguments - | ||
# exec_scope, memory_scope and memory_semantics_mask. | ||
# All arguments have to be of type unsigned int32. | ||
sig = types.void(types.uint32, types.uint32, types.uint32) | ||
|
||
def _intrinsic_barrier_codegen( | ||
context, builder, sig, args # pylint: disable=unused-argument | ||
): | ||
exec_scope_arg = builder.trunc(args[0], llvmir.IntType(32)) | ||
mem_scope_arg = builder.trunc(args[1], llvmir.IntType(32)) | ||
spirv_memory_semantics_mask_arg = builder.trunc( | ||
args[2], llvmir.IntType(32) | ||
) | ||
|
||
fn_args = [ | ||
exec_scope_arg, | ||
mem_scope_arg, | ||
spirv_memory_semantics_mask_arg, | ||
] | ||
|
||
mangled_fn_name = ext_itanium_mangler.mangle_ext( | ||
"__spirv_ControlBarrier", [types.uint32, types.uint32, types.uint32] | ||
) | ||
|
||
spirv_fn_arg_types = [ | ||
llvmir.IntType(32), | ||
llvmir.IntType(32), | ||
llvmir.IntType(32), | ||
] | ||
|
||
fn = cgutils.get_or_insert_function( | ||
builder.module, | ||
llvmir.FunctionType(llvmir.VoidType(), spirv_fn_arg_types), | ||
mangled_fn_name, | ||
) | ||
|
||
if _SUPPORT_CONVERGENT: | ||
fn.attributes.add("convergent") | ||
fn.attributes.add("nounwind") | ||
fn.calling_convention = "spir_func" | ||
|
||
callinst = builder.call(fn, fn_args) | ||
|
||
if _SUPPORT_CONVERGENT: | ||
callinst.attributes.add("convergent") | ||
callinst.attributes.add("nounwind") | ||
|
||
return ( | ||
sig, | ||
_intrinsic_barrier_codegen, | ||
) | ||
|
||
|
||
@overload( | ||
group_barrier, | ||
prefer_literal=True, | ||
target=DPEX_KERNEL_EXP_TARGET_NAME, | ||
) | ||
def ol_group_barrier(fence_scope=MemoryScope.WORK_GROUP): | ||
"""SPIR-V overload for | ||
:meth:`numba_dpex.kernel_api.group_barrier`. | ||
Generates the same LLVM IR instruction as dpcpp for the | ||
`group_barrier` function. | ||
""" | ||
|
||
# Per SYCL spec, group_barrier must perform both control | ||
# barrier and memory fence operations. Hence, | ||
# group_barrier requires two scopes and memory | ||
# consistency specification as three arguments. | ||
# | ||
# mem_scope - scope of any memory consistency operations | ||
# that are performed by the barrier. By default, | ||
# mem_scope is set to `work_group`. | ||
# exec_scope - scope that determines the set of work-items | ||
# that synchronize at barrier. | ||
# Set to `work_group` for group_barrier always. | ||
# spirv_memory_semantics_mask - Based on sycl implementation, | ||
# Mask that is set to use sequential consistency | ||
# memory order semantics always. | ||
|
||
mem_scope = _get_memory_scope(fence_scope) | ||
exec_scope = get_scope(MemoryScope.WORK_GROUP.value) | ||
spirv_memory_semantics_mask = get_memory_semantics_mask( | ||
MemoryOrder.SEQ_CST.value | ||
) | ||
|
||
def _ol_group_barrier_impl( | ||
fence_scope=MemoryScope.WORK_GROUP, | ||
): # pylint: disable=unused-argument | ||
# pylint: disable=no-value-for-parameter | ||
return _intrinsic_barrier( | ||
exec_scope, mem_scope, spirv_memory_semantics_mask | ||
) | ||
|
||
return _ol_group_barrier_impl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# SPDX-FileCopyrightText: 2023 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
"""Python functions that simulate SYCL's barrier primitives. | ||
""" | ||
|
||
from .memory_enums import MemoryScope | ||
|
||
|
||
def group_barrier(fence_scope=MemoryScope.WORK_GROUP): | ||
"""Performs a barrier operation across all work-items in a work group. | ||
The function is modeled after the ``sycl::group_barrier`` function. It | ||
synchronizes work within a group of work items. All the work-items | ||
of the group must execute the barrier construct before any work-item | ||
continues execution beyond the barrier. However, unlike | ||
``sycl::group_barrier`` the numba_dpex function implicitly synchronizes at | ||
the level of a work group and does not allow specifying the group as an | ||
argument. The :func:`sub_group_barrier` function should be used if | ||
synchronization has to be performed only across a sub-group. | ||
The ``group_barrier`` performs mem-fence operations ensuring that memory | ||
accesses issued before the barrier are not re-ordered with those issued | ||
after the barrier: all work-items in group g execute a release fence prior | ||
to synchronizing at the barrier, all work-items in group g execute an | ||
acquire fence afterwards, and there is an implicit synchronization of these | ||
fences as if provided by an explicit atomic operation on an atomic object. | ||
Args: | ||
fence_scope (optional): scope of any memory consistency | ||
operations that are performed by the barrier. | ||
""" | ||
|
||
# TODO: A pure Python simulation of a group_barrier will be added later. | ||
raise NotImplementedError |
27 changes: 27 additions & 0 deletions
27
numba_dpex/tests/experimental/kernel_api_overloads/spv_overloads/test_barriers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import dpnp | ||
|
||
import numba_dpex as dpex | ||
import numba_dpex.experimental as dpex_exp | ||
from numba_dpex.kernel_api import MemoryScope, NdItem, group_barrier | ||
|
||
|
||
def test_group_barrier(): | ||
"""A test for group_barrier function.""" | ||
|
||
@dpex_exp.kernel | ||
def _kernel(nd_item: NdItem, a): | ||
i = nd_item.get_global_id(0) | ||
|
||
a[i] += 1 | ||
group_barrier(MemoryScope.DEVICE) | ||
|
||
if i == 0: | ||
for idx in range(1, a.size): | ||
a[0] += a[idx] | ||
|
||
N = 16 | ||
a = dpnp.ones(N, dtype=dpnp.int32) | ||
|
||
dpex_exp.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a) | ||
|
||
assert a[0] == N * 2 |