Skip to content

Commit

Permalink
implementation of barrier operation and test case
Browse files Browse the repository at this point in the history
Co-authored-by: Yevhenii Havrylko <[email protected]>
  • Loading branch information
adarshyoga and ZzEeKkAa committed Feb 2, 2024
1 parent b0ea6aa commit 8ca9551
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 0 deletions.
1 change: 1 addition & 0 deletions numba_dpex/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from ._kernel_dpcpp_spirv_overloads import (
_atomic_ref_overloads,
_barriers_overloads,
_index_space_id_overloads,
)
from .decorators import device_func, kernel
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# SPDX-FileCopyrightText: 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""
Provides overloads for functions included in kernel_iface.barrier that
generate dpcpp SPIR-V LLVM IR intrinsic function calls.
"""
from llvmlite import ir as llvmir
from numba.core import cgutils, types
from numba.extending import intrinsic, overload

from numba_dpex.core import itanium_mangler as ext_itanium_mangler
from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME
from numba_dpex.kernel_api import group_barrier
from numba_dpex.kernel_api.memory_enums import MemoryOrder, MemoryScope

from ._spv_atomic_inst_helper import get_memory_semantics_mask, get_scope

_SUPPORT_CONVERGENT = True

try:
llvmir.FunctionAttributes("convergent")
except ValueError:
_SUPPORT_CONVERGENT = False


def _get_memory_scope(fence_scope):
if isinstance(fence_scope, types.Literal):
return get_scope(fence_scope.literal_value)
return get_scope(fence_scope.value)


@intrinsic
def _intrinsic_barrier(
ty_context, # pylint: disable=unused-argument
ty_exec_scope, # pylint: disable=unused-argument
ty_mem_scope, # pylint: disable=unused-argument
ty_spirv_mem_sem_mask, # pylint: disable=unused-argument
):
# Signature of `__spirv_control_barrier` call that is
# generated for group_barrier. It takes three arguments -
# exec_scope, memory_scope and memory_semantics_mask.
# All arguments have to be of type unsigned int32.
sig = types.void(types.uint32, types.uint32, types.uint32)

def _intrinsic_barrier_codegen(
context, builder, sig, args # pylint: disable=unused-argument
):
exec_scope_arg = builder.trunc(args[0], llvmir.IntType(32))
mem_scope_arg = builder.trunc(args[1], llvmir.IntType(32))
spirv_memory_semantics_mask_arg = builder.trunc(
args[2], llvmir.IntType(32)
)

fn_args = [
exec_scope_arg,
mem_scope_arg,
spirv_memory_semantics_mask_arg,
]

mangled_fn_name = ext_itanium_mangler.mangle_ext(
"__spirv_ControlBarrier", [types.uint32, types.uint32, types.uint32]
)

spirv_fn_arg_types = [
llvmir.IntType(32),
llvmir.IntType(32),
llvmir.IntType(32),
]

fn = cgutils.get_or_insert_function(
builder.module,
llvmir.FunctionType(llvmir.VoidType(), spirv_fn_arg_types),
mangled_fn_name,
)

if _SUPPORT_CONVERGENT:
fn.attributes.add("convergent")
fn.attributes.add("nounwind")
fn.calling_convention = "spir_func"

callinst = builder.call(fn, fn_args)

if _SUPPORT_CONVERGENT:
callinst.attributes.add("convergent")
callinst.attributes.add("nounwind")

return (
sig,
_intrinsic_barrier_codegen,
)


@overload(
group_barrier,
prefer_literal=True,
target=DPEX_KERNEL_EXP_TARGET_NAME,
)
def ol_group_barrier(fence_scope=MemoryScope.WORK_GROUP):
"""SPIR-V overload for
:meth:`numba_dpex.kernel_api.group_barrier`.
Generates the same LLVM IR instruction as dpcpp for the
`group_barrier` function.
"""

# Per SYCL spec, group_barrier must perform both control
# barrier and memory fence operations. Hence,
# group_barrier requires two scopes and memory
# consistency specification as three arguments.
#
# mem_scope - scope of any memory consistency operations
# that are performed by the barrier. By default,
# mem_scope is set to `work_group`.
# exec_scope - scope that determines the set of work-items
# that synchronize at barrier.
# Set to `work_group` for group_barrier always.
# spirv_memory_semantics_mask - Based on sycl implementation,
# Mask that is set to use sequential consistency
# memory order semantics always.

mem_scope = _get_memory_scope(fence_scope)
exec_scope = get_scope(MemoryScope.WORK_GROUP.value)
spirv_memory_semantics_mask = get_memory_semantics_mask(
MemoryOrder.SEQ_CST.value
)

def _ol_group_barrier_impl(
fence_scope=MemoryScope.WORK_GROUP,
): # pylint: disable=unused-argument
# pylint: disable=no-value-for-parameter
return _intrinsic_barrier(
exec_scope, mem_scope, spirv_memory_semantics_mask
)

return _ol_group_barrier_impl
2 changes: 2 additions & 0 deletions numba_dpex/kernel_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""

from .atomic_ref import AtomicRef
from .barrier import group_barrier
from .index_space_ids import Item, NdItem
from .memory_enums import AddressSpace, MemoryOrder, MemoryScope
from .ranges import NdRange, Range
Expand All @@ -23,4 +24,5 @@
"Range",
"NdItem",
"Item",
"group_barrier",
]
36 changes: 36 additions & 0 deletions numba_dpex/kernel_api/barrier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""Python functions that simulate SYCL's barrier primitives.
"""

from .memory_enums import MemoryScope


def group_barrier(fence_scope=MemoryScope.WORK_GROUP):
"""Performs a barrier operation across all work-items in a work group.
The function is modeled after the ``sycl::group_barrier`` function. It
synchronizes work within a group of work items. All the work-items
of the group must execute the barrier construct before any work-item
continues execution beyond the barrier. However, unlike
``sycl::group_barrier`` the numba_dpex function implicitly synchronizes at
the level of a work group and does not allow specifying the group as an
argument. The :func:`sub_group_barrier` function should be used if
synchronization has to be performed only across a sub-group.
The ``group_barrier`` performs mem-fence operations ensuring that memory
accesses issued before the barrier are not re-ordered with those issued
after the barrier: all work-items in group g execute a release fence prior
to synchronizing at the barrier, all work-items in group g execute an
acquire fence afterwards, and there is an implicit synchronization of these
fences as if provided by an explicit atomic operation on an atomic object.
Args:
fence_scope (optional): scope of any memory consistency
operations that are performed by the barrier.
"""

# TODO: A pure Python simulation of a group_barrier will be added later.
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import dpnp

import numba_dpex as dpex
import numba_dpex.experimental as dpex_exp
from numba_dpex.kernel_api import MemoryScope, NdItem, group_barrier


def test_group_barrier():
"""A test for group_barrier function."""

@dpex_exp.kernel
def _kernel(nd_item: NdItem, a):
i = nd_item.get_global_id(0)

a[i] += 1
group_barrier(MemoryScope.DEVICE)

if i == 0:
for idx in range(1, a.size):
a[0] += a[idx]

N = 16
a = dpnp.ones(N, dtype=dpnp.int32)

dpex_exp.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a)

assert a[0] == N * 2

0 comments on commit 8ca9551

Please sign in to comment.