Skip to content

Commit

Permalink
implementation of barrier operations + test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
adarshyoga committed Jan 12, 2024
1 parent 315b6e7 commit d069324
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 2 deletions.
5 changes: 4 additions & 1 deletion numba_dpex/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

from numba.core.imputils import Registry

from ._kernel_dpcpp_spirv_overloads import _atomic_ref_overloads
from ._kernel_dpcpp_spirv_overloads import (
_atomic_ref_overloads,
_barriers_overloads,
)
from .decorators import device_func, kernel
from .kernel_dispatcher import KernelDispatcher
from .launcher import call_kernel, call_kernel_async
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# SPDX-FileCopyrightText: 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""
Provides overloads for functions included in kernel_iface.barrier that
generate dpcpp SPIR-V LLVM IR intrinsic function calls.
"""
from llvmlite import ir as llvmir
from numba.core import cgutils, types
from numba.extending import intrinsic, overload

from numba_dpex.core import itanium_mangler as ext_itanium_mangler
from numba_dpex.experimental.kernel_iface import (
group_barrier,
sub_group_barrier,
)
from numba_dpex.experimental.kernel_iface.memory_enums import (
MemoryOrder,
MemoryScope,
)
from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME

from ._spv_atomic_inst_helper import get_memory_semantics_mask, get_scope


def _get_memory_scope(fence_scope):
if isinstance(fence_scope, types.Literal):
return get_scope(fence_scope.literal_value)
return get_scope(fence_scope.value)


@intrinsic
def _intrinsic_barrier(
ty_context, # pylint: disable=unused-argument
ty_exec_scope, # pylint: disable=unused-argument
ty_mem_scope, # pylint: disable=unused-argument
ty_spirv_mem_sem_mask, # pylint: disable=unused-argument
):
sig = types.void(types.uint32, types.uint32, types.uint32)

def _intrinsic_barrier_codegen(
context, builder, sig, args
): # pylint: disable=unused-argument
fn_name = "__spirv_ControlBarrier"
mangled_fn_name = ext_itanium_mangler.mangle_ext(
fn_name, [types.uint32, types.uint32, types.uint32]
)

spirv_fn_arg_types = [
llvmir.IntType(32),
llvmir.IntType(32),
llvmir.IntType(32),
]

fnty = llvmir.FunctionType(llvmir.VoidType(), spirv_fn_arg_types)

exec_scope_arg = builder.trunc(args[0], llvmir.IntType(32))
mem_scope_arg = builder.trunc(args[1], llvmir.IntType(32))
spirv_memory_semantics_mask_arg = builder.trunc(
args[2], llvmir.IntType(32)
)

fn_args = [
exec_scope_arg,
mem_scope_arg,
spirv_memory_semantics_mask_arg,
]

fn = cgutils.get_or_insert_function(
builder.module, fnty, mangled_fn_name
)

fn.attributes.add("convergent")
fn.attributes.add("nounwind")
fn.calling_convention = "spir_func"

callinst = builder.call(fn, fn_args)

callinst.attributes.add("convergent")
callinst.attributes.add("nounwind")

return (
sig,
_intrinsic_barrier_codegen,
)


@overload(
group_barrier,
prefer_literal=True,
target=DPEX_KERNEL_EXP_TARGET_NAME,
)
def _ol_group_barrier(fence_scope=MemoryScope.WORK_GROUP):
spirv_memory_semantics_mask = get_memory_semantics_mask(
MemoryOrder.SEQ_CST.value
)
exec_scope = get_scope(MemoryScope.WORK_GROUP.value)
mem_scope = _get_memory_scope(fence_scope)

def _ol_group_barrier_impl(
fence_scope=MemoryScope.WORK_GROUP,
): # pylint: disable=unused-argument
# pylint: disable=no-value-for-parameter
return _intrinsic_barrier(
exec_scope, mem_scope, spirv_memory_semantics_mask
)

return _ol_group_barrier_impl


@overload(
sub_group_barrier,
prefer_literal=True,
target=DPEX_KERNEL_EXP_TARGET_NAME,
)
def _ol_sub_group_barrier(fence_scope=MemoryScope.SUB_GROUP):
spirv_memory_semantics_mask = get_memory_semantics_mask(
MemoryOrder.SEQ_CST.value
)
exec_scope = get_scope(MemoryScope.SUB_GROUP.value)
mem_scope = _get_memory_scope(fence_scope)

def _ol_sub_group_barrier_impl(
fence_scope=MemoryScope.SUB_GROUP,
): # pylint: disable=unused-argument
# pylint: disable=no-value-for-parameter
return _intrinsic_barrier(
exec_scope, mem_scope, spirv_memory_semantics_mask
)

return _ol_sub_group_barrier_impl
10 changes: 9 additions & 1 deletion numba_dpex/experimental/kernel_iface/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
"""

from .atomic_ref import AtomicRef
from .barrier import group_barrier, sub_group_barrier
from .memory_enums import AddressSpace, MemoryOrder, MemoryScope

__all__ = ["AddressSpace", "AtomicRef", "MemoryOrder", "MemoryScope"]
__all__ = [
"group_barrier",
"sub_group_barrier",
"AddressSpace",
"AtomicRef",
"MemoryOrder",
"MemoryScope",
]
51 changes: 51 additions & 0 deletions numba_dpex/experimental/kernel_iface/barrier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# SPDX-FileCopyrightText: 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""Python functions that simulate SYCL's barrier primitives.
"""

from .memory_enums import MemoryScope


def group_barrier(fence_scope=MemoryScope.WORK_GROUP):
"""Performs a barrier operation across all work-items in a work group.
The function is modeled after the ``sycl::group_barrier`` function. It
synchronizes work within a group of work items. All the work-items
of the group must execute the barrier construct before any work-item
continues execution beyond the barrier. However, unlike
``sycl::group_barrier`` the numba_dpex function implicitly synchronizes at
the level of a work group and does not allow specifying the group as an
argument. The :func:`sub_group_barrier` function should be used if
synchronization has to be performed only across a sub-group.
The ``group_barrier`` performs mem-fence operations ensuring that memory
accesses issued before the barrier are not re-ordered with those issued
after the barrier: all work-items in group g execute a release fence prior
to synchronizing at the barrier, all work-items in group g execute an
acquire fence afterwards, and there is an implicit synchronization of these
fences as if provided by an explicit atomic operation on an atomic object.
Args:
fence_scope (optional): scope of any memory consistency
operations that are performed by the barrier.
"""

# TODO: A pure Python simulation of a group_barrier will be added later.
raise NotImplementedError


def sub_group_barrier(fence_scope=MemoryScope.SUB_GROUP):
"""Performs a barrier operation across all work-items in a sub-group.
Modeled after ``sycl::group_barrier`` function when invoked on a
sub-group. Refer :func:`group_barrier` for further details.
Args:
fence_scope (optional): scope of any memory consistency
operations that are performed by the barrier.
"""

# TODO: A pure Python simulation of a sub_group_barrier will be added later.
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import dpnp

import numba_dpex as dpex
import numba_dpex.experimental as dpex_exp
from numba_dpex.experimental.kernel_iface import group_barrier


def test_group_barrier():
"""A test for group_barrier function."""

@dpex_exp.kernel
def _kernel(a, N):
i = dpex.get_global_id(0)

a[i] += 1
group_barrier()

if i == 0:
for idx in range(1, N):
a[0] += a[idx]

N = 8196
a = dpnp.ones(N, dtype=dpnp.int32)
b = dpnp.ones(N, dtype=dpnp.int32)

dpex_exp.call_kernel(_kernel, dpex.Range(N), a, N)

assert a[0] == N * 2

0 comments on commit d069324

Please sign in to comment.