Skip to content

Commit

Permalink
implementation of barrier operation and test case
Browse files Browse the repository at this point in the history
  • Loading branch information
adarshyoga committed Jan 30, 2024
1 parent 8fdfc1b commit 3b8a88c
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 2 deletions.
5 changes: 4 additions & 1 deletion numba_dpex/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

from numba.core.imputils import Registry

from ._kernel_dpcpp_spirv_overloads import _atomic_ref_overloads
from ._kernel_dpcpp_spirv_overloads import (
_atomic_ref_overloads,
_barriers_overloads,
)
from .decorators import device_func, kernel
from .kernel_dispatcher import KernelDispatcher
from .launcher import call_kernel, call_kernel_async
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# SPDX-FileCopyrightText: 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""
Provides overloads for functions included in kernel_iface.barrier that
generate dpcpp SPIR-V LLVM IR intrinsic function calls.
"""
from llvmlite import ir as llvmir
from numba.core import cgutils, types
from numba.extending import intrinsic, overload

from numba_dpex.core import itanium_mangler as ext_itanium_mangler
from numba_dpex.experimental.kernel_iface import group_barrier
from numba_dpex.experimental.kernel_iface.memory_enums import (
MemoryOrder,
MemoryScope,
)
from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME

from ._spv_atomic_inst_helper import get_memory_semantics_mask, get_scope


def _get_memory_scope(fence_scope):
if isinstance(fence_scope, types.Literal):
return get_scope(fence_scope.literal_value)
return get_scope(fence_scope.value)


@intrinsic
def _intrinsic_barrier(
ty_context, # pylint: disable=unused-argument
ty_exec_scope, # pylint: disable=unused-argument
ty_mem_scope, # pylint: disable=unused-argument
ty_spirv_mem_sem_mask, # pylint: disable=unused-argument
):
# Signature of `__spirv_control_barrier` call that is
# generated for group_barrier. It takes three arguments -
# exec_scope, memory_scope and memory_semantics_mask.
# All arguments have to be of type unsigned int32.
sig = types.void(types.uint32, types.uint32, types.uint32)

def _intrinsic_barrier_codegen(
context, builder, sig, args # pylint: disable=unused-argument
):
exec_scope_arg = builder.trunc(args[0], llvmir.IntType(32))
mem_scope_arg = builder.trunc(args[1], llvmir.IntType(32))
spirv_memory_semantics_mask_arg = builder.trunc(
args[2], llvmir.IntType(32)
)

fn_args = [
exec_scope_arg,
mem_scope_arg,
spirv_memory_semantics_mask_arg,
]

mangled_fn_name = ext_itanium_mangler.mangle_ext(
"__spirv_ControlBarrier", [types.uint32, types.uint32, types.uint32]
)

spirv_fn_arg_types = [
llvmir.IntType(32),
llvmir.IntType(32),
llvmir.IntType(32),
]

fn = cgutils.get_or_insert_function(
builder.module,
llvmir.FunctionType(llvmir.VoidType(), spirv_fn_arg_types),
mangled_fn_name,
)

fn.attributes.add("convergent")
fn.attributes.add("nounwind")
fn.calling_convention = "spir_func"

callinst = builder.call(fn, fn_args)

callinst.attributes.add("convergent")
callinst.attributes.add("nounwind")

return (
sig,
_intrinsic_barrier_codegen,
)


@overload(
group_barrier,
prefer_literal=True,
target=DPEX_KERNEL_EXP_TARGET_NAME,
)
def ol_group_barrier(fence_scope=MemoryScope.WORK_GROUP):
"""SPIR-V overload for
:meth:`numba_dpex.experimental.kernel_iface.group_barrier`.
Generates the same LLVM IR instruction as dpcpp for the
`group_barrier` function.
"""

# Per SYCL spec, group_barrier must perform both control
# barrier and memory fence operations. Hence,
# group_barrier requires two scopes and memory
# consistency specification as three arguments.
#
# mem_scope - scope of any memory consistency operations
# that are performed by the barrier. By default,
# mem_scope is set to `work_group`.
# exec_scope - scope that determines the set of work-items
# that synchronize at barrier.
# Set to `work_group` for group_barrier always.
# spirv_memory_semantics_mask - Based on sycl implementation,
# Mask that is set to use sequential consistency
# memory order semantics always.

mem_scope = _get_memory_scope(fence_scope)
exec_scope = get_scope(MemoryScope.WORK_GROUP.value)
spirv_memory_semantics_mask = get_memory_semantics_mask(
MemoryOrder.SEQ_CST.value
)

def _ol_group_barrier_impl(
fence_scope=MemoryScope.WORK_GROUP,
): # pylint: disable=unused-argument
# pylint: disable=no-value-for-parameter
return _intrinsic_barrier(
exec_scope, mem_scope, spirv_memory_semantics_mask
)

return _ol_group_barrier_impl
9 changes: 8 additions & 1 deletion numba_dpex/experimental/kernel_iface/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
"""

from .atomic_ref import AtomicRef
from .barrier import group_barrier
from .memory_enums import AddressSpace, MemoryOrder, MemoryScope

__all__ = ["AddressSpace", "AtomicRef", "MemoryOrder", "MemoryScope"]
__all__ = [
"group_barrier",
"AddressSpace",
"AtomicRef",
"MemoryOrder",
"MemoryScope",
]
36 changes: 36 additions & 0 deletions numba_dpex/experimental/kernel_iface/barrier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

"""Python functions that simulate SYCL's barrier primitives.
"""

from .memory_enums import MemoryScope


def group_barrier(fence_scope=MemoryScope.WORK_GROUP):
"""Performs a barrier operation across all work-items in a work group.
The function is modeled after the ``sycl::group_barrier`` function. It
synchronizes work within a group of work items. All the work-items
of the group must execute the barrier construct before any work-item
continues execution beyond the barrier. However, unlike
``sycl::group_barrier`` the numba_dpex function implicitly synchronizes at
the level of a work group and does not allow specifying the group as an
argument. The :func:`sub_group_barrier` function should be used if
synchronization has to be performed only across a sub-group.
The ``group_barrier`` performs mem-fence operations ensuring that memory
accesses issued before the barrier are not re-ordered with those issued
after the barrier: all work-items in group g execute a release fence prior
to synchronizing at the barrier, all work-items in group g execute an
acquire fence afterwards, and there is an implicit synchronization of these
fences as if provided by an explicit atomic operation on an atomic object.
Args:
fence_scope (optional): scope of any memory consistency
operations that are performed by the barrier.
"""

# TODO: A pure Python simulation of a group_barrier will be added later.
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import dpnp

import numba_dpex as dpex
import numba_dpex.experimental as dpex_exp
from numba_dpex.experimental.kernel_iface import group_barrier


def test_group_barrier():
"""A test for group_barrier function."""

@dpex_exp.kernel
def _kernel(a, N):
i = dpex.get_global_id(0)

a[i] += 1
group_barrier()

if i == 0:
for idx in range(1, N):
a[0] += a[idx]

N = 8196
a = dpnp.ones(N, dtype=dpnp.int32)
b = dpnp.ones(N, dtype=dpnp.int32)

dpex_exp.call_kernel(_kernel, dpex.Range(N), a, N)

assert a[0] == N * 2

0 comments on commit 3b8a88c

Please sign in to comment.