Skip to content

Commit

Permalink
Remove atomic emulation
Browse files Browse the repository at this point in the history
  • Loading branch information
ZzEeKkAa authored and Diptorup Deb committed Jul 26, 2023
1 parent 67f06ee commit e751744
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 188 deletions.
3 changes: 0 additions & 3 deletions numba_dpex/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ def __getattr__(name):
# Dump offload diagnostics
OFFLOAD_DIAGNOSTICS = _readenv("NUMBA_DPEX_OFFLOAD_DIAGNOSTICS", int, 0)

# Activate Native floating point atomcis support for supported devices.
# Requires llvm-spirv supporting the FP atomics extension
NATIVE_FP_ATOMICS = _readenv("NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE", int, 0)
# Emit debug info
DEBUG = _readenv("NUMBA_DPEX_DEBUG", int, config.DEBUG)
DEBUGINFO_DEFAULT = _readenv(
Expand Down
152 changes: 13 additions & 139 deletions numba_dpex/ocl/oclimpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,63 +143,6 @@ def sub_group_barrier_impl(context, builder, sig, args):
return _void_value


def insert_and_call_atomic_fn(
context, builder, sig, fn_type, dtype, ptr, val, addrspace
):
ll_p = None
name = ""
if dtype.name == "float32":
ll_val = llvmir.FloatType()
ll_p = ll_val.as_pointer()
if fn_type == "add":
name = "numba_dpex_atomic_add_f32"
elif fn_type == "sub":
name = "numba_dpex_atomic_sub_f32"
else:
raise TypeError("Operation type is not supported %s" % (fn_type))
elif dtype.name == "float64":
if True:
ll_val = llvmir.DoubleType()
ll_p = ll_val.as_pointer()
if fn_type == "add":
name = "numba_dpex_atomic_add_f64"
elif fn_type == "sub":
name = "numba_dpex_atomic_sub_f64"
else:
raise TypeError(
"Operation type is not supported %s" % (fn_type)
)
else:
raise TypeError(
"Atomic operation is not supported for type %s" % (dtype.name)
)

if addrspace == address_space.LOCAL:
name = name + "_local"
else:
name = name + "_global"

assert ll_p is not None
assert name != ""
ll_p.addrspace = address_space.GENERIC

mod = builder.module
if sig.return_type == types.void:
llretty = llvmir.VoidType()
else:
llretty = context.get_value_type(sig.return_type)

llargs = [ll_p, context.get_value_type(sig.args[2])]
fnty = llvmir.FunctionType(llretty, llargs)

fn = cgutils.get_or_insert_function(mod, fnty, name)
fn.calling_convention = kernel_target.CC_SPIR_FUNC

generic_ptr = context.addrspacecast(builder, ptr, address_space.GENERIC)

return builder.call(fn, [generic_ptr, val])


def native_atomic_add(context, builder, sig, args):
aryty, indty, valty = sig.args
ary, inds, val = args
Expand Down Expand Up @@ -286,20 +229,15 @@ def native_atomic_add(context, builder, sig, args):
@lower(stubs.atomic.add, types.Array, types.UniTuple, types.Any)
@lower(stubs.atomic.add, types.Array, types.Tuple, types.Any)
def atomic_add_tuple(context, builder, sig, args):
device_type = dpctl.get_current_queue().sycl_device.device_type
dtype = sig.args[0].dtype

if dtype == types.float32 or dtype == types.float64:
if (
device_type == dpctl.device_type.gpu
and config.NATIVE_FP_ATOMICS == 1
):
return native_atomic_add(context, builder, sig, args)
else:
# Currently, DPCPP only supports native floating point
# atomics for GPUs.
return atomic_add(context, builder, sig, args, "add")
elif dtype == types.int32 or dtype == types.int64:
# TODO: do we need this check, or should we just use native_atomic_add for everything?
if (
dtype == types.float32
or dtype == types.float64
or dtype == types.int32
or dtype == types.int64
):
return native_atomic_add(context, builder, sig, args)
else:
raise TypeError("Atomic operation on unsupported type %s" % dtype)
Expand Down Expand Up @@ -337,83 +275,19 @@ def atomic_sub_wrapper(context, builder, sig, args):
@lower(stubs.atomic.sub, types.Array, types.UniTuple, types.Any)
@lower(stubs.atomic.sub, types.Array, types.Tuple, types.Any)
def atomic_sub_tuple(context, builder, sig, args):
device_type = dpctl.get_current_queue().sycl_device.device_type
dtype = sig.args[0].dtype

if dtype == types.float32 or dtype == types.float64:
if (
device_type == dpctl.device_type.gpu
and config.NATIVE_FP_ATOMICS == 1
):
return atomic_sub_wrapper(context, builder, sig, args)
else:
# Currently, DPCPP only supports native floating point
# atomics for GPUs.
return atomic_add(context, builder, sig, args, "sub")
elif dtype == types.int32 or dtype == types.int64:
if (
dtype == types.float32
or dtype == types.float64
or dtype == types.int32
or dtype == types.int64
):
return atomic_sub_wrapper(context, builder, sig, args)
else:
raise TypeError("Atomic operation on unsupported type %s" % dtype)


def atomic_add(context, builder, sig, args, name):
from .atomics import atomic_support_present

if atomic_support_present():
context.extra_compile_options[kernel_target.LINK_ATOMIC] = True
aryty, indty, valty = sig.args
ary, inds, val = args
dtype = aryty.dtype

if indty == types.intp:
indices = [inds] # just a single integer
indty = [indty]
else:
indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
indices = [
context.cast(builder, i, t, types.intp)
for t, i in zip(indty, indices)
]

if dtype != valty:
raise TypeError("expecting %s but got %s" % (dtype, valty))

if aryty.ndim != len(indty):
raise TypeError(
"indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))
)

lary = context.make_array(aryty)(context, builder, ary)
ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)

if isinstance(aryty, Array) and aryty.addrspace == address_space.LOCAL:
return insert_and_call_atomic_fn(
context,
builder,
sig,
name,
dtype,
ptr,
val,
address_space.LOCAL,
)
else:
return insert_and_call_atomic_fn(
context,
builder,
sig,
name,
dtype,
ptr,
val,
address_space.GLOBAL,
)
else:
raise ImportError(
"Atomic support is not present, can not perform atomic_add"
)


@lower(stubs.private.array, types.IntegerLiteral, types.Any)
def dpex_private_array_integer(context, builder, sig, args):
length = sig.args[0].literal_value
Expand Down
21 changes: 1 addition & 20 deletions numba_dpex/spirv_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""A wrapper to connect to the SPIR-V binaries (Tools, Translator)."""

import os
import shutil
import tempfile
from subprocess import CalledProcessError, check_call

Expand Down Expand Up @@ -75,14 +74,6 @@ def generate(self, llvm_spirv_args, ipath, opath):
if config.DEBUG:
llvm_spirv_flags.append("--spirv-debug-info-version=ocl-100")

if not config.NATIVE_FP_ATOMICS:
# Do NOT upgrade version unless you are 100% confident. Not all
# kernel outputs can be converted to higher version of spirv.
# That results in different spirv file versions. As next step
# requires linking of the result file and
# numba_dpex/ocl/atomics/atomic_ops.spir it will raise an error
# that two spirv files have different version and can't be linked
llvm_spirv_args = ["--spirv-max-version", "1.0"] + llvm_spirv_args
llvm_spirv_tool = self._llvm_spirv()

if config.DEBUG:
Expand Down Expand Up @@ -162,15 +153,9 @@ def finalize(self):
# Generate SPIR-V from "friendly" LLVM-based SPIR 2.0
spirv_path = self._track_temp_file("generated-spirv")

binary_paths = [spirv_path]

llvm_spirv_args = []
for key in list(self.context.extra_compile_options.keys()):
if key == LINK_ATOMIC:
from .ocl.atomics import get_atomic_spirv_path

binary_paths.append(get_atomic_spirv_path())
elif key == LLVM_SPIRV_ARGS:
if key == LLVM_SPIRV_ARGS:
llvm_spirv_args = self.context.extra_compile_options[key]
del self.context.extra_compile_options[key]

Expand All @@ -194,10 +179,6 @@ def finalize(self):
opath=spirv_path,
)

if len(binary_paths) > 1:
spirv_path = self._track_temp_file("linked-spirv")
self._cmd.link(spirv_path, binary_paths)

if config.SAVE_IR_FILES != 0:
# Dump the llvmir and llvmbc in file
with open("generated_spirv.spir", "wb") as f1:
Expand Down
40 changes: 14 additions & 26 deletions numba_dpex/tests/kernel_tests/test_atomic_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,21 +160,11 @@ def test_kernel_atomic_multi_dim(
assert a[0] == expected


skip_NATIVE_FP_ATOMICS_0 = pytest.mark.skipif(
not config.NATIVE_FP_ATOMICS, reason="Native FP atomics disabled"
)


def skip_if_disabled(*args):
return pytest.param(*args, marks=skip_NATIVE_FP_ATOMICS_0)


@skip_no_atomic_support
@pytest.mark.parametrize(
"NATIVE_FP_ATOMICS, expected_native_atomic_for_device",
"expected_native_atomic_for_device",
[
skip_if_disabled(1, lambda device: device != "opencl:cpu:0"),
(0, lambda device: False),
lambda device: True,
],
)
@pytest.mark.parametrize(
Expand All @@ -189,7 +179,6 @@ def skip_if_disabled(*args):
)
@pytest.mark.parametrize("dtype", list_of_f_dtypes)
def test_atomic_fp_native(
NATIVE_FP_ATOMICS,
expected_native_atomic_for_device,
function_generator,
operator_name,
Expand All @@ -206,16 +195,15 @@ def test_atomic_fp_native(
for arg in args
]

with override_config("NATIVE_FP_ATOMICS", NATIVE_FP_ATOMICS):
kernel.compile(
args=argtypes,
debug=False,
compile_flags=None,
target_ctx=dpex_kernel_target.target_context,
typing_ctx=dpex_kernel_target.typing_context,
)

is_native_atomic = expected_spirv_function in kernel._llvm_module
assert is_native_atomic == expected_native_atomic_for_device(
dpctl.select_default_device().filter_string
)
kernel.compile(
args=argtypes,
debug=False,
compile_flags=None,
target_ctx=dpex_kernel_target.target_context,
typing_ctx=dpex_kernel_target.typing_context,
)

is_native_atomic = expected_spirv_function in kernel._llvm_module
assert is_native_atomic == expected_native_atomic_for_device(
dpctl.select_default_device().filter_string
)

0 comments on commit e751744

Please sign in to comment.