Skip to content

Commit

Permalink
Adds support for prange reduction loops to numba-dpex.
Browse files Browse the repository at this point in the history
   - Adds a template to generate a tree-reduction kernel for
     reduction loops that use the "+" or "*" operators.
   - Adds code generation for nd-range kernels in the parfor
     lowerer.
   - Refactoring of the parfor lowerer and kernel builder modules.
   - New unit test cases.
  • Loading branch information
mingjie-intel authored and Diptorup Deb committed May 17, 2023
1 parent 65b06c9 commit c9f9bc7
Show file tree
Hide file tree
Showing 11 changed files with 1,783 additions and 80 deletions.
328 changes: 274 additions & 54 deletions numba_dpex/core/passes/parfor_lowering_pass.py

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,47 @@ static void DpexrtQueue_SubmitRange(const void *KRef,
__FILE__, __LINE__));
}

static void DpexrtQueue_SubmitNDRange(const void *KRef,
const void *QRef,
void **Args,
const DPCTLKernelArgType *ArgTypes,
size_t NArgs,
const size_t gRange[3],
const size_t lRange[3],
size_t Ndims,
const void *DepEvents,
size_t NDepEvents)
{
DPCTLSyclEventRef eref = NULL;
DPCTLSyclQueueRef qref = NULL;

DPEXRT_DEBUG(drt_debug_print(
"DPEXRT-DEBUG: Inside DpexrtQueue_SubmitNDRange %s, line %d\n",
__FILE__, __LINE__));

qref = (DPCTLSyclQueueRef)QRef;

eref = DPCTLQueue_SubmitNDRange((DPCTLSyclKernelRef)KRef, qref, Args,
(DPCTLKernelArgType *)ArgTypes, NArgs,
gRange, lRange, Ndims,
(DPCTLSyclEventRef *)DepEvents, NDepEvents);
if (eref == NULL) {
DPEXRT_DEBUG(
drt_debug_print("DPEXRT-ERROR: Kernel submission using "
"DpexrtQueue_SubmitNDRange failed! %s, line %d\n",
__FILE__, __LINE__));
}
else {
DPCTLQueue_Wait(qref);
DPCTLEvent_Wait(eref);
DPCTLEvent_Delete(eref);
}

DPEXRT_DEBUG(drt_debug_print(
"DPEXRT-DEBUG: Done with DpexrtQueue_SubmitNDRange %s, line %d\n",
__FILE__, __LINE__));
}

/*----------------------------------------------------------------------------*/
/*---------------------- Functions for NRT_MemInfo allocation ----------------*/
/*----------------------------------------------------------------------------*/
Expand Down Expand Up @@ -1246,6 +1287,7 @@ static PyObject *build_c_helpers_dict(void)
_declpointer("DPEXRTQueue_CreateFromFilterString",
&DPEXRTQueue_CreateFromFilterString);
_declpointer("DpexrtQueue_SubmitRange", &DpexrtQueue_SubmitRange);
_declpointer("DpexrtQueue_SubmitNDRange", &DpexrtQueue_SubmitNDRange);
_declpointer("DPEXRT_MemInfo_alloc", &DPEXRT_MemInfo_alloc);
_declpointer("DPEXRT_MemInfo_fill", &DPEXRT_MemInfo_fill);
_declpointer("NRT_ExternalAllocator_new_for_usm",
Expand Down Expand Up @@ -1309,6 +1351,8 @@ MOD_INIT(_dpexrt_python)
PyLong_FromVoidPtr(&DPEXRTQueue_CreateFromFilterString));
PyModule_AddObject(m, "DpexrtQueue_SubmitRange",
PyLong_FromVoidPtr(&DpexrtQueue_SubmitRange));
PyModule_AddObject(m, "DpexrtQueue_SubmitNDRange",
PyLong_FromVoidPtr(&DpexrtQueue_SubmitNDRange));
PyModule_AddObject(m, "DPEXRT_MemInfo_alloc",
PyLong_FromVoidPtr(&DPEXRT_MemInfo_alloc));
PyModule_AddObject(m, "DPEXRT_MemInfo_fill",
Expand Down
64 changes: 59 additions & 5 deletions numba_dpex/core/runtime/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,7 @@ def submit_range(
"""Calls DPEXRTQueue_CreateFromFilterString to create a new sycl::queue
from a given filter string.
Args:
device (llvmlite.ir.values.FormattedConstant): An LLVM ArrayType
storing a const string for a DPC++ filter selector string.
Returns: A DPCTLSyclQueueRef pointer.
Returns: A LLVM IR call inst.
"""
mod = builder.module
fnty = llvmir.FunctionType(
Expand Down Expand Up @@ -299,3 +295,61 @@ def submit_range(
)

return ret

def submit_ndrange(
self,
builder,
kref,
qref,
args,
argtys,
nargs,
grange,
lrange,
ndims,
depevents,
ndepevents,
):
"""Calls DPEXRTQueue_CreateFromFilterString to create a new sycl::queue
from a given filter string.
Returns: A LLVM IR call inst.
"""

mod = builder.module
fnty = llvmir.FunctionType(
llvmir.types.VoidType(),
[
cgutils.voidptr_t,
cgutils.voidptr_t,
cgutils.voidptr_t.as_pointer(),
cgutils.int32_t.as_pointer(),
llvmir.IntType(64),
llvmir.IntType(64).as_pointer(),
llvmir.IntType(64).as_pointer(),
llvmir.IntType(64),
cgutils.voidptr_t,
llvmir.IntType(64),
],
)
fn = cgutils.get_or_insert_function(
mod, fnty, "DpexrtQueue_SubmitNDRange"
)

ret = builder.call(
fn,
[
kref,
qref,
args,
argtys,
nargs,
grange,
lrange,
ndims,
depevents,
ndepevents,
],
)

return ret
64 changes: 61 additions & 3 deletions numba_dpex/core/utils/kernel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
import warnings

import dpctl.program as dpctl_prog
import dpnp
import numba
from numba.core import compiler, ir, types
from numba.core import ir, types
from numba.core.errors import NumbaParallelSafetyWarning
from numba.core.ir_utils import (
add_offset_to_labels,
Expand Down Expand Up @@ -456,3 +454,63 @@ def create_kernel_for_parfor(
kernel_arg_types=func_arg_types,
queue=exec_queue,
)


def update_sentinel(kernel_ir, sentinel_name, kernel_body, new_label):
"""Searched all the blocks in the IR generated from a kernel template and
replaces the __sentinel__ instruction with the actual op for the parfor.
Args:
kernel_ir : Numba FunctionIR that was generated from a kernel template
sentinel_name : The name of the sentinel instruction that is to be
replaced.
kernel_body : The function body of the kernel template generated
numba_dpex.kernel function
new_label: The new label to be used for the basic block created to store
the instructions that replaced the sentinel
"""
for label, block in kernel_ir.blocks.items():
for i, inst in enumerate(block.body):
if (
isinstance(inst, ir.Assign)
and inst.target.name == sentinel_name
):
# We found the sentinel assignment.
loc = inst.loc
scope = block.scope
# split block across __sentinel__
# A new block is allocated for the statements prior to the
# sentinel but the new block maintains the current block label.
prev_block = ir.Block(scope, loc)
prev_block.body = block.body[:i]

# The current block is used for statements after the sentinel.
block.body = block.body[i + 1 :] # noqa: E203
# But the current block gets a new label.
body_first_label = min(kernel_body.keys())

# The previous block jumps to the minimum labelled block of the
# parfor body.
prev_block.append(ir.Jump(body_first_label, loc))

# Add all the parfor loop body blocks to the gufunc function's
# IR.
for loop, b in kernel_body.items():
kernel_ir.blocks[loop] = copy.copy(b)
kernel_ir.blocks[loop].body = copy.copy(
kernel_ir.blocks[loop].body
)

body_last_label = max(kernel_body.keys())
kernel_ir.blocks[new_label] = block
kernel_ir.blocks[label] = prev_block
# Add a jump from the last parfor body block to the block
# containing statements after the sentinel.
kernel_ir.blocks[body_last_label].append(
ir.Jump(new_label, loc)
)

break
else:
continue
break
34 changes: 17 additions & 17 deletions numba_dpex/core/utils/kernel_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,6 @@ def _create_sycl_range(self, idx_range):
Args:
idx_range (_type_): _description_
kernel_name_tag (_type_): _description_
"""
intp_t = utils.get_llvm_type(context=self.context, type=types.intp)
intp_ptr_t = utils.get_llvm_ptr_type(intp_t)
Expand Down Expand Up @@ -341,27 +340,20 @@ def _create_sycl_range(self, idx_range):

return self.builder.bitcast(global_range, intp_ptr_t)

def submit_sync_ranged_kernel(
def submit_sync_kernel(
self,
idx_range,
sycl_queue_val,
total_kernel_args,
arg_list,
arg_ty_list,
global_range,
local_range=None,
):
"""
submit_sync_ranged_kernel(dim_bounds, sycl_queue_val)
Submits the kernel to the specified queue, waits and then copies
back any results to the host.
Args:
idx_range: Tuple specifying the range over which the kernel is
to be submitted.
sycl_queue_val : The SYCL queue on which the kernel is
submitted.
Submits the kernel to the specified queue, waits.
"""
gr = self._create_sycl_range(idx_range)
args = [
gr = self._create_sycl_range(global_range)
args1 = [
self.builder.inttoptr(
self.context.get_constant(types.uintp, self.kernel_addr),
utils.get_llvm_type(context=self.context, type=types.voidptr),
Expand All @@ -371,7 +363,9 @@ def submit_sync_ranged_kernel(
arg_ty_list,
self.context.get_constant(types.uintp, total_kernel_args),
gr,
self.context.get_constant(types.uintp, len(idx_range)),
]
args2 = [
self.context.get_constant(types.uintp, len(global_range)),
self.builder.bitcast(
utils.create_null_ptr(
builder=self.builder, context=self.context
Expand All @@ -380,5 +374,11 @@ def submit_sync_ranged_kernel(
),
self.context.get_constant(types.uintp, 0),
]

self.rtctx.submit_range(self.builder, *args)
args = []
if len(local_range) == 0:
args = args1 + args2
self.rtctx.submit_range(self.builder, *args)
else:
lr = self._create_sycl_range(local_range)
args = args1 + [lr] + args2
self.rtctx.submit_ndrange(self.builder, *args)
45 changes: 45 additions & 0 deletions numba_dpex/core/utils/kernel_templates/kernel_template_iface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SPDX-FileCopyrightText: 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

import abc


class KernelTemplateInterface(metaclass=abc.ABCMeta):
@classmethod
def __subclasshook__(cls, subclass):
return hasattr(
callable(subclass._generate_kernel_stub_as_string)
and callable(subclass._generate_kernel_ir)
and callable(subclass.dump_kernel_string)
and callable(subclass.dump_kernel_ir)
and hasattr(subclass, "kernel_ir")
and hasattr(subclass, "kernel_string")
)

@abc.abstractmethod
def _generate_kernel_stub_as_string(self):
"""Generates as a string a stub for a numba_dpex kernel function"""
raise NotImplementedError

@abc.abstractmethod
def _generate_kernel_ir(self):
raise NotImplementedError

@abc.abstractmethod
def dump_kernel_string(self):
raise NotImplementedError

@abc.abstractmethod
def dump_kernel_ir(self):
raise NotImplementedError

@property
@abc.abstractmethod
def kernel_ir(self):
raise NotImplementedError

@property
@abc.abstractmethod
def kernel_string(self):
raise NotImplementedError
Loading

0 comments on commit c9f9bc7

Please sign in to comment.