Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing dpnp.zeros() and dpnp.ones() interface #923

Merged
merged 1 commit into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 163 additions & 11 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,123 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
return NULL;
}

/**
* @brief Interface for the core.runtime.context.DpexRTContext.meminfo_alloc.
* This function takes an allocated memory as NRT_MemInfo and fills it with
* the value specified by `value`.
*
* @param mi An NRT_MemInfo object, should be found from memory
* allocation.
* @param itemsize The itemsize, the size of each item in the array.
* @param is_float Flag to specify if the data being float or not.
* @param value The value to be used to fill an array.
* @param device The device on which the memory was allocated.
* @return NRT_MemInfo* A new NRT_MemInfo object, NULL if no NRT_MemInfo
* object could be created.
*/
static NRT_MemInfo *DPEXRT_MemInfo_fill(NRT_MemInfo *mi,
size_t itemsize,
bool is_float,
uint8_t value,
const char *device)
{
DPCTLSyclDeviceSelectorRef dselector = NULL;
DPCTLSyclDeviceRef dref = NULL;
DPCTLSyclQueueRef qref = NULL;
DPCTLSyclEventRef eref = NULL;
size_t count = 0, size = 0, exp = 0;

size = mi->size;
while (itemsize >>= 1)
exp++;
count = (unsigned int)(size >> exp);

NRT_Debug(nrt_debug_print(
"DPEXRT-DEBUG: mi->size = %u, itemsize = %u, count = %u, "
"value = %u, Inside DPEXRT_MemInfo_fill %s, line %d\n",
mi->size, itemsize << exp, count, value, __FILE__, __LINE__));

if (mi->data == NULL) {
NRT_Debug(nrt_debug_print("DPEXRT-DEBUG: mi->data is NULL, "
"Inside DPEXRT_MemInfo_fill %s, line %d\n",
__FILE__, __LINE__));
goto error;
}

if (!(dselector = DPCTLFilterSelector_Create(device))) {
NRT_Debug(nrt_debug_print(
"DPEXRT-ERROR: Could not create a sycl::device_selector from "
"filter string: %s at %s %d.\n",
device, __FILE__, __LINE__));
goto error;
}

if (!(dref = DPCTLDevice_CreateFromSelector(dselector)))
goto error;

if (!(qref = DPCTLQueue_CreateForDevice(dref, NULL, 0)))
goto error;

DPCTLDeviceSelector_Delete(dselector);
DPCTLDevice_Delete(dref);

switch (exp) {
case 3:
{
uint64_t value_assign = (uint64_t)value;
if (is_float) {
double const_val = (double)value;
// To stop warning: dereferencing type-punned pointer
// will break strict-aliasing rules [-Wstrict-aliasing]
double *p = &const_val;
value_assign = *((uint64_t *)(p));
}
if (!(eref = DPCTLQueue_Fill64(qref, mi->data, value_assign, count)))
goto error;
break;
}
case 2:
{
uint32_t value_assign = (uint32_t)value;
if (is_float) {
float const_val = (float)value;
// To stop warning: dereferencing type-punned pointer
// will break strict-aliasing rules [-Wstrict-aliasing]
float *p = &const_val;
value_assign = *((uint32_t *)(p));
}
if (!(eref = DPCTLQueue_Fill32(qref, mi->data, value_assign, count)))
goto error;
break;
}
case 1:
if (!(eref = DPCTLQueue_Fill16(qref, mi->data, value, count)))
goto error;
break;
case 0:
if (!(eref = DPCTLQueue_Fill8(qref, mi->data, value, count)))
goto error;
break;
default:
goto error;
}

DPCTLEvent_Wait(eref);

DPCTLQueue_Delete(qref);
DPCTLEvent_Delete(eref);

return mi;

error:
DPCTLQueue_Delete(qref);
DPCTLEvent_Delete(eref);
DPCTLDeviceSelector_Delete(dselector);
DPCTLDevice_Delete(dref);

return NULL;
}

/*----------------------------------------------------------------------------*/
/*--------- Helpers to get attributes out of a dpnp.ndarray PyObject ---------*/
/*----------------------------------------------------------------------------*/
Expand Down Expand Up @@ -487,12 +604,13 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj,
arystruct_t *arystruct)
{
struct PyUSMArrayObject *arrayobj = NULL;
int i, ndim;
int i = 0, ndim = 0, exp = 0;
npy_intp *shape = NULL, *strides = NULL;
npy_intp *p = NULL, nitems, itemsize;
npy_intp *p = NULL, nitems;
void *data = NULL;
DPCTLSyclQueueRef qref = NULL;
PyGILState_STATE gstate;
npy_intp itemsize = 0;

// Increment the ref count on obj to prevent CPython from garbage
// collecting the array.
Expand Down Expand Up @@ -546,20 +664,29 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj,

p = arystruct->shape_and_strides;

// Calculate the exponent from the arystruct->itemsize as we know
// itemsize is a power of two
while (itemsize >>= 1)
exp++;

for (i = 0; i < ndim; ++i, ++p)
*p = shape[i];

// DPCTL returns a NULL pointer if the array is contiguous
// DPCTL returns a NULL pointer if the array is contiguous. dpctl stores
// strides as number of elements and Numba stores strides as bytes, for
// that reason we are multiplying stride by itemsize when unboxing the
// external array.

// FIXME: Stride computation should check order and adjust how strides are
// calculated. Right now strides are assuming that order is C contigous.
if (strides) {
for (i = 0; i < ndim; ++i, ++p) {
*p = strides[i];
*p = strides[i] << exp;
}
}
else {
for (i = 1; i < ndim; ++i, ++p) {
*p = shape[i];
*p = shape[i] << exp;
}
*p = 1;
}
Expand Down Expand Up @@ -598,11 +725,12 @@ static PyObject *box_from_arystruct_parent(arystruct_t *arystruct,
int ndim,
PyArray_Descr *descr)
{
int i;
npy_intp *p;
int i = 0, exp = 0;
npy_intp *p = NULL;
npy_intp *shape = NULL, *strides = NULL;
PyObject *array = arystruct->parent;
struct PyUSMArrayObject *arrayobj = NULL;
npy_intp itemsize = 0;

NRT_Debug(nrt_debug_print("DPEXRT-DEBUG: In try_to_return_parent.\n"));

Expand All @@ -623,9 +751,16 @@ static PyObject *box_from_arystruct_parent(arystruct_t *arystruct,
if (shape[i] != *p)
return NULL;
}

// Calculate the exponent from the arystruct->itemsize as we know
// itemsize is a power of two
itemsize = arystruct->itemsize;
while (itemsize >>= 1)
exp++;
// dpctl stores strides as number of elements and Numba stores strides as
// bytes, for that reason we are multiplying stride by itemsize when
// unboxing the external array.
if (strides) {
if (strides[i] != *p)
if (strides[i] << exp != *p)
return NULL;
}
else {
Expand Down Expand Up @@ -680,6 +815,8 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct,
npy_intp *shape = NULL, *strides = NULL;
int typenum = 0;
int status = 0;
int exp = 0;
npy_intp itemsize = 0;

NRT_Debug(nrt_debug_print(
"DPEXRT-DEBUG: In DPEXRT_sycl_usm_ndarray_to_python_acqref.\n"));
Expand Down Expand Up @@ -750,7 +887,20 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct,
}

shape = arystruct->shape_and_strides;
strides = shape + ndim;

// Calculate the exponent from the arystruct->itemsize as we know
// itemsize is a power of two
itemsize = arystruct->itemsize;
while (itemsize >>= 1)
exp++;

// Numba internally stores strides as bytes and not as elements. Divide
// the stride by itemsize to get number of elements.
for (size_t idx = ndim; idx < 2 * ((size_t)ndim); ++idx)
arystruct->shape_and_strides[idx] =
arystruct->shape_and_strides[idx] >> exp;
strides = (shape + ndim);

typenum = descr->type_num;
usm_ndarr_obj = UsmNDArray_MakeFromPtr(
ndim, shape, typenum, strides, (DPCTLSyclUSMRef)arystruct->data,
Expand Down Expand Up @@ -845,6 +995,7 @@ static PyObject *build_c_helpers_dict(void)
_declpointer("DPEXRT_sycl_usm_ndarray_to_python_acqref",
&DPEXRT_sycl_usm_ndarray_to_python_acqref);
_declpointer("DPEXRT_MemInfo_alloc", &DPEXRT_MemInfo_alloc);
_declpointer("DPEXRT_MemInfo_fill", &DPEXRT_MemInfo_fill);
_declpointer("NRT_ExternalAllocator_new_for_usm",
&NRT_ExternalAllocator_new_for_usm);

Expand Down Expand Up @@ -895,7 +1046,8 @@ MOD_INIT(_dpexrt_python)
PyLong_FromVoidPtr(&DPEXRT_sycl_usm_ndarray_to_python_acqref));
PyModule_AddObject(m, "DPEXRT_MemInfo_alloc",
PyLong_FromVoidPtr(&DPEXRT_MemInfo_alloc));

PyModule_AddObject(m, "DPEXRT_MemInfo_fill",
PyLong_FromVoidPtr(&DPEXRT_MemInfo_fill));
PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
return MOD_SUCCESS_VAL(m);
}
53 changes: 44 additions & 9 deletions numba_dpex/core/runtime/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ def wrap(self, builder, *args, **kwargs):

@_check_null_result
def meminfo_alloc(self, builder, size, usm_type, device):
"""A wrapped caller for meminfo_alloc_unchecked() with null check."""
return self.meminfo_alloc_unchecked(builder, size, usm_type, device)

@_check_null_result
def meminfo_fill(self, builder, meminfo, itemsize, is_float, value, device):
"""A wrapped caller for meminfo_fill_unchecked() with null check."""
return self.meminfo_fill_unchecked(
builder, meminfo, itemsize, is_float, value, device
)

def meminfo_alloc_unchecked(self, builder, size, usm_type, device):
"""Allocate a new MemInfo with a data payload of `size` bytes.

The result of the call is checked and if it is NULL, i.e. allocation
Expand All @@ -49,26 +60,50 @@ def meminfo_alloc(self, builder, size, usm_type, device):

Returns: A pointer to the MemInfo is returned.
"""
mod = builder.module
u64 = ir.IntType(64)
fnty = ir.FunctionType(
cgutils.voidptr_t, [cgutils.intp_t, u64, cgutils.voidptr_t]
)
fn = cgutils.get_or_insert_function(mod, fnty, "DPEXRT_MemInfo_alloc")
fn.return_value.add_attribute("noalias")

return self.meminfo_alloc_unchecked(builder, size, usm_type, device)
ret = builder.call(fn, [size, usm_type, device])

def meminfo_alloc_unchecked(self, builder, size, usm_type, device):
"""
Allocate a new MemInfo with a data payload of `size` bytes.
return ret

A pointer to the MemInfo is returned.
def meminfo_fill_unchecked(
self, builder, meminfo, itemsize, is_float, value, device
):
"""Fills an allocated `MemInfo` with the value specified.

Returns NULL to indicate error/failure to allocate.
The result of the call is checked and if it is `NULL`, i.e. the fill
operation failed, then a `MemoryError` is raised. If the fill operation
is succeeded then a pointer to the `MemInfo` is returned.

Args:
builder (llvmlite.ir.builder.IRBuilder): LLVM IR builder
meminfo (llvmlite.ir.instructions.LoadInstr): LLVM uint64 value
specifying the size in bytes for the data payload.
itemsize (llvmlite.ir.values.Constant): An LLVM Constant value
specifying the size of the each data item allocated by the
usm allocator.
device (llvmlite.ir.values.FormattedConstant): An LLVM ArrayType
storing a const string for a DPC++ filter selector string.

Returns: A pointer to the `MemInfo` is returned.
"""
mod = builder.module
u64 = ir.IntType(64)
b = ir.IntType(1)
fnty = ir.FunctionType(
cgutils.voidptr_t, [cgutils.intp_t, u64, cgutils.voidptr_t]
cgutils.voidptr_t,
[cgutils.voidptr_t, u64, b, cgutils.int8_t, cgutils.voidptr_t],
)
fn = cgutils.get_or_insert_function(mod, fnty, "DPEXRT_MemInfo_alloc")
fn = cgutils.get_or_insert_function(mod, fnty, "DPEXRT_MemInfo_fill")
fn.return_value.add_attribute("noalias")

ret = builder.call(fn, [size, usm_type, device])
ret = builder.call(fn, [meminfo, itemsize, is_float, value, device])

return ret

Expand Down
Loading