From a3c8646b930c0a824742f37a396dfd7ca27a1c80 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sat, 1 Jul 2023 01:36:59 -0500 Subject: [PATCH 1/8] New data model for DpnpNdArray type objects. - Creates a new data model DpnpNdArrayModel to represent DpnpNdArray type objects natively. The data model differs from numba's ArrayModel by having an extra member to store a sycl::queue pointer. - Introduces a _usmarraystruct.h header to define the C struct for the DpnpNdArrayModel. - Renames numba_dpex.core.datamodel.models.ArrayModel to USMArrayModel. - Updates kernel launcher and parfor lowering functions to account for the new data model. --- numba_dpex/core/datamodel/models.py | 65 +++++++++++++++++-- .../kernel_interface/arg_pack_unpacker.py | 2 + numba_dpex/core/parfors/parfor_lowerer.py | 13 ++-- numba_dpex/core/runtime/_dpexrt_python.c | 24 +++---- numba_dpex/core/runtime/_usmarraystruct.h | 27 ++++++++ numba_dpex/core/utils/kernel_launcher.py | 64 +++++++++++------- numba_dpex/dpctl_iface/_helpers.py | 18 ++--- .../dpctl_iface/legacy_kernel_launch_ops.py | 18 +++-- 8 files changed, 167 insertions(+), 64 deletions(-) create mode 100644 numba_dpex/core/runtime/_usmarraystruct.h diff --git a/numba_dpex/core/datamodel/models.py b/numba_dpex/core/datamodel/models.py index f052ca7e76..870df75fa0 100644 --- a/numba_dpex/core/datamodel/models.py +++ b/numba_dpex/core/datamodel/models.py @@ -3,10 +3,10 @@ # SPDX-License-Identifier: Apache-2.0 from numba.core import datamodel, types -from numba.core.datamodel.models import ArrayModel as DpnpNdArrayModel from numba.core.datamodel.models import PrimitiveModel, StructModel from numba.core.extending import register_model +from numba_dpex.core.exceptions import UnreachableError from numba_dpex.utils import address_space from ..types import Array, DpctlSyclQueue, DpnpNdArray, USMNdArray @@ -23,7 +23,7 @@ def __init__(self, dmm, fe_type): super(GenericPointerModel, self).__init__(dmm, fe_type, be_type) -class ArrayModel(StructModel): +class USMArrayModel(StructModel): """A data model to represent a Dpex's array types in LLVM IR. Dpex's ArrayModel is based on Numba's ArrayModel for NumPy arrays. The @@ -40,7 +40,7 @@ def __init__(self, dmm, fe_type): ), ( "parent", - types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace), + types.CPointer(types.pyobject, addrspace=fe_type.addrspace), ), ("nitems", types.intp), ("itemsize", types.intp), @@ -48,10 +48,61 @@ def __init__(self, dmm, fe_type): "data", types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace), ), + ("sycl_queue", types.voidptr), + ("shape", types.UniTuple(types.intp, ndim)), + ("strides", types.UniTuple(types.intp, ndim)), + ] + super(USMArrayModel, self).__init__(dmm, fe_type, members) + + +class DpnpNdArrayModel(StructModel): + """Data model for the DpnpNdArray type. + + The data model for DpnpNdArray is similar to numb's ArrayModel used for + the numba.types.Array type, with the additional field ``sycl_queue`. The + `sycl_queue` attribute stores the pointer to the C++ sycl::queue object + that was used to allocate memory for numba-dpex's native representation + for an Python object inferred as a DpnpNdArray. + """ + + def __init__(self, dmm, fe_type): + ndim = fe_type.ndim + members = [ + ("meminfo", types.MemInfoPointer(fe_type.dtype)), + ("parent", types.pyobject), + ("nitems", types.intp), + ("itemsize", types.intp), + ("data", types.CPointer(fe_type.dtype)), + ("sycl_queue", types.voidptr), ("shape", types.UniTuple(types.intp, ndim)), ("strides", types.UniTuple(types.intp, ndim)), ] - super(ArrayModel, self).__init__(dmm, fe_type, members) + super(DpnpNdArrayModel, self).__init__(dmm, fe_type, members) + + @property + def flattened_field_count(self): + """Return the number of fields in an instance of a DpnpNdArrayModel.""" + flattened_member_count = 0 + members = self._members + for member in members: + if isinstance(member, types.UniTuple): + flattened_member_count += member.count + elif isinstance( + member, + ( + types.scalars.Integer, + types.misc.PyObject, + types.misc.RawPointer, + types.misc.CPointer, + types.misc.MemInfoPointer, + ), + ): + flattened_member_count += 1 + else: + print(member, type(member)) + raise UnreachableError + + return flattened_member_count class SyclQueueModel(StructModel): @@ -84,7 +135,7 @@ def __init__(self, dmm, fe_type): def _init_data_model_manager(): dmm = datamodel.default_manager.copy() dmm.register(types.CPointer, GenericPointerModel) - dmm.register(Array, ArrayModel) + dmm.register(Array, USMArrayModel) return dmm @@ -103,8 +154,8 @@ def _init_data_model_manager(): # object. # Register the USMNdArray type with the dpex ArrayModel -register_model(USMNdArray)(ArrayModel) -dpex_data_model_manager.register(USMNdArray, ArrayModel) +register_model(USMNdArray)(USMArrayModel) +dpex_data_model_manager.register(USMNdArray, USMArrayModel) # Register the DpnpNdArray type with the Numba ArrayModel register_model(DpnpNdArray)(DpnpNdArrayModel) diff --git a/numba_dpex/core/kernel_interface/arg_pack_unpacker.py b/numba_dpex/core/kernel_interface/arg_pack_unpacker.py index f74a4406a9..bb99e11290 100644 --- a/numba_dpex/core/kernel_interface/arg_pack_unpacker.py +++ b/numba_dpex/core/kernel_interface/arg_pack_unpacker.py @@ -54,6 +54,8 @@ def _unpack_usm_array(self, val): unpacked_array_attrs.append(ctypes.c_longlong(size)) unpacked_array_attrs.append(ctypes.c_longlong(itemsize)) unpacked_array_attrs.append(buf) + # queue: unused and passed as void* + unpacked_array_attrs.append(ctypes.c_size_t(0)) for ax in range(ndim): unpacked_array_attrs.append(ctypes.c_longlong(shape[ax])) for ax in range(ndim): diff --git a/numba_dpex/core/parfors/parfor_lowerer.py b/numba_dpex/core/parfors/parfor_lowerer.py index 6efbb8dd4e..f41811a964 100644 --- a/numba_dpex/core/parfors/parfor_lowerer.py +++ b/numba_dpex/core/parfors/parfor_lowerer.py @@ -5,7 +5,7 @@ import copy from llvmlite import ir as llvmir -from numba.core import cgutils, ir, types +from numba.core import ir, types from numba.parfors.parfor import ( find_potential_aliases_parfor, get_parfor_outputs, @@ -26,6 +26,8 @@ create_reduction_remainder_kernel_for_parfor, ) +from numba_dpex.core.datamodel.models import dpex_data_model_manager as dpex_dmm + # A global list of kernels to keep the objects alive indefinitely. keep_alive_kernels = [] @@ -114,8 +116,8 @@ def _build_kernel_arglist(self, kernel_fn, lowerer): # kernel_fn.kernel_args as arrays get flattened. for arg_type in kernel_fn.kernel_arg_types: if isinstance(arg_type, DpnpNdArray): - # FIXME: Remove magic constants - num_flattened_args += 5 + (2 * arg_type.ndim) + datamodel = dpex_dmm.lookup(arg_type) + num_flattened_args += datamodel.flattened_field_count elif arg_type == types.complex64 or arg_type == types.complex128: num_flattened_args += 2 else: @@ -134,15 +136,16 @@ def _build_kernel_arglist(self, kernel_fn, lowerer): argtype = kernel_fn.kernel_arg_types[arg_num] llvm_val = _getvar(lowerer, arg) if isinstance(argtype, DpnpNdArray): + datamodel = dpex_dmm.lookup(arg_type) self.kernel_builder.build_array_arg( array_val=llvm_val, + array_data_model=datamodel, array_rank=argtype.ndim, arg_list=self.args_list, args_ty_list=self.args_ty_list, arg_num=self.kernel_arg_num, ) - # FIXME: Get rid of magic constants - self.kernel_arg_num += 5 + (2 * argtype.ndim) + self.kernel_arg_num += datamodel.flattened_field_count else: if argtype == types.complex64: self.kernel_builder.build_complex_arg( diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c index 981f1a617d..b2340b799d 100644 --- a/numba_dpex/core/runtime/_dpexrt_python.c +++ b/numba_dpex/core/runtime/_dpexrt_python.c @@ -21,7 +21,7 @@ #include "_dbg_printer.h" #include "_queuestruct.h" -#include "numba/_arraystruct.h" +#include "_usmarraystruct.h" // forward declarations static struct PyUSMArrayObject *PyUSMNdArray_ARRAYOBJ(PyObject *obj); @@ -49,14 +49,14 @@ static NRT_MemInfo *DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const DPCTLSyclQueueRef qref); static void usmndarray_meminfo_dtor(void *ptr, size_t size, void *info); -static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, +static PyObject *box_from_arystruct_parent(usmarystruct_t *arystruct, int ndim, PyArray_Descr *descr); static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, - arystruct_t *arystruct); + usmarystruct_t *arystruct); static PyObject * -DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, +DPEXRT_sycl_usm_ndarray_to_python_acqref(usmarystruct_t *arystruct, PyTypeObject *retty, int ndim, int writeable, @@ -770,7 +770,7 @@ static npy_intp product_of_shape(npy_intp *shape, npy_intp ndim) * @return {return} Error code representing success (0) or failure (-1). */ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, - arystruct_t *arystruct) + usmarystruct_t *arystruct) { struct PyUSMArrayObject *arrayobj = NULL; int i = 0, j = 0, k = 0, ndim = 0, exp = 0; @@ -827,6 +827,7 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, } arystruct->data = data; + arystruct->sycl_queue = qref; arystruct->nitems = nitems; arystruct->itemsize = itemsize; arystruct->parent = obj; @@ -892,7 +893,7 @@ static int DPEXRT_sycl_usm_ndarray_from_python(PyObject *obj, * @return {return} A PyObject created from the arystruct_t->parent, if * the PyObject could not be created return NULL. */ -static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, +static PyObject *box_from_arystruct_parent(usmarystruct_t *arystruct, int ndim, PyArray_Descr *descr) { @@ -914,8 +915,10 @@ static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, } if ((void *)UsmNDArray_GetData(arrayobj) != arystruct->data) { - DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: Arrayobj cannot be boxed " - "from parent as data pointer is NULL.\n")); + DPEXRT_DEBUG(drt_debug_print( + "DPEXRT-DEBUG: Arrayobj cannot be boxed " + "from parent as data pointer in the arystruct is not the same as " + "the data pointer in the parent object.\n")); return NULL; } @@ -978,7 +981,7 @@ static PyObject *box_from_arystruct_parent(arystruct_t *arystruct, * */ static PyObject * -DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, +DPEXRT_sycl_usm_ndarray_to_python_acqref(usmarystruct_t *arystruct, PyTypeObject *retty, int ndim, int writeable, @@ -1094,8 +1097,7 @@ DPEXRT_sycl_usm_ndarray_to_python_acqref(arystruct_t *arystruct, typenum = descr->type_num; usm_ndarr_obj = UsmNDArray_MakeFromPtr( ndim, shape, typenum, strides, (DPCTLSyclUSMRef)arystruct->data, - (DPCTLSyclQueueRef)miobj->meminfo->external_allocator->opaque_data, 0, - (PyObject *)miobj); + (DPCTLSyclQueueRef)arystruct->sycl_queue, 0, (PyObject *)miobj); if (usm_ndarr_obj == NULL || !PyObject_TypeCheck(usm_ndarr_obj, &PyUSMArrayType)) diff --git a/numba_dpex/core/runtime/_usmarraystruct.h b/numba_dpex/core/runtime/_usmarraystruct.h new file mode 100644 index 0000000000..695d54e23e --- /dev/null +++ b/numba_dpex/core/runtime/_usmarraystruct.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the numba-dpex native representation for a dpctl.tensor.usm_ndarray +/// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +typedef struct +{ + void *meminfo; + PyObject *parent; + npy_intp nitems; + npy_intp itemsize; + void *data; + void *sycl_queue; + + npy_intp shape_and_strides[]; +} usmarystruct_t; diff --git a/numba_dpex/core/utils/kernel_launcher.py b/numba_dpex/core/utils/kernel_launcher.py index 62f19bd22a..ace7e6d7d9 100644 --- a/numba_dpex/core/utils/kernel_launcher.py +++ b/numba_dpex/core/utils/kernel_launcher.py @@ -64,7 +64,9 @@ def _build_array_attr_arg( # FIXME: If pointer arg then load it to some value and pass that value. # We also most likely need an address space cast - if isinstance(array_attr_ty, types.misc.RawPointer): + if isinstance( + array_attr_ty, (types.misc.RawPointer, types.misc.CPointer) + ): array_attr = self.builder.load(array_attr) self.build_arg( @@ -75,7 +77,7 @@ def _build_array_attr_arg( arg_num=arg_num, ) - def _build_flattened_array_args( + def _build_unituple_member_arg( self, array_val, array_attr_pos, ndims, arg_list, args_ty_list, arg_num ): array_attr = self.builder.gep( @@ -151,7 +153,13 @@ def build_complex_arg(self, val, ty, arg_list, args_ty_list, arg_num): arg_num += 1 def build_array_arg( - self, array_val, array_rank, arg_list, args_ty_list, arg_num + self, + array_val, + array_data_model, + array_rank, + arg_list, + args_ty_list, + arg_num, ): """Creates a list of LLVM Values for an unpacked DpnpNdArray kernel argument. @@ -179,56 +187,68 @@ def build_array_arg( arg_num=arg_num, ) arg_num += 1 - # Argument 3: Array size + # Argument nitems + self._build_array_attr_arg( + array_val=array_val, + array_attr_pos=array_data_model.get_field_position("nitems"), + array_attr_ty=array_data_model.get_member_fe_type("nitems"), + arg_list=arg_list, + args_ty_list=args_ty_list, + arg_num=arg_num, + ) + arg_num += 1 + # Argument itemsize self._build_array_attr_arg( array_val=array_val, - array_attr_pos=2, - array_attr_ty=types.int64, + array_attr_pos=array_data_model.get_field_position("itemsize"), + array_attr_ty=array_data_model.get_member_fe_type("itemsize"), arg_list=arg_list, args_ty_list=args_ty_list, arg_num=arg_num, ) arg_num += 1 - # Argument 4: itemsize + # Argument data self._build_array_attr_arg( array_val=array_val, - array_attr_pos=3, - array_attr_ty=types.int64, + array_attr_pos=array_data_model.get_field_position("data"), + array_attr_ty=array_data_model.get_member_fe_type("data"), arg_list=arg_list, args_ty_list=args_ty_list, arg_num=arg_num, ) arg_num += 1 - # Argument 5: data pointer + # Argument sycl_queue self._build_array_attr_arg( array_val=array_val, - array_attr_pos=4, - array_attr_ty=types.voidptr, + array_attr_pos=array_data_model.get_field_position("sycl_queue"), + array_attr_ty=array_data_model.get_member_fe_type("sycl_queue"), arg_list=arg_list, args_ty_list=args_ty_list, arg_num=arg_num, ) arg_num += 1 - # Arguments for flattened shape - self._build_flattened_array_args( + # Arguments for shape + shape_member = array_data_model.get_member_fe_type("shape") + self._build_unituple_member_arg( array_val=array_val, - array_attr_pos=5, - ndims=array_rank, + array_attr_pos=array_data_model.get_field_position("shape"), + ndims=shape_member.count, arg_list=arg_list, args_ty_list=args_ty_list, arg_num=arg_num, ) - arg_num += array_rank - # Arguments for flattened stride - self._build_flattened_array_args( + arg_num += shape_member.count + # Arguments for strides + stride_member = array_data_model.get_member_fe_type("strides") + self._build_unituple_member_arg( array_val=array_val, - array_attr_pos=6, - ndims=array_rank, + array_attr_pos=array_data_model.get_field_position("strides"), + ndims=stride_member.count, arg_list=arg_list, args_ty_list=args_ty_list, arg_num=arg_num, ) - arg_num += array_rank + arg_num += stride_member.count def get_queue(self, exec_queue): """Allocates memory on the stack to store a DPCTLSyclQueueRef. diff --git a/numba_dpex/dpctl_iface/_helpers.py b/numba_dpex/dpctl_iface/_helpers.py index 637689fbce..604ba659bb 100644 --- a/numba_dpex/dpctl_iface/_helpers.py +++ b/numba_dpex/dpctl_iface/_helpers.py @@ -5,35 +5,35 @@ from numba.core import types -def numba_type_to_dpctl_typenum(context, type): +def numba_type_to_dpctl_typenum(context, ty): """ This function looks up the dpctl defined enum values from ``DPCTLKernelArgType``. """ val = None - if type == types.int32 or isinstance(type, types.scalars.IntegerLiteral): + if ty == types.int32 or isinstance(ty, types.scalars.IntegerLiteral): # DPCTL_LONG_LONG val = context.get_constant(types.int32, 9) - elif type == types.uint32: + elif ty == types.uint32: # DPCTL_UNSIGNED_LONG_LONG val = context.get_constant(types.int32, 10) - elif type == types.boolean: + elif ty == types.boolean: # DPCTL_UNSIGNED_INT val = context.get_constant(types.int32, 5) - elif type == types.int64: + elif ty == types.int64: # DPCTL_LONG_LONG val = context.get_constant(types.int32, 9) - elif type == types.uint64: + elif ty == types.uint64: # DPCTL_SIZE_T val = context.get_constant(types.int32, 11) - elif type == types.float32: + elif ty == types.float32: # DPCTL_FLOAT val = context.get_constant(types.int32, 12) - elif type == types.float64: + elif ty == types.float64: # DPCTL_DOUBLE val = context.get_constant(types.int32, 13) - elif type == types.voidptr: + elif ty == types.voidptr or isinstance(ty, types.CPointer): # DPCTL_VOID_PTR val = context.get_constant(types.int32, 15) else: diff --git a/numba_dpex/dpctl_iface/legacy_kernel_launch_ops.py b/numba_dpex/dpctl_iface/legacy_kernel_launch_ops.py index 078af6213c..63db1141a7 100644 --- a/numba_dpex/dpctl_iface/legacy_kernel_launch_ops.py +++ b/numba_dpex/dpctl_iface/legacy_kernel_launch_ops.py @@ -143,7 +143,7 @@ def process_kernel_arg( self.context.get_constant(types.int64, 0), storage ) ty = numba_type_to_dpctl_typenum( - context=self.context, type=types.int64 + context=self.context, ty=types.int64 ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( @@ -160,7 +160,7 @@ def process_kernel_arg( self.context.get_constant(types.int64, 0), storage ) ty = numba_type_to_dpctl_typenum( - context=self.context, type=types.int64 + context=self.context, ty=types.int64 ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( @@ -182,7 +182,7 @@ def process_kernel_arg( ) ty = numba_type_to_dpctl_typenum( - context=self.context, type=types.int64 + context=self.context, ty=types.int64 ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( @@ -204,7 +204,7 @@ def process_kernel_arg( ) ty = numba_type_to_dpctl_typenum( - context=self.context, type=types.int64 + context=self.context, ty=types.int64 ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( @@ -247,7 +247,7 @@ def process_kernel_arg( # here for them to match. legal_names = legalize_names([var]) ty = numba_type_to_dpctl_typenum( - context=self.context, type=types.voidptr + context=self.context, ty=types.voidptr ) malloc_fn = DpctlCAPIFnBuilder.get_dpctl_malloc_shared( @@ -323,7 +323,7 @@ def process_kernel_arg( ], ) ty = numba_type_to_dpctl_typenum( - context=self.context, type=types.int64 + context=self.context, ty=types.int64 ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( @@ -354,7 +354,7 @@ def process_kernel_arg( ) ty = numba_type_to_dpctl_typenum( - context=self.context, type=types.int64 + context=self.context, ty=types.int64 ) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( @@ -367,9 +367,7 @@ def process_kernel_arg( ) else: - ty = numba_type_to_dpctl_typenum( - context=self.context, type=arg_type - ) + ty = numba_type_to_dpctl_typenum(context=self.context, ty=arg_type) self._form_kernel_arg_and_arg_ty( self.builder.bitcast( llvm_arg, From 3d394f5819ff71067a6426f18fee141fa5e412bc Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sat, 1 Jul 2023 01:42:01 -0500 Subject: [PATCH 2/8] Add license header. --- numba_dpex/core/runtime/_queuestruct.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/numba_dpex/core/runtime/_queuestruct.h b/numba_dpex/core/runtime/_queuestruct.h index b2997411eb..ebb088d65a 100644 --- a/numba_dpex/core/runtime/_queuestruct.h +++ b/numba_dpex/core/runtime/_queuestruct.h @@ -1,10 +1,15 @@ -#ifndef NUMBA_DPEX_QUEUESTRUCT_H_ -#define NUMBA_DPEX_QUEUESTRUCT_H_ -/* - * Fill in the *queuestruct* with information from the Numpy array *obj*. - * *queuestruct*'s layout is defined in numba.targets.arrayobj (look - * for the ArrayTemplate class). - */ +// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the numba-dpex native representation for a dpctl.SyclQueue +/// +//===----------------------------------------------------------------------===// + +#pragma once #include @@ -13,5 +18,3 @@ typedef struct PyObject *parent; void *queue_ref; } queuestruct_t; - -#endif /* NUMBA_DPEX_QUEUESTRUCT_H_ */ From 7a5a31759161a4804179e82b638c594565cfb96a Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sat, 1 Jul 2023 01:42:15 -0500 Subject: [PATCH 3/8] Updates the dpnp array constructor overloads. - Updates the *_like overloads to extract the sycl_queue for the input array. It was not possible previously as the sycl_queue attribute was not present. - Update unit tests. - Add new unit tests. --- numba_dpex/dpnp_iface/_intrinsic.py | 91 +++++++++++++++---- numba_dpex/dpnp_iface/arrayobj.py | 84 ++++++++++++++++- .../tests/core/types/DpnpNdArray/test_bugs.py | 2 - .../core/types/DpnpNdArray/test_models.py | 14 +-- .../dpjit_tests/dpnp/test_dpnp_empty_like.py | 23 ++++- .../dpjit_tests/dpnp/test_dpnp_full_like.py | 27 ++++-- .../dpjit_tests/dpnp/test_dpnp_ones_like.py | 24 ++++- .../dpjit_tests/dpnp/test_dpnp_zeros_like.py | 27 +++++- 8 files changed, 243 insertions(+), 49 deletions(-) diff --git a/numba_dpex/dpnp_iface/_intrinsic.py b/numba_dpex/dpnp_iface/_intrinsic.py index f806f16ce8..d82c8a26c6 100644 --- a/numba_dpex/dpnp_iface/_intrinsic.py +++ b/numba_dpex/dpnp_iface/_intrinsic.py @@ -21,6 +21,7 @@ populate_array, ) +from numba_dpex.core.datamodel.models import dpex_data_model_manager as dpex_dmm from numba_dpex.core.runtime import context as dpexrt from numba_dpex.core.types import DpnpNdArray from numba_dpex.core.types.dpctl_types import DpctlSyclQueue @@ -80,7 +81,9 @@ def make_queue(context, builder, py_dpctl_sycl_queue): return ret -def _get_queue_ref(context, builder, sig, args): +def _get_queue_ref( + context, builder, sig, args, *, sycl_queue_arg_pos, array_arg_pos=None +): """Returns an LLVM IR Value pointer to a DpctlSyclQueueRef The _get_queue_ref function is used by the intinsic functions that implement @@ -118,25 +121,33 @@ def _get_queue_ref(context, builder, sig, args): """ - queue_arg = args[-2] - queue_arg_ty = sig.args[-2] + queue_arg = args[sycl_queue_arg_pos] + queue_arg_ty = sig.args[sycl_queue_arg_pos] queue_ref = None py_dpctl_sycl_queue_addr = None pyapi = None - if isinstance(queue_arg_ty, DpctlSyclQueue): + if not isinstance( + queue_arg_ty, (types.misc.NoneType, types.misc.Omitted) + ) and isinstance(queue_arg_ty, DpctlSyclQueue): if not isinstance(queue_arg.type, llvmir.LiteralStructType): raise AssertionError - queue_ref = builder.extract_value(queue_arg, 1) - - elif isinstance(queue_arg_ty, types.misc.NoneType) or isinstance( - queue_arg_ty, types.misc.Omitted - ): + sycl_queue_dm = dpex_dmm.lookup(queue_arg_ty) + queue_ref = builder.extract_value( + queue_arg, sycl_queue_dm.get_field_position("queue_ref") + ) + elif array_arg_pos is not None: + array_arg = args[array_arg_pos] + array_arg_ty = sig.args[array_arg_pos] + dpnp_ndarray_dm = dpex_dmm.lookup(array_arg_ty) + queue_ref = builder.extract_value( + array_arg, dpnp_ndarray_dm.get_field_position("sycl_queue") + ) + else: if not isinstance(queue_arg.type, llvmir.PointerType): # TODO: check if the pointer is null raise AssertionError - ty_sycl_queue = sig.return_type.queue py_dpctl_sycl_queue = get_device_cached_queue(ty_sycl_queue.sycl_device) (queue_ref, py_dpctl_sycl_queue_addr, pyapi) = make_queue( @@ -147,6 +158,14 @@ def _get_queue_ref(context, builder, sig, args): return ret +def _update_queue_attr(array, queue): + """Sets the sycl_queue member of an ArrayStruct.""" + + attr = dict(sycl_queue=queue) + for k, v in attr.items(): + setattr(array, k, v) + + def _empty_nd_impl(context, builder, arrtype, shapes, queue_ref): """Utility function used for allocating a new array. @@ -252,6 +271,7 @@ def _empty_nd_impl(context, builder, arrtype, shapes, queue_ref): shape_array = cgutils.pack_array(builder, shapes, ty=intp_t) strides_array = cgutils.pack_array(builder, strides, ty=intp_t) + _update_queue_attr(ary, queue=queue_ref_copy) populate_array( ary, data=builder.bitcast(data, datatype.as_pointer()), @@ -432,9 +452,11 @@ def impl_dpnp_empty( ty_retty_ref, ) + sycl_queue_arg_pos = -2 + def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos ) ary = alloc_empty_arrayobj( @@ -496,10 +518,11 @@ def impl_dpnp_zeros( ty_sycl_queue, ty_retty_ref, ) + sycl_queue_arg_pos = -2 def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args @@ -569,9 +592,11 @@ def impl_dpnp_ones( ty_retty_ref, ) + sycl_queue_arg_pos = -2 + def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args @@ -647,10 +672,11 @@ def impl_dpnp_full( ty_sycl_queue, ty_retty_ref, ) + sycl_queue_arg_pos = -2 def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args @@ -726,10 +752,17 @@ def impl_dpnp_empty_like( ty_sycl_queue, ty_retty_ref, ) + sycl_queue_arg_pos = -2 + array_arg_pos = 0 def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, + builder, + sig, + args, + sycl_queue_arg_pos=sycl_queue_arg_pos, + array_arg_pos=array_arg_pos, ) ary = alloc_empty_arrayobj( @@ -799,9 +832,17 @@ def impl_dpnp_zeros_like( ty_retty_ref, ) + sycl_queue_arg_pos = -2 + array_arg_pos = 0 + def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, + builder, + sig, + args, + sycl_queue_arg_pos=sycl_queue_arg_pos, + array_arg_pos=array_arg_pos, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args, is_like=True @@ -877,10 +918,17 @@ def impl_dpnp_ones_like( ty_sycl_queue, ty_retty_ref, ) + sycl_queue_arg_pos = -2 + array_arg_pos = 0 def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, + builder, + sig, + args, + sycl_queue_arg_pos=sycl_queue_arg_pos, + array_arg_pos=array_arg_pos, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args, is_like=True @@ -960,10 +1008,17 @@ def impl_dpnp_full_like( ty_sycl_queue, ty_retty_ref, ) + sycl_queue_arg_pos = -2 + array_arg_pos = 0 def codegen(context, builder, sig, args): qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args + context, + builder, + sig, + args, + sycl_queue_arg_pos=sycl_queue_arg_pos, + array_arg_pos=array_arg_pos, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args, is_like=True diff --git a/numba_dpex/dpnp_iface/arrayobj.py b/numba_dpex/dpnp_iface/arrayobj.py index da73ac8ac7..c87bfa8a7e 100644 --- a/numba_dpex/dpnp_iface/arrayobj.py +++ b/numba_dpex/dpnp_iface/arrayobj.py @@ -2,15 +2,20 @@ # # SPDX-License-Identifier: Apache-2.0 +import operator + import dpnp from numba import errors, types +from numba.core.imputils import lower_builtin from numba.core.types import scalars from numba.core.types.containers import UniTuple from numba.core.typing.npydecl import parse_dtype as _ty_parse_dtype from numba.core.typing.npydecl import parse_shape as _ty_parse_shape from numba.extending import overload +from numba.np.arrayobj import getitem_arraynd_intp as np_getitem_arraynd_intp from numba.np.numpy_support import is_nonelike +from numba_dpex.core.datamodel.models import dpex_data_model_manager as dpex_dmm from numba_dpex.core.types import DpnpNdArray from ._intrinsic import ( @@ -606,6 +611,7 @@ def ol_dpnp_empty_like( errors.TypingError: If both `device` and `sycl_queue` are provided. errors.TypingError: If couldn't parse input types to dpnp.empty_like(). errors.TypingError: If shape is provided. + errors.TypingError: If `x1` is not an instance of DpnpNdArray Returns: function: Local function `impl_dpnp_empty_like()`. @@ -617,19 +623,31 @@ def ol_dpnp_empty_like( + "inside overloaded dpnp.empty_like() function." ) + if not isinstance(x1, DpnpNdArray): + raise errors.TypingError( + "Only objects of dpnp.dpnp_array type are supported as " + "input array ``x1``." + ) + _ndim = _parse_dim(x1) _dtype = x1.dtype if isinstance(x1, types.Array) else _parse_dtype(dtype) _layout = x1.layout if order is None else order _usm_type = _parse_usm_type(usm_type) if usm_type else "device" _device = _parse_device_filter_string(device) if device else None + # If a sycl_queue or device argument was not explicitly provided get the + # queue from the array (x1) argument. + _queue = sycl_queue + if _queue is None and _device is None: + _queue = x1.queue + ret_ty = DpnpNdArray( ndim=_ndim, layout=_layout, dtype=_dtype, usm_type=_usm_type, device=_device, - queue=sycl_queue, + queue=_queue, ) if ret_ty: @@ -718,6 +736,7 @@ def ol_dpnp_zeros_like( errors.TypingError: If both `device` and `sycl_queue` are provided. errors.TypingError: If couldn't parse input types to dpnp.zeros_like(). errors.TypingError: If shape is provided. + errors.TypingError: If `x1` is not an instance of DpnpNdArray Returns: function: Local function `impl_dpnp_zeros_like()`. @@ -729,19 +748,31 @@ def ol_dpnp_zeros_like( + "inside overloaded dpnp.zeros_like() function." ) + if not isinstance(x1, DpnpNdArray): + raise errors.TypingError( + "Only objects of dpnp.dpnp_array type are supported as " + "input array ``x1``." + ) + _ndim = _parse_dim(x1) _dtype = x1.dtype if isinstance(x1, types.Array) else _parse_dtype(dtype) _layout = x1.layout if order is None else order _usm_type = _parse_usm_type(usm_type) if usm_type else "device" _device = _parse_device_filter_string(device) if device else None + # If a sycl_queue or device argument was not explicitly provided get the + # queue from the array (x1) argument. + _queue = sycl_queue + if _queue is None and _device is None: + _queue = x1.queue + ret_ty = DpnpNdArray( ndim=_ndim, layout=_layout, dtype=_dtype, usm_type=_usm_type, device=_device, - queue=sycl_queue, + queue=_queue, ) if ret_ty: @@ -829,6 +860,7 @@ def ol_dpnp_ones_like( errors.TypingError: If both `device` and `sycl_queue` are provided. errors.TypingError: If couldn't parse input types to dpnp.ones_like(). errors.TypingError: If shape is provided. + errors.TypingError: If `x1` is not an instance of DpnpNdArray Returns: function: Local function `impl_dpnp_ones_like()`. @@ -840,19 +872,31 @@ def ol_dpnp_ones_like( + "inside overloaded dpnp.ones_like() function." ) + if not isinstance(x1, DpnpNdArray): + raise errors.TypingError( + "Only objects of dpnp.dpnp_array type are supported as " + "input array ``x1``." + ) + _ndim = _parse_dim(x1) _dtype = x1.dtype if isinstance(x1, types.Array) else _parse_dtype(dtype) _layout = x1.layout if order is None else order _usm_type = _parse_usm_type(usm_type) if usm_type else "device" _device = _parse_device_filter_string(device) if device else None + # If a sycl_queue or device argument was not explicitly provided get the + # queue from the array (x1) argument. + _queue = sycl_queue + if _queue is None and _device is None: + _queue = x1.queue + ret_ty = DpnpNdArray( ndim=_ndim, layout=_layout, dtype=_dtype, usm_type=_usm_type, device=_device, - queue=sycl_queue, + queue=_queue, ) if ret_ty: @@ -946,6 +990,7 @@ def ol_dpnp_full_like( errors.TypingError: If both `device` and `sycl_queue` are provided. errors.TypingError: If couldn't parse input types to dpnp.full_like(). errors.TypingError: If shape is provided. + errors.TypingError: If `x1` is not an instance of DpnpNdArray Returns: function: Local function `impl_dpnp_full_like()`. @@ -967,13 +1012,19 @@ def ol_dpnp_full_like( _usm_type = _parse_usm_type(usm_type) if usm_type else "device" _device = _parse_device_filter_string(device) if device else None + # If a sycl_queue or device argument was not explicitly provided get the + # queue from the array (x1) argument. + _queue = sycl_queue + if _queue is None and _device is None: + _queue = x1.queue + ret_ty = DpnpNdArray( ndim=_ndim, layout=_layout, dtype=_dtype, usm_type=_usm_type, device=_device, - queue=sycl_queue, + queue=_queue, ) if ret_ty: @@ -1008,3 +1059,28 @@ def impl( "Cannot parse input types to " + f"function dpnp.full_like({x1}, {fill_value}, {dtype}, ...)." ) + + +@lower_builtin(operator.getitem, DpnpNdArray, types.Integer) +@lower_builtin(operator.getitem, DpnpNdArray, types.SliceType) +def getitem_arraynd_intp(context, builder, sig, args): + """ + Overrding the numba.np.arrayobj.getitem_arraynd_intp to support dpnp.ndarray + + The data model for numba.types.Array and numba_dpex.types.DpnpNdArray + are different. DpnpNdArray has an extra attribute to store a sycl::queue + pointer. For that reason, np_getitem_arraynd_intp needs to be overriden so + that when returning a view of a dpnp.ndarray the sycl::queue pointer + member in the LLVM IR struct gets properly updated. + """ + ret = np_getitem_arraynd_intp(context, builder, sig, args) + + array_val = args[0] + array_ty = sig.args[0] + sycl_queue_attr_pos = dpex_dmm.lookup(array_ty).get_field_position( + "sycl_queue" + ) + sycl_queue_attr = builder.extract_value(array_val, sycl_queue_attr_pos) + ret = builder.insert_value(ret, sycl_queue_attr, sycl_queue_attr_pos) + + return ret diff --git a/numba_dpex/tests/core/types/DpnpNdArray/test_bugs.py b/numba_dpex/tests/core/types/DpnpNdArray/test_bugs.py index ef2bc5b0f1..2b4055b96b 100644 --- a/numba_dpex/tests/core/types/DpnpNdArray/test_bugs.py +++ b/numba_dpex/tests/core/types/DpnpNdArray/test_bugs.py @@ -2,10 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -import pytest from dpnp import ndarray as dpnp_ndarray -@pytest.mark.xfail(reason="dpnp.ndarray does not support flags yet") def test_dpnp_ndarray_flags(): assert hasattr(dpnp_ndarray([1]), "flags") diff --git a/numba_dpex/tests/core/types/DpnpNdArray/test_models.py b/numba_dpex/tests/core/types/DpnpNdArray/test_models.py index b578253feb..93b8eafe7b 100644 --- a/numba_dpex/tests/core/types/DpnpNdArray/test_models.py +++ b/numba_dpex/tests/core/types/DpnpNdArray/test_models.py @@ -3,9 +3,12 @@ # SPDX-License-Identifier: Apache-2.0 from numba import types -from numba.core.datamodel import default_manager, models -from numba.core.datamodel.models import ArrayModel +from numba.core.datamodel import models +from numba_dpex.core.datamodel.models import ( + DpnpNdArrayModel, + dpex_data_model_manager, +) from numba_dpex.core.types.dpnp_ndarray_type import DpnpNdArray @@ -16,10 +19,10 @@ def test_model_for_DpnpNdArray(): """ - model = default_manager.lookup( + model = dpex_data_model_manager.lookup( DpnpNdArray(ndim=1, dtype=types.float64, layout="C") ) - assert isinstance(model, ArrayModel) + assert isinstance(model, DpnpNdArrayModel) def test_dpnp_ndarray_Model(): @@ -28,5 +31,4 @@ def test_dpnp_ndarray_Model(): It is a subclass of models.StructModel and models.ArrayModel. """ - assert issubclass(ArrayModel, models.StructModel) - assert issubclass(ArrayModel, models.ArrayModel) + assert issubclass(DpnpNdArrayModel, models.StructModel) diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py index b92a4b38be..9918f2c4dd 100644 --- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py @@ -112,11 +112,26 @@ def func(x, queue): assert c.dtype == a.dtype assert c.usm_type == a.usm_type assert c.sycl_device == queue.sycl_device + assert c.sycl_queue == queue + assert c.sycl_queue == a.sycl_queue - if c.sycl_queue != queue: - pytest.xfail( - "Returned queue does not have the same queue as the one passed to the dpnp function." - ) + try: + queue = dpctl.SyclQueue() + a1 = dpnp.ones(shape, dtype=dtype, usm_type=usm_type) + c1 = func(a1, queue) + except Exception: + pytest.fail("Calling dpnp.empty_like() inside dpjit failed.") + + if len(c1.shape) == 1: + assert c1.shape[0] == a1.shape[0] + else: + assert c1.shape == a1.shape + + assert c1.dtype == a1.dtype + assert c1.usm_type == a1.usm_type + assert c1.sycl_device == queue.sycl_device + assert c1.sycl_queue == queue + assert c1.sycl_queue != a1.sycl_queue def test_dpnp_empty_like_exceptions(): diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_full_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_full_like.py index cc23319b92..b36dc0d41b 100644 --- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_full_like.py +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_full_like.py @@ -148,11 +148,8 @@ def func(x, fill_value, queue): assert c.dtype == dtype assert c.usm_type == usm_type assert c.sycl_device == queue.sycl_device - - if c.sycl_queue != queue: - pytest.xfail( - "Returned queue does not have the same queue as the one passed to the dpnp function." - ) + assert c.sycl_queue == a.sycl_queue + assert c.sycl_queue == queue # dummy = dpnp.full_like(a, fill_value, dtype=dtype) # dpnp can't cast 4294967295 into int32 and so on, @@ -160,6 +157,25 @@ def func(x, fill_value, queue): dummy = numpy.full_like(a.asnumpy(), fill_value, dtype=dtype) assert numpy.array_equal(c.asnumpy(), dummy) + try: + queue = dpctl.SyclQueue() + a1 = dpnp.zeros(shape, dtype=dtype, usm_type=usm_type) + c1 = func(a1, fill_value, queue) + + if len(c1.shape) == 1: + assert c1.shape[0] == shape + else: + assert c1.shape == shape + + assert c1.dtype == dtype + assert c1.usm_type == usm_type + assert c1.sycl_device == queue.sycl_device + assert c1.sycl_queue == queue + assert c1.sycl_queue != a1.sycl_queue + + except Exception: + pytest.fail("Calling dpnp.full_like() inside dpjit failed.") + def test_dpnp_full_like_exceptions(): """Test if exception is raised when both queue and device are specified.""" @@ -194,7 +210,6 @@ def func2(x, fill_value): ) -@pytest.mark.xfail def test_dpnp_full_like_from_numpy(): """Test if dpnp works with numpy array (it shouldn't)""" diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py index 7356e9f278..8f5ce86441 100644 --- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py @@ -116,11 +116,27 @@ def func(x, queue): assert c.usm_type == a.usm_type assert c.sycl_device == queue.sycl_device assert (c.asnumpy() == 1).all() + assert c.sycl_queue == queue + assert c.sycl_queue == a.sycl_queue - if c.sycl_queue != queue: - pytest.xfail( - "Returned queue does not have the same queue as the one passed to the dpnp function." - ) + try: + queue = dpctl.SyclQueue() + a1 = dpnp.zeros(shape, dtype=dtype, usm_type=usm_type) + c1 = func(a1, queue) + except Exception: + pytest.fail("Calling dpnp.ones_like() inside dpjit failed.") + + if len(c1.shape) == 1: + assert c1.shape[0] == a1.shape[0] + else: + assert c1.shape == a1.shape + + assert c1.dtype == a1.dtype + assert c1.usm_type == a1.usm_type + assert c1.sycl_device == queue.sycl_device + assert (c1.asnumpy() == 1).all() + assert c1.sycl_queue == queue + assert c1.sycl_queue != a1.sycl_queue def test_dpnp_ones_like_exceptions(): diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py index d3c3bd97c2..ce2e5d5f78 100644 --- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py @@ -84,7 +84,8 @@ def func(x): device ): pytest.xfail( - "Returned queue does not have the same queue as cached against the device." + "Returned queue does not have the same queue as cached against " + "the device." ) assert not c.asnumpy().any() @@ -116,11 +117,27 @@ def func(x, queue): assert c.usm_type == a.usm_type assert c.sycl_device == queue.sycl_device assert not c.asnumpy().any() + assert c.sycl_queue == queue + assert c.sycl_queue == a.sycl_queue - if c.sycl_queue != queue: - pytest.xfail( - "Returned queue does not have the same queue as the one passed to the dpnp function." - ) + try: + queue = dpctl.SyclQueue() + a1 = dpnp.zeros(shape, dtype=dtype, usm_type=usm_type) + c1 = func(a1, queue) + except Exception: + pytest.fail("Calling dpnp.ones_like() inside dpjit failed.") + + if len(c1.shape) == 1: + assert c1.shape[0] == a1.shape[0] + else: + assert c1.shape == a1.shape + + assert c1.dtype == a1.dtype + assert c1.usm_type == a1.usm_type + assert c1.sycl_device == queue.sycl_device + assert not c1.asnumpy().any() + assert c1.sycl_queue == queue + assert c1.sycl_queue != a1.sycl_queue def test_dpnp_zeros_like_exceptions(): From 35faa75cc77409f9128d5416466f4b076d2dbd2e Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sat, 1 Jul 2023 15:44:10 -0500 Subject: [PATCH 4/8] Fix OpenCL GPU kernel execution. --- numba_dpex/core/datamodel/models.py | 5 ++++- numba_dpex/core/utils/kernel_launcher.py | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/numba_dpex/core/datamodel/models.py b/numba_dpex/core/datamodel/models.py index 870df75fa0..f117204176 100644 --- a/numba_dpex/core/datamodel/models.py +++ b/numba_dpex/core/datamodel/models.py @@ -48,7 +48,10 @@ def __init__(self, dmm, fe_type): "data", types.CPointer(fe_type.dtype, addrspace=fe_type.addrspace), ), - ("sycl_queue", types.voidptr), + ( + "sycl_queue", + types.CPointer(types.void, addrspace=fe_type.addrspace), + ), ("shape", types.UniTuple(types.intp, ndim)), ("strides", types.UniTuple(types.intp, ndim)), ] diff --git a/numba_dpex/core/utils/kernel_launcher.py b/numba_dpex/core/utils/kernel_launcher.py index ace7e6d7d9..5865160c6f 100644 --- a/numba_dpex/core/utils/kernel_launcher.py +++ b/numba_dpex/core/utils/kernel_launcher.py @@ -217,11 +217,11 @@ def build_array_arg( arg_num=arg_num, ) arg_num += 1 - # Argument sycl_queue - self._build_array_attr_arg( - array_val=array_val, - array_attr_pos=array_data_model.get_field_position("sycl_queue"), - array_attr_ty=array_data_model.get_member_fe_type("sycl_queue"), + # Argument sycl_queue: as the queue pointer is not to be used in a + # kernel we always pass in a nullptr + self.build_arg( + val=nullptr, + ty=types.int64, arg_list=arg_list, args_ty_list=args_ty_list, arg_num=arg_num, From 27b36b9f5f3094b08db4ff4223515bf67d7c7518 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sat, 1 Jul 2023 18:14:54 -0500 Subject: [PATCH 5/8] Remove xfail from fixed test cases. --- numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py | 1 - numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py | 1 - numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py | 1 - 3 files changed, 3 deletions(-) diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py index 9918f2c4dd..7a1c0ec9d8 100644 --- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_empty_like.py @@ -167,7 +167,6 @@ def func2(x): ) -@pytest.mark.xfail def test_dpnp_empty_like_from_numpy(): """Test if dpnp works with numpy array (it shouldn't)""" diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py index 8f5ce86441..fd8ad5056d 100644 --- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_ones_like.py @@ -172,7 +172,6 @@ def func2(x): ) -@pytest.mark.xfail def test_dpnp_ones_like_from_numpy(): """Test if dpnp works with numpy array (it shouldn't)""" diff --git a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py index ce2e5d5f78..2cb6e1daaf 100644 --- a/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py +++ b/numba_dpex/tests/dpjit_tests/dpnp/test_dpnp_zeros_like.py @@ -173,7 +173,6 @@ def func2(x): ) -@pytest.mark.xfail def test_dpnp_zeros_like_from_numpy(): """Test if dpnp works with numpy array (it shouldn't)""" From e17a55bbe5b9752a0ace4096a73fb17c44b5d604 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Sat, 1 Jul 2023 18:22:12 -0500 Subject: [PATCH 6/8] Check for expected deprecation warning --- .../kernel_tests/test_kernel_launch_params.py | 66 +++++++++++-------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py index ed6fc7679b..55e383e44c 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_launch_params.py @@ -22,9 +22,10 @@ def vecadd(a, b, c): def test_1D_global_range_as_int(): - k = vecadd[10] - assert k._global_range == [10] - assert k._local_range is None + with pytest.deprecated_call(): + k = vecadd[10] + assert k._global_range == [10] + assert k._local_range is None def test_1D_global_range_as_one_tuple(): @@ -34,43 +35,49 @@ def test_1D_global_range_as_one_tuple(): def test_1D_global_range_as_list(): - k = vecadd[[10]] - assert k._global_range == [10] - assert k._local_range is None + with pytest.deprecated_call(): + k = vecadd[[10]] + assert k._global_range == [10] + assert k._local_range is None def test_1D_global_range_and_1D_local_range1(): - k = vecadd[[10, 10]] - assert k._global_range == [10] - assert k._local_range == [10] + with pytest.deprecated_call(): + k = vecadd[[10, 10]] + assert k._global_range == [10] + assert k._local_range == [10] def test_1D_global_range_and_1D_local_range2(): - k = vecadd[(10,), (10,)] - assert k._global_range == [10] - assert k._local_range == [10] + with pytest.deprecated_call(): + k = vecadd[(10,), (10,)] + assert k._global_range == [10] + assert k._local_range == [10] def test_2D_global_range_and_2D_local_range1(): - k = vecadd[(10, 10), (10, 10)] - assert k._global_range == [10, 10] - assert k._local_range == [10, 10] + with pytest.deprecated_call(): + k = vecadd[(10, 10), (10, 10)] + assert k._global_range == [10, 10] + assert k._local_range == [10, 10] def test_2D_global_range_and_2D_local_range2(): - k = vecadd[[10, 10], (10, 10)] - assert k._global_range == [10, 10] - assert k._local_range == [10, 10] + with pytest.deprecated_call(): + k = vecadd[[10, 10], (10, 10)] + assert k._global_range == [10, 10] + assert k._local_range == [10, 10] def test_2D_global_range_and_2D_local_range3(): - k = vecadd[(10, 10), [10, 10]] - assert k._global_range == [10, 10] - assert k._local_range == [10, 10] + with pytest.deprecated_call(): + k = vecadd[(10, 10), [10, 10]] + assert k._global_range == [10, 10] + assert k._local_range == [10, 10] def test_2D_global_range_and_2D_local_range4(): - k = vecadd[[10, 10], [10, 10]] + k = vecadd[dpex.NdRange((10, 10), (10, 10))] assert k._global_range == [10, 10] assert k._local_range == [10, 10] @@ -119,27 +126,32 @@ def test_unknown_global_range_error(): def test_illegal_kernel_launch_arg1(): with pytest.raises(InvalidKernelLaunchArgsError): - vecadd[()] + with pytest.deprecated_call(): + vecadd[()] def test_illegal_kernel_launch_arg2(): with pytest.raises(InvalidKernelLaunchArgsError): - vecadd[10, 10, []] + with pytest.deprecated_call(): + vecadd[10, 10, []] def test_illegal_range_error1(): with pytest.raises(IllegalRangeValueError): - vecadd[[], []] + with pytest.deprecated_call(): + vecadd[[], []] def test_illegal_range_error2(): with pytest.raises(IllegalRangeValueError): - vecadd[[], 10] + with pytest.deprecated_call(): + vecadd[[], 10] def test_illegal_range_error3(): with pytest.raises(IllegalRangeValueError): - vecadd[(), 10] + with pytest.deprecated_call(): + vecadd[(), 10] if __name__ == "__main__": From 8aff32219272f5cee4ea09ce1241992c5f3cbe5c Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 6 Jul 2023 23:31:44 -0500 Subject: [PATCH 7/8] Address review comments. --- numba_dpex/core/datamodel/models.py | 15 +++++++++------ numba_dpex/dpnp_iface/_intrinsic.py | 18 +++++++++++++++--- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/numba_dpex/core/datamodel/models.py b/numba_dpex/core/datamodel/models.py index f117204176..1e97bd2b1b 100644 --- a/numba_dpex/core/datamodel/models.py +++ b/numba_dpex/core/datamodel/models.py @@ -61,11 +61,15 @@ def __init__(self, dmm, fe_type): class DpnpNdArrayModel(StructModel): """Data model for the DpnpNdArray type. - The data model for DpnpNdArray is similar to numb's ArrayModel used for - the numba.types.Array type, with the additional field ``sycl_queue`. The - `sycl_queue` attribute stores the pointer to the C++ sycl::queue object - that was used to allocate memory for numba-dpex's native representation - for an Python object inferred as a DpnpNdArray. + DpnpNdArrayModel is used by the numba_dpex.types.DpnpNdArray type and + abstracts the usmarystruct_t C type defined in + numba_dpex.core.runtime._usmarraystruct.h. + + The DpnpNdArrayModel differs from numba's ArrayModel by including an extra + member sycl_queue that maps to _usmarraystruct.sycl_queue pointer. The + _usmarraystruct.sycl_queue pointer stores the C++ sycl::queue pointer that + was used to allocate the data for the dpnp.ndarray represented by an + instance of _usmarraystruct. """ def __init__(self, dmm, fe_type): @@ -102,7 +106,6 @@ def flattened_field_count(self): ): flattened_member_count += 1 else: - print(member, type(member)) raise UnreachableError return flattened_member_count diff --git a/numba_dpex/dpnp_iface/_intrinsic.py b/numba_dpex/dpnp_iface/_intrinsic.py index d82c8a26c6..3e15de9265 100644 --- a/numba_dpex/dpnp_iface/_intrinsic.py +++ b/numba_dpex/dpnp_iface/_intrinsic.py @@ -132,7 +132,9 @@ def _get_queue_ref( queue_arg_ty, (types.misc.NoneType, types.misc.Omitted) ) and isinstance(queue_arg_ty, DpctlSyclQueue): if not isinstance(queue_arg.type, llvmir.LiteralStructType): - raise AssertionError + raise AssertionError( + "Expected the queue_arg to be an llvmir.LiteralStructType" + ) sycl_queue_dm = dpex_dmm.lookup(queue_arg_ty) queue_ref = builder.extract_value( queue_arg, sycl_queue_dm.get_field_position("queue_ref") @@ -147,7 +149,9 @@ def _get_queue_ref( else: if not isinstance(queue_arg.type, llvmir.PointerType): # TODO: check if the pointer is null - raise AssertionError + raise AssertionError( + "Expected the queue_arg to be an llvmir.PointerType" + ) ty_sycl_queue = sig.return_type.queue py_dpctl_sycl_queue = get_device_cached_queue(ty_sycl_queue.sycl_device) (queue_ref, py_dpctl_sycl_queue_addr, pyapi) = make_queue( @@ -159,7 +163,15 @@ def _get_queue_ref( def _update_queue_attr(array, queue): - """Sets the sycl_queue member of an ArrayStruct.""" + """Assigns the sycl_queue member of an usmarystruct_t instance. + + After creating a new usmarystruct_t struct (e.g. in _empty_nd_impl) the + members of the struct are populated by calling + numba.np.arrayobj.populate_array. The populate_array function does not + update the sycl_queue member as populate_array is written specifically for + numba's arystruct_t type that does not have a sycl_queue member. The + _update_queue_attr is a helper function to update the sycl_queue field. + """ attr = dict(sycl_queue=queue) for k, v in attr.items(): From 487735265584af7c6f5d4ea538182076bd811435 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Mon, 10 Jul 2023 13:16:03 -0500 Subject: [PATCH 8/8] Change the signature for _get_queue_ref. - Addresses the review comment to pass required arguments to _get_queue_ref explicitly. --- numba_dpex/dpnp_iface/_intrinsic.py | 162 +++++++++++++++++----------- 1 file changed, 101 insertions(+), 61 deletions(-) diff --git a/numba_dpex/dpnp_iface/_intrinsic.py b/numba_dpex/dpnp_iface/_intrinsic.py index 3e15de9265..a540a13cb0 100644 --- a/numba_dpex/dpnp_iface/_intrinsic.py +++ b/numba_dpex/dpnp_iface/_intrinsic.py @@ -30,6 +30,8 @@ "QueueRefPayload", ["queue_ref", "py_dpctl_sycl_queue_addr", "pyapi"] ) +_ArgTyAndValue = namedtuple("ArgTyAndValue", ["numba_ty", "llvmir_val"]) + # XXX: The function should be moved into DpexTargetContext def make_queue(context, builder, py_dpctl_sycl_queue): @@ -82,7 +84,11 @@ def make_queue(context, builder, py_dpctl_sycl_queue): def _get_queue_ref( - context, builder, sig, args, *, sycl_queue_arg_pos, array_arg_pos=None + context, + builder, + returned_sycl_queue_ty, + sycl_queue_arg: _ArgTyAndValue, + array_arg: _ArgTyAndValue = None, ): """Returns an LLVM IR Value pointer to a DpctlSyclQueueRef @@ -90,19 +96,15 @@ def _get_queue_ref( the overloads for dpnp array constructors: ``empty``, ``empty_like``, ``zeros``, ``zeros_like``, ``ones``, ``ones_like``, ``full``, ``full_like``. - The args contains the list of LLVM IR values passed in to the dpnp - overloads. The convention we follow is that the queue arg is always the - penultimate arg passed to the intrinsic. For that reason, we can extract the - queue argument as args[-2] and the type of the argument from the signature - as sig.args[-2]. - - Depending on whether the ``sycl_queue`` argument was explicitly specified, - or was omitted, the queue_arg will be either a DpctlSyclQueue type or a - numba NoneType/Omitted type. If a DpctlSyclQueue, then we directly extract - the queue_ref from the unboxed native struct representation of a - dpctl.SyclQueue. If a queue was not explicitly provided and the type is - NoneType/Omitted, we get a cached dpctl.SyclQueue from dpctl and unbox it - on the fly and return the queue_ref. + The function returns an LLVM IR Value corresponding to a dpctl.SyclQueue + Python object's underlying ``_queue_ref`` pointer. If a non-None + ``sycl_queue_arg`` is provided, then the ``_queue_ref`` attribute is + extracted from the ``sycl_queue_arg``. If the ``sycl_queue_arg`` is + None or omitted and an ``array_arg`` is provided, then the ``_queue_ref`` + is extracted from the unboxed representation of the ``array_arg``. If + nether a non-None ``sycl_queue_arg`` nor an ``array_arg`` is provided, + then a cached dpctl.SyclQueue is retreived from dpctl and unboxed on the fly + and the ``_queue_ref`` from that unboxed queue is returned to caller. Args: context (numba.core.base.BaseContext): Any of the context @@ -110,9 +112,11 @@ def _get_queue_ref( (e.g. `numba.core.cpu.CPUContext`). builder (llvmlite.ir.builder.IRBuilder): The IR builder from `llvmlite` for code generation. - sig: Signature of the overload function - args (list): LLVM IR values corresponding to the args passed to the LLVM - function created for a dpnp overload. + returned_sycl_queue_ty: An instance of numba_dpex.types.DpctlSyclQueue + sycl_queue_arg: A 2-tuple storing the numba inferred type and the + corresponding LLVM IR value for a dpctl.SyclQueue Python object. + array_arg: A 2-tuple storing the numba inferred type and the + corresponding LLVM IR value for a dpnp.ndarray Python object. Return: A namedtuple wrapping the queue_ref pointer, an optional address to @@ -121,39 +125,39 @@ def _get_queue_ref( """ - queue_arg = args[sycl_queue_arg_pos] - queue_arg_ty = sig.args[sycl_queue_arg_pos] - queue_ref = None py_dpctl_sycl_queue_addr = None pyapi = None if not isinstance( - queue_arg_ty, (types.misc.NoneType, types.misc.Omitted) - ) and isinstance(queue_arg_ty, DpctlSyclQueue): - if not isinstance(queue_arg.type, llvmir.LiteralStructType): + sycl_queue_arg.numba_ty, (types.misc.NoneType, types.misc.Omitted) + ) and isinstance(sycl_queue_arg.numba_ty, DpctlSyclQueue): + if not isinstance( + sycl_queue_arg.llvmir_val.type, llvmir.LiteralStructType + ): raise AssertionError( "Expected the queue_arg to be an llvmir.LiteralStructType" ) - sycl_queue_dm = dpex_dmm.lookup(queue_arg_ty) + sycl_queue_dm = dpex_dmm.lookup(sycl_queue_arg.numba_ty) queue_ref = builder.extract_value( - queue_arg, sycl_queue_dm.get_field_position("queue_ref") + sycl_queue_arg.llvmir_val, + sycl_queue_dm.get_field_position("queue_ref"), ) - elif array_arg_pos is not None: - array_arg = args[array_arg_pos] - array_arg_ty = sig.args[array_arg_pos] - dpnp_ndarray_dm = dpex_dmm.lookup(array_arg_ty) + elif array_arg is not None: + dpnp_ndarray_dm = dpex_dmm.lookup(array_arg.numba_ty) queue_ref = builder.extract_value( - array_arg, dpnp_ndarray_dm.get_field_position("sycl_queue") + array_arg.llvmir_val, + dpnp_ndarray_dm.get_field_position("sycl_queue"), ) else: - if not isinstance(queue_arg.type, llvmir.PointerType): + if not isinstance(sycl_queue_arg.llvmir_val.type, llvmir.PointerType): # TODO: check if the pointer is null raise AssertionError( "Expected the queue_arg to be an llvmir.PointerType" ) - ty_sycl_queue = sig.return_type.queue - py_dpctl_sycl_queue = get_device_cached_queue(ty_sycl_queue.sycl_device) + py_dpctl_sycl_queue = get_device_cached_queue( + returned_sycl_queue_ty.sycl_device + ) (queue_ref, py_dpctl_sycl_queue_addr, pyapi) = make_queue( context, builder, py_dpctl_sycl_queue ) @@ -467,8 +471,14 @@ def impl_dpnp_empty( sycl_queue_arg_pos = -2 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, ) ary = alloc_empty_arrayobj( @@ -533,8 +543,14 @@ def impl_dpnp_zeros( sycl_queue_arg_pos = -2 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args @@ -607,8 +623,14 @@ def impl_dpnp_ones( sycl_queue_arg_pos = -2 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args @@ -687,8 +709,14 @@ def impl_dpnp_full( sycl_queue_arg_pos = -2 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) qref_payload: _QueueRefPayload = _get_queue_ref( - context, builder, sig, args, sycl_queue_arg_pos=sycl_queue_arg_pos + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args @@ -768,13 +796,16 @@ def impl_dpnp_empty_like( array_arg_pos = 0 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) + array_arg = _ArgTyAndValue(sig.args[array_arg_pos], args[array_arg_pos]) qref_payload: _QueueRefPayload = _get_queue_ref( - context, - builder, - sig, - args, - sycl_queue_arg_pos=sycl_queue_arg_pos, - array_arg_pos=array_arg_pos, + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, + array_arg=array_arg, ) ary = alloc_empty_arrayobj( @@ -848,13 +879,16 @@ def impl_dpnp_zeros_like( array_arg_pos = 0 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) + array_arg = _ArgTyAndValue(sig.args[array_arg_pos], args[array_arg_pos]) qref_payload: _QueueRefPayload = _get_queue_ref( - context, - builder, - sig, - args, - sycl_queue_arg_pos=sycl_queue_arg_pos, - array_arg_pos=array_arg_pos, + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, + array_arg=array_arg, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args, is_like=True @@ -934,13 +968,16 @@ def impl_dpnp_ones_like( array_arg_pos = 0 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) + array_arg = _ArgTyAndValue(sig.args[array_arg_pos], args[array_arg_pos]) qref_payload: _QueueRefPayload = _get_queue_ref( - context, - builder, - sig, - args, - sycl_queue_arg_pos=sycl_queue_arg_pos, - array_arg_pos=array_arg_pos, + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, + array_arg=array_arg, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args, is_like=True @@ -1024,13 +1061,16 @@ def impl_dpnp_full_like( array_arg_pos = 0 def codegen(context, builder, sig, args): + sycl_queue_arg = _ArgTyAndValue( + sig.args[sycl_queue_arg_pos], args[sycl_queue_arg_pos] + ) + array_arg = _ArgTyAndValue(sig.args[array_arg_pos], args[array_arg_pos]) qref_payload: _QueueRefPayload = _get_queue_ref( - context, - builder, - sig, - args, - sycl_queue_arg_pos=sycl_queue_arg_pos, - array_arg_pos=array_arg_pos, + context=context, + builder=builder, + returned_sycl_queue_ty=sig.return_type.queue, + sycl_queue_arg=sycl_queue_arg, + array_arg=array_arg, ) ary = alloc_empty_arrayobj( context, builder, sig, qref_payload.queue_ref, args, is_like=True