-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- The concept of a kernel was decoupled from the notion of dispatching of a kernel. The present implementation in compiler.py intermixes both things, making hard the separation of compute-follows-data based kernel launch and legacy `dpctl.device_context` based behavior. - Deprecates support for numpy arrays as kernel args. - Deprecates support for the square bracket notation using `__getitem__` to provide global and local ranges for a kernel launch. - Changes the behavior of specializing a kernel using only a signature. The new way to specialize will require a device type and a backend. - Improvements to exception messages using custom exceptions. - The new API is now inside `numba_dpex.core.kernel_interface`.
- Loading branch information
Diptorup Deb
committed
Oct 18, 2022
1 parent
b9098d1
commit 6b179af
Showing
4 changed files
with
577 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
"""Defines the Kernel classes that abstract a SYCL device kernel. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,369 @@ | ||
# SPDX-FileCopyrightText: 2022 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
import copy | ||
from warnings import warn | ||
|
||
import dpctl | ||
|
||
from numba_dpex.core.descriptor import dpex_target | ||
from numba_dpex.core.exceptions import ( | ||
ComputeFollowsDataInferenceError, | ||
ExecutionQueueInferenceError, | ||
InvalidKernelLaunchArgsError, | ||
UnknownGlobalRangeError, | ||
UnsupportedBackendError, | ||
) | ||
from numba_dpex.core.types import Array as ArrayType | ||
from numba_dpex.dpctl_iface import USMNdArrayType | ||
|
||
|
||
class KernelLauncher(object): | ||
"""Creates a Kernel object from a @kernel decorated function and enqueues | ||
the Kernel object on a specified device. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
pyfunc, | ||
debug_flags=None, | ||
compile_flags=None, | ||
array_access_specifiers=None, | ||
): | ||
self.typingctx = dpex_target.typing_context | ||
self.pyfunc = pyfunc | ||
self.debug_flags = debug_flags | ||
self.compile_flags = compile_flags | ||
self.kernel_name = pyfunc.__name__ | ||
|
||
if array_access_specifiers: | ||
warn( | ||
"Access specifiers apply only to NumPy ndarrays. " | ||
+ "Support for NumPy ndarray objects as kernel arguments " | ||
+ "and access specifiers flags is deprecated. " | ||
+ "Use dpctl.tensor.usm_ndarray based arrays instead.", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
self.array_access_specifiers = array_access_specifiers | ||
|
||
# def _ensure_valid_work_item_grid(self, val, sycl_queue): | ||
|
||
# if not isinstance(val, (tuple, list, int)): | ||
# error_message = ( | ||
# "Cannot create work item dimension from provided argument" | ||
# ) | ||
# raise ValueError(error_message) | ||
|
||
# if isinstance(val, int): | ||
# val = [val] | ||
|
||
# # TODO: we need some way to check the max dimensions | ||
# """ | ||
# if len(val) > device_env.get_max_work_item_dims(): | ||
# error_message = ("Unsupported number of work item dimensions ") | ||
# raise ValueError(error_message) | ||
# """ | ||
|
||
# return list( | ||
# val[::-1] | ||
# ) # reversing due to sycl and opencl interop kernel range mismatch semantic | ||
|
||
def _determine_compute_follows_data_queue(self, usm_array_list): | ||
"""Determine the execution queue for the list of usm array args using | ||
compute follows data rules. | ||
Uses ``dpctl.utils.get_execution_queue()`` to check if the list of | ||
queues belonging to the usm_ndarrays are equivalent. If the queues are | ||
equivalent, then returns the queue. If the queues are not equivalent | ||
then returns None. | ||
Args: | ||
usm_array_list : A list of usm_ndarray objects | ||
Returns: | ||
A queue the common queue used to allocate the arrays. If no such | ||
queue exists, then returns None. | ||
""" | ||
|
||
queues = [usm_array.sycl_queue for usm_array in usm_array_list] | ||
return dpctl.utils.get_execution_queue(queues) | ||
|
||
def _determine_kernel_launch_queue(self, args): | ||
"""Determines the queue where the kernel is to be launched. | ||
The execution queue is derived using the following algorithm. In future, | ||
support for ``numpy.ndarray`` and ``dpctl.device_context`` is to be | ||
removed and queue derivation will follows Python Array API's | ||
"compute follows data" logic. | ||
Check if there are array arguments. | ||
True: | ||
Check if all array arguments are of type numpy.ndarray | ||
(numba.types.Array) | ||
True: | ||
Check if the kernel was invoked from within a | ||
dpctl.device_context. | ||
True: | ||
Provide a deprecation warning for device_context use and | ||
point to using dpctl.tensor.usm_ndarray or dpnp.ndarray | ||
return dpctl.get_current_queue | ||
False: | ||
Raise ExecutionQueueInferenceError | ||
False: | ||
Check if all of the arrays are USMNdarray | ||
True: | ||
Check if execution queue could be inferred using | ||
compute follows data rules | ||
True: | ||
return the compute follows data inferred queue | ||
False: | ||
Raise ComputeFollowsDataInferenceError | ||
False: | ||
Raise ComputeFollowsDataInferenceError | ||
False: | ||
Check if the kernel was invoked from within a dpctl.device_context. | ||
True: | ||
Provide a deprecation warning for device_context use and | ||
point to using dpctl.tensor.usm_ndarray of dpnp.ndarray | ||
return dpctl.get_current_queue | ||
False: | ||
Raise ExecutionQueueInferenceError | ||
Args: | ||
args : A list of arguments passed to the kernel stored in the | ||
launcher. | ||
Returns: | ||
A queue the common queue used to allocate the arrays. If no such | ||
queue exists, then raises an Exception. | ||
Raises: | ||
ComputeFollowsDataInferenceError: If the queue could not be inferred | ||
using compute follows data rules. | ||
ExecutionQueueInferenceError: If the queue could not be inferred | ||
using the dpctl queue manager. | ||
""" | ||
array_argnums = [ | ||
i | ||
for i, arg in enumerate(args) | ||
if isinstance(self.typingctx.resolve_argument_type(arg), ArrayType) | ||
] | ||
usmarray_argnums = [ | ||
i | ||
for i, arg in enumerate(args) | ||
if isinstance( | ||
self.typingctx.resolve_argument_type(arg), USMNdArrayType | ||
) | ||
] | ||
# if usm and non-usm array arguments are getting mixed, then the | ||
# execution queue cannot be inferred using compute follows data rules. | ||
if array_argnums and usmarray_argnums: | ||
raise ComputeFollowsDataInferenceError( | ||
array_argnums, usmarray_argnums | ||
) | ||
elif array_argnums and not usmarray_argnums: | ||
if dpctl.is_in_device_context(): | ||
warn( | ||
"Support for dpctl.device_context to specify the " | ||
+ "execution queue is deprecated. " | ||
+ "Use dpctl.tensor.usm_ndarray based array " | ||
+ "containers instead. ", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
warn( | ||
"Support for NumPy ndarray objects as kernel arguments is " | ||
+ "deprecated. Use dpctl.tensor.usm_ndarray based array " | ||
+ "containers instead. ", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
return dpctl.get_current_queue() | ||
else: | ||
raise ExecutionQueueInferenceError(self.kernel.name) | ||
elif usmarray_argnums and not array_argnums: | ||
if dpctl.is_in_device_context(): | ||
warn( | ||
"dpctl.device_context ignored as the kernel arguments " | ||
+ "are dpctl.tensor.usm_ndarray based array containers." | ||
) | ||
usm_array_args = [ | ||
arg for i, arg in enumerate(args) if i in usmarray_argnums | ||
] | ||
queue = self._determine_compute_follows_data_queue(usm_array_args) | ||
if not queue: | ||
raise ComputeFollowsDataInferenceError( | ||
self.kernel.name, usmarray_argnums | ||
) | ||
else: | ||
return queue | ||
else: | ||
if dpctl.is_in_device_context(): | ||
warn( | ||
"Support for dpctl.device_context to specify the " | ||
+ "execution queue is deprecated. " | ||
+ "Use dpctl.tensor.usm_ndarray based array " | ||
+ "containers instead. ", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
return dpctl.get_current_queue() | ||
else: | ||
raise ExecutionQueueInferenceError(self.kernel.name) | ||
|
||
def __getitem__(self, args): | ||
"""Mimic's ``numba.cuda`` square-bracket notation for configuring the | ||
global_range and local_range settings when launching a kernel on a | ||
SYCL queue. | ||
When a Python function decorated with the @kernel decorator, | ||
is invoked it creates a KernelLauncher object. Calling the | ||
KernelLauncher objects ``__getitem__`` function inturn clones the object | ||
and sets the ``global_range`` and optionally the ``local_range`` | ||
attributes with the arguments passed to ``__getitem__``. | ||
Args: | ||
args (tuple): A tuple of tuples that specify the global and | ||
optionally the local range for the kernel execution. If the | ||
argument is a two-tuple of tuple, then it is assumed that both | ||
global and local range options are specified. The first entry is | ||
considered to be the global range and the second the local range. | ||
If only a single tuple value is provided, then the kernel is | ||
launched with only a global range and the local range configuration | ||
is decided by the SYCL runtime. | ||
Returns: | ||
KernelLauncher: A clone of the KernelLauncher object, but with the | ||
global_range and local_range attributes initialized. | ||
.. deprecated:: 0.19 | ||
Use :func:`KernelLauncher.execute` instead. | ||
""" | ||
|
||
warn( | ||
"The [] (__getitem__) method to set global and local ranges for " | ||
+ "launching a kernel is deprecated. " | ||
+ "Use the execute function instead.", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
|
||
nargs = len(args) | ||
|
||
# Check if the kernel launch arguments are sane. | ||
if nargs < 1: | ||
raise UnknownGlobalRangeError(kernel_name=self.kernel_name) | ||
elif nargs > 2: | ||
raise InvalidKernelLaunchArgsError( | ||
kernel_name=self.kernel_name, args=args | ||
) | ||
|
||
self.global_range = args[0] | ||
if nargs == 2 and args[1] != []: | ||
self.local_range = args[1] | ||
else: | ||
self.local_range = None | ||
|
||
return copy.copy(self) | ||
|
||
def __call__(self, *args, global_range=None, local_range=None): | ||
"""_summary_ | ||
Args: | ||
global_range (_type_): _description_ | ||
local_range (_type_, optional): _description_. Defaults to None. | ||
""" | ||
|
||
# TODO: Move to separate function | ||
if global_range: | ||
if self.global_range: | ||
warn( | ||
"Ignoring the previously set value of global_range and " | ||
+ "using the value specified at the kernel call site." | ||
) | ||
else: | ||
if self.global_range: | ||
warn( | ||
"Use of __getitem__ to set the global_range attribute is " | ||
+ 'deprecated. Use the keyword argument "global_range" of ' | ||
+ "__call__ method to set the attribute." | ||
) | ||
global_range = self.global_range | ||
|
||
else: | ||
raise UnknownGlobalRangeError(self.kernel_name) | ||
|
||
if local_range: | ||
if self.local_range: | ||
warn( | ||
"Ignoring the previously set value of local_range and " | ||
+ "using the value specified at the kernel call site.." | ||
) | ||
else: | ||
if self.local_range: | ||
warn( | ||
"Use of __getitem__ to set the local_range attribute is " | ||
+ 'deprecated. Use the keyword argument "local_range" of ' | ||
+ "__call__ method to set the attribute." | ||
) | ||
local_range = self.local_range | ||
else: | ||
local_range = None | ||
warn( | ||
"Kernel to be submitted without a local range letting " | ||
+ "the SYCL runtime select a local range. The behavior " | ||
+ "can lead to suboptimal performance in certain cases. " | ||
+ "Consider setting the local range value for the kernel " | ||
+ "execution.\n" | ||
+ "The local_range keyword may be made a required argument " | ||
+ "in the future." | ||
) | ||
# TODO: Move out to separate function | ||
exec_queue = self._determine_kernel_launch_queue(*args) | ||
backend = exec_queue.backend | ||
supported_backends = ["opencl", "level_zero"] | ||
if exec_queue.backend not in [ | ||
dpctl.backend_type.opencl, | ||
dpctl.backend_type.level_zero, | ||
]: | ||
raise UnsupportedBackendError( | ||
self.kernel_name, backend, supported_backends | ||
) | ||
# TODO: Test global and local ranges to be valid for the device | ||
# TODO: Create a kernel (or check if we already have a cached version) | ||
# TODO: kernel.compile() | ||
|
||
# TODO: submit the kernel | ||
# kernelargs = [] | ||
# internal_device_arrs = [] | ||
# for ty, val, access_type in zip( | ||
# self.argument_types, args, self.ordered_arg_access_types | ||
# ): | ||
# self._unpack_argument( | ||
# ty, | ||
# val, | ||
# self.sycl_queue, | ||
# kernelargs, | ||
# internal_device_arrs, | ||
# access_type, | ||
# ) | ||
|
||
# self.sycl_queue.submit( | ||
# self.kernel, kernelargs, self.global_size, self.local_size | ||
# ) | ||
# self.sycl_queue.wait() | ||
|
||
# for ty, val, i_dev_arr, access_type in zip( | ||
# self.argument_types, | ||
# args, | ||
# internal_device_arrs, | ||
# self.ordered_arg_access_types, | ||
# ): | ||
# self._pack_argument( | ||
# ty, val, self.sycl_queue, i_dev_arr, access_type | ||
# ) |
Oops, something went wrong.