Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removes the experimental parfor fallback feature. #817

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions numba_dpex/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ def __getattr__(name):
# Dump offload diagnostics
OFFLOAD_DIAGNOSTICS = _readenv("NUMBA_DPEX_OFFLOAD_DIAGNOSTICS", int, 0)

FALLBACK_ON_CPU = _readenv("NUMBA_DPEX_FALLBACK_ON_CPU", int, 1)

# Activate Native floating point atomcis support for supported devices.
# Activate Native floating point atomics support for supported devices.
# Requires llvm-spirv supporting the FP atomics extension
NATIVE_FP_ATOMICS = _readenv("NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE", int, 0)
LLVM_SPIRV_ROOT = _readenv("NUMBA_DPEX_LLVM_SPIRV_ROOT", str, "")
Expand Down
8 changes: 8 additions & 0 deletions numba_dpex/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,11 @@ def __init__(self) -> None:
else:
self.message = "Unreachable code executed."
super().__init__(self.message)


class UnsupportedParforError(Exception):
"""Exception raised when a parfor node could not be compiled."""

def __init__(self) -> None:
self.message = "Expression cannot be offloaded"
super().__init__(self.message)
275 changes: 9 additions & 266 deletions numba_dpex/core/passes/lowerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@
replace_var_names,
visit_vars_inner,
)
from numba.core.lowering import Lower
from numba.core.typing import signature
from numba.parfors import parfor
from numba.parfors.parfor_lowering import _lower_parfor_parallel

import numba_dpex as dpex
from numba_dpex import config
from numba_dpex.core.exceptions import UnsupportedParforError
from numba_dpex.core.target import DpexTargetContext
from numba_dpex.core.types import Array
from numba_dpex.dpctl_iface import KernelLaunchOps
Expand Down Expand Up @@ -1036,217 +1038,6 @@ def load_range(v):
kernel_launcher.free_queue(sycl_queue_val=curr_queue)


from numba.core.lowering import Lower


class CopyIRException(RuntimeError):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)


def relatively_deep_copy(obj, memo):
# WARNING: there are some issues with genarators which were not investigated
# and root cause is not found. Though copied IR seems to work fine there are
# some extra references kept on generator objects which may result in a
# memory leak.

obj_id = id(obj)
if obj_id in memo:
return memo[obj_id]

from ctypes import _CFuncPtr
from types import ModuleType

from numba.core.bytecode import FunctionIdentity
from numba.core.compiler import CompileResult
from numba.core.dispatcher import _DispatcherBase
from numba.core.types.abstract import Type
from numba.core.types.functions import Dispatcher, Function
from numba.core.typing.templates import Signature
from numba.np.ufunc.dufunc import DUFunc

from numba_dpex.compiler import DpexFunctionTemplate

# objects which shouldn't or can't be copied and it's ok not to copy it.
if isinstance(
obj,
(
FunctionIdentity,
_DispatcherBase,
Function,
Type,
Dispatcher,
ModuleType,
Signature,
DpexFunctionTemplate,
CompileResult,
DUFunc,
_CFuncPtr,
type,
str,
bool,
type(None),
),
):
return obj

from numba.core.funcdesc import FunctionDescriptor
from numba.core.ir import FreeVar, FunctionIR, Global
from numba.core.postproc import PostProcessor

if isinstance(obj, FunctionDescriptor):
cpy = FunctionDescriptor(
native=obj.native,
modname=obj.modname,
qualname=obj.qualname,
unique_name=obj.unique_name,
doc=obj.doc,
typemap=relatively_deep_copy(obj.typemap, memo),
restype=obj.restype,
calltypes=relatively_deep_copy(obj.calltypes, memo),
args=obj.args,
kws=obj.kws,
mangler=None,
argtypes=relatively_deep_copy(obj.argtypes, memo),
inline=obj.inline,
noalias=obj.noalias,
env_name=obj.env_name,
global_dict=obj.global_dict,
)
# mangler parameter is not saved in FunctionDescriptor, but used to generated name.
# So pass None as mangler parameter and then copy mangled_name by hands
cpy.mangled_name = obj.mangled_name

memo[obj_id] = cpy

return cpy

if isinstance(obj, FunctionIR):
# PostProcessor do the following:
# 1. canonicolize cfg, modifying IR
# 2. fills internal generators status
# 3. creates and fills VariableLifetime object
# We can't copy this objects. So in order to have copy of it we need run PostProcessor on copied IR.
# This means, that in case PostProcess wasn't run for original object copied object would defer.
# In order to avoid this we are running PostProcess on original object firstly.
# This means that copy of IR actually has a side effect on it.
pp = PostProcessor(obj)
pp.run()
cpy = FunctionIR(
blocks=relatively_deep_copy(obj.blocks, memo),
is_generator=relatively_deep_copy(obj.is_generator, memo),
func_id=relatively_deep_copy(obj.func_id, memo),
loc=obj.loc,
definitions=relatively_deep_copy(obj._definitions, memo),
arg_count=obj.arg_count,
arg_names=relatively_deep_copy(obj.arg_names, memo),
)
pp = PostProcessor(cpy)
pp.run()

memo[obj_id] = cpy

return cpy

if isinstance(obj, Global):
cpy = Global(name=obj.name, value=obj.value, loc=obj.loc)
memo[obj_id] = cpy

return cpy

if isinstance(obj, FreeVar):
cpy = FreeVar(
index=obj.index, name=obj.name, value=obj.value, loc=obj.loc
)
memo[obj_id] = cpy

return cpy

# for containers we need to copy container itself first. And then fill it with copied items.
if isinstance(obj, list):
cpy = copy.copy(obj)
cpy.clear()
for item in obj:
cpy.append(relatively_deep_copy(item, memo))
memo[obj_id] = cpy
return cpy
elif isinstance(obj, dict):
cpy = copy.copy(obj)
cpy.clear()
for key, item in obj.items():
cpy[relatively_deep_copy(key, memo)] = relatively_deep_copy(
item, memo
)
memo[obj_id] = cpy
return cpy
elif isinstance(obj, tuple):
# subclass constructors could have different parameters than superclass.
# e.g. tuple and namedtuple constructors accepts quite different parameters.
# it is better to have separate section for namedtuple
tpl = tuple([relatively_deep_copy(item, memo) for item in obj])
if type(obj) == tuple:
cpy = tpl
else:
cpy = type(obj)(*tpl)
memo[obj_id] = cpy
return cpy
elif isinstance(obj, set):
cpy = copy.copy(obj)
cpy.clear()
for item in obj:
cpy.add(relatively_deep_copy(item, memo))
memo[obj_id] = cpy
return cpy

# some python objects are not copyable. In such case exception would be raised
# it is just a convinient point to find such objects
try:
cpy = copy.copy(obj)
except Exception as e:
raise e

# __slots__ for subclass specify only members declared in subclass. So to get all members we need to go through
# all supeclasses
def get_slots_members(obj):
keys = []
typ = obj
if not isinstance(typ, type):
typ = type(obj)

try:
if len(typ.__slots__):
keys.extend(typ.__slots__)
if len(typ.__bases__):
for base in typ.__bases__:
keys.extend(get_slots_members(base))
except:
pass

return keys

memo[obj_id] = cpy
keys = []

# Objects have either __dict__ or __slots__ or neither.
# If object has none of it and it is copyable we already made a copy, just return it
# If object is not copyable we shouldn't reach this point.
try:
keys = obj.__dict__.keys()
except:
try:
obj.__slots__
keys = get_slots_members(obj)
except:
return cpy

for key in keys:
attr = getattr(obj, key)
attr_cpy = relatively_deep_copy(attr, memo)
setattr(cpy, key, attr_cpy)

return cpy


class WrapperDefaultLower(Lower):
@property
def _disable_sroa_like_opt(self):
Expand All @@ -1257,22 +1048,10 @@ def _disable_sroa_like_opt(self):
class DPEXLowerer(Lower):
def __init__(self, context, library, fndesc, func_ir, metadata=None):
Lower.__init__(self, context, library, fndesc, func_ir, metadata)
memo = {}

fndesc_cpu = relatively_deep_copy(fndesc, memo)
func_ir_cpu = relatively_deep_copy(func_ir, memo)

cpu_context = (
context.cpu_context
if isinstance(context, DpexTargetContext)
else context
)
self.gpu_lower = self._lower(
context, library, fndesc, func_ir, metadata
)
self.cpu_lower = self._lower(
cpu_context, library, fndesc_cpu, func_ir_cpu, metadata
)

def _lower(self, context, library, fndesc, func_ir, metadata):
"""Create Lower with changed linkageName in debug info"""
Expand All @@ -1299,42 +1078,20 @@ def _lower(self, context, library, fndesc, func_ir, metadata):
def lower(self):
"""Custom lowering function to support offloading of kernels.

The lowerer has a builtin fallback mechanism for parfor functions. We
first try to lower a parfor onto a SYCL device, if the lowering fails
then we fallback to the default Numba lowering to CPU. The lowering
follow the following steps:

1. Start lowering of parent function
2. Try to lower parfor onto the specified SYCL device
2.a. The ``lower_parfor_rollback`` function prepares function to
lower onto to the specified SYCL device and inserts the
``get_global_id`` intrinsic function.

2.a.a. Start lowering the parfor body and execute
``DpexLowerer.lower()`` again.
2.a.b. If the lowering fails, throw an exception.
2.b. The ``lower_parfor_rollback`` catches the exception and
restores the parfor body to its initial state.
2.c. Then throw an exception inside ``lower_parfor_rollback``
that will be caught inside ``DpexLowerer.lower()``.
3. Catch exception and start parfor lowering with the default Numba CPU
context.

TODO/FIXME The rollback approach only works in case no device specific
modifications were added to function containing the parfor node. If the
function has any device specific modifications, a different solution
should be used.
Lowering fails when some parfor node is not yet offloadable and an
UnsupportedParforError is raised.

Raises:
Exception: If a parfor node could not be lowered to a SYCL device.
UnsupportedParforError: If a parfor node could not be lowered to the
specified SYCL device.

"""
try:
context = self.gpu_lower.context
try:
# Only Numba's CPUContext has the `lower_extension` attribute
lower_extension_parfor = context.lower_extensions[parfor.Parfor]
context.lower_extensions[parfor.Parfor] = lower_parfor_rollback
context.lower_extensions[parfor.Parfor] = lower_parfor
except Exception as e:
if config.DEBUG:
print(e)
Expand Down Expand Up @@ -1373,14 +1130,7 @@ def lower(self):
)
print(traceback.format_exc())

if config.FALLBACK_ON_CPU == 1:
self.cpu_lower.context.lower_extensions[
parfor.Parfor
] = _lower_parfor_parallel
self.cpu_lower.lower()
self.base_lower = self.cpu_lower
else:
raise e
raise UnsupportedParforError()

self.env = self.base_lower.env
self.call_helper = self.base_lower.call_helper
Expand All @@ -1389,14 +1139,7 @@ def create_cpython_wrapper(self, release_gil=False):
return self.base_lower.create_cpython_wrapper(release_gil)


def copy_block(block):
memo = {}
new_block = ir.Block(block.scope, block.loc)
new_block.body = [relatively_deep_copy(stmt, memo) for stmt in block.body]
return new_block


def lower_parfor_rollback(lowerer, parfor):
def lower_parfor(lowerer, parfor):
try:
_lower_parfor_gufunc(lowerer, parfor)
if config.DEBUG:
Expand Down
7 changes: 4 additions & 3 deletions numba_dpex/dpctl_iface/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
USMNdArrayType,
]

create_event_vector = dpctl_fn_ty.dpctl_event_vector_create()
event_wait = dpctl_fn_ty.dpctl_event_wait()
event_delete = dpctl_fn_ty.dpctl_event_delete()
free_with_queue = dpctl_fn_ty.dpctl_free_with_queue()
get_current_queue = dpctl_fn_ty.dpctl_get_current_queue()
malloc_shared = dpctl_fn_ty.dpctl_malloc_shared()
queue_memcpy = dpctl_fn_ty.dpctl_queue_memcpy()
free_with_queue = dpctl_fn_ty.dpctl_free_with_queue()
event_wait = dpctl_fn_ty.dpctl_event_wait()
event_delete = dpctl_fn_ty.dpctl_event_delete()
queue_wait = dpctl_fn_ty.dpctl_queue_wait()
8 changes: 7 additions & 1 deletion numba_dpex/dpctl_iface/dpctl_function_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def dpctl_queue_memcpy():


def dpctl_event_wait():
ret_type = types.voidptr
ret_type = types.void
sig = signature(ret_type, types.voidptr)
return types.ExternalFunction("DPCTLEvent_Wait", sig)

Expand All @@ -47,3 +47,9 @@ def dpctl_queue_wait():
ret_type = types.void
sig = signature(ret_type, types.voidptr)
return types.ExternalFunction("DPCTLQueue_Wait", sig)


def dpctl_event_vector_create():
ret_type = types.voidptr
sig = signature(ret_type)
return types.ExternalFunction("DPCTLEventVector_Create", sig)
Loading