From 8530953ec33f7008c1b29c3b963a65a633eed308 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Tue, 15 Oct 2024 11:28:54 +0200 Subject: [PATCH 01/12] Added new transformation for GPU data offloading with FIELD API --- loki/transformations/data_offload.py | 324 ++++++++++++++++++++++++++- 1 file changed, 318 insertions(+), 6 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index ebd587280..bda0b9c91 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -7,15 +7,19 @@ from collections import defaultdict from itertools import chain +from enum import Enum from loki.analyse import dataflow_analysis_attached from loki.batch import Transformation, ProcedureItem, ModuleItem -from loki.expression import Scalar, Array +from loki.expression import Scalar, Array, symbols as sym from loki.ir import ( FindNodes, PragmaRegion, CallStatement, Pragma, Import, Comment, - Transformer, pragma_regions_attached, get_pragma_parameters, - FindInlineCalls, SubstituteExpressions + Transformer, pragma_regions_attached, get_pragma_parameters, pragmas_attached, + FindInlineCalls, FindVariables, SubstituteExpressions ) +import loki.ir as ir +from loki.scope import Scope +from loki.transformations.utilities import find_driver_loops from loki.logging import warning from loki.tools import as_tuple, flatten, CaseInsensitiveDict, CaseInsensitiveDefaultDict from loki.types import BasicType, DerivedType @@ -23,7 +27,8 @@ __all__ = [ 'DataOffloadTransformation', 'GlobalVariableAnalysis', - 'GlobalVarOffloadTransformation', 'GlobalVarHoistTransformation' + 'GlobalVarOffloadTransformation', 'GlobalVarHoistTransformation', + 'FieldOffloadTransformation' ] @@ -49,6 +54,7 @@ def __init__(self, **kwargs): self.has_data_regions = False self.remove_openmp = kwargs.get('remove_openmp', False) self.assume_deviceptr = kwargs.get('assume_deviceptr', False) + self.assume_acc_mapped = kwargs.get('assume_acc_mapped', False) def transform_subroutine(self, routine, **kwargs): """ @@ -131,8 +137,7 @@ def insert_data_offload_pragmas(self, routine, targets): continue for param, arg in call.arg_iter(): - if isinstance(param, Array) and param.type.intent.lower() == 'in': - inargs += (str(arg.name).lower(),) + if isinstance(param, Array) and param.type.intent.lower() == 'in': inargs += (str(arg.name).lower(),) if isinstance(param, Array) and param.type.intent.lower() == 'inout': inoutargs += (str(arg.name).lower(),) if isinstance(param, Array) and param.type.intent.lower() == 'out': @@ -156,6 +161,13 @@ def insert_data_offload_pragmas(self, routine, targets): else: deviceptr = '' pragma = Pragma(keyword='acc', content=f'data{deviceptr}') + elif self.assume_acc_mapped: + offload_args = inargs + outargs + inoutargs + if offload_args: + present = f' present({", ".join(offload_args)})' + else: + present = '' + pragma = Pragma(keyword='acc', content=f'data{present}') else: copyin = f'copyin({", ".join(inargs)})' if inargs else '' copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' @@ -908,3 +920,303 @@ def _append_routine_arguments(self, routine, item): )) for arg in new_arguments ] routine.arguments += tuple(sorted(new_arguments, key=lambda symbol: symbol.name)) + + +################################################################################ +# Field API helper routines +################################################################################ +def get_field_type(a: sym.Array) -> sym.DerivedType: + """ + Returns the corresponding FIELD API type for an array. + + This transformation is IFS specific and assumes that the + type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB + """ + type_map = ["jprb", + "jpit", + "jpis", + "jpim", + "jpib", + "jpia", + "jprt", + "jprs", + "jprm", + "jprd", + "jplm"] + + type_name = a.type.kind.name + assert type_name.lower() in type_map, ('Error array type kind is: ' + f'"{type_name}" which is not a valid IFS type specifier') + rank = len(a.shape) + field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4]) + return field_type + + +def field_new(field_ptr, data, scope): + return ir.CallStatement(sym.ProcedureSymbol('FIELD_NEW', scope=scope), + (field_ptr,), (('DATA', data),)) + + +def field_delete(field_ptr, scope): + return ir.CallStatement(sym.ProcedureSymbol('FIELD_DELETE', scope=scope), + (field_ptr,)) + + +class FieldAPITransferType(Enum): + READ_ONLY = 1 + READ_WRITE = 2 + WRITE_ONLY = 3 + + +def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): + assert isinstance(transfer_type, FieldAPITransferType) + if transfer_type == FieldAPITransferType.READ_ONLY: + suffix = 'RDONLY' + if transfer_type == FieldAPITransferType.READ_WRITE: + suffix = 'RDWR' + if transfer_type == FieldAPITransferType.WRITE_ONLY: + suffix = 'WRONLY' + procedure_name = 'GET_DEVICE_DATA_' + suffix + return ir.CallStatement(sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), + (dev_ptr.clone(dimensions=None),)) + + +def field_sync_host(field_ptr, scope): + procedure_name = 'SYNC_HOST_RDWR' + return ir.CallStatement(sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), ()) + + +def find_array_arguments(routine, calls): + """ + Finds all arguments and sorts them by intents + + Parameters + ---------- + routine : `Subroutine` + Subroutine to apply this transformation to. + calls : list of `CallStatement` + Calls to extract arguments from. + This transformation will only apply at the ``'driver'`` level. + + + Returns + ------- + inargs : tuple of arguments that are only inargs + inoutargs : tuples of arguments that are both in and out, or inout + outargs : tuples of arguments that are only outargs + """ + inargs = () + inoutargs = () + outargs = () + + for call in calls: + if call.routine is BasicType.DEFERRED: + warning(f'[Loki] Data offload: Routine {routine.name} has not been enriched ' + + f'in {str(call.name).lower()}') + continue + for param, arg in call.arg_iter(): + if isinstance(param, Array) and param.type.intent.lower() == 'in': + inargs += (arg, ) + if isinstance(param, Array) and param.type.intent.lower() == 'inout': + inoutargs += (arg, ) + if isinstance(param, Array) and param.type.intent.lower() == 'out': + outargs += (arg, ) + + inoutargs += tuple(v for v in inargs if v in outargs) + inargs = tuple(v for v in inargs if v not in inoutargs) + outargs = tuple(v for v in outargs if v not in inoutargs) + + # Filter for duplicates TODO: What if we pass different slices of same array!? + inargs = tuple(set(inargs)) + inoutargs = tuple(set(inoutargs)) + outargs = tuple(set(outargs)) + return inargs, inoutargs, outargs + + +def find_target_calls(region, targets): + """Returns a list of all calls to targets inside the region + + Parameters + ---------- + :region: :any:`PragmaRegion` + :targets: collection of :any:`Subroutine` + Iterable object of subroutines or functions called + :returns: list of :any:`CallStatement` + """ + calls = FindNodes(CallStatement).visit(region) + calls = [c for c in calls if str(c.name).lower() in targets] + return calls + + +class FieldOffloadTransformation(Transformation): + class FieldPointerMap: + def __init__(self, devptrs, inargs, inoutargs, outargs): + self.inargs = inargs + self.inoutargs = inoutargs + self.outargs = outargs + self.devptrs = devptrs + + @property + def in_pairs(self): + for i, inarg in enumerate(self.inargs): + yield inarg, self.devptrs[i] + + @property + def inout_pairs(self): + start = len(self.inargs) + for i, inoutarg in enumerate(self.inoutargs): + yield inoutarg, self.devptrs[i+start] + + @property + def out_pairs(self): + start = len(self.inargs)+len(self.inoutargs) + for i, outarg in enumerate(self.outargs): + yield outarg, self.devptrs[i+start] + + + def __init__(self, devptr_prefix='LOKI_DEVPTR_', **kwargs): + self.devptr_prefix = kwargs.get('devptr_prefix', 'loki_devptr') + field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', 'CLOUDSC_AUX_TYPE', 'CLOUDSC_FLUX_TYPE']) + self.assume_deviceptr = kwargs.get('assume_deviceptr', False) + self.field_group_types = tuple(typename.lower() for typename in field_group_types) + + def transform_subroutine(self, routine, **kwargs): + role = kwargs['role'] + targets = as_tuple(kwargs.get('targets'), (None)) + if role == 'kernel': + self.process_kernel(routine) + if role == 'driver': + self.process_driver(routine, targets) + + def process_kernel(self, routine): + pass + + def process_driver(self, driver, targets): + with pragma_regions_attached(driver): + for region in FindNodes(PragmaRegion).visit(driver.body): + # Only work on active `!$loki data` regions + if not DataOffloadTransformation._is_active_loki_data_region(region, targets): + continue + kernel_calls = find_target_calls(region, targets) + offload_variables = self.find_offload_variables(driver, kernel_calls) + device_ptrs = self._declare_device_ptrs(driver, offload_variables) + offload_map = self.FieldPointerMap(device_ptrs, *offload_variables) + old_offload_calls = self._replace_data_offload_calls(driver, region, offload_map) + self._replace_kernel_args(kernel_calls, old_offload_calls, offload_map) + + def find_offload_variables(self, driver, calls): + inargs = () + inoutargs = () + outargs = () + + for call in calls: + if call.routine is BasicType.DEFERRED: + warning(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + + f'in {str(call.name).lower()}') + continue + for param, arg in call.arg_iter(): + if not isinstance(param, Array): + continue + try: + parent = arg.parent + if parent.type.dtype.name.lower() not in self.field_group_types: + warning(f'[Loki] The parent object {parent.name} of type {parent.type.dtype} is not in the list of' + + ' field wrapper types') + except AttributeError: + warning(f'[Loki] Field data offload: Raw array object {arg.name} encountered in' + + f'{driver.name} that is not wrapped by a Field API object') # ofc we cant know this for sure + continue + + if param.type.intent.lower() == 'in': + inargs += (arg, ) + if param.type.intent.lower() == 'inout': + inoutargs += (arg, ) + if param.type.intent.lower() == 'out': + outargs += (arg, ) + + inoutargs += tuple(v for v in inargs if v in outargs) + inargs = tuple(v for v in inargs if v not in inoutargs) + outargs = tuple(v for v in outargs if v not in inoutargs) + + # Filter for duplicates TODO: What if we pass different slices of same array!? + inargs = tuple(set(inargs)) + inoutargs = tuple(set(inoutargs)) + outargs = tuple(set(outargs)) + return inargs, inoutargs, outargs + + + def _declare_device_ptrs(self, driver, offload_variables): + device_ptrs = tuple(self._devptr_from_array(driver, a) for a in chain(*offload_variables)) + driver.variables += device_ptrs + return device_ptrs + + def _devptr_from_array(self, driver, a: sym.Array) -> sym.Variable: + """ + Returns a contiguous pointer :any:`Variable` with types matching the array a + """ + shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) + devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) + base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) + devptr_name = self.devptr_prefix + base_name + try: + driver.variable_map[devptr_name] + warning(f'[Loki] Field data offload: The routine {driver.name} already has a' + + f'variable named {devptr_name}') + except KeyError: + pass + devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) + return devptr + + def _replace_data_offload_calls(self, driver, region, offload_map): + # remove calls to [field_group_type]%update_view + calls = FindNodes(CallStatement).visit(region) + field_group_updates = tuple(c for c in calls if self._is_field_group_update(driver, c)) + # c.arguments contains Scalar(IBL) + Transformer(dict.fromkeys(field_group_updates, None), inplace=True).visit(region.body) + host_to_device = tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) + host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.inout_pairs) + host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) + device_to_host = tuple(field_sync_host(self._get_field_ptr_from_view(inarg), driver) + for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) + # field_deletes = tuple(field_delete(field_ptr_map[var], routine) for var in blocking_arrays) + update_map = {region: host_to_device + (region,) + device_to_host} + Transformer(update_map, inplace=True).visit(driver.body) + return field_group_updates + + def _is_field_group_update(self, driver, call): + try: + *_, parent, call_name = call.name.name.split('%') + parent = driver.variable_map.get(parent) + if parent is not None and parent.type.dtype.name.lower() in self.field_group_types: + return True + except ValueError: + return False + return False + + def _get_field_ptr_from_view(self, field_view): + type_chain = field_view.name.split('%') + field_type_name = 'F_' + type_chain[-1] + return field_view.parent.get_derived_type_member(field_type_name) + + def _replace_kernel_args(self, kernel_calls, old_offload_calls, offload_map): + """TODO: Docstring for _replace_kernel_calls. + + :kernel_calls: TODO + :old_offload_calls: TODO + :device_ptrs: TODO + :returns: TODO + """ + change_map = {} + for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): + group_update = next((c for c in old_offload_calls if c.name.parent == arg.parent), None) + assert group_update is not None, "Group update should not be none" + block_idx = group_update.arguments[0] + dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (block_idx,) + change_map[arg] = devptr.clone(dimensions=dims) + arg_transformer = SubstituteExpressions(change_map, inplace=True) + for call in kernel_calls: + arg_transformer.visit(call) + From 60b0c9c1bc6f19107b5786f3f7929c1cb8826b5f Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Tue, 29 Oct 2024 15:35:49 +0100 Subject: [PATCH 02/12] started adding tests for field offload functionality --- loki/transformations/data_offload.py | 4 +- .../tests/test_data_offload.py | 75 +++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index bda0b9c91..ee0f907dc 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -1074,10 +1074,10 @@ def out_pairs(self): yield outarg, self.devptrs[i+start] - def __init__(self, devptr_prefix='LOKI_DEVPTR_', **kwargs): + def __init__(self, **kwargs): self.devptr_prefix = kwargs.get('devptr_prefix', 'loki_devptr') - field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', 'CLOUDSC_AUX_TYPE', 'CLOUDSC_FLUX_TYPE']) self.assume_deviceptr = kwargs.get('assume_deviceptr', False) + field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', 'CLOUDSC_AUX_TYPE', 'CLOUDSC_FLUX_TYPE']) self.field_group_types = tuple(typename.lower() for typename in field_group_types) def transform_subroutine(self, routine, **kwargs): diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index 81dad4dee..e97ca57da 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -20,6 +20,9 @@ DataOffloadTransformation, GlobalVariableAnalysis, GlobalVarOffloadTransformation, GlobalVarHoistTransformation ) +from loki.expression import symbols as sym +from loki.scope import Scope +from loki.transformations.data_offload import find_array_arguments, find_target_calls @pytest.fixture(scope='module', name='here') @@ -805,3 +808,75 @@ def test_transformation_global_var_derived_type_hoist(here, config, frontend, ho assert kernel.variable_map['p'].type.dtype.name == 'point' assert kernel.variable_map['p0'].type.intent == 'in' assert kernel.variable_map['p0'].type.dtype.name == 'point' + + +def test_get_field_type(): + type_map = ["jprb", + "jpit", + "jpis", + "jpim", + "jpib", + "jpia", + "jprt", + "jprs", + "jprm", + "jprd", + "jplm"] + type_map += [t.upper() for t in type_map] + scope = Scope() + for type_name in type_map: + for dim in range(1,4): + a = sym.Array(name='test_array', dimensions=(dim), scope=scope) + # Am I going about this wrong with shape? + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_find_array_arguments(frontend): + fcode_driver = """ + SUBROUTINE driver_routine(nlon, nlev, a, b, c, d, e, f) + INTEGER, INTENT(INOUT) :: nlon, nlev + REAL, INTENT(INOUT) :: a(nlon,nlev) + REAL, INTENT(INOUT) :: b(nlon) + REAL, INTENT(INOUT) :: c(nlon,nlev,nlon) + REAL, INTENT(INOUT) :: d + REAL, INTENT(INOUT) :: e + REAL, INTENT(INOUT) :: f + INTEGER :: nlon_copy, nlev_copy + REAL :: a_copy(nlon,nlev) + REAL :: b_copy(nlon) + REAL :: c_copy(nlon,nlev,nlon) + REAL :: d_copy + REAL :: e_copy + REAL :: f_copy + + call kernel_routine(nlon_copy, nlev_copy, a_copy, b_copy, c_copy, d_copy, e_copy, f_copy) + + END SUBROUTINE driver_routine + """ + fcode_kernel = """ + SUBROUTINE kernel_routine(nlon, nlev, a, b, c, d, e, f) + INTEGER, INTENT(IN) :: nlon, nlev + REAL, INTENT(IN) :: a(nlon,nlev) + REAL, INTENT(INOUT) :: b(nlon) + REAL, INTENT(OUT) :: c(nlon,nlev,nlon) + REAL, INTENT(IN) :: d + REAL, INTENT(IN) :: e + REAL, INTENT(IN) :: f + + do j=1, nlon + do i=1, nlev + b_copy(i) = a(i,j) + 0.1 + c(i,j,i) = 0.1 + end do + end do + END SUBROUTINE kernel_routine + """ + driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] + kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] + driver.enrich(kernel) + targets = find_target_calls(driver.body, [kernel.name]) + in_vars, inout_vars, out_vars = find_array_arguments(driver, targets) + + assert len(in_vars) == 1 and in_vars[0].name == 'a_copy' + assert len(inout_vars) == 1 and inout_vars[0].name == 'b_copy' + assert len(out_vars) == 1 and out_vars[0].name == 'c_copy' + From da415b5c326d4c5877f256046c0550a00f03169c Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Tue, 29 Oct 2024 17:00:37 +0100 Subject: [PATCH 03/12] Added named arguments when creating Callstmts to work around bug in CallStatement pre_init method --- loki/transformations/data_offload.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index ee0f907dc..b86aa86a3 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -953,13 +953,13 @@ def get_field_type(a: sym.Array) -> sym.DerivedType: def field_new(field_ptr, data, scope): - return ir.CallStatement(sym.ProcedureSymbol('FIELD_NEW', scope=scope), - (field_ptr,), (('DATA', data),)) + return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_NEW', scope=scope), + arguments=(field_ptr,), kwarguments=(('DATA', data),)) def field_delete(field_ptr, scope): - return ir.CallStatement(sym.ProcedureSymbol('FIELD_DELETE', scope=scope), - (field_ptr,)) + return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_DELETE', scope=scope), + arguments=(field_ptr,)) class FieldAPITransferType(Enum): @@ -977,13 +977,13 @@ def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferTyp if transfer_type == FieldAPITransferType.WRITE_ONLY: suffix = 'WRONLY' procedure_name = 'GET_DEVICE_DATA_' + suffix - return ir.CallStatement(sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), - (dev_ptr.clone(dimensions=None),)) + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), + arguments=(dev_ptr.clone(dimensions=None),), ) def field_sync_host(field_ptr, scope): procedure_name = 'SYNC_HOST_RDWR' - return ir.CallStatement(sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), ()) + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) def find_array_arguments(routine, calls): From 76765d165302c03ef4fdc680104f5a4e044c97b2 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Wed, 30 Oct 2024 16:02:21 +0100 Subject: [PATCH 04/12] Moved F-API helper functions from data_offload.py to parallel/field_api.py --- loki/transformations/data_offload.py | 70 +------------------- loki/transformations/parallel/field_api.py | 74 ++++++++++++++++++++-- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index b86aa86a3..876341937 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -7,23 +7,19 @@ from collections import defaultdict from itertools import chain -from enum import Enum from loki.analyse import dataflow_analysis_attached from loki.batch import Transformation, ProcedureItem, ModuleItem from loki.expression import Scalar, Array, symbols as sym from loki.ir import ( FindNodes, PragmaRegion, CallStatement, Pragma, Import, Comment, - Transformer, pragma_regions_attached, get_pragma_parameters, pragmas_attached, - FindInlineCalls, FindVariables, SubstituteExpressions + Transformer, pragma_regions_attached, get_pragma_parameters, + FindInlineCalls, SubstituteExpressions ) -import loki.ir as ir -from loki.scope import Scope -from loki.transformations.utilities import find_driver_loops from loki.logging import warning from loki.tools import as_tuple, flatten, CaseInsensitiveDict, CaseInsensitiveDefaultDict from loki.types import BasicType, DerivedType - +from loki.transformations.parallel import FieldAPITransferType, field_get_device_data, field_sync_host __all__ = [ 'DataOffloadTransformation', 'GlobalVariableAnalysis', @@ -925,66 +921,6 @@ def _append_routine_arguments(self, routine, item): ################################################################################ # Field API helper routines ################################################################################ -def get_field_type(a: sym.Array) -> sym.DerivedType: - """ - Returns the corresponding FIELD API type for an array. - - This transformation is IFS specific and assumes that the - type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB - """ - type_map = ["jprb", - "jpit", - "jpis", - "jpim", - "jpib", - "jpia", - "jprt", - "jprs", - "jprm", - "jprd", - "jplm"] - - type_name = a.type.kind.name - assert type_name.lower() in type_map, ('Error array type kind is: ' - f'"{type_name}" which is not a valid IFS type specifier') - rank = len(a.shape) - field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4]) - return field_type - - -def field_new(field_ptr, data, scope): - return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_NEW', scope=scope), - arguments=(field_ptr,), kwarguments=(('DATA', data),)) - - -def field_delete(field_ptr, scope): - return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_DELETE', scope=scope), - arguments=(field_ptr,)) - - -class FieldAPITransferType(Enum): - READ_ONLY = 1 - READ_WRITE = 2 - WRITE_ONLY = 3 - - -def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): - assert isinstance(transfer_type, FieldAPITransferType) - if transfer_type == FieldAPITransferType.READ_ONLY: - suffix = 'RDONLY' - if transfer_type == FieldAPITransferType.READ_WRITE: - suffix = 'RDWR' - if transfer_type == FieldAPITransferType.WRITE_ONLY: - suffix = 'WRONLY' - procedure_name = 'GET_DEVICE_DATA_' + suffix - return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), - arguments=(dev_ptr.clone(dimensions=None),), ) - - -def field_sync_host(field_ptr, scope): - procedure_name = 'SYNC_HOST_RDWR' - return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) - def find_array_arguments(routine, calls): """ diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py index e1e06b491..5ef3bc164 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_api.py @@ -9,16 +9,19 @@ Transformation utilities to manage and inject FIELD-API boilerplate code. """ +from enum import Enum from loki.expression import symbols as sym from loki.ir import ( nodes as ir, FindNodes, FindVariables, Transformer ) +from loki.scope import Scope from loki.logging import warning from loki.tools import as_tuple - __all__ = [ - 'remove_field_api_view_updates', 'add_field_api_view_updates' + 'remove_field_api_view_updates', 'add_field_api_view_updates', 'get_field_type', + 'field_new', 'field_delete', 'field_get_device_data', 'field_sync_host', + 'FieldAPITransferType' ] @@ -26,9 +29,7 @@ def remove_field_api_view_updates(routine, field_group_types, dim_object=None): """ Remove FIELD API boilerplate calls for view updates of derived types. - This utility is intended to remove the IFS-specific group type - objects that provide block-scope view pointers to deep kernel - trees. It will remove all calls to ``UPDATE_VIEW`` on derive-type + This utility is intended to remove the IFS-specific group type objects that provide block-scope view pointers to deep kernel trees. It will remove all calls to ``UPDATE_VIEW`` on derive-type objects with the respective types. Parameters @@ -150,3 +151,66 @@ def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument return loop routine.body = InsertFieldAPIViewsTransformer().visit(routine.body, scope=routine) + + +def get_field_type(a: sym.Array) -> sym.DerivedType: + """ + Returns the corresponding FIELD API type for an array. + + This transformation is IFS specific and assumes that the + type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB + """ + type_map = ["jprb", + "jpit", + "jpis", + "jpim", + "jpib", + "jpia", + "jprt", + "jprs", + "jprm", + "jprd", + "jplm"] + + type_name = a.type.kind.name + assert type_name.lower() in type_map, ('Error array type kind is: ' + f'"{type_name}" which is not a valid IFS type specifier') + rank = len(a.shape) + field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4]) + return field_type + + +def field_new(field_ptr, data, scope): + return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_NEW', scope=scope), + arguments=(field_ptr,), kwarguments=(('DATA', data),)) + + +def field_delete(field_ptr, scope): + return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_DELETE', scope=scope), + arguments=(field_ptr,)) + + +class FieldAPITransferType(Enum): + READ_ONLY = 1 + READ_WRITE = 2 + WRITE_ONLY = 3 + + +def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): + assert isinstance(transfer_type, FieldAPITransferType) + if transfer_type == FieldAPITransferType.READ_ONLY: + suffix = 'RDONLY' + if transfer_type == FieldAPITransferType.READ_WRITE: + suffix = 'RDWR' + if transfer_type == FieldAPITransferType.WRITE_ONLY: + suffix = 'WRONLY' + procedure_name = 'GET_DEVICE_DATA_' + suffix + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), + arguments=(dev_ptr.clone(dimensions=None),), ) + + +def field_sync_host(field_ptr, scope): + procedure_name = 'SYNC_HOST_RDWR' + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) + + From 87850a10bb2a5607830ab73dd13f7553942b6706 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Mon, 4 Nov 2024 17:49:49 +0100 Subject: [PATCH 05/12] Added tests for FieldOffloadTransformation --- loki/transformations/data_offload.py | 7 +- loki/transformations/parallel/field_api.py | 7 +- .../parallel/tests/test_field_api.py | 57 +++- .../tests/test_data_offload.py | 271 ++++++++++++++++-- 4 files changed, 309 insertions(+), 33 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index 876341937..3f6a43f32 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -1011,8 +1011,7 @@ def out_pairs(self): def __init__(self, **kwargs): - self.devptr_prefix = kwargs.get('devptr_prefix', 'loki_devptr') - self.assume_deviceptr = kwargs.get('assume_deviceptr', False) + self.deviceptr_prefix = kwargs.get('devptr_prefix', 'loki_devptr_') field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', 'CLOUDSC_AUX_TYPE', 'CLOUDSC_FLUX_TYPE']) self.field_group_types = tuple(typename.lower() for typename in field_group_types) @@ -1086,14 +1085,14 @@ def _declare_device_ptrs(self, driver, offload_variables): driver.variables += device_ptrs return device_ptrs - def _devptr_from_array(self, driver, a: sym.Array) -> sym.Variable: + def _devptr_from_array(self, driver, a: sym.Array): """ Returns a contiguous pointer :any:`Variable` with types matching the array a """ shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) - devptr_name = self.devptr_prefix + base_name + devptr_name = self.deviceptr_prefix + base_name try: driver.variable_map[devptr_name] warning(f'[Loki] Field data offload: The routine {driver.name} already has a' + diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py index 5ef3bc164..656ce70ce 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_api.py @@ -171,12 +171,12 @@ def get_field_type(a: sym.Array) -> sym.DerivedType: "jprm", "jprd", "jplm"] - type_name = a.type.kind.name + assert type_name.lower() in type_map, ('Error array type kind is: ' f'"{type_name}" which is not a valid IFS type specifier') rank = len(a.shape) - field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4]) + field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4].lower()) return field_type @@ -197,7 +197,8 @@ class FieldAPITransferType(Enum): def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): - assert isinstance(transfer_type, FieldAPITransferType) + if not isinstance(transfer_type, FieldAPITransferType): + raise TypeError(f"transfer_type must be of type FieldAPITransferType, but is of type {type(transfer_type)}") if transfer_type == FieldAPITransferType.READ_ONLY: suffix = 'RDONLY' if transfer_type == FieldAPITransferType.READ_WRITE: diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py index b752dec85..1441389b4 100644 --- a/loki/transformations/parallel/tests/test_field_api.py +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -10,11 +10,13 @@ from loki import Subroutine, Module, Dimension from loki.frontend import available_frontends, OMNI from loki.ir import nodes as ir, FindNodes -from loki.logging import WARNING - +from loki.expression import symbols as sym +from loki.scope import Scope from loki.transformations.parallel import ( - remove_field_api_view_updates, add_field_api_view_updates + remove_field_api_view_updates, add_field_api_view_updates, get_field_type ) +from loki.types import BasicType, SymbolAttributes +from loki.logging import WARNING @pytest.mark.parametrize('frontend', available_frontends( @@ -136,3 +138,52 @@ def test_field_api_add_view_updates(frontend): assert calls[3].name == 'STATE%UPDATE_VIEW' and calls[3].arguments == ('IBL',) assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + +def test_get_field_type(): + type_map = ["jprb", + "jpit", + "jpis", + "jpim", + "jpib", + "jpia", + "jprt", + "jprs", + "jprm", + "jprd", + "jplm"] + field_types = [ + "field_1rb", "field_2rb", "field_3rb", + "field_1it", "field_2it", "field_3it", + "field_1is", "field_2is", "field_3is", + "field_1im", "field_2im", "field_3im", + "field_1ib", "field_2ib", "field_3ib", + "field_1ia", "field_2ia", "field_3ia", + "field_1rt", "field_2rt", "field_3rt", + "field_1rs", "field_2rs", "field_3rs", + "field_1rm", "field_2rm", "field_3rm", + "field_1rd", "field_2rd", "field_3rd", + "field_1lm", "field_2lm", "field_3lm", + ] + + def generate_fields(types): + generated = [] + for type_name in types: + for dim in range(1, 4): + shape = tuple(None for _ in range(dim)) + a = sym.Variable(name='test_array', + type=SymbolAttributes(BasicType.REAL, + shape=shape, + kind=sym.Variable(name=type_name))) + generated.append(get_field_type(a)) + return generated + + generated = generate_fields(type_map) + for field, field_name in zip(generated, field_types): + assert isinstance(field, sym.DerivedType) and field.name == field_name + + generated = generate_fields([t.upper() for t in type_map]) + for field, field_name in zip(generated, field_types): + assert isinstance(field, sym.DerivedType) and field.name == field_name + + diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index e97ca57da..5517b0a7c 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -16,12 +16,11 @@ FindNodes, Pragma, PragmaRegion, Loop, CallStatement, Import, pragma_regions_attached, get_pragma_parameters ) +import loki.expression.symbols as sym from loki.transformations import ( DataOffloadTransformation, GlobalVariableAnalysis, - GlobalVarOffloadTransformation, GlobalVarHoistTransformation + GlobalVarOffloadTransformation, GlobalVarHoistTransformation, FieldOffloadTransformation ) -from loki.expression import symbols as sym -from loki.scope import Scope from loki.transformations.data_offload import find_array_arguments, find_target_calls @@ -810,24 +809,6 @@ def test_transformation_global_var_derived_type_hoist(here, config, frontend, ho assert kernel.variable_map['p0'].type.dtype.name == 'point' -def test_get_field_type(): - type_map = ["jprb", - "jpit", - "jpis", - "jpim", - "jpib", - "jpia", - "jprt", - "jprs", - "jprm", - "jprd", - "jplm"] - type_map += [t.upper() for t in type_map] - scope = Scope() - for type_name in type_map: - for dim in range(1,4): - a = sym.Array(name='test_array', dimensions=(dim), scope=scope) - # Am I going about this wrong with shape? @pytest.mark.parametrize('frontend', available_frontends()) def test_find_array_arguments(frontend): @@ -847,7 +828,7 @@ def test_find_array_arguments(frontend): REAL :: d_copy REAL :: e_copy REAL :: f_copy - + call kernel_routine(nlon_copy, nlev_copy, a_copy, b_copy, c_copy, d_copy, e_copy, f_copy) END SUBROUTINE driver_routine @@ -861,7 +842,7 @@ def test_find_array_arguments(frontend): REAL, INTENT(IN) :: d REAL, INTENT(IN) :: e REAL, INTENT(IN) :: f - + do j=1, nlon do i=1, nlev b_copy(i) = a(i,j) + 0.1 @@ -880,3 +861,247 @@ def test_find_array_arguments(frontend): assert len(inout_vars) == 1 and inout_vars[0].name == 'b_copy' assert len(out_vars) == 1 and out_vars[0].name == 'c_copy' + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload(frontend): + fcode = """ + module driver_mod + use state_type_mod, only: state_type + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + end type state_type + + + contains + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + call kernel_routine(nlon, nlev, state%a, state%b, state%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + + driver_mod = Sourcefile.from_source(fcode)['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + + calls = FindNodes(CallStatement).visit(driver.body) + kernel_call = next(c for c in calls if c.name=='kernel_routine') + + # verify that field offloads are generated properly + in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] + assert len(in_calls) == 1 + inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] + assert len(inout_calls) == 2 + # verify that field sync host calls are generated properly + sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] + assert len(sync_calls) == 2 + + # verify that data offload pragmas remain + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) + + # verify that new pointer variables are created and used in driver calls + for var in ['state_a', 'state_b', 'state_c']: + name = deviceptr_prefix + var + assert name in driver.variable_map + devptr = driver.variable_map[name] + assert isinstance(devptr, sym.Array) + assert len(devptr.shape) == 3 + assert devptr.name in (arg.name for arg in kernel_call.arguments) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_multiple_calls(frontend): + fcode = """ + module driver_mod + use state_type_mod, only: state_type + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + end type state_type + + + contains + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + + call kernel_routine(nlon, nlev, state%a, state%b, state%c) + + call kernel_routine(nlon, nlev, state%a, state%b, state%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + + driver_mod = Sourcefile.from_source(fcode)['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + calls = FindNodes(CallStatement).visit(driver.body) + kernel_calls = [c for c in calls if c.name=='kernel_routine'] + + # verify that field offloads are generated properly + in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] + assert len(in_calls) == 1 + inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] + assert len(inout_calls) == 2 + # verify that field sync host calls are generated properly + sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] + assert len(sync_calls) == 2 + + # verify that data offload pragmas remain + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) + + # verify that new pointer variables are created and used in driver calls + for var in ['state_a', 'state_b', 'state_c']: + name = deviceptr_prefix + var + assert name in driver.variable_map + devptr = driver.variable_map[name] + assert isinstance(devptr, sym.Array) + assert len(devptr.shape) == 3 + assert devptr.name in (arg.name for kernel_call in kernel_calls for arg in kernel_call.arguments) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_no_targets(frontend): + fcode = """ + module driver_mod + use state_type_mod, only: state_type + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + use another_module, only: another_kernel + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + end type state_type + + + contains + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + + call another_kernel() + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + + driver_mod = Sourcefile.from_source(fcode)['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + + calls = FindNodes(CallStatement).visit(driver.body) + assert not any(c for c in calls if c.name=='kernel_routine') + + # verify that no field offloads are generated + in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] + assert len(in_calls) == 0 + inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] + assert len(inout_calls) == 0 + # verify that no field sync host calls are generated + sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] + assert len(sync_calls) == 0 + + # verify that data offload pragmas remain + pragmas = FindNodes(Pragma).visit(driver.body) + assert len(pragmas) == 2 + assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) + From 742795d0a7c90b914002ddf1c5a569af14c65b6f Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Wed, 6 Nov 2024 17:03:52 +0100 Subject: [PATCH 06/12] Removed automatic inferral of offload indexes from view updates --- loki/transformations/data_offload.py | 35 +++++++------------ .../tests/test_data_offload.py | 3 ++ 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index 3f6a43f32..5382c7fbd 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -19,7 +19,9 @@ from loki.logging import warning from loki.tools import as_tuple, flatten, CaseInsensitiveDict, CaseInsensitiveDefaultDict from loki.types import BasicType, DerivedType -from loki.transformations.parallel import FieldAPITransferType, field_get_device_data, field_sync_host +from loki.transformations.parallel import ( + FieldAPITransferType, field_get_device_data, field_sync_host, remove_field_api_view_updates +) __all__ = [ 'DataOffloadTransformation', 'GlobalVariableAnalysis', @@ -1014,6 +1016,7 @@ def __init__(self, **kwargs): self.deviceptr_prefix = kwargs.get('devptr_prefix', 'loki_devptr_') field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', 'CLOUDSC_AUX_TYPE', 'CLOUDSC_FLUX_TYPE']) self.field_group_types = tuple(typename.lower() for typename in field_group_types) + self.offload_index = kwargs.get('offload_index', 'IBL') def transform_subroutine(self, routine, **kwargs): role = kwargs['role'] @@ -1032,12 +1035,14 @@ def process_driver(self, driver, targets): # Only work on active `!$loki data` regions if not DataOffloadTransformation._is_active_loki_data_region(region, targets): continue + # remove_field_api_view_updates(driver, self.field_group_types) # FIXME: if called here, driver.body will not be updated in subsequent routines kernel_calls = find_target_calls(region, targets) offload_variables = self.find_offload_variables(driver, kernel_calls) device_ptrs = self._declare_device_ptrs(driver, offload_variables) offload_map = self.FieldPointerMap(device_ptrs, *offload_variables) - old_offload_calls = self._replace_data_offload_calls(driver, region, offload_map) - self._replace_kernel_args(kernel_calls, old_offload_calls, offload_map) + self._add_field_offload_calls(driver, region, offload_map) + self._replace_kernel_args(driver, kernel_calls, offload_map) + remove_field_api_view_updates(driver, self.field_group_types) # if called here it works def find_offload_variables(self, driver, calls): inargs = () @@ -1102,12 +1107,7 @@ def _devptr_from_array(self, driver, a: sym.Array): devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) return devptr - def _replace_data_offload_calls(self, driver, region, offload_map): - # remove calls to [field_group_type]%update_view - calls = FindNodes(CallStatement).visit(region) - field_group_updates = tuple(c for c in calls if self._is_field_group_update(driver, c)) - # c.arguments contains Scalar(IBL) - Transformer(dict.fromkeys(field_group_updates, None), inplace=True).visit(region.body) + def _add_field_offload_calls(self, driver, region, offload_map): host_to_device = tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, @@ -1116,10 +1116,8 @@ def _replace_data_offload_calls(self, driver, region, offload_map): FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) device_to_host = tuple(field_sync_host(self._get_field_ptr_from_view(inarg), driver) for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) - # field_deletes = tuple(field_delete(field_ptr_map[var], routine) for var in blocking_arrays) update_map = {region: host_to_device + (region,) + device_to_host} Transformer(update_map, inplace=True).visit(driver.body) - return field_group_updates def _is_field_group_update(self, driver, call): try: @@ -1136,20 +1134,11 @@ def _get_field_ptr_from_view(self, field_view): field_type_name = 'F_' + type_chain[-1] return field_view.parent.get_derived_type_member(field_type_name) - def _replace_kernel_args(self, kernel_calls, old_offload_calls, offload_map): - """TODO: Docstring for _replace_kernel_calls. - - :kernel_calls: TODO - :old_offload_calls: TODO - :device_ptrs: TODO - :returns: TODO - """ + def _replace_kernel_args(self, driver, kernel_calls, offload_map): change_map = {} + offload_idx_expr = driver.variable_map[self.offload_index] for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): - group_update = next((c for c in old_offload_calls if c.name.parent == arg.parent), None) - assert group_update is not None, "Group update should not be none" - block_idx = group_update.arguments[0] - dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (block_idx,) + dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) change_map[arg] = devptr.clone(dimensions=dims) arg_transformer = SubstituteExpressions(change_map, inplace=True) for call in kernel_calls: diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index 5517b0a7c..8ce949dc9 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -914,6 +914,7 @@ def test_field_offload(frontend): driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', field_group_types=['state_type']), role='driver', targets=['kernel_routine']) @@ -1000,6 +1001,7 @@ def test_field_offload_multiple_calls(frontend): driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', field_group_types=['state_type']), role='driver', targets=['kernel_routine']) @@ -1084,6 +1086,7 @@ def test_field_offload_no_targets(frontend): driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', field_group_types=['state_type']), role='driver', targets=['kernel_routine']) From ff5ddee22183a1e9586cbaea5396d11fbe6422f9 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Mon, 11 Nov 2024 10:50:06 +0100 Subject: [PATCH 07/12] Removed unused find_array_arg utility and FieldOffload clean --- loki/transformations/data_offload.py | 55 +------------------ .../tests/test_data_offload.py | 54 ------------------ 2 files changed, 1 insertion(+), 108 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index 5382c7fbd..21ca50b1c 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -920,57 +920,6 @@ def _append_routine_arguments(self, routine, item): routine.arguments += tuple(sorted(new_arguments, key=lambda symbol: symbol.name)) -################################################################################ -# Field API helper routines -################################################################################ - -def find_array_arguments(routine, calls): - """ - Finds all arguments and sorts them by intents - - Parameters - ---------- - routine : `Subroutine` - Subroutine to apply this transformation to. - calls : list of `CallStatement` - Calls to extract arguments from. - This transformation will only apply at the ``'driver'`` level. - - - Returns - ------- - inargs : tuple of arguments that are only inargs - inoutargs : tuples of arguments that are both in and out, or inout - outargs : tuples of arguments that are only outargs - """ - inargs = () - inoutargs = () - outargs = () - - for call in calls: - if call.routine is BasicType.DEFERRED: - warning(f'[Loki] Data offload: Routine {routine.name} has not been enriched ' + - f'in {str(call.name).lower()}') - continue - for param, arg in call.arg_iter(): - if isinstance(param, Array) and param.type.intent.lower() == 'in': - inargs += (arg, ) - if isinstance(param, Array) and param.type.intent.lower() == 'inout': - inoutargs += (arg, ) - if isinstance(param, Array) and param.type.intent.lower() == 'out': - outargs += (arg, ) - - inoutargs += tuple(v for v in inargs if v in outargs) - inargs = tuple(v for v in inargs if v not in inoutargs) - outargs = tuple(v for v in outargs if v not in inoutargs) - - # Filter for duplicates TODO: What if we pass different slices of same array!? - inargs = tuple(set(inargs)) - inoutargs = tuple(set(inoutargs)) - outargs = tuple(set(outargs)) - return inargs, inoutargs, outargs - - def find_target_calls(region, targets): """Returns a list of all calls to targets inside the region @@ -1030,19 +979,18 @@ def process_kernel(self, routine): pass def process_driver(self, driver, targets): + remove_field_api_view_updates(driver, self.field_group_types + tuple(s.upper() for s in self.field_group_types)) with pragma_regions_attached(driver): for region in FindNodes(PragmaRegion).visit(driver.body): # Only work on active `!$loki data` regions if not DataOffloadTransformation._is_active_loki_data_region(region, targets): continue - # remove_field_api_view_updates(driver, self.field_group_types) # FIXME: if called here, driver.body will not be updated in subsequent routines kernel_calls = find_target_calls(region, targets) offload_variables = self.find_offload_variables(driver, kernel_calls) device_ptrs = self._declare_device_ptrs(driver, offload_variables) offload_map = self.FieldPointerMap(device_ptrs, *offload_variables) self._add_field_offload_calls(driver, region, offload_map) self._replace_kernel_args(driver, kernel_calls, offload_map) - remove_field_api_view_updates(driver, self.field_group_types) # if called here it works def find_offload_variables(self, driver, calls): inargs = () @@ -1078,7 +1026,6 @@ def find_offload_variables(self, driver, calls): inargs = tuple(v for v in inargs if v not in inoutargs) outargs = tuple(v for v in outargs if v not in inoutargs) - # Filter for duplicates TODO: What if we pass different slices of same array!? inargs = tuple(set(inargs)) inoutargs = tuple(set(inoutargs)) outargs = tuple(set(outargs)) diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index 8ce949dc9..73570dc4b 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -21,7 +21,6 @@ DataOffloadTransformation, GlobalVariableAnalysis, GlobalVarOffloadTransformation, GlobalVarHoistTransformation, FieldOffloadTransformation ) -from loki.transformations.data_offload import find_array_arguments, find_target_calls @pytest.fixture(scope='module', name='here') @@ -809,59 +808,6 @@ def test_transformation_global_var_derived_type_hoist(here, config, frontend, ho assert kernel.variable_map['p0'].type.dtype.name == 'point' - -@pytest.mark.parametrize('frontend', available_frontends()) -def test_find_array_arguments(frontend): - fcode_driver = """ - SUBROUTINE driver_routine(nlon, nlev, a, b, c, d, e, f) - INTEGER, INTENT(INOUT) :: nlon, nlev - REAL, INTENT(INOUT) :: a(nlon,nlev) - REAL, INTENT(INOUT) :: b(nlon) - REAL, INTENT(INOUT) :: c(nlon,nlev,nlon) - REAL, INTENT(INOUT) :: d - REAL, INTENT(INOUT) :: e - REAL, INTENT(INOUT) :: f - INTEGER :: nlon_copy, nlev_copy - REAL :: a_copy(nlon,nlev) - REAL :: b_copy(nlon) - REAL :: c_copy(nlon,nlev,nlon) - REAL :: d_copy - REAL :: e_copy - REAL :: f_copy - - call kernel_routine(nlon_copy, nlev_copy, a_copy, b_copy, c_copy, d_copy, e_copy, f_copy) - - END SUBROUTINE driver_routine - """ - fcode_kernel = """ - SUBROUTINE kernel_routine(nlon, nlev, a, b, c, d, e, f) - INTEGER, INTENT(IN) :: nlon, nlev - REAL, INTENT(IN) :: a(nlon,nlev) - REAL, INTENT(INOUT) :: b(nlon) - REAL, INTENT(OUT) :: c(nlon,nlev,nlon) - REAL, INTENT(IN) :: d - REAL, INTENT(IN) :: e - REAL, INTENT(IN) :: f - - do j=1, nlon - do i=1, nlev - b_copy(i) = a(i,j) + 0.1 - c(i,j,i) = 0.1 - end do - end do - END SUBROUTINE kernel_routine - """ - driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] - kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] - driver.enrich(kernel) - targets = find_target_calls(driver.body, [kernel.name]) - in_vars, inout_vars, out_vars = find_array_arguments(driver, targets) - - assert len(in_vars) == 1 and in_vars[0].name == 'a_copy' - assert len(inout_vars) == 1 and inout_vars[0].name == 'b_copy' - assert len(out_vars) == 1 and out_vars[0].name == 'c_copy' - - @pytest.mark.parametrize('frontend', available_frontends()) def test_field_offload(frontend): fcode = """ From b7f05127b3f8f32ac07f59ecc49b13bf2db0c681 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Fri, 15 Nov 2024 16:18:20 +0100 Subject: [PATCH 08/12] clean and linter error fixes --- loki/transformations/data_offload.py | 23 +++++++++++----------- loki/transformations/parallel/field_api.py | 12 ++++++----- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index 21ca50b1c..ceae5317f 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -135,7 +135,8 @@ def insert_data_offload_pragmas(self, routine, targets): continue for param, arg in call.arg_iter(): - if isinstance(param, Array) and param.type.intent.lower() == 'in': inargs += (str(arg.name).lower(),) + if isinstance(param, Array) and param.type.intent.lower() == 'in': + inargs += (str(arg.name).lower(),) if isinstance(param, Array) and param.type.intent.lower() == 'inout': inoutargs += (str(arg.name).lower(),) if isinstance(param, Array) and param.type.intent.lower() == 'out': @@ -963,7 +964,9 @@ def out_pairs(self): def __init__(self, **kwargs): self.deviceptr_prefix = kwargs.get('devptr_prefix', 'loki_devptr_') - field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', 'CLOUDSC_AUX_TYPE', 'CLOUDSC_FLUX_TYPE']) + field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', + 'CLOUDSC_AUX_TYPE', + 'CLOUDSC_FLUX_TYPE']) self.field_group_types = tuple(typename.lower() for typename in field_group_types) self.offload_index = kwargs.get('offload_index', 'IBL') @@ -1008,11 +1011,11 @@ def find_offload_variables(self, driver, calls): try: parent = arg.parent if parent.type.dtype.name.lower() not in self.field_group_types: - warning(f'[Loki] The parent object {parent.name} of type {parent.type.dtype} is not in the list of' + - ' field wrapper types') + warning(f'[Loki] The parent object {parent.name} of type ' + + f'{parent.type.dtype} is not in the list of field wrapper types') except AttributeError: - warning(f'[Loki] Field data offload: Raw array object {arg.name} encountered in' + - f'{driver.name} that is not wrapped by a Field API object') # ofc we cant know this for sure + warning(f'[Loki] Field data offload: Raw array object {arg.name} encountered in' + + f'{driver.name} that is not wrapped by a Field API object') continue if param.type.intent.lower() == 'in': @@ -1045,12 +1048,9 @@ def _devptr_from_array(self, driver, a: sym.Array): devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) devptr_name = self.deviceptr_prefix + base_name - try: - driver.variable_map[devptr_name] + if devptr_name in driver.variable_map: warning(f'[Loki] Field data offload: The routine {driver.name} already has a' + f'variable named {devptr_name}') - except KeyError: - pass devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) return devptr @@ -1068,7 +1068,7 @@ def _add_field_offload_calls(self, driver, region, offload_map): def _is_field_group_update(self, driver, call): try: - *_, parent, call_name = call.name.name.split('%') + *_, parent, _call_name = call.name.name.split('%') parent = driver.variable_map.get(parent) if parent is not None and parent.type.dtype.name.lower() in self.field_group_types: return True @@ -1090,4 +1090,3 @@ def _replace_kernel_args(self, driver, kernel_calls, offload_map): arg_transformer = SubstituteExpressions(change_map, inplace=True) for call in kernel_calls: arg_transformer.visit(call) - diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py index 656ce70ce..687842e10 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_api.py @@ -29,7 +29,9 @@ def remove_field_api_view_updates(routine, field_group_types, dim_object=None): """ Remove FIELD API boilerplate calls for view updates of derived types. - This utility is intended to remove the IFS-specific group type objects that provide block-scope view pointers to deep kernel trees. It will remove all calls to ``UPDATE_VIEW`` on derive-type + This utility is intended to remove the IFS-specific group type + objects that provide block-scope view pointers to deep kernel + trees. It will remove all calls to ``UPDATE_VIEW`` on derive-type objects with the respective types. Parameters @@ -201,10 +203,12 @@ def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferTyp raise TypeError(f"transfer_type must be of type FieldAPITransferType, but is of type {type(transfer_type)}") if transfer_type == FieldAPITransferType.READ_ONLY: suffix = 'RDONLY' - if transfer_type == FieldAPITransferType.READ_WRITE: + elif transfer_type == FieldAPITransferType.READ_WRITE: suffix = 'RDWR' - if transfer_type == FieldAPITransferType.WRITE_ONLY: + elif transfer_type == FieldAPITransferType.WRITE_ONLY: suffix = 'WRONLY' + else: + suffix = '' procedure_name = 'GET_DEVICE_DATA_' + suffix return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=(dev_ptr.clone(dimensions=None),), ) @@ -213,5 +217,3 @@ def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferTyp def field_sync_host(field_ptr, scope): procedure_name = 'SYNC_HOST_RDWR' return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) - - From 7e53319f3e99c5cd0fa4ddbf31d9fd7e305eb57f Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Mon, 18 Nov 2024 12:00:33 +0100 Subject: [PATCH 09/12] Removed unused functions and added more complete tests of data offloads --- loki/transformations/data_offload.py | 37 +- loki/transformations/parallel/field_api.py | 12 +- .../parallel/tests/test_field_api.py | 14 +- .../tests/test_data_offload.py | 315 ++++++++++++++++-- 4 files changed, 315 insertions(+), 63 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index ceae5317f..d5c4f4d72 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -16,7 +16,7 @@ Transformer, pragma_regions_attached, get_pragma_parameters, FindInlineCalls, SubstituteExpressions ) -from loki.logging import warning +from loki.logging import warning, error from loki.tools import as_tuple, flatten, CaseInsensitiveDict, CaseInsensitiveDefaultDict from loki.types import BasicType, DerivedType from loki.transformations.parallel import ( @@ -54,6 +54,11 @@ def __init__(self, **kwargs): self.assume_deviceptr = kwargs.get('assume_deviceptr', False) self.assume_acc_mapped = kwargs.get('assume_acc_mapped', False) + if self.assume_deviceptr and self.assume_acc_mapped: + error("[Loki] Data offload: Can't assume both acc_mapped and " + + "non-mapped device pointers for device data offload") + raise RuntimeError + def transform_subroutine(self, routine, **kwargs): """ Apply the transformation to a `Subroutine` object. @@ -152,7 +157,7 @@ def insert_data_offload_pragmas(self, routine, targets): outargs = tuple(dict.fromkeys(outargs)) inoutargs = tuple(dict.fromkeys(inoutargs)) - # Now geenerate the pre- and post pragmas (OpenACC) + # Now generate the pre- and post pragmas (OpenACC) if self.assume_deviceptr: offload_args = inargs + outargs + inoutargs if offload_args: @@ -973,14 +978,9 @@ def __init__(self, **kwargs): def transform_subroutine(self, routine, **kwargs): role = kwargs['role'] targets = as_tuple(kwargs.get('targets'), (None)) - if role == 'kernel': - self.process_kernel(routine) if role == 'driver': self.process_driver(routine, targets) - def process_kernel(self, routine): - pass - def process_driver(self, driver, targets): remove_field_api_view_updates(driver, self.field_group_types + tuple(s.upper() for s in self.field_group_types)) with pragma_regions_attached(driver): @@ -1002,20 +1002,21 @@ def find_offload_variables(self, driver, calls): for call in calls: if call.routine is BasicType.DEFERRED: - warning(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + + error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + f'in {str(call.name).lower()}') - continue + raise RuntimeError for param, arg in call.arg_iter(): if not isinstance(param, Array): continue try: parent = arg.parent if parent.type.dtype.name.lower() not in self.field_group_types: - warning(f'[Loki] The parent object {parent.name} of type ' + + warning(f'[Loki] Data offload: The parent object {parent.name} of type ' + f'{parent.type.dtype} is not in the list of field wrapper types') + continue except AttributeError: - warning(f'[Loki] Field data offload: Raw array object {arg.name} encountered in' - + f'{driver.name} that is not wrapped by a Field API object') + warning(f'[Loki] Data offload: Raw array object {arg.name} encountered in' + + f' {driver.name} that is not wrapped by a Field API object') continue if param.type.intent.lower() == 'in': @@ -1049,7 +1050,7 @@ def _devptr_from_array(self, driver, a: sym.Array): base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) devptr_name = self.deviceptr_prefix + base_name if devptr_name in driver.variable_map: - warning(f'[Loki] Field data offload: The routine {driver.name} already has a' + + warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + f'variable named {devptr_name}') devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) return devptr @@ -1066,16 +1067,6 @@ def _add_field_offload_calls(self, driver, region, offload_map): update_map = {region: host_to_device + (region,) + device_to_host} Transformer(update_map, inplace=True).visit(driver.body) - def _is_field_group_update(self, driver, call): - try: - *_, parent, _call_name = call.name.name.split('%') - parent = driver.variable_map.get(parent) - if parent is not None and parent.type.dtype.name.lower() in self.field_group_types: - return True - except ValueError: - return False - return False - def _get_field_ptr_from_view(self, field_view): type_chain = field_view.name.split('%') field_type_name = 'F_' + type_chain[-1] diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py index 687842e10..3aa529c8b 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_api.py @@ -20,8 +20,7 @@ __all__ = [ 'remove_field_api_view_updates', 'add_field_api_view_updates', 'get_field_type', - 'field_new', 'field_delete', 'field_get_device_data', 'field_sync_host', - 'FieldAPITransferType' + 'field_get_device_data', 'field_sync_host', 'FieldAPITransferType' ] @@ -182,15 +181,6 @@ def get_field_type(a: sym.Array) -> sym.DerivedType: return field_type -def field_new(field_ptr, data, scope): - return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_NEW', scope=scope), - arguments=(field_ptr,), kwarguments=(('DATA', data),)) - - -def field_delete(field_ptr, scope): - return ir.CallStatement(name=sym.ProcedureSymbol('FIELD_DELETE', scope=scope), - arguments=(field_ptr,)) - class FieldAPITransferType(Enum): READ_ONLY = 1 diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py index 1441389b4..5e08f9c4c 100644 --- a/loki/transformations/parallel/tests/test_field_api.py +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -13,7 +13,8 @@ from loki.expression import symbols as sym from loki.scope import Scope from loki.transformations.parallel import ( - remove_field_api_view_updates, add_field_api_view_updates, get_field_type + remove_field_api_view_updates, add_field_api_view_updates, get_field_type, + field_get_device_data, FieldAPITransferType ) from loki.types import BasicType, SymbolAttributes from loki.logging import WARNING @@ -187,3 +188,14 @@ def generate_fields(types): assert isinstance(field, sym.DerivedType) and field.name == field_name +def test_field_get_device_data(): + scope = Scope() + fptr = sym.Variable(name='fptr_var') + dev_ptr = sym.Variable(name='data_var') + for fttype in FieldAPITransferType: + get_dev_data_call = field_get_device_data(fptr, dev_ptr, fttype, scope) + assert isinstance(get_dev_data_call, ir.CallStatement) + assert get_dev_data_call.name.parent == fptr + with pytest.raises(TypeError): + _ = field_get_device_data(fptr, dev_ptr, "none_transfer_type", scope) + diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index 73570dc4b..458169dfe 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -9,25 +9,25 @@ import pytest from loki import ( - Sourcefile, Scheduler, FindInlineCalls + Sourcefile, Scheduler, FindInlineCalls, warning ) from loki.frontend import available_frontends, OMNI +from loki.logging import log_levels, logger from loki.ir import ( FindNodes, Pragma, PragmaRegion, Loop, CallStatement, Import, pragma_regions_attached, get_pragma_parameters ) import loki.expression.symbols as sym +from loki.module import Module from loki.transformations import ( DataOffloadTransformation, GlobalVariableAnalysis, GlobalVarOffloadTransformation, GlobalVarHoistTransformation, FieldOffloadTransformation ) - @pytest.fixture(scope='module', name='here') def fixture_here(): return Path(__file__).parent - @pytest.fixture(name='config') def fixture_config(): """ @@ -43,10 +43,45 @@ def fixture_config(): }, } +@pytest.fixture(name="parkind_mod") +def fixture_parkind_mod(tmp_path, frontend): + fcode = """ + module parkind1 + integer, parameter :: jprb=4 + end module + """ + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + +@pytest.fixture(name="field_module") +def fixture_field_module(tmp_path, frontend): + fcode = """ + module field_module + implicit none + + type field_2rb + real, pointer :: f_ptr(:,:,:) + end type field_2rb + + type field_3rb + real, pointer :: f_ptr(:,:,:) + contains + procedure :: update_view + end type field_3rb + + contains + subroutine update_view(self, idx) + class(field_3rb), intent(in) :: self + integer, intent(in) :: idx + end subroutine + end module + """ + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + @pytest.mark.parametrize('frontend', available_frontends()) @pytest.mark.parametrize('assume_deviceptr', [True, False]) -def test_data_offload_region_openacc(frontend, assume_deviceptr): +@pytest.mark.parametrize('assume_acc_mapped', [True, False]) +def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, assume_acc_mapped): """ Test the creation of a simple device data offload region (`!$acc update`) from a `!$loki data` region with a single @@ -82,11 +117,25 @@ def test_data_offload_region_openacc(frontend, assume_deviceptr): end do END SUBROUTINE kernel_routine """ - driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] - kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] - driver.enrich(kernel) - - driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr), role='driver', + with caplog.at_level(log_levels['ERROR']): + driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] + kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] + driver.enrich(kernel) + + if assume_deviceptr and assume_acc_mapped: + with pytest.raises(RuntimeError): + driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr, + assume_acc_mapped=assume_acc_mapped), + role='driver', + targets=['kernel_routine']) + assert len(caplog.records) == 1 + assert ("[Loki] Data offload: Can't assume both acc_mapped and non-mapped device pointers" + + " for device data offload") in caplog.records[0].message + return + + driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr, + assume_acc_mapped=assume_acc_mapped), + role='driver', targets=['kernel_routine']) pragmas = FindNodes(Pragma).visit(driver.body) @@ -96,6 +145,10 @@ def test_data_offload_region_openacc(frontend, assume_deviceptr): assert 'deviceptr' in pragmas[0].content params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) assert all(var in params['deviceptr'] for var in ('a', 'b', 'c')) + elif assume_acc_mapped: + assert 'present' in pragmas[0].content + params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) + assert all(var in params['present'] for var in ('a', 'b', 'c')) else: transformed = driver.to_fortran() assert 'copyin( a )' in transformed @@ -809,10 +862,9 @@ def test_transformation_global_var_derived_type_hoist(here, config, frontend, ho @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload(frontend): +def test_field_offload(frontend, parkind_mod, field_module, tmp_path): fcode = """ module driver_mod - use state_type_mod, only: state_type use parkind1, only: jprb use field_module, only: field_2rb, field_3rb implicit none @@ -820,11 +872,17 @@ def test_field_offload(frontend): type state_type real(kind=jprb), dimension(10,10), pointer :: a, b, c class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view end type state_type - contains + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev real(kind=jprb), intent(in) :: a(nlon,nlev) @@ -855,12 +913,11 @@ def test_field_offload(frontend): end subroutine driver_routine end module driver_mod """ - - driver_mod = Sourcefile.from_source(fcode)['driver_mod'] + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', + offload_index='i', field_group_types=['state_type']), role='driver', targets=['kernel_routine']) @@ -893,10 +950,9 @@ def test_field_offload(frontend): @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_multiple_calls(frontend): +def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_path): fcode = """ module driver_mod - use state_type_mod, only: state_type use parkind1, only: jprb use field_module, only: field_2rb, field_3rb implicit none @@ -904,10 +960,16 @@ def test_field_offload_multiple_calls(frontend): type state_type real(kind=jprb), dimension(10,10), pointer :: a, b, c class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view end type state_type contains + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev @@ -943,17 +1005,17 @@ def test_field_offload_multiple_calls(frontend): end module driver_mod """ - driver_mod = Sourcefile.from_source(fcode)['driver_mod'] + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', + offload_index='i', field_group_types=['state_type']), role='driver', targets=['kernel_routine']) calls = FindNodes(CallStatement).visit(driver.body) kernel_calls = [c for c in calls if c.name=='kernel_routine'] - + # verify that field offloads are generated properly in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] assert len(in_calls) == 1 @@ -967,7 +1029,7 @@ def test_field_offload_multiple_calls(frontend): pragmas = FindNodes(Pragma).visit(driver.body) assert len(pragmas) == 2 assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) - + # verify that new pointer variables are created and used in driver calls for var in ['state_a', 'state_b', 'state_c']: name = deviceptr_prefix + var @@ -979,23 +1041,42 @@ def test_field_offload_multiple_calls(frontend): @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_no_targets(frontend): +def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path): + fother = """ + module another_module + implicit none + contains + subroutine another_kernel(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real, intent(in) :: a(nlon,nlev) + real, intent(inout) :: b(nlon,nlev) + real, intent(out) :: c(nlon,nlev) + integer :: i, j + end subroutine + end module + """ fcode = """ module driver_mod - use state_type_mod, only: state_type use parkind1, only: jprb use field_module, only: field_2rb, field_3rb use another_module, only: another_kernel + implicit none type state_type real(kind=jprb), dimension(10,10), pointer :: a, b, c class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view end type state_type - contains + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev real(kind=jprb), intent(in) :: a(nlon,nlev) @@ -1019,8 +1100,7 @@ def test_field_offload_no_targets(frontend): !$loki data do i=1,nlev call state%update_view(i) - - call another_kernel() + call another_kernel(nlon, state%a, state%b, state%c) end do !$loki end data @@ -1028,11 +1108,12 @@ def test_field_offload_no_targets(frontend): end module driver_mod """ - driver_mod = Sourcefile.from_source(fcode)['driver_mod'] + Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', + offload_index='i', field_group_types=['state_type']), role='driver', targets=['kernel_routine']) @@ -1054,3 +1135,181 @@ def test_field_offload_no_targets(frontend): assert len(pragmas) == 2 assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_module, tmp_path): + fother = """ + module another_module + implicit none + contains + subroutine another_kernel(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real, intent(in) :: a(nlon,nlev) + real, intent(inout) :: b(nlon,nlev) + real, intent(out) :: c(nlon,nlev) + integer :: i, j + end subroutine + end module + """ + fcode = """ + module driver_mod + use parkind1, only: jprb + use another_module, only: another_kernel + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + call another_kernel(nlon, nlev, state%a, state%b, state%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + + with caplog.at_level(log_levels['ERROR']): + Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + + with pytest.raises(RuntimeError): + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']), + role='driver', + targets=['another_kernel']) + assert len(caplog.records) == 1 + assert ('[Loki] Data offload: Routine driver_routine has not been enriched '+ + 'in another_kernel') in caplog.records[0].message + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp_path): + fother_state = """ + module state_type_mod + implicit none + type state_type2 + real, dimension(10,10), pointer :: a, b, c + contains + procedure :: update_view => state_update_view + end type state_type2 + + contains + + subroutine state_update_view(self, idx) + class(state_type2), intent(in) :: self + integer, intent(in) :: idx + end subroutine + end module + """ + fother_mod= """ + module another_module + implicit none + contains + subroutine another_kernel(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real, intent(in) :: a(nlon,nlev) + real, intent(inout) :: b(nlon,nlev) + real, intent(out) :: c(nlon,nlev) + integer :: i, j + end subroutine + end module + """ + fcode = """ + module driver_mod + use state_type_mod, only: state_type2 + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + use another_module, only: another_kernel + + implicit none + + type state_type + real(kind=jprb), dimension(10,10), pointer :: a, b, c + class(field_3rb), pointer :: f_a, f_b, f_c + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine kernel_routine(nlon, nlev, a, b, c) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon,nlev) + integer :: i, j + + do j=1, nlon + do i=1, nlev + b(i,j) = a(i,j) + 0.1 + c(i,j) = 0.1 + end do + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state, state2) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + type(state_type2), intent(inout) :: state2 + + integer :: i + real(kind=jprb) :: a(nlon,nlev) + real, pointer :: loki_devptr_prefix_state_b + + !$loki data + do i=1,nlev + call state%update_view(i) + call kernel_routine(nlon, nlev, a, state%b, state2%c) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + with caplog.at_level(log_levels['WARNING']): + Sourcefile.from_source(fother_state, frontend=frontend, xmods=[tmp_path]) + Sourcefile.from_source(fother_mod, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + + assert len(caplog.records) == 3 + assert (('[Loki] Data offload: Raw array object a encountered in' + +' driver_routine that is not wrapped by a Field API object') + in caplog.records[0].message) + assert ('[Loki] Data offload: The parent object state2 of type state_type2 is not in the' + + ' list of field wrapper types') in caplog.records[1].message + assert ('[Loki] Data offload: The routine driver_routine already has a' + + ' variable named loki_devptr_prefix_state_b') in caplog.records[2].message From 5ee7c8c59de57576c525604865bf66cf4fc5fd37 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Tue, 19 Nov 2024 16:49:11 +0100 Subject: [PATCH 10/12] Added missing docstrings, updated caplog usage in tests, and changed kwargs to named kwargs in FieldOffload init --- loki/transformations/data_offload.py | 80 +++++++++++++++++-- loki/transformations/parallel/field_api.py | 29 ++++++- .../tests/test_data_offload.py | 72 ++++++++--------- 3 files changed, 135 insertions(+), 46 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index d5c4f4d72..c72cc1ee6 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -927,7 +927,8 @@ def _append_routine_arguments(self, routine, item): def find_target_calls(region, targets): - """Returns a list of all calls to targets inside the region + """ + Returns a list of all calls to targets inside the region. Parameters ---------- @@ -942,38 +943,101 @@ def find_target_calls(region, targets): class FieldOffloadTransformation(Transformation): + """ + + Transformation to offload arrays owned by Field API fields to the device. **This transformation is IFS specific.** + + The transformation assumes that fields are wrapped in derived types specified in + ``field_group_types`` and will only offload arrays that are members of such derived types. + In the process this transformation removes calls to Field API ``update_view`` and adds + declarations for the device pointers to the driver subroutine. + + The transformation acts on ``!$loki data`` regions and offloads all :any:`Array` + symbols that satisfy the following conditions: + + 1. The array is a member of an object that is of type specified in ``field_group_types``. + + 2. The array is passed as a parameter to at least one of the kernel targets passed to ``transform_subroutine``. + + Parameters + ---------- + devptr_prefix: str, optional + The prefix of device pointers added by this transformation (defaults to ``'loki_devptr_'``). + field_froup_types: list or tuple of str, optional + Names of the field group types with members that may be offloaded (defaults to ``['']``). + offload_index: str, optional + Names of index variable to inject in the outmost dimension of offloaded arrays in the kernel + calls (defaults to ``'IBL'``). + """ + class FieldPointerMap: + """ + Helper class to :any:`FieldOffloadTransformation` that is used to store arrays passed to + target kernel calls and the corresponding device pointers added by the transformation. + The pointer/array variable pairs are exposed through the class properties, based on + the intent of the kernel argument. + """ def __init__(self, devptrs, inargs, inoutargs, outargs): self.inargs = inargs self.inoutargs = inoutargs self.outargs = outargs self.devptrs = devptrs + @property def in_pairs(self): + """ + Iterator that yields array/pointer pairs for kernel arguments of intent(in). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any: `Array` + Corresponding device pointer added by the transformation. + """ for i, inarg in enumerate(self.inargs): yield inarg, self.devptrs[i] @property def inout_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(inout). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ start = len(self.inargs) for i, inoutarg in enumerate(self.inoutargs): yield inoutarg, self.devptrs[i+start] @property def out_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(out) + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + start = len(self.inargs)+len(self.inoutargs) for i, outarg in enumerate(self.outargs): yield outarg, self.devptrs[i+start] - def __init__(self, **kwargs): - self.deviceptr_prefix = kwargs.get('devptr_prefix', 'loki_devptr_') - field_group_types = kwargs.get('field_group_types', ['CLOUDSC_STATE_TYPE', - 'CLOUDSC_AUX_TYPE', - 'CLOUDSC_FLUX_TYPE']) + def __init__(self, devptr_prefix=None, field_group_types=None, offload_index=None): + self.deviceptr_prefix = 'loki_devptr_' if devptr_prefix is None else devptr_prefix + field_group_types = [''] if field_group_types is None else field_group_types self.field_group_types = tuple(typename.lower() for typename in field_group_types) - self.offload_index = kwargs.get('offload_index', 'IBL') + self.offload_index = 'IBL' if offload_index is None else offload_index def transform_subroutine(self, routine, **kwargs): role = kwargs['role'] @@ -982,7 +1046,7 @@ def transform_subroutine(self, routine, **kwargs): self.process_driver(routine, targets) def process_driver(self, driver, targets): - remove_field_api_view_updates(driver, self.field_group_types + tuple(s.upper() for s in self.field_group_types)) + remove_field_api_view_updates(driver, self.field_group_types) with pragma_regions_attached(driver): for region in FindNodes(PragmaRegion).visit(driver.body): # Only work on active `!$loki data` regions diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py index 3aa529c8b..4b5705770 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_api.py @@ -158,7 +158,7 @@ def get_field_type(a: sym.Array) -> sym.DerivedType: """ Returns the corresponding FIELD API type for an array. - This transformation is IFS specific and assumes that the + This function is IFS specific and assumes that the type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB """ type_map = ["jprb", @@ -189,6 +189,21 @@ class FieldAPITransferType(Enum): def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): + """ + Utility function to generate a :any:`CallStatement` corresponding to a Field API + ``GET_DEVICE_DATA`` call. + + Parameters + ---------- + field_ptr: pointer to field object + Pointer to the field to call ``GET_DEVICE_DATA`` from. + dev_ptr: :any:`Array` + Device pointer array + transfer_type: :any:`FieldAPITransferType` + Field API transfer type to determine which ``GET_DEVICE_DATA`` method to call. + scope: :any:`Scope` + Scope of the created :any:`CallStatement` + """ if not isinstance(transfer_type, FieldAPITransferType): raise TypeError(f"transfer_type must be of type FieldAPITransferType, but is of type {type(transfer_type)}") if transfer_type == FieldAPITransferType.READ_ONLY: @@ -205,5 +220,17 @@ def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferTyp def field_sync_host(field_ptr, scope): + """ + Utility function to generate a :any:`CallStatement` corresponding to a Field API + ``SYNC_HOST`` call. + + Parameters + ---------- + field_ptr: pointer to field object + Pointer to the field to call ``SYNC_HOST`` from. + scope: :any:`Scope` + Scope of the created :any:`CallStatement` + """ + procedure_name = 'SYNC_HOST_RDWR' return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index 458169dfe..18a2063db 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -117,26 +117,24 @@ def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, assume_ end do END SUBROUTINE kernel_routine """ - with caplog.at_level(log_levels['ERROR']): - driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] - kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] - driver.enrich(kernel) - - if assume_deviceptr and assume_acc_mapped: + driver = Sourcefile.from_source(fcode_driver, frontend=frontend)['driver_routine'] + kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] + driver.enrich(kernel) + + if assume_deviceptr and assume_acc_mapped: + caplog.clear() + with caplog.at_level(log_levels['ERROR']): with pytest.raises(RuntimeError): - driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr, - assume_acc_mapped=assume_acc_mapped), - role='driver', - targets=['kernel_routine']) + data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, + assume_acc_mapped=assume_acc_mapped) assert len(caplog.records) == 1 assert ("[Loki] Data offload: Can't assume both acc_mapped and non-mapped device pointers" + " for device data offload") in caplog.records[0].message return - driver.apply(DataOffloadTransformation(assume_deviceptr=assume_deviceptr, - assume_acc_mapped=assume_acc_mapped), - role='driver', - targets=['kernel_routine']) + data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, + assume_acc_mapped=assume_acc_mapped) + driver.apply(data_offload_trafo, role='driver', targets=['kernel_routine']) pragmas = FindNodes(Pragma).visit(driver.body) assert len(pragmas) == 2 @@ -964,8 +962,8 @@ def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_p procedure :: update_view => state_update_view end type state_type - contains + subroutine state_update_view(self, idx) class(state_type), intent(in) :: self integer, intent(in) :: idx @@ -1187,21 +1185,21 @@ def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_modul end module driver_mod """ + Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + + field_offload_trafo = FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']) + caplog.clear() with caplog.at_level(log_levels['ERROR']): - Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - with pytest.raises(RuntimeError): - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['another_kernel']) + driver.apply(field_offload_trafo, role='driver', targets=['another_kernel']) assert len(caplog.records) == 1 assert ('[Loki] Data offload: Routine driver_routine has not been enriched '+ - 'in another_kernel') in caplog.records[0].message + 'in another_kernel') in caplog.records[0].message @pytest.mark.parametrize('frontend', available_frontends()) @@ -1293,18 +1291,18 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp end subroutine driver_routine end module driver_mod """ + Sourcefile.from_source(fother_state, frontend=frontend, xmods=[tmp_path]) + Sourcefile.from_source(fother_mod, frontend=frontend, xmods=[tmp_path]) + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + + field_offload_trafo = FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']) + caplog.clear() with caplog.at_level(log_levels['WARNING']): - Sourcefile.from_source(fother_state, frontend=frontend, xmods=[tmp_path]) - Sourcefile.from_source(fother_mod, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['kernel_routine']) - + driver.apply(field_offload_trafo, role='driver', targets=['kernel_routine']) assert len(caplog.records) == 3 assert (('[Loki] Data offload: Raw array object a encountered in' +' driver_routine that is not wrapped by a Field API object') From 67d1a302b1a2b6120105d91d5cd5219d40d2d0c3 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Wed, 20 Nov 2024 10:32:44 +0100 Subject: [PATCH 11/12] Data Offload: renamed assume_acc_mapped to present_on_device and added requirement of present_on_device to assume_deviceptr --- loki/transformations/data_offload.py | 37 ++++++++++--------- .../tests/test_data_offload.py | 16 ++++---- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index c72cc1ee6..5f963f25a 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -41,6 +41,8 @@ class DataOffloadTransformation(Transformation): ---------- remove_openmp : bool Remove any existing OpenMP pragmas inside the marked region. + present_on_device : bool + Assume arrays are already offloaded and present on device" assume_deviceptr : bool Mark all offloaded arrays as true device-pointers if data offload is being managed outside of structured OpenACC data regions. @@ -52,11 +54,11 @@ def __init__(self, **kwargs): self.has_data_regions = False self.remove_openmp = kwargs.get('remove_openmp', False) self.assume_deviceptr = kwargs.get('assume_deviceptr', False) - self.assume_acc_mapped = kwargs.get('assume_acc_mapped', False) + self.present_on_device = kwargs.get('present_on_device', False) - if self.assume_deviceptr and self.assume_acc_mapped: - error("[Loki] Data offload: Can't assume both acc_mapped and " + - "non-mapped device pointers for device data offload") + if self.assume_deviceptr and not self.present_on_device: + error("[Loki] Data offload: Can't assume device pointer arrays without arrays being marked" + + "present on device.") raise RuntimeError def transform_subroutine(self, routine, **kwargs): @@ -158,20 +160,21 @@ def insert_data_offload_pragmas(self, routine, targets): inoutargs = tuple(dict.fromkeys(inoutargs)) # Now generate the pre- and post pragmas (OpenACC) - if self.assume_deviceptr: - offload_args = inargs + outargs + inoutargs - if offload_args: - deviceptr = f' deviceptr({", ".join(offload_args)})' + if self.present_on_device: + if self.assume_deviceptr: + offload_args = inargs + outargs + inoutargs + if offload_args: + deviceptr = f' deviceptr({", ".join(offload_args)})' + else: + deviceptr = '' + pragma = Pragma(keyword='acc', content=f'data{deviceptr}') else: - deviceptr = '' - pragma = Pragma(keyword='acc', content=f'data{deviceptr}') - elif self.assume_acc_mapped: - offload_args = inargs + outargs + inoutargs - if offload_args: - present = f' present({", ".join(offload_args)})' - else: - present = '' - pragma = Pragma(keyword='acc', content=f'data{present}') + offload_args = inargs + outargs + inoutargs + if offload_args: + present = f' present({", ".join(offload_args)})' + else: + present = '' + pragma = Pragma(keyword='acc', content=f'data{present}') else: copyin = f'copyin({", ".join(inargs)})' if inargs else '' copy = f'copy({", ".join(inoutargs)})' if inoutargs else '' diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index 18a2063db..4dbf915c5 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -80,8 +80,8 @@ def fixture_field_module(tmp_path, frontend): @pytest.mark.parametrize('frontend', available_frontends()) @pytest.mark.parametrize('assume_deviceptr', [True, False]) -@pytest.mark.parametrize('assume_acc_mapped', [True, False]) -def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, assume_acc_mapped): +@pytest.mark.parametrize('present_on_device', [True, False]) +def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, present_on_device): """ Test the creation of a simple device data offload region (`!$acc update`) from a `!$loki data` region with a single @@ -121,19 +121,19 @@ def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, assume_ kernel = Sourcefile.from_source(fcode_kernel, frontend=frontend)['kernel_routine'] driver.enrich(kernel) - if assume_deviceptr and assume_acc_mapped: + if assume_deviceptr and not present_on_device: caplog.clear() with caplog.at_level(log_levels['ERROR']): with pytest.raises(RuntimeError): data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, - assume_acc_mapped=assume_acc_mapped) + present_on_device=present_on_device) assert len(caplog.records) == 1 - assert ("[Loki] Data offload: Can't assume both acc_mapped and non-mapped device pointers" + - " for device data offload") in caplog.records[0].message + assert ("[Loki] Data offload: Can't assume device pointer arrays without arrays being marked" + + "present on device.") in caplog.records[0].message return data_offload_trafo = DataOffloadTransformation(assume_deviceptr=assume_deviceptr, - assume_acc_mapped=assume_acc_mapped) + present_on_device=present_on_device) driver.apply(data_offload_trafo, role='driver', targets=['kernel_routine']) pragmas = FindNodes(Pragma).visit(driver.body) @@ -143,7 +143,7 @@ def test_data_offload_region_openacc(caplog, frontend, assume_deviceptr, assume_ assert 'deviceptr' in pragmas[0].content params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) assert all(var in params['deviceptr'] for var in ('a', 'b', 'c')) - elif assume_acc_mapped: + elif present_on_device: assert 'present' in pragmas[0].content params = get_pragma_parameters(pragmas[0], only_loki_pragmas=False) assert all(var in params['present'] for var in ('a', 'b', 'c')) From ac0aa752f359d1d1e4296d5a2b1c5ee745445072 Mon Sep 17 00:00:00 2001 From: Johan Ericsson Date: Wed, 20 Nov 2024 12:08:00 +0100 Subject: [PATCH 12/12] Field Offload, added missing support for sliced arguments to calls --- loki/transformations/data_offload.py | 8 +- .../tests/test_data_offload.py | 77 +++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/loki/transformations/data_offload.py b/loki/transformations/data_offload.py index 5f963f25a..1c662d36c 100644 --- a/loki/transformations/data_offload.py +++ b/loki/transformations/data_offload.py @@ -996,7 +996,7 @@ def in_pairs(self): ______ :any:`Array` Original kernel call argument - :any: `Array` + :any:`Array` Corresponding device pointer added by the transformation. """ for i, inarg in enumerate(self.inargs): @@ -1143,8 +1143,12 @@ def _replace_kernel_args(self, driver, kernel_calls, offload_map): change_map = {} offload_idx_expr = driver.variable_map[self.offload_index] for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): - dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) + if len(arg.dimensions) != 0: + dims = arg.dimensions + (offload_idx_expr,) + else: + dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) change_map[arg] = devptr.clone(dimensions=dims) + arg_transformer = SubstituteExpressions(change_map, inplace=True) for call in kernel_calls: arg_transformer.visit(call) diff --git a/loki/transformations/tests/test_data_offload.py b/loki/transformations/tests/test_data_offload.py index 4dbf915c5..c6cb5cf11 100644 --- a/loki/transformations/tests/test_data_offload.py +++ b/loki/transformations/tests/test_data_offload.py @@ -67,6 +67,12 @@ def fixture_field_module(tmp_path, frontend): contains procedure :: update_view end type field_3rb + + type field_4rb + real, pointer :: f_ptr(:,:,:) + contains + procedure :: update_view + end type field_4rb contains subroutine update_view(self, idx) @@ -947,6 +953,77 @@ def test_field_offload(frontend, parkind_mod, field_module, tmp_path): assert devptr.name in (arg.name for arg in kernel_call.arguments) +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_slices(frontend, parkind_mod, field_module, tmp_path): + fcode = """ + module driver_mod + use parkind1, only: jprb + use field_module, only: field_4rb + implicit none + + type state_type + real(kind=jprb), dimension(10,10,10), pointer :: a, b, c, d + class(field_4rb), pointer :: f_a, f_b, f_c, f_d + contains + procedure :: update_view => state_update_view + end type state_type + + contains + + subroutine state_update_view(self, idx) + class(state_type), intent(in) :: self + integer, intent(in) :: idx + end subroutine + + subroutine kernel_routine(nlon, nlev, a, b, c, d) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a(nlon,nlev,nlon) + real(kind=jprb), intent(inout) :: b(nlon,nlev) + real(kind=jprb), intent(out) :: c(nlon) + real(kind=jprb), intent(in) :: d(nlon,nlev,nlon) + integer :: i, j + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + !$loki data + do i=1,nlev + call kernel_routine(nlon, nlev, state%a(:,:,1), state%b(:,1,1), state%c(1,1,1), state%d) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver = driver_mod['driver_routine'] + deviceptr_prefix = 'loki_devptr_prefix_' + driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, + offload_index='i', + field_group_types=['state_type']), + role='driver', + targets=['kernel_routine']) + + calls = FindNodes(CallStatement).visit(driver.body) + kernel_call = next(c for c in calls if c.name=='kernel_routine') + # verify that new pointer variables are created and used in driver calls + for var, rank in zip(['state_d', 'state_a', 'state_b', 'state_c',], [4, 3, 2, 1]): + name = deviceptr_prefix + var + assert name in driver.variable_map + devptr = driver.variable_map[name] + assert isinstance(devptr, sym.Array) + assert len(devptr.shape) == 4 + assert devptr.name in (arg.name for arg in kernel_call.arguments) + arg = next(arg for arg in kernel_call.arguments if devptr.name in arg.name) + assert arg.dimensions == ((sym.RangeIndex((None,None)),)*(rank-1) + + (sym.IntLiteral(1),)*(4-rank) + + (sym.Scalar(name='i'),)) + + + + @pytest.mark.parametrize('frontend', available_frontends()) def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_path): fcode = """