diff --git a/loki/ir/pragma_utils.py b/loki/ir/pragma_utils.py index 3a17a822a..ae1fab732 100644 --- a/loki/ir/pragma_utils.py +++ b/loki/ir/pragma_utils.py @@ -532,7 +532,7 @@ def visit_tuple(self, o, **kwargs): if start in o: # If a pair does not live in the same tuple we have a problem. if stop not in o: - warning('[Loki::IR] Cannot find matching end for pragma {start} at same IR level!') + warning(f'[Loki::IR] Cannot find matching end for pragma {start} at same IR level!') continue # Create the PragmaRegion node and replace in tuple diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index 11c8bdbf0..236607132 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -36,3 +36,4 @@ from loki.transformations.split_read_write import * # noqa from loki.transformations.loop_blocking import * # noqa from loki.transformations.routine_signatures import * # noqa +from loki.transformations.parallel import * # noqa diff --git a/loki/transformations/parallel/__init__.py b/loki/transformations/parallel/__init__.py new file mode 100644 index 000000000..0071a9814 --- /dev/null +++ b/loki/transformations/parallel/__init__.py @@ -0,0 +1,15 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Sub-package with utilities to remove, generate and manipulate parallel +regions. +""" + +from loki.transformations.parallel.block_loop import * # noqa +from loki.transformations.parallel.field_api import * # noqa +from loki.transformations.parallel.openmp_region import * # noqa diff --git a/loki/transformations/parallel/block_loop.py b/loki/transformations/parallel/block_loop.py new file mode 100644 index 000000000..1926152b6 --- /dev/null +++ b/loki/transformations/parallel/block_loop.py @@ -0,0 +1,137 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Transformation utilities to remove and generate parallel block loops. +""" + +from loki.expression import symbols as sym +from loki.ir import ( + nodes as ir, FindNodes, Transformer, pragma_regions_attached, + is_loki_pragma +) +from loki.tools import as_tuple +from loki.types import BasicType, SymbolAttributes + + +__all__ = ['remove_block_loops', 'add_block_loops'] + + +def remove_block_loops(routine, dimension): + """ + Remove any outer block :any:`Loop` from a given :any:`Subroutine. + + The loops are identified according to a given :any:`Dimension` + object, and will remove auxiliary assignments of index and bound + variables, as commonly used in IFS-style block loops. + + Parameters + ---------- + routine: :any:`Subroutine` + Subroutine from which to remove block loops + dimension : :any:`Dimension` + The dimension object describing loop variables + """ + idx = dimension.index + variables = as_tuple(dimension.indices) + variables += as_tuple(dimension.lower) + variables += as_tuple(dimension.upper) + + class RemoveBlockLoopTransformer(Transformer): + """ + :any:`Transformer` to remove driver-level block loops. + """ + + def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument + body = self.visit(loop.body, **kwargs) + + if not loop.variable == idx: + return loop._rebuild(body=body) + + to_remove = tuple( + a for a in FindNodes(ir.Assignment).visit(body) + if a.lhs in variables + ) + return tuple(n for n in body if n not in to_remove) + + routine.body = RemoveBlockLoopTransformer().visit(routine.body) + + +def add_block_loops(routine, dimension, default_type=None): + """ + Insert IFS-style (NPROMA) driver block-loops in ``!$loki + parallel`` regions. + + The provided :any:`Dimension` object describes the variables to + used when generating the loop and default assignments. It + encapsulates IFS-specific convention, where a strided loop over + points, defined by ``dimension.index``, ``dimension.bounds`` and + ``dimension.step`` is created, alongside assignments that define + the corresponding block index and upper bound, defined by + ``dimension.indices[1]`` and ``dimension.upper[1]`` respectively. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine in which to add block loops. + dimension : :any:`Dimension` + The dimension object describing the block loop variables. + default_type : :any:`SymbolAttributes`, optional + Default type to use when creating variables; defaults to + ``integer(kind=JPIM)``. + """ + + _default = SymbolAttributes(BasicType.INTEGER, kind='JPIM') + dtype = default_type if default_type else _default + + lidx = routine.parse_expr(dimension.index) + bidx = routine.parse_expr(dimension.indices[1]) + bupper = routine.parse_expr(dimension.upper[1]) + + # Ensure that local integer variables are declared + for v in (lidx, bupper, bidx): + if not v in routine.variable_map: + routine.variables += (v.clone(type=dtype),) + + def _create_block_loop(body, scope): + """ + Generate block loop object, including indexing preamble + """ + + bsize = scope.parse_expr(dimension.step) + lupper = scope.parse_expr(dimension.upper[0]) + lrange = sym.LoopRange((sym.Literal(1), lupper, bsize)) + + expr_tail = scope.parse_expr(f'{lupper}-{lidx}+1') + expr_max = sym.InlineCall( + function=sym.ProcedureSymbol('MIN', scope=scope), parameters=(bsize, expr_tail) + ) + preamble = (ir.Assignment(lhs=bupper, rhs=expr_max),) + preamble += (ir.Assignment( + lhs=bidx, rhs=scope.parse_expr(f'({lidx}-1)/{bsize}+1') + ),) + + return ir.Loop(variable=lidx, bounds=lrange, body=preamble + body) + + class InsertBlockLoopTransformer(Transformer): + + def visit_PragmaRegion(self, region, **kwargs): + """ + (Re-)insert driver-level block loops into marked parallel region. + """ + if not is_loki_pragma(region.pragma, starts_with='parallel'): + return region + + scope = kwargs.get('scope') + + loop = _create_block_loop(body=region.body, scope=scope) + + region._update(body=(ir.Comment(''), loop)) + return region + + with pragma_regions_attached(routine): + routine.body = InsertBlockLoopTransformer().visit(routine.body, scope=routine) diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py new file mode 100644 index 000000000..e1e06b491 --- /dev/null +++ b/loki/transformations/parallel/field_api.py @@ -0,0 +1,152 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Transformation utilities to manage and inject FIELD-API boilerplate code. +""" + +from loki.expression import symbols as sym +from loki.ir import ( + nodes as ir, FindNodes, FindVariables, Transformer +) +from loki.logging import warning +from loki.tools import as_tuple + + +__all__ = [ + 'remove_field_api_view_updates', 'add_field_api_view_updates' +] + + +def remove_field_api_view_updates(routine, field_group_types, dim_object=None): + """ + Remove FIELD API boilerplate calls for view updates of derived types. + + This utility is intended to remove the IFS-specific group type + objects that provide block-scope view pointers to deep kernel + trees. It will remove all calls to ``UPDATE_VIEW`` on derive-type + objects with the respective types. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine from which to remove FIELD API update calls + field_group_types : tuple of str + List of names of the derived types of "field group" objects to remove + dim_object : str, optional + Optional name of the "dimension" object; if provided it will remove the + call to ``%UPDATE(...)`` accordingly. + """ + field_group_types = as_tuple(str(fgt).lower() for fgt in field_group_types) + + class RemoveFieldAPITransformer(Transformer): + + def visit_CallStatement(self, call, **kwargs): # pylint: disable=unused-argument + + if '%update_view' in str(call.name).lower(): + if not str(call.name.parent.type.dtype).lower() in field_group_types: + warning(f'[Loki::ControlFlow] Removing {call.name} call, but not in field group types!') + + return None + + if dim_object and f'{dim_object}%update'.lower() in str(call.name).lower(): + return None + + return call + + def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument + if str(assign.lhs.type.dtype).lower() in field_group_types: + warning(f'[Loki::ControlFlow] Found LHS field group assign: {assign}') + return assign + + def visit_Loop(self, loop, **kwargs): + loop = self.visit_Node(loop, **kwargs) + return loop if loop.body else None + + def visit_Conditional(self, cond, **kwargs): + cond = super().visit_Node(cond, **kwargs) + return cond if cond.body else None + + routine.body = RemoveFieldAPITransformer().visit(routine.body) + + +def add_field_api_view_updates(routine, dimension, field_group_types, dim_object=None): + """ + Adds FIELD API boilerplate calls for view updates. + + The provided :any:`Dimension` object describes the local loop variables to + pass to the respective update calls. In particular, ``dimension.indices[1]`` + is used to denote the block loop index that is passed to ``UPDATE_VIEW()`` + calls on field group object. The list of type names ``field_group_types`` + is used to identify for which objcets the view update calls get added. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine from which to remove FIELD API update calls + dimension : :any:`Dimension` + The dimension object describing the block loop variables. + field_group_types : tuple of str + List of names of the derived types of "field group" objects to remove + dim_object : str, optional + Optional name of the "dimension" object; if provided it will remove the + call to ``%UPDATE(...)`` accordingly. + """ + + def _create_dim_update(scope, dim_object): + index = scope.parse_expr(dimension.index) + upper = scope.parse_expr(dimension.upper[1]) + bindex = scope.parse_expr(dimension.indices[1]) + idims = scope.get_symbol(dim_object) + csym = sym.ProcedureSymbol(name='UPDATE', parent=idims, scope=idims.scope) + return ir.CallStatement(name=csym, arguments=(bindex, upper, index), kwarguments=()) + + def _create_view_updates(section, scope): + bindex = scope.parse_expr(dimension.indices[1]) + + fgroup_vars = sorted(tuple( + v for v in FindVariables(unique=True).visit(section) + if str(v.type.dtype) in field_group_types + ), key=str) + calls = () + for fgvar in fgroup_vars: + fgsym = scope.get_symbol(fgvar.name) + csym = sym.ProcedureSymbol(name='UPDATE_VIEW', parent=fgsym, scope=fgsym.scope) + calls += (ir.CallStatement(name=csym, arguments=(bindex,), kwarguments=()),) + + return calls + + class InsertFieldAPIViewsTransformer(Transformer): + """ Injects FIELD-API view updates into block loops """ + + def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument + if not loop.variable == 'JKGLO': + return loop + + scope = kwargs.get('scope') + + # Find the loop-setup assignments + _loop_symbols = dimension.indices + _loop_symbols += as_tuple(dimension.lower) + as_tuple(dimension.upper) + loop_setup = tuple( + a for a in FindNodes(ir.Assignment).visit(loop.body) + if a.lhs in _loop_symbols + ) + idx = max(loop.body.index(a) for a in loop_setup) + 1 + + # Prepend FIELD API boilerplate + preamble = ( + ir.Comment(''), ir.Comment('! Set up thread-local view pointers') + ) + if dim_object: + preamble += (_create_dim_update(scope, dim_object=dim_object),) + preamble += _create_view_updates(loop.body, scope) + + loop._update(body=loop.body[:idx] + preamble + loop.body[idx:]) + return loop + + routine.body = InsertFieldAPIViewsTransformer().visit(routine.body, scope=routine) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py new file mode 100644 index 000000000..d3a50f996 --- /dev/null +++ b/loki/transformations/parallel/openmp_region.py @@ -0,0 +1,282 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Sub-package with utilities to remove and manipulate parallel OpenMP regions. +""" + +from loki.analyse import dataflow_analysis_attached +from loki.expression import symbols as sym, parse_expr +from loki.ir import ( + nodes as ir, FindNodes, FindVariables, Transformer, + SubstituteStringExpressions, is_loki_pragma, pragmas_attached, + pragma_regions_attached +) +from loki.tools import dict_override, flatten +from loki.types import DerivedType + + +__all__ = [ + 'remove_openmp_regions', 'add_openmp_regions', + 'remove_firstprivate_copies', 'add_firstprivate_copies' +] + + +def remove_openmp_regions(routine, insert_loki_parallel=False): + """ + Remove any OpenMP parallel annotations (``!$omp parallel``). + + Optionally, this can replace ``!$omp parallel`` with ``!$loki + parallel`` pragmas. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine from which to strip all OpenMP annotations. + insert_loki_parallel : bool + Flag for the optional insertion of ``!$loki parallel` pragmas + """ + + class RemoveOpenMPRegionTransformer(Transformer): + """ + Remove OpenMP pragmas from "parallel" regions and remove all + contained OpenMP pragmas and pragma regions. + + Optionally replaces outer ``!$omp parallel`` region with + ``!$loki parallel`` region. + """ + + def visit_PragmaRegion(self, region, **kwargs): + """ + Perform the filtering and removal of OpenMP pragma regions. + + Parameters + ---------- + active : tuple + Flag to indicate whether we're actively traversing an + outer OpenMP region. + """ + if not region.pragma.keyword.lower() == 'omp': + return region + + if kwargs['active'] and region.pragma.keyword.lower() == 'omp': + # Remove other OpenMP pragma regions when in active mode + region._update(pragma=None, pragma_post=None) + return region + + if 'parallel' in region.pragma.content.lower(): + # Replace or remove pragmas + pragma = None + pragma_post = None + if insert_loki_parallel: + pragma = ir.Pragma(keyword='loki', content='parallel') + pragma_post = ir.Pragma(keyword='loki', content='end parallel') + + with dict_override(kwargs, {'active': True}): + body = self.visit(region.body, **kwargs) + + region._update(body=body, pragma=pragma, pragma_post=pragma_post) + + return region + + def visit_Pragma(self, pragma, **kwargs): + """ Remove other OpenMP pragmas if in active region """ + + if kwargs['active'] and pragma.keyword.lower() == 'omp': + return None + + return pragma + + with pragma_regions_attached(routine): + routine.body = RemoveOpenMPRegionTransformer().visit(routine.body, active=False) + + +def add_openmp_regions( + routine, dimension, shared_variables=None, field_group_types=None +): + """ + Add the OpenMP directives for a parallel driver region with an + outer block loop. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine to which to add OpenMP parallel regions. + dimension : :any:`Dimension` + The dimension object describing the block loop variables. + shared_variables : tuple of str + Names of variables that should neither be private nor firstprivate + field_group_types : tuple of str + Names of types designating "field groups", which should be + treated as firstprivate + """ + shared_variables = shared_variables or {} + field_group_types = field_group_types or {} + + # First get local variables and separate scalars and arrays + routine_arguments = routine.arguments + local_variables = tuple( + v for v in routine.variables if v not in routine_arguments + ) + local_scalars = tuple( + v for v in local_variables if isinstance(v, sym.Scalar) + ) + # Filter arrays by block-dim size, as these are global + local_arrays = tuple( + v for v in local_variables + if isinstance(v, sym.Array) and not v.dimensions[-1] == dimension.size + ) + + with pragma_regions_attached(routine): + with dataflow_analysis_attached(routine): + for region in FindNodes(ir.PragmaRegion).visit(routine.body): + if not is_loki_pragma(region.pragma, starts_with='parallel'): + return + + # Accumulate the set of locally used symbols and chase parents + symbols = tuple(region.uses_symbols | region.defines_symbols) + symbols = tuple(dict.fromkeys(flatten( + s.parents if s.parent else s for s in symbols + ))) + + # Start with loop variables and add local scalars and arrays + local_vars = tuple(dict.fromkeys(flatten( + loop.variable for loop in FindNodes(ir.Loop).visit(region.body) + ))) + + local_vars += tuple(v for v in local_scalars if v.name in symbols) + local_vars += tuple(v for v in local_arrays if v.name in symbols ) + + # Also add used symbols that might be field groups + local_vars += tuple(dict.fromkeys( + v for v in routine_arguments + if v.name in symbols and str(v.type.dtype) in field_group_types + )) + + # Filter out known global variables + local_vars = tuple(v for v in local_vars if v.name not in shared_variables) + + # Make field group types firstprivate + firstprivates = tuple(dict.fromkeys( + v.name for v in local_vars if v.type.dtype.name in field_group_types + )) + # Also make values that have an initial value firstprivate + firstprivates += tuple(v.name for v in local_vars if v.type.initial) + + # Mark all other variables as private + privates = tuple(dict.fromkeys( + v.name for v in local_vars if v.name not in firstprivates + )) + + s_fp_vars = ", ".join(str(v) for v in firstprivates) + s_firstprivate = f'FIRSTPRIVATE({s_fp_vars})' if firstprivates else '' + s_private = f'PRIVATE({", ".join(str(v) for v in privates)})' if privates else '' + pragma_parallel = ir.Pragma( + keyword='OMP', content=f'PARALLEL DEFAULT(SHARED) {s_private} {s_firstprivate}' + ) + region._update( + pragma=pragma_parallel, + pragma_post=ir.Pragma(keyword='OMP', content='END PARALLEL') + ) + + # And finally mark all block-dimension loops as parallel + with pragmas_attached(routine, node_type=ir.Loop): + for loop in FindNodes(ir.Loop).visit(region.body): + # Add OpenMP DO directives onto block loops + if loop.variable == dimension.index: + loop._update( + pragma=ir.Pragma(keyword='OMP', content='DO SCHEDULE(DYNAMIC,1)'), + pragma_post=ir.Pragma(keyword='OMP', content='END DO'), + ) + + +def remove_firstprivate_copies(region, fprivate_map, scope): + """ + Removes an IFS-specific workaround, where complex derived-type + objects are explicitly copied into a local copy of the object to + avoid erroneous firstprivatisation in OpenMP loops. + + Parameters + ---------- + region : tuple of :any:`Node` + The code region from which to remove firstprivate copies + fprivate_map : dict of (str, str) + String mapping of local-to-global names for explicitly + privatised objects + scope : :any:`Scope` + Scope to use for symbol susbtitution + """ + + class RemoveExplicitCopyTransformer(Transformer): + """ Remove assignments that match the firstprivatisation map """ + + def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument + if not isinstance(assign.lhs.type.dtype, DerivedType): + return assign + + lhs = assign.lhs.name + if lhs in fprivate_map and assign.rhs == fprivate_map[lhs]: + return None + return assign + + # Strip assignments of local copies + region = RemoveExplicitCopyTransformer().visit(region) + + # Invert the local use of the private copy + return SubstituteStringExpressions(fprivate_map, scope=scope).visit(region) + + +def add_firstprivate_copies(routine, fprivate_map): + """ + Injects IFS-specific thread-local copies of named complex derived + type objects in parallel regions. This is to prevent issues with + firstprivate variables in OpenMP loops. + + Parameters + ---------- + routine : :any:`Subroutine` + Subroutine in which to insert privatisation copies + fprivate_map : dict of (str, str) + String mapping of local-to-global names for explicitly + privatised objects + """ + inverse_map = {v: k for k, v in fprivate_map.items()} + + # Ensure the local object copies are declared + for lcl, gbl in fprivate_map.items(): + lhs = parse_expr(lcl, scope=routine) + rhs = parse_expr(gbl, scope=routine) + if not lhs in routine.variable_map: + routine.variables += (lhs.clone(type=rhs.type.clone(intent=None)),) + + class InjectExplicitCopyTransformer(Transformer): + """" Inject assignments that match the firstprivate map in parallel regions """ + + def visit_PragmaRegion(self, region, **kwargs): # pylint: disable=unused-argument + # Apply to pragma-marked "parallel" regions only + if not is_loki_pragma(region.pragma, starts_with='parallel'): + return region + + # Collect the explicit privatisation copies + lvars = FindVariables(unique=True).visit(region.body) + assigns = () + for lcl, gbl in fprivate_map.items(): + lhs = parse_expr(lcl, scope=routine) + rhs = parse_expr(gbl, scope=routine) + if rhs in lvars: + assigns += (ir.Assignment(lhs=lhs, rhs=rhs),) + + # Remap from global to local name in marked regions + region = SubstituteStringExpressions(inverse_map, scope=routine).visit(region) + + # Add the copies and return + region.prepend(assigns) + return region + + with pragma_regions_attached(routine): + # Inject assignments of local copies + routine.body = InjectExplicitCopyTransformer().visit(routine.body) diff --git a/loki/transformations/parallel/tests/test_block_loop.py b/loki/transformations/parallel/tests/test_block_loop.py new file mode 100644 index 000000000..0612855ea --- /dev/null +++ b/loki/transformations/parallel/tests/test_block_loop.py @@ -0,0 +1,167 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Subroutine, Module, Dimension +from loki.frontend import available_frontends, OMNI +from loki.ir import nodes as ir, FindNodes +from loki.tools import flatten + +from loki.transformations.parallel import ( + remove_block_loops, add_block_loops +) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_remove_block_loops(tmp_path, frontend): + """ + A simple test for :any:`remove_block_loops` + """ + fcode_type = """ +module geom_mod + type geom_type + integer :: nproma, ngptot + end type geom_type +end module geom_mod +""" + + fcode = """ +subroutine test_remove_block_loop(ydgeom, npoints, nlev, arr) + use geom_mod, only: geom_type + implicit none + type(geom_type), intent(in) :: ydgeom + integer(kind=4), intent(in) :: npoints, nlev + real(kind=8), intent(inout) :: arr(:,:,:) + integer :: JKGLO, IBL, ICEND, JK, JL + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + ICEND = MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1) + IBL = (JKGLO - 1) / YDGEOM%NPROMA + 1 + + CALL MY_KERNEL(ARR(:,:,IBL)) + END DO + + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + ICEND = MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1) + IBL = (JKGLO - 1) / YDGEOM%NPROMA + 1 + + DO JK=1, NLEV + DO JL=1, NPOINTS + ARR(JL, JK, IBL) = 42.0 + END DO + END DO + END DO +end subroutine test_remove_block_loop +""" + _ = Module.from_source(fcode_type, frontend=frontend, xmods=[tmp_path]) + routine = Subroutine.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 4 + assert loops[0].variable == 'jkglo' + assert loops[1].variable == 'jkglo' + assert loops[2].variable == 'jk' + assert loops[3].variable == 'jl' + assert len(FindNodes(ir.Assignment).visit(loops[0].body)) == 2 + assert len(FindNodes(ir.Assignment).visit(loops[1].body)) == 3 + + block = Dimension( + 'block', index=('jkglo', 'ibl'), step='YDGEOM%NPROMA', + lower=('1', 'ICST'), upper=('YDGEOM%NGPTOT', 'ICEND') + ) + remove_block_loops(routine, dimension=block) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 2 + assert loops[0].variable == 'jk' + assert loops[1].variable == 'jl' + assert len(FindNodes(ir.Assignment).visit(routine.body)) == 1 + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_add_block_loops(frontend): + """ + A simple test for :any:`add_block_loops` + """ + fcode = """ +subroutine test_add_block_loop(ydgeom, npoints, nlev, arr) + integer(kind=4), intent(in) :: npoints, nlev + real(kind=8), intent(inout) :: arr(:,:,:) + integer :: JKGLO + +!$loki parallel + call my_kernel(arr(:,:,ibl)) +!$loki end parallel + +!$loki parallel + do jk=1, nlev + do jl=1, npoints + arr(jl, jk, ibl) = 42.0 + end do + end do +!$loki end parallel + +!$omp parallel + do jk=1, nlev + do jl=1, npoints + arr(jl, jk, ibl) = 42.0 + end do + end do +!$omp end parallel + +end subroutine test_add_block_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 4 + assert loops[0].variable == 'jk' + assert loops[1].variable == 'jl' + assert len(FindNodes(ir.Assignment).visit(routine.body)) == 2 + + blocking = Dimension( + name='block', index=('JKGLO', 'IBL'), + lower=('1', 'ICST'), + upper=('YDGEOM%NGPTOT', 'ICEND'), + step='YDGEOM%NPROMA', + size='YDGEOM%NGPBLKS', + ) + + add_block_loops(routine, dimension=blocking) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 6 + assert loops[0].variable == 'jkglo' + assert loops[1].variable == 'jkglo' + assert loops[2].variable == 'jk' + assert loops[3].variable == 'jl' + assert loops[4].variable == 'jk' + assert loops[5].variable == 'jl' + + assigns1 = FindNodes(ir.Assignment).visit(loops[0].body) + assert len(assigns1) == 2 + assert assigns1[0].lhs == 'ICEND' + assert str(assigns1[0].rhs) == 'MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1)' + assert assigns1[1].lhs == 'IBL' + assert assigns1[1].rhs == '(JKGLO - 1) / YDGEOM%NPROMA + 1' + + assigns2 = FindNodes(ir.Assignment).visit(loops[1].body) + assert len(assigns2) == 3 + assert assigns2[0].lhs == 'ICEND' + assert str(assigns2[0].rhs) == 'MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1)' + assert assigns2[1].lhs == 'IBL' + assert assigns2[1].rhs == '(JKGLO - 1) / YDGEOM%NPROMA + 1' + assert assigns2[2].lhs == 'arr(jl, jk, ibl)' + assert assigns2[2].rhs == 42.0 + + decls = FindNodes(ir.VariableDeclaration).visit(routine.spec) + assert len(decls) == 9 if frontend == OMNI else 5 + decl_symbols = tuple(flatten(d.symbols for d in decls)) + for v in ['JKGLO', 'IBL', 'ICEND']: + assert v in decl_symbols diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py new file mode 100644 index 000000000..b752dec85 --- /dev/null +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -0,0 +1,138 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Subroutine, Module, Dimension +from loki.frontend import available_frontends, OMNI +from loki.ir import nodes as ir, FindNodes +from loki.logging import WARNING + +from loki.transformations.parallel import ( + remove_field_api_view_updates, add_field_api_view_updates +) + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_field_api_remove_view_updates(caplog, frontend): + """ + A simple test for :any:`remove_field_api_view_updates` + """ + + fcode = """ +subroutine test_remove_block_loop(ngptot, nproma, nflux, dims, state, aux_fields, fluxes, ricks_fields) + use type_module, only: dimension_type, state_type, aux_type, flux_type, ricks_type + implicit none + integer(kind=4), intent(in) :: ngptot, nproma, nflux + type(dimension_type), intent(inout) :: dims + type(STATE_TYPE), intent(inout) :: state + type(aux_type), intent(inout) :: aux_fields + type(FLUX_type), intent(inout) :: fluxes(nflux) + type(ricks_type), intent(inout) :: ricks_fields + + integer :: JKGLO, IBL, ICEND, JK, JL, JF + + DO jkglo=1, ngptot, nproma + icend = min(nproma, ngptot - JKGLO + 1) + ibl = (jkglo - 1) / nproma + 1 + + STATE = STATE%CLONE() + + CALL DIMS%UPDATE(IBL, ICEND, JKGLO) + CALL STATE%update_VIEW(IBL) + CALL AUX_FIELDS%UPDATE_VIEW(block_index=IBL) + IF (NFLUX > 0) THEN + DO jf=1, nflux + CALL FLUXES(JF)%UPDATE_VIEW(IBL) + END DO + END IF + CALL RICKS_FIELDS%UPDATE_VIEW(IBL) + + CALL MY_KERNEL(STATE%U, STATE%V, AUX_FIELDS%STUFF, FLUXES(1)%FOO, FLUXES(2)%BAR) + END DO +end subroutine test_remove_block_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + assert len(FindNodes(ir.CallStatement).visit(routine.body)) == 6 + assert len(FindNodes(ir.Conditional).visit(routine.body)) == 1 + assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 + + with caplog.at_level(WARNING): + field_group_types = ['state_type', 'aux_type', 'flux_type'] + remove_field_api_view_updates( + routine, field_group_types=field_group_types, dim_object='DIMS' + ) + + assert len(caplog.records) == 2 + assert '[Loki::ControlFlow] Found LHS field group assign: Assignment:: STATE = STATE%CLONE()'\ + in caplog.records[0].message + assert '[Loki::ControlFlow] Removing RICKS_FIELDS%UPDATE_VIEW call, but not in field group types!'\ + in caplog.records[1].message + + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 1 + assert calls[0].name == 'MY_KERNEL' + + assert len(FindNodes(ir.Conditional).visit(routine.body)) == 0 + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 1 + assert loops[0].variable == 'jkglo' + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_field_api_add_view_updates(frontend): + """ + A simple test for :any:`add_field_api_view_updates`. + """ + + fcode = """ +subroutine test_remove_block_loop(ngptot, nproma, nflux, dims, state, aux_fields, fluxes) + implicit none + integer(kind=4), intent(in) :: ngptot, nproma, nflux + type(dimension_type), intent(inout) :: dims + type(state_type), intent(inout) :: state + type(aux_type), intent(inout) :: aux_fields + type(flux_type), intent(inout) :: fluxes + + integer :: JKGLO, IBL, ICEND, JK, JL, JF + + DO jkglo=1, ngptot, nproma + icend = min(nproma, ngptot - jkglo + 1) + ibl = (jkglo - 1) / nproma + 1 + + CALL MY_KERNEL(STATE%U, STATE%V, AUX_FIELDS%STUFF, FLUXES%FOO, FLUXES%BAR) + END DO +end subroutine test_remove_block_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + assert len(FindNodes(ir.CallStatement).visit(routine.body)) == 1 + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + block = Dimension( + index=('jkglo', 'ibl'), step='NPROMA', + lower=('1', 'ICST'), upper=('NGPTOT', 'ICEND') + ) + field_group_types = ['state_type', 'aux_type', 'flux_type'] + add_field_api_view_updates( + routine, dimension=block, field_group_types=field_group_types, + dim_object='DIMS' + ) + + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 5 + assert calls[0].name == 'DIMS%UPDATE' and calls[0].arguments == ('IBL', 'ICEND', 'JKGLO') + assert calls[1].name == 'AUX_FIELDS%UPDATE_VIEW' and calls[1].arguments == ('IBL',) + assert calls[2].name == 'FLUXES%UPDATE_VIEW' and calls[2].arguments == ('IBL',) + assert calls[3].name == 'STATE%UPDATE_VIEW' and calls[3].arguments == ('IBL',) + + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py new file mode 100644 index 000000000..e9a05705b --- /dev/null +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -0,0 +1,328 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Subroutine, Module, Dimension +from loki.frontend import available_frontends, OMNI +from loki.ir import ( + nodes as ir, FindNodes, pragmas_attached, pragma_regions_attached, + is_loki_pragma +) + +from loki.transformations.parallel import ( + remove_openmp_regions, add_openmp_regions, + remove_firstprivate_copies, add_firstprivate_copies +) + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('insert_loki_parallel', (True, False)) +def test_remove_openmp_regions(frontend, insert_loki_parallel): + """ + A simple test for :any:`remove_openmp_regions` + """ + fcode = """ +subroutine test_driver_openmp(n, arr) + integer, intent(in) :: n + real(kind=8), intent(inout) :: arr(n) + integer :: i + + !$omp parallel private(i) + !$omp do schedule dynamic(1) + do i=1, n + !$loki foo-bar + arr(i) = arr(i) + 1.0 + end do + !$omp end do + !$omp end parallel + + + !$OMP PARALLEL PRIVATE(i) + !$OMP DO SCHEDULE DYNAMIC(1) + do i=1, n + !$loki foo-baz + arr(i) = arr(i) + 1.0 + !$loki end foo-baz + end do + !$OMP END DO + !$OMP END PARALLEL + + + !$omp parallel do private(i) + do i=1, n + !$omp simd + arr(i) = arr(i) + 1.0 + end do + !$omp end parallel +end subroutine test_driver_openmp +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + assert len(FindNodes(ir.Loop).visit(routine.body)) == 3 + assert len(FindNodes(ir.Pragma).visit(routine.body)) == 14 + + with pragma_regions_attached(routine): + # Without attaching Loop-pragmas, all are recognised as regions + assert len(FindNodes(ir.PragmaRegion).visit(routine.body)) == 6 + + remove_openmp_regions(routine, insert_loki_parallel=insert_loki_parallel) + + assert len(FindNodes(ir.Loop).visit(routine.body)) == 3 + pragmas = FindNodes(ir.Pragma).visit(routine.body) + assert len(pragmas) == (9 if insert_loki_parallel else 3) + + if insert_loki_parallel: + with pragma_regions_attached(routine): + pragma_regions = FindNodes(ir.PragmaRegion).visit(routine.body) + assert len(pragma_regions) == 4 + assert is_loki_pragma(pragma_regions[0].pragma, starts_with='parallel') + assert is_loki_pragma(pragma_regions[0].pragma_post, starts_with='end parallel') + assert is_loki_pragma(pragma_regions[1].pragma, starts_with='parallel') + assert is_loki_pragma(pragma_regions[1].pragma_post, starts_with='end parallel') + assert is_loki_pragma(pragma_regions[2].pragma, starts_with='foo-baz') + assert is_loki_pragma(pragma_regions[2].pragma_post, starts_with='end foo-baz') + assert is_loki_pragma(pragma_regions[3].pragma, starts_with='parallel') + assert is_loki_pragma(pragma_regions[3].pragma_post, starts_with='end parallel') + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI has trouble mixing Loop and Section pragmas')] +)) +def test_add_openmp_regions(tmp_path, frontend): + """ + A simple test for :any:`add_openmp_regions` + """ + fcode_type = """ +module geom_mod + type geom_type + integer :: nproma, ngptot + end type geom_type +end module geom_mod +""" + + fcode = """ +subroutine test_add_openmp_loop(ydgeom, ydfields, arr) + use geom_mod, only: geom_type, fld_type + use kernel_mod, only: my_kernel, my_non_kernel + implicit none + type(geom_type), intent(in) :: ydgeom + type(fld_type), intent(inout) :: ydfields + type(fld_type), intent(inout) :: ylfields + real(kind=8), intent(inout) :: arr(:,:,:) + integer :: JKGLO, IBL, ICEND + + !$loki parallel + + ylfields = ydfields + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + ICEND = MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1) + IBL = (JKGLO - 1) / YDGEOM%NPROMA + 1 + + CALL YDFIELDS%UPDATE_STUFF() + + CALL MY_KERNEL(ARR(:,:,IBL)) + END DO + + !$loki end parallel + + !$loki not-so-parallel + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + call my_non_kernel(arr(1,1,1)) + END DO + + !$loki end not-so-parallel + +end subroutine test_add_openmp_loop +""" + _ = Module.from_source(fcode_type, frontend=frontend, xmods=[tmp_path]) + routine = Subroutine.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + assert len(FindNodes(ir.Pragma).visit(routine.body)) == 4 + with pragma_regions_attached(routine): + regions = FindNodes(ir.PragmaRegion).visit(routine.body) + assert len(regions) == 2 + assert is_loki_pragma(regions[0].pragma, starts_with='parallel') + assert is_loki_pragma(regions[0].pragma_post, starts_with='end parallel') + assert is_loki_pragma(regions[1].pragma, starts_with='not-so-parallel') + assert is_loki_pragma(regions[1].pragma_post, starts_with='end not-so-parallel') + + block_dim = Dimension(index='JKGLO', size='YDGEOM%NGPBLK') + add_openmp_regions( + routine, dimension=block_dim, + field_group_types=('fld_type',), + shared_variables=('ydfields',) + ) + + # Ensure pragmas have been inserted + pragmas = FindNodes(ir.Pragma).visit(routine.body) + assert len(pragmas) == 6 + assert all(p.keyword == 'OMP' for p in pragmas[0:4]) + assert all(p.keyword == 'loki' for p in pragmas[5:6]) + + with pragmas_attached(routine, node_type=ir.Loop): + with pragma_regions_attached(routine): + # Ensure pragma region has been created + regions = FindNodes(ir.PragmaRegion).visit(routine.body) + assert len(regions) == 2 + assert regions[0].pragma.keyword == 'OMP' + assert regions[0].pragma.content.startswith('PARALLEL') + assert regions[0].pragma_post.keyword == 'OMP' + assert regions[0].pragma_post.content == 'END PARALLEL' + assert is_loki_pragma(regions[1].pragma, starts_with='not-so-parallel') + assert is_loki_pragma(regions[1].pragma_post, starts_with='end not-so-parallel') + + # Ensure shared, private and firstprivate have been set right + assert 'PARALLEL DEFAULT(SHARED)' in regions[0].pragma.content + assert 'PRIVATE(JKGLO, IBL, ICEND)' in regions[0].pragma.content + assert 'FIRSTPRIVATE(ylfields)' in regions[0].pragma.content + + # Ensure loops has been annotated + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 2 + assert loops[0].pragma[0].keyword == 'OMP' + assert loops[0].pragma[0].content == 'DO SCHEDULE(DYNAMIC,1)' + assert not loops[1].pragma + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_remove_firstprivate_copies(frontend): + """ + A simple test for :any:`remove_firstprivate_copies` + """ + fcode = """ +subroutine test_add_openmp_loop(ydgeom, state, arr) + use geom_mod, only: geom_type + use type_mod, only: state_type, flux_type, NewFlux + implicit none + type(geom_type), intent(in) :: ydgeom + real(kind=8), intent(inout) :: arr(:,:,:) + type(state_type), intent(in) :: state + type(state_type) :: ydstate + type(flux_type) :: ydflux + integer :: jkglo, ibl, icend + + !$loki parallel + + ydstate = state + + ydflux = NewFlux() + + do jkglo=1,ydgeom%ngptot,ydgeom%nproma + icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1) + ibl = (jkglo - 1) / ydgeom%nproma + 1 + + call ydstate%update_view(ibl) + + call my_kernel(ydstate%u(:,:), arr(:,:,ibl)) + end do + + !$loki end parallel +end subroutine test_add_openmp_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + fprivate_map = {'ydstate' : 'state'} + + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 4 + assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('ydstate%') + assert calls[1].arguments[0].parent == 'ydstate' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + # Remove the explicit copy of `ydstate = state` and adjust symbols + routine.body = remove_firstprivate_copies( + region=routine.body, fprivate_map=fprivate_map, scope=routine + ) + + # Check removal and symbol replacement + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 3 + assert assigns[0].lhs == 'ydflux' + assert assigns[1].lhs == 'icend' + assert assigns[2].lhs == 'ibl' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('state%') + assert calls[1].arguments[0].parent == 'state' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_add_firstprivate_copies(tmp_path, frontend): + """ + A simple test for :any:`add_firstprivate_copies` + """ + + fcode = """ +subroutine test_add_openmp_loop(ydgeom, state, arr) + use geom_mod, only: geom_type + implicit none + type(geom_type), intent(in) :: ydgeom + real(kind=8), intent(inout) :: arr(:,:,:) + type(state_type), intent(in) :: state + integer :: jkglo, ibl, icend + + !$loki parallel + + do jkglo=1,ydgeom%ngptot,ydgeom%nproma + icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1) + ibl = (jkglo - 1) / ydgeom%nproma + 1 + + call state%update_view(ibl) + + call my_kernel(state%u(:,:), arr(:,:,ibl)) + end do + + !$loki end parallel + + !$loki not-so-parallel + + do jkglo=1,ydgeom%ngptot,ydgeom%nproma + icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1) + ibl = (jkglo - 1) / ydgeom%nproma + 1 + end do + + !$loki end not-so-parallel +end subroutine test_add_openmp_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + fprivate_map = {'ydstate' : 'state'} + + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 4 + assert assigns[0].lhs == 'icend' + assert assigns[1].lhs == 'ibl' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('state%') + assert calls[1].arguments[0].parent == 'state' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 + + # Put the explicit firstprivate copies back in + add_firstprivate_copies( + routine=routine, fprivate_map=fprivate_map + ) + + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 5 + assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('ydstate%') + assert calls[1].arguments[0].parent == 'ydstate' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 2