Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transformations: "Parallel" sub-module with driver-level parallelilsation utilities #415

Merged
merged 18 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
153176a
IR: Small fix for an erroneous warning message
mlange05 Oct 16, 2024
3a5ae2f
Parallel: Add utility to remove existing OpenMP parallel region
mlange05 Oct 18, 2024
29dc638
Parallel: Add utility to insert host-side OpenMP thread pragmas
mlange05 Oct 18, 2024
8e3b8db
Parallel: Ensure reproducible variable ordering in pragmas
mlange05 May 17, 2024
474681a
Parallel: Add utility to remove parallel block loops
mlange05 Oct 18, 2024
167a9ba
Parallel: Add utility to insert block loops into routines
mlange05 Oct 19, 2024
2fdb580
Parallel: Add utility to add/remove FIELD-API view updates
mlange05 Oct 19, 2024
efe4be8
Parallel: Add utility to manage explicit firstprivate copies
mlange05 Oct 19, 2024
180fd83
Parallel: Add `DEFAULT(SHARED)` to OpenMP parallel clause creation
mlange05 Nov 3, 2024
b973bbb
Parallel: Disable OMNI for OpenMP region insertion test
mlange05 Nov 11, 2024
db6df94
Parallel: Add camel-case testing and check generated warnings
mlange05 Nov 11, 2024
6e60ce1
Parallel: Fixing typos, comments and imports
mlange05 Nov 13, 2024
4a1fada
Parallel: Better test coverage and case sensitivity for F-API removal
mlange05 Nov 13, 2024
c011e71
Parallel: Use `Dimension` and improve tests for add_openmp_region
mlange05 Nov 13, 2024
c4f0bc5
Parallel: Rename argument `global_variables` => `shared_variables`
mlange05 Nov 13, 2024
a3b85d2
Parallel: Add more test coverage for corner cases
mlange05 Nov 14, 2024
efea62f
Parallel: Rename to `add/remove_firstprivate_copies`
mlange05 Nov 14, 2024
2d361bc
Parallel: Add missing recursion on remove_block_loops
mlange05 Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion loki/ir/pragma_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ def visit_tuple(self, o, **kwargs):
if start in o:
# If a pair does not live in the same tuple we have a problem.
if stop not in o:
warning('[Loki::IR] Cannot find matching end for pragma {start} at same IR level!')
warning(f'[Loki::IR] Cannot find matching end for pragma {start} at same IR level!')
continue

# Create the PragmaRegion node and replace in tuple
Expand Down
1 change: 1 addition & 0 deletions loki/transformations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@
from loki.transformations.split_read_write import * # noqa
from loki.transformations.loop_blocking import * # noqa
from loki.transformations.routine_signatures import * # noqa
from loki.transformations.parallel import * # noqa
15 changes: 15 additions & 0 deletions loki/transformations/parallel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

"""
Sub-package with utilities to remove, generate and manipulate parallel
regions.
"""

from loki.transformations.parallel.block_loop import * # noqa
from loki.transformations.parallel.field_api import * # noqa
from loki.transformations.parallel.openmp_region import * # noqa
137 changes: 137 additions & 0 deletions loki/transformations/parallel/block_loop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

"""
Transformation utilities to remove and generate parallel block loops.
"""

from loki.expression import symbols as sym
from loki.ir import (
nodes as ir, FindNodes, Transformer, pragma_regions_attached,
is_loki_pragma
)
from loki.tools import as_tuple
from loki.types import BasicType, SymbolAttributes


__all__ = ['remove_block_loops', 'add_block_loops']


def remove_block_loops(routine, dimension):
"""
Remove any outer block :any:`Loop` from a given :any:`Subroutine.

The loops are identified according to a given :any:`Dimension`
object, and will remove auxiliary assignments of index and bound
variables, as commonly used in IFS-style block loops.

Parameters
----------
routine: :any:`Subroutine`
Subroutine from which to remove block loops
dimension : :any:`Dimension`
The dimension object describing loop variables
"""
idx = dimension.index
variables = as_tuple(dimension.indices)
variables += as_tuple(dimension.lower)
variables += as_tuple(dimension.upper)

class RemoveBlockLoopTransformer(Transformer):
"""
:any:`Transformer` to remove driver-level block loops.
"""

def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument
body = self.visit(loop.body, **kwargs)

if not loop.variable == idx:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional: this bailout is untested

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is actually tested, but due to missing recursion, we never his this line. I'll add recursion to be sure.

return loop._rebuild(body=body)

to_remove = tuple(
a for a in FindNodes(ir.Assignment).visit(body)
if a.lhs in variables
)
return tuple(n for n in body if n not in to_remove)

routine.body = RemoveBlockLoopTransformer().visit(routine.body)


def add_block_loops(routine, dimension, default_type=None):
"""
Insert IFS-style (NPROMA) driver block-loops in ``!$loki
parallel`` regions.

The provided :any:`Dimension` object describes the variables to
used when generating the loop and default assignments. It
encapsulates IFS-specific convention, where a strided loop over
points, defined by ``dimension.index``, ``dimension.bounds`` and
``dimension.step`` is created, alongside assignments that define
the corresponding block index and upper bound, defined by
``dimension.indices[1]`` and ``dimension.upper[1]`` respectively.

Parameters
----------
routine : :any:`Subroutine`
The routine in which to add block loops.
dimension : :any:`Dimension`
The dimension object describing the block loop variables.
default_type : :any:`SymbolAttributes`, optional
Default type to use when creating variables; defaults to
``integer(kind=JPIM)``.
"""

_default = SymbolAttributes(BasicType.INTEGER, kind='JPIM')
dtype = default_type if default_type else _default

lidx = routine.parse_expr(dimension.index)
bidx = routine.parse_expr(dimension.indices[1])
bupper = routine.parse_expr(dimension.upper[1])

# Ensure that local integer variables are declared
for v in (lidx, bupper, bidx):
if not v in routine.variable_map:
routine.variables += (v.clone(type=dtype),)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional: the declaration of the loop variable is untested.


def _create_block_loop(body, scope):
"""
Generate block loop object, including indexing preamble
"""

bsize = scope.parse_expr(dimension.step)
lupper = scope.parse_expr(dimension.upper[0])
lrange = sym.LoopRange((sym.Literal(1), lupper, bsize))

expr_tail = scope.parse_expr(f'{lupper}-{lidx}+1')
expr_max = sym.InlineCall(
function=sym.ProcedureSymbol('MIN', scope=scope), parameters=(bsize, expr_tail)
)
preamble = (ir.Assignment(lhs=bupper, rhs=expr_max),)
preamble += (ir.Assignment(
lhs=bidx, rhs=scope.parse_expr(f'({lidx}-1)/{bsize}+1')
),)

return ir.Loop(variable=lidx, bounds=lrange, body=preamble + body)

class InsertBlockLoopTransformer(Transformer):

def visit_PragmaRegion(self, region, **kwargs):
"""
(Re-)insert driver-level block loops into marked parallel region.
"""
if not is_loki_pragma(region.pragma, starts_with='parallel'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we please test that this utility doesn't touch existing !$omp parallel regions?

return region

scope = kwargs.get('scope')

loop = _create_block_loop(body=region.body, scope=scope)

region._update(body=(ir.Comment(''), loop))
return region

with pragma_regions_attached(routine):
routine.body = InsertBlockLoopTransformer().visit(routine.body, scope=routine)
152 changes: 152 additions & 0 deletions loki/transformations/parallel/field_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# (C) Copyright 2018- ECMWF.
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

"""
Transformation utilities to manage and inject FIELD-API boilerplate code.
"""

from loki.expression import symbols as sym
from loki.ir import (
nodes as ir, FindNodes, FindVariables, Transformer
)
from loki.logging import warning
from loki.tools import as_tuple


__all__ = [
'remove_field_api_view_updates', 'add_field_api_view_updates'
]


def remove_field_api_view_updates(routine, field_group_types, dim_object=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is case-sensitive, but it may be good to make it case-insensitive.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch! The transformation is actual case-insensitive, but the warning checks are not. I've added come camel-case testing and checking of the generated warnings now.

"""
Remove FIELD API boilerplate calls for view updates of derived types.

This utility is intended to remove the IFS-specific group type
objects that provide block-scope view pointers to deep kernel
trees. It will remove all calls to ``UPDATE_VIEW`` on derive-type
objects with the respective types.

Parameters
----------
routine : :any:`Subroutine`
The routine from which to remove FIELD API update calls
field_group_types : tuple of str
List of names of the derived types of "field group" objects to remove
dim_object : str, optional
Optional name of the "dimension" object; if provided it will remove the
call to ``<dim>%UPDATE(...)`` accordingly.
"""
field_group_types = as_tuple(str(fgt).lower() for fgt in field_group_types)

class RemoveFieldAPITransformer(Transformer):

def visit_CallStatement(self, call, **kwargs): # pylint: disable=unused-argument

if '%update_view' in str(call.name).lower():
if not str(call.name.parent.type.dtype).lower() in field_group_types:
warning(f'[Loki::ControlFlow] Removing {call.name} call, but not in field group types!')

return None

if dim_object and f'{dim_object}%update'.lower() in str(call.name).lower():
return None

return call

def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument
if str(assign.lhs.type.dtype).lower() in field_group_types:
warning(f'[Loki::ControlFlow] Found LHS field group assign: {assign}')
return assign

def visit_Loop(self, loop, **kwargs):
loop = self.visit_Node(loop, **kwargs)
return loop if loop.body else None

def visit_Conditional(self, cond, **kwargs):
wertysas marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please extend the tests to also cover the visit_Conditional method?

cond = super().visit_Node(cond, **kwargs)
return cond if cond.body else None

routine.body = RemoveFieldAPITransformer().visit(routine.body)


def add_field_api_view_updates(routine, dimension, field_group_types, dim_object=None):
"""
Adds FIELD API boilerplate calls for view updates.

The provided :any:`Dimension` object describes the local loop variables to
pass to the respective update calls. In particular, ``dimension.indices[1]``
is used to denote the block loop index that is passed to ``UPDATE_VIEW()``
calls on field group object. The list of type names ``field_group_types``
is used to identify for which objcets the view update calls get added.

Parameters
----------
routine : :any:`Subroutine`
The routine from which to remove FIELD API update calls
dimension : :any:`Dimension`
The dimension object describing the block loop variables.
field_group_types : tuple of str
List of names of the derived types of "field group" objects to remove
dim_object : str, optional
Optional name of the "dimension" object; if provided it will remove the
call to ``<dim>%UPDATE(...)`` accordingly.
"""

def _create_dim_update(scope, dim_object):
index = scope.parse_expr(dimension.index)
upper = scope.parse_expr(dimension.upper[1])
bindex = scope.parse_expr(dimension.indices[1])
idims = scope.get_symbol(dim_object)
csym = sym.ProcedureSymbol(name='UPDATE', parent=idims, scope=idims.scope)
return ir.CallStatement(name=csym, arguments=(bindex, upper, index), kwarguments=())

def _create_view_updates(section, scope):
bindex = scope.parse_expr(dimension.indices[1])

fgroup_vars = sorted(tuple(
v for v in FindVariables(unique=True).visit(section)
if str(v.type.dtype) in field_group_types
), key=str)
calls = ()
for fgvar in fgroup_vars:
fgsym = scope.get_symbol(fgvar.name)
csym = sym.ProcedureSymbol(name='UPDATE_VIEW', parent=fgsym, scope=fgsym.scope)
calls += (ir.CallStatement(name=csym, arguments=(bindex,), kwarguments=()),)

return calls

class InsertFieldAPIViewsTransformer(Transformer):
""" Injects FIELD-API view updates into block loops """

def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument
if not loop.variable == 'JKGLO':
return loop

scope = kwargs.get('scope')

# Find the loop-setup assignments
_loop_symbols = dimension.indices
_loop_symbols += as_tuple(dimension.lower) + as_tuple(dimension.upper)
loop_setup = tuple(
a for a in FindNodes(ir.Assignment).visit(loop.body)
if a.lhs in _loop_symbols
)
idx = max(loop.body.index(a) for a in loop_setup) + 1

# Prepend FIELD API boilerplate
preamble = (
ir.Comment(''), ir.Comment('! Set up thread-local view pointers')
)
if dim_object:
preamble += (_create_dim_update(scope, dim_object=dim_object),)
preamble += _create_view_updates(loop.body, scope)

loop._update(body=loop.body[:idx] + preamble + loop.body[idx:])
return loop

routine.body = InsertFieldAPIViewsTransformer().visit(routine.body, scope=routine)
Loading
Loading