Skip to content

Commit

Permalink
Parallel: Add utility to manage explicit firstprivate copies
Browse files Browse the repository at this point in the history
  • Loading branch information
mlange05 committed Oct 25, 2024
1 parent 77f0fee commit 8d95562
Show file tree
Hide file tree
Showing 2 changed files with 225 additions and 6 deletions.
102 changes: 98 additions & 4 deletions loki/transformations/parallel/openmp_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,21 @@
"""

from loki.analyse import dataflow_analysis_attached
from loki.expression import symbols as sym
from loki.expression import symbols as sym, parse_expr
from loki.ir import (
nodes as ir, FindNodes, Transformer, is_loki_pragma,
pragmas_attached, pragma_regions_attached
nodes as ir, FindNodes, FindVariables, Transformer,
SubstituteStringExpressions, is_loki_pragma, pragmas_attached,
pragma_regions_attached
)
from loki.tools import dict_override, flatten
from loki.types import DerivedType


__all__ = ['remove_openmp_regions', 'add_openmp_regions']
__all__ = [
'remove_openmp_regions', 'add_openmp_regions',
'remove_explicit_firstprivatisation',
'create_explicit_firstprivatisation'
]


def remove_openmp_regions(routine, insert_loki_parallel=False):
Expand Down Expand Up @@ -175,3 +181,91 @@ def add_openmp_regions(routine, global_variables=None, field_group_types=None):
pragma=ir.Pragma(keyword='OMP', content='DO SCHEDULE(DYNAMIC,1)'),
pragma_post=ir.Pragma(keyword='OMP', content='END DO'),
)


def remove_explicit_firstprivatisation(region, fprivate_map, scope):
"""
Removes an IFS-specific workaround, where complex derived-type
objects are explicitly copied into a local copy of the object to
avoid erroneous firstprivatisation in OpenMP loops.
Parameters
----------
region : tuple of :any:`Node`
The code region from which to remove firstprivate copies
fprivate_map : dict of (str, str)
String mapping of local-to-global names for explicitly
privatised objects
scope : :any:`Scope`
Scope to use for symbol susbtitution
"""

class RemoveExplicitCopyTransformer(Transformer):
""" Remove assignments that match the firstprivatisation map """

def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument
if not isinstance(assign.lhs.type.dtype, DerivedType):
return assign

lhs = assign.lhs.name
if lhs in fprivate_map and assign.rhs == fprivate_map[lhs]:
return None
return assign

# Strip assignments of local copies
region = RemoveExplicitCopyTransformer().visit(region)

# Invert the local use of the private copy
return SubstituteStringExpressions(fprivate_map, scope=scope).visit(region)


def create_explicit_firstprivatisation(routine, fprivate_map):
"""
Injects IFS-specific thread-local copies of named complex derived
type objects in parallel regions. This is to prevent issues with
firstprivate variables in OpenMP loops.
Parameters
----------
routine : :any:`Subroutine`
Subroutine in which to insert privatisation copies
fprivate_map : dict of (str, str)
String mapping of local-to-global names for explicitly
privatised objects
"""
inverse_map = {v: k for k, v in fprivate_map.items()}

# Ensure the local object copies are declared
for lcl, gbl in fprivate_map.items():
lhs = parse_expr(lcl, scope=routine)
rhs = parse_expr(gbl, scope=routine)
if not lhs in routine.variable_map:
routine.variables += (lhs.clone(type=rhs.type.clone(intent=None)),)

class InjectExplicitCopyTransformer(Transformer):
"""" Inject assignments that match the firstprivate map in parallel regions """

def visit_PragmaRegion(self, region, **kwargs): # pylint: disable=unused-argument
# Apply to pragma-marked "parallel" regions only
if not is_loki_pragma(region.pragma, starts_with='parallel'):
return region

# Collect the explicit privatisation copies
lvars = FindVariables(unique=True).visit(region.body)
assigns = ()
for lcl, gbl in fprivate_map.items():
lhs = parse_expr(lcl, scope=routine)
rhs = parse_expr(gbl, scope=routine)
if rhs in lvars:
assigns += (ir.Assignment(lhs=lhs, rhs=rhs),)

# Remap from global to local name in marked regions
region = SubstituteStringExpressions(inverse_map, scope=routine).visit(region)

# Add the copies and return
region.prepend(assigns)
return region

with pragma_regions_attached(routine):
# Inject assignments of local copies
routine.body = InjectExplicitCopyTransformer().visit(routine.body)
129 changes: 127 additions & 2 deletions loki/transformations/parallel/tests/test_openmp_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
import pytest

from loki import Subroutine, Module
from loki.frontend import available_frontends
from loki.frontend import available_frontends, OMNI
from loki.ir import (
nodes as ir, FindNodes, pragmas_attached, pragma_regions_attached,
is_loki_pragma
)

from loki.transformations.parallel import (
remove_openmp_regions, add_openmp_regions
remove_openmp_regions, add_openmp_regions,
remove_explicit_firstprivatisation,
create_explicit_firstprivatisation
)


Expand Down Expand Up @@ -149,3 +151,126 @@ def test_add_openmp_regions(tmp_path, frontend):
assert loops[0].pragma[0].content == 'DO SCHEDULE(DYNAMIC,1)'

# TODO: Test field_group_types and known global variables


@pytest.mark.parametrize('frontend', available_frontends(
skip=[(OMNI, 'OMNI needs full type definitions for derived types')]
))
def test_remove_explicit_firstprivatisation(frontend):
"""
A simple test for :any:`remove_explicit_firstprivatisation`
"""
fcode = """
subroutine test_add_openmp_loop(ydgeom, state, arr)
use geom_mod, only: geom_type
implicit none
type(geom_type), intent(in) :: ydgeom
real(kind=8), intent(inout) :: arr(:,:,:)
type(state_type), intent(in) :: state
type(state_type) :: ydstate
integer :: jkglo, ibl, icend
!$loki parallel
ydstate = state
do jkglo=1,ydgeom%ngptot,ydgeom%nproma
icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1)
ibl = (jkglo - 1) / ydgeom%nproma + 1
call ydstate%update_view(ibl)
call my_kernel(ydstate%u(:,:), arr(:,:,ibl))
end do
!$loki end parallel
end subroutine test_add_openmp_loop
"""
routine = Subroutine.from_source(fcode, frontend=frontend)

fprivate_map = {'ydstate' : 'state'}

assigns = FindNodes(ir.Assignment).visit(routine.body)
assert len(assigns) == 3
assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state'
calls = FindNodes(ir.CallStatement).visit(routine.body)
assert len(calls) == 2
assert str(calls[0].name).startswith('ydstate%')
assert calls[1].arguments[0].parent == 'ydstate'
assert len(FindNodes(ir.Loop).visit(routine.body)) == 1

# Remove the explicit copy of `ydstate = state` and adjust symbols
routine.body = remove_explicit_firstprivatisation(
region=routine.body, fprivate_map=fprivate_map, scope=routine
)

# Check removal and symbol replacement
assigns = FindNodes(ir.Assignment).visit(routine.body)
assert len(assigns) == 2
assert assigns[0].lhs == 'icend'
assert assigns[1].lhs == 'ibl'
calls = FindNodes(ir.CallStatement).visit(routine.body)
assert len(calls) == 2
assert str(calls[0].name).startswith('state%')
assert calls[1].arguments[0].parent == 'state'
assert len(FindNodes(ir.Loop).visit(routine.body)) == 1


@pytest.mark.parametrize('frontend', available_frontends(
skip=[(OMNI, 'OMNI needs full type definitions for derived types')]
))
def test_create_explicit_firstprivatisation(tmp_path, frontend):
"""
A simple test for :any:`create_explicit_firstprivatisation`
"""

fcode = """
subroutine test_add_openmp_loop(ydgeom, state, arr)
use geom_mod, only: geom_type
implicit none
type(geom_type), intent(in) :: ydgeom
real(kind=8), intent(inout) :: arr(:,:,:)
type(state_type), intent(in) :: state
integer :: jkglo, ibl, icend
!$loki parallel
do jkglo=1,ydgeom%ngptot,ydgeom%nproma
icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1)
ibl = (jkglo - 1) / ydgeom%nproma + 1
call state%update_view(ibl)
call my_kernel(state%u(:,:), arr(:,:,ibl))
end do
!$loki end parallel
end subroutine test_add_openmp_loop
"""
routine = Subroutine.from_source(fcode, frontend=frontend)

fprivate_map = {'ydstate' : 'state'}

assigns = FindNodes(ir.Assignment).visit(routine.body)
assert len(assigns) == 2
assert assigns[0].lhs == 'icend'
assert assigns[1].lhs == 'ibl'
calls = FindNodes(ir.CallStatement).visit(routine.body)
assert len(calls) == 2
assert str(calls[0].name).startswith('state%')
assert calls[1].arguments[0].parent == 'state'
assert len(FindNodes(ir.Loop).visit(routine.body)) == 1

# Put the explicit firstprivate copies back in
create_explicit_firstprivatisation(
routine=routine, fprivate_map=fprivate_map
)

assigns = FindNodes(ir.Assignment).visit(routine.body)
assert len(assigns) == 3
assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state'
calls = FindNodes(ir.CallStatement).visit(routine.body)
assert len(calls) == 2
assert str(calls[0].name).startswith('ydstate%')
assert calls[1].arguments[0].parent == 'ydstate'
assert len(FindNodes(ir.Loop).visit(routine.body)) == 1

0 comments on commit 8d95562

Please sign in to comment.