From 153176a8cefe66b150c62b0675197be263011277 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 16 Oct 2024 08:15:24 +0000 Subject: [PATCH 01/18] IR: Small fix for an erroneous warning message --- loki/ir/pragma_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loki/ir/pragma_utils.py b/loki/ir/pragma_utils.py index 3a17a822a..ae1fab732 100644 --- a/loki/ir/pragma_utils.py +++ b/loki/ir/pragma_utils.py @@ -532,7 +532,7 @@ def visit_tuple(self, o, **kwargs): if start in o: # If a pair does not live in the same tuple we have a problem. if stop not in o: - warning('[Loki::IR] Cannot find matching end for pragma {start} at same IR level!') + warning(f'[Loki::IR] Cannot find matching end for pragma {start} at same IR level!') continue # Create the PragmaRegion node and replace in tuple From 3a5ae2f0599742510edc56a96e0720586c5f1a51 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 18 Oct 2024 13:54:20 +0000 Subject: [PATCH 02/18] Parallel: Add utility to remove existing OpenMP parallel region Optionally, this utility can also replace these outer pragmas with `!$loki parallal` pragma markers. --- loki/transformations/__init__.py | 1 + loki/transformations/parallel/__init__.py | 13 +++ .../transformations/parallel/openmp_region.py | 85 +++++++++++++++++++ .../parallel/tests/test_openmp_region.py | 78 +++++++++++++++++ 4 files changed, 177 insertions(+) create mode 100644 loki/transformations/parallel/__init__.py create mode 100644 loki/transformations/parallel/openmp_region.py create mode 100644 loki/transformations/parallel/tests/test_openmp_region.py diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index 11c8bdbf0..236607132 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -36,3 +36,4 @@ from loki.transformations.split_read_write import * # noqa from loki.transformations.loop_blocking import * # noqa from loki.transformations.routine_signatures import * # noqa +from loki.transformations.parallel import * # noqa diff --git a/loki/transformations/parallel/__init__.py b/loki/transformations/parallel/__init__.py new file mode 100644 index 000000000..9020b472d --- /dev/null +++ b/loki/transformations/parallel/__init__.py @@ -0,0 +1,13 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Sub-package with utilities to remove, generate and manipulate parallel +regions. +""" + +from loki.transformations.parallel.openmp_region import * # noqa diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py new file mode 100644 index 000000000..332a976d7 --- /dev/null +++ b/loki/transformations/parallel/openmp_region.py @@ -0,0 +1,85 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Sub-package with utilities to remove and manipulate parallel OpenMP regions. +""" + +from loki.ir import nodes as ir, Transformer, pragma_regions_attached +from loki.tools import dict_override + + +__all__ = ['remove_openmp_regions'] + + +def remove_openmp_regions(routine, insert_loki_parallel=False): + """ + Remove any OpenMP parallel annotations (``!$omp parallel``). + + Optionally, this can replace ``!$omp parallel`` with ``!$loki + parallel`` pragmas. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine from which to strip all OpenMP annotations. + insert_loki_parallel : bool + Flag for the optional insertion of ``!$loki parallel` pragmas + """ + + class RemoveOpenMPRegionTransformer(Transformer): + """ + Remove OpenMP pragmas from "parallel" regions and remove all + contained OpenMP pragmas and pragma regions. + + Optionally replaces outer ``!$omp parallel`` region with + ``!$loki parallel`` region. + """ + + def visit_PragmaRegion(self, region, **kwargs): + """ + Perform the fileterin and removeal of OpenMP pragma regions. + + Parameters + ---------- + active : tuple + Flag to indicate whether we're actively traversing an + outer OpenMP region. + """ + if not region.pragma.keyword.lower() == 'omp': + return region + + if kwargs['active'] and region.pragma.keyword.lower() == 'omp': + # Remove other OpenMP pragam regions when in active mode + region._update(pragma=None, pragma_post=None) + return region + + if 'parallel' in region.pragma.content.lower(): + # Replace or remove pragmas + pragma = None + pragma_post = None + if insert_loki_parallel: + pragma = ir.Pragma(keyword='loki', content='parallel') + pragma_post = ir.Pragma(keyword='loki', content='end parallel') + + with dict_override(kwargs, {'active': True}): + body = self.visit(region.body, **kwargs) + + region._update(body=body, pragma=pragma, pragma_post=pragma_post) + + return region + + def visit_Pragma(self, pragma, **kwargs): + """ Remove other OpenMP pragmas if in active region """ + + if kwargs['active'] and pragma.keyword.lower() == 'omp': + return None + + return pragma + + with pragma_regions_attached(routine): + routine.body = RemoveOpenMPRegionTransformer().visit(routine.body, active=False) diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py new file mode 100644 index 000000000..75b9952ba --- /dev/null +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -0,0 +1,78 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Subroutine +from loki.frontend import available_frontends +from loki.ir import ( + nodes as ir, FindNodes, pragma_regions_attached, is_loki_pragma +) + +from loki.transformations.parallel import remove_openmp_regions + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('insert_loki_parallel', (True, False)) +def test_remove_openmp_regions(frontend, insert_loki_parallel): + """ + A simple test for :any:`remove_openmp_regions` + """ + fcode = """ +subroutine test_driver_openmp(n, arr) + integer, intent(in) :: n + real(kind=8), intent(inout) :: arr(n) + integer :: i + + !$omp parallel private(i) + !$omp do schedule dynamic(1) + do i=1, n + arr(i) = arr(i) + 1.0 + end do + !$omp end do + !$omp end parallel + + + !$OMP PARALLEL PRIVATE(i) + !$OMP DO SCHEDULE DYNAMIC(1) + do i=1, n + arr(i) = arr(i) + 1.0 + end do + !$OMP END DO + !$OMP END PARALLEL + + + !$omp parallel do private(i) + do i=1, n + !$omp simd + arr(i) = arr(i) + 1.0 + end do + !$omp end parallel +end subroutine test_driver_openmp +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + assert len(FindNodes(ir.Loop).visit(routine.body)) == 3 + assert len(FindNodes(ir.Pragma).visit(routine.body)) == 11 + + with pragma_regions_attached(routine): + # Without attaching Loop-pragmas, all are recognised as regions + assert len(FindNodes(ir.PragmaRegion).visit(routine.body)) == 5 + + remove_openmp_regions(routine, insert_loki_parallel=insert_loki_parallel) + + assert len(FindNodes(ir.Loop).visit(routine.body)) == 3 + pragmas = FindNodes(ir.Pragma).visit(routine.body) + assert len(pragmas) == (6 if insert_loki_parallel else 0) + + if insert_loki_parallel: + with pragma_regions_attached(routine): + pragma_regions = FindNodes(ir.PragmaRegion).visit(routine.body) + assert len(pragma_regions) == 3 + for region in pragma_regions: + assert is_loki_pragma(region.pragma, starts_with='parallel') + assert is_loki_pragma(region.pragma_post, starts_with='end parallel') From 29dc638bb709e3ca463eaa81b4d452107a6a27d8 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 18 Oct 2024 14:11:22 +0000 Subject: [PATCH 03/18] Parallel: Add utility to insert host-side OpenMP thread pragmas --- .../transformations/parallel/openmp_region.py | 98 ++++++++++++++++++- .../parallel/tests/test_openmp_region.py | 79 ++++++++++++++- 2 files changed, 171 insertions(+), 6 deletions(-) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index 332a976d7..71f1603f0 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -9,11 +9,16 @@ Sub-package with utilities to remove and manipulate parallel OpenMP regions. """ -from loki.ir import nodes as ir, Transformer, pragma_regions_attached -from loki.tools import dict_override +from loki.analyse import dataflow_analysis_attached +from loki.expression import symbols as sym +from loki.ir import ( + nodes as ir, FindNodes, Transformer, is_loki_pragma, + pragmas_attached, pragma_regions_attached +) +from loki.tools import dict_override, flatten -__all__ = ['remove_openmp_regions'] +__all__ = ['remove_openmp_regions', 'add_openmp_regions'] def remove_openmp_regions(routine, insert_loki_parallel=False): @@ -83,3 +88,90 @@ def visit_Pragma(self, pragma, **kwargs): with pragma_regions_attached(routine): routine.body = RemoveOpenMPRegionTransformer().visit(routine.body, active=False) + + +def add_openmp_regions(routine, global_variables=None, field_group_types=None): + """ + Add the OpenMP directives for a parallel driver region with an + outer block loop. + """ + block_dim_size = 'YDGEOMETRY%YRDIM%NGPBLKS' + + global_variables = global_variables or {} + field_group_types = field_group_types or {} + + # First get local variables and separate scalars and arrays + routine_arguments = routine.arguments + local_variables = tuple( + v for v in routine.variables if v not in routine_arguments + ) + local_scalars = tuple( + v.name for v in local_variables if isinstance(v, sym.Scalar) + ) + # Filter arrays by block-dim size, as these are global + local_arrays = tuple( + v.name for v in local_variables + if isinstance(v, sym.Array) and not v.dimensions[-1] == block_dim_size + ) + + with pragma_regions_attached(routine): + with dataflow_analysis_attached(routine): + for region in FindNodes(ir.PragmaRegion).visit(routine.body): + if not is_loki_pragma(region.pragma, starts_with='parallel'): + return + + # Accumulate the set of locally used symbols and chase parents + symbols = tuple(region.uses_symbols | region.defines_symbols) + symbols = tuple(dict.fromkeys(flatten( + s.parents if s.parent else s for s in symbols + ))) + + # Start with loop variables and add local scalars and arrays + local_vars = tuple(dict.fromkeys(flatten( + loop.variable for loop in FindNodes(ir.Loop).visit(region.body) + ))) + + local_vars += tuple(v for v in symbols if v.name in local_scalars) + local_vars += tuple(v for v in symbols if v.name in local_arrays) + + # Also add used symbols that might be field groups + local_vars += tuple(dict.fromkeys( + v for v in symbols + if isinstance(v, sym.Scalar) and str(v.type.dtype) in field_group_types + )) + + # Filter out known global variables + local_vars = tuple(v for v in local_vars if v not in global_variables) + + # Make field group types firstprivate + firstprivates = tuple(dict.fromkeys( + v.name for v in local_vars if v.type.dtype.name in field_group_types + )) + # Also make values that have an initial value firstprivate + firstprivates += tuple(v.name for v in local_vars if v.type.initial) + + # Mark all other variables as private + privates = tuple(dict.fromkeys( + v.name for v in local_vars if v.name not in firstprivates + )) + + s_fp_vars = ", ".join(str(v) for v in firstprivates) + s_firstprivate = f'FIRSTPRIVATE({s_fp_vars})' if firstprivates else '' + s_private = f'PRIVATE({", ".join(str(v) for v in privates)})' if privates else '' + pragma_parallel = ir.Pragma( + keyword='OMP', content=f'PARALLEL {s_private} {s_firstprivate}' + ) + region._update( + pragma=pragma_parallel, + pragma_post=ir.Pragma(keyword='OMP', content='END PARALLEL') + ) + + # And finally mark all block-dimension loops as parallel + with pragmas_attached(routine, node_type=ir.Loop): + for loop in FindNodes(ir.Loop).visit(region.body): + # Add OpenMP DO directives onto block loops + if loop.variable == 'JKGLO': + loop._update( + pragma=ir.Pragma(keyword='OMP', content='DO SCHEDULE(DYNAMIC,1)'), + pragma_post=ir.Pragma(keyword='OMP', content='END DO'), + ) diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py index 75b9952ba..22d15a18d 100644 --- a/loki/transformations/parallel/tests/test_openmp_region.py +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -7,13 +7,16 @@ import pytest -from loki import Subroutine +from loki import Subroutine, Module from loki.frontend import available_frontends from loki.ir import ( - nodes as ir, FindNodes, pragma_regions_attached, is_loki_pragma + nodes as ir, FindNodes, pragmas_attached, pragma_regions_attached, + is_loki_pragma ) -from loki.transformations.parallel import remove_openmp_regions +from loki.transformations.parallel import ( + remove_openmp_regions, add_openmp_regions +) @pytest.mark.parametrize('frontend', available_frontends()) @@ -76,3 +79,73 @@ def test_remove_openmp_regions(frontend, insert_loki_parallel): for region in pragma_regions: assert is_loki_pragma(region.pragma, starts_with='parallel') assert is_loki_pragma(region.pragma_post, starts_with='end parallel') + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_add_openmp_regions(tmp_path, frontend): + """ + A simple test for :any:`add_openmp_regions` + """ + fcode_type = """ +module geom_mod + type geom_type + integer :: nproma, ngptot + end type geom_type +end module geom_mod +""" + + fcode = """ +subroutine test_add_openmp_loop(ydgeom, arr) + use geom_mod, only: geom_type + implicit none + type(geom_type), intent(in) :: ydgeom + real(kind=8), intent(inout) :: arr(:,:,:) + integer :: JKGLO, IBL, ICEND + + !$loki parallel + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + ICEND = MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1) + IBL = (JKGLO - 1) / YDGEOM%NPROMA + 1 + + CALL MY_KERNEL(ARR(:,:,IBL)) + END DO + + !$loki end parallel + +end subroutine test_add_openmp_loop +""" + _ = Module.from_source(fcode_type, frontend=frontend, xmods=[tmp_path]) + routine = Subroutine.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + assert len(FindNodes(ir.Pragma).visit(routine.body)) == 2 + with pragma_regions_attached(routine): + regions = FindNodes(ir.PragmaRegion).visit(routine.body) + assert len(regions) == 1 + assert regions[0].pragma.keyword == 'loki' and regions[0].pragma.content == 'parallel' + assert regions[0].pragma_post.keyword == 'loki' and regions[0].pragma_post.content == 'end parallel' + + add_openmp_regions(routine) + + # Ensure pragmas have been inserted + pragmas = FindNodes(ir.Pragma).visit(routine.body) + assert len(pragmas) == 4 + assert all(p.keyword == 'OMP' for p in pragmas) + + with pragmas_attached(routine, node_type=ir.Loop): + with pragma_regions_attached(routine): + # Ensure pragma region has been created + regions = FindNodes(ir.PragmaRegion).visit(routine.body) + assert len(regions) == 1 + assert regions[0].pragma.keyword == 'OMP' + assert regions[0].pragma.content.startswith('PARALLEL') + assert regions[0].pragma_post.keyword == 'OMP' + assert regions[0].pragma_post.content == 'END PARALLEL' + + # Ensure loops has been annotated + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 1 + assert loops[0].pragma[0].keyword == 'OMP' + assert loops[0].pragma[0].content == 'DO SCHEDULE(DYNAMIC,1)' + + # TODO: Test field_group_types and known global variables From 8e3b8db29985cc5a886fff247fe260d0c2e56a0e Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 17 May 2024 16:06:51 +0000 Subject: [PATCH 04/18] Parallel: Ensure reproducible variable ordering in pragmas --- loki/transformations/parallel/openmp_region.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index 71f1603f0..efbdf75ae 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -106,11 +106,11 @@ def add_openmp_regions(routine, global_variables=None, field_group_types=None): v for v in routine.variables if v not in routine_arguments ) local_scalars = tuple( - v.name for v in local_variables if isinstance(v, sym.Scalar) + v for v in local_variables if isinstance(v, sym.Scalar) ) # Filter arrays by block-dim size, as these are global local_arrays = tuple( - v.name for v in local_variables + v for v in local_variables if isinstance(v, sym.Array) and not v.dimensions[-1] == block_dim_size ) @@ -131,17 +131,17 @@ def add_openmp_regions(routine, global_variables=None, field_group_types=None): loop.variable for loop in FindNodes(ir.Loop).visit(region.body) ))) - local_vars += tuple(v for v in symbols if v.name in local_scalars) - local_vars += tuple(v for v in symbols if v.name in local_arrays) + local_vars += tuple(v for v in local_scalars if v.name in symbols) + local_vars += tuple(v for v in local_arrays if v.name in symbols ) # Also add used symbols that might be field groups local_vars += tuple(dict.fromkeys( - v for v in symbols - if isinstance(v, sym.Scalar) and str(v.type.dtype) in field_group_types + v for v in routine_arguments + if v.name in symbols and str(v.type.dtype) in field_group_types )) # Filter out known global variables - local_vars = tuple(v for v in local_vars if v not in global_variables) + local_vars = tuple(v for v in local_vars if v.name not in global_variables) # Make field group types firstprivate firstprivates = tuple(dict.fromkeys( From 474681a31c5513e50c6d446ecc9297843af0e6f9 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 18 Oct 2024 18:51:22 +0000 Subject: [PATCH 05/18] Parallel: Add utility to remove parallel block loops --- loki/transformations/parallel/__init__.py | 1 + loki/transformations/parallel/block_loop.py | 54 +++++++++++++ .../parallel/tests/test_block_loop.py | 81 +++++++++++++++++++ 3 files changed, 136 insertions(+) create mode 100644 loki/transformations/parallel/block_loop.py create mode 100644 loki/transformations/parallel/tests/test_block_loop.py diff --git a/loki/transformations/parallel/__init__.py b/loki/transformations/parallel/__init__.py index 9020b472d..825921022 100644 --- a/loki/transformations/parallel/__init__.py +++ b/loki/transformations/parallel/__init__.py @@ -10,4 +10,5 @@ regions. """ +from loki.transformations.parallel.block_loop import * # noqa from loki.transformations.parallel.openmp_region import * # noqa diff --git a/loki/transformations/parallel/block_loop.py b/loki/transformations/parallel/block_loop.py new file mode 100644 index 000000000..8671c3f44 --- /dev/null +++ b/loki/transformations/parallel/block_loop.py @@ -0,0 +1,54 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Transformation utilities to remove and generate parallel block loops. +""" + +from loki.ir import nodes as ir, FindNodes, Transformer +from loki.tools import as_tuple + + +__all__ = ['remove_block_loops'] + + +def remove_block_loops(routine, dimension): + """ + Remove any outer block :any:`Loop` from a given :any:`Subroutine. + + The loops are identified accoerding to a given :any:`Dimension` + object, and will remove auxiliary assignments of index and bound + variables, as commongly used in IFS-style block loops. + + Parameters + ---------- + routine: :any:`Subroutine` + Subroutine from which to remove block loops + dimension : :any:`Dimension` + The dimension object describing loop variables + """ + idx = dimension.index + variables = as_tuple(dimension.indices) + variables += as_tuple(dimension.lower) + variables += as_tuple(dimension.upper) + + class RemoveBlockLoopTransformer(Transformer): + """ + :any:`Transformer` to remove driver-level block loops. + """ + + def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument + if not loop.variable == idx: + return loop + + to_remove = tuple( + a for a in FindNodes(ir.Assignment).visit(loop.body) + if a.lhs in variables + ) + return tuple(n for n in loop.body if n not in to_remove) + + routine.body = RemoveBlockLoopTransformer().visit(routine.body) diff --git a/loki/transformations/parallel/tests/test_block_loop.py b/loki/transformations/parallel/tests/test_block_loop.py new file mode 100644 index 000000000..29c6d327e --- /dev/null +++ b/loki/transformations/parallel/tests/test_block_loop.py @@ -0,0 +1,81 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Subroutine, Module, Dimension +from loki.frontend import available_frontends +from loki.ir import nodes as ir, FindNodes + +from loki.transformations.parallel import remove_block_loops + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_remove_block_loops(tmp_path, frontend): + """ + A simple test for :any:`remove_block_loops` + """ + fcode_type = """ +module geom_mod + type geom_type + integer :: nproma, ngptot + end type geom_type +end module geom_mod +""" + + fcode = """ +subroutine test_remove_block_loop(ydgeom, npoints, nlev, arr) + use geom_mod, only: geom_type + implicit none + type(geom_type), intent(in) :: ydgeom + integer(kind=4), intent(in) :: npoints, nlev + real(kind=8), intent(inout) :: arr(:,:,:) + integer :: JKGLO, IBL, ICEND, JK, JL + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + ICEND = MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1) + IBL = (JKGLO - 1) / YDGEOM%NPROMA + 1 + + CALL MY_KERNEL(ARR(:,:,IBL)) + END DO + + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + ICEND = MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1) + IBL = (JKGLO - 1) / YDGEOM%NPROMA + 1 + + DO JK=1, NLEV + DO JL=1, NPOINTS + ARR(JL, JK, IBL) = 42.0 + END DO + END DO + END DO +end subroutine test_remove_block_loop +""" + _ = Module.from_source(fcode_type, frontend=frontend, xmods=[tmp_path]) + routine = Subroutine.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 4 + assert loops[0].variable == 'jkglo' + assert loops[1].variable == 'jkglo' + assert loops[2].variable == 'jk' + assert loops[3].variable == 'jl' + assert len(FindNodes(ir.Assignment).visit(loops[0].body)) == 2 + assert len(FindNodes(ir.Assignment).visit(loops[1].body)) == 3 + + block = Dimension( + 'block', index=('jkglo', 'ibl'), step='YDGEOM%NPROMA', + lower=('1', 'ICST'), upper=('YDGEOM%NGPTOT', 'ICEND') + ) + remove_block_loops(routine, dimension=block) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 2 + assert loops[0].variable == 'jk' + assert loops[1].variable == 'jl' + assert len(FindNodes(ir.Assignment).visit(routine.body)) == 1 From 167a9ba25ec99993b5bc2a34305d750aa0534d48 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sat, 19 Oct 2024 03:46:51 +0000 Subject: [PATCH 06/18] Parallel: Add utility to insert block loops into routines --- loki/transformations/parallel/block_loop.py | 87 ++++++++++++++++++- .../parallel/tests/test_block_loop.py | 70 ++++++++++++++- 2 files changed, 154 insertions(+), 3 deletions(-) diff --git a/loki/transformations/parallel/block_loop.py b/loki/transformations/parallel/block_loop.py index 8671c3f44..0203ead18 100644 --- a/loki/transformations/parallel/block_loop.py +++ b/loki/transformations/parallel/block_loop.py @@ -9,11 +9,17 @@ Transformation utilities to remove and generate parallel block loops. """ -from loki.ir import nodes as ir, FindNodes, Transformer +from loki.expression import symbols as sym +from loki.ir import ( + nodes as ir, FindNodes, Transformer, pragma_regions_attached, + is_loki_pragma +) +from loki.scope import SymbolAttributes from loki.tools import as_tuple +from loki.types import BasicType -__all__ = ['remove_block_loops'] +__all__ = ['remove_block_loops', 'add_block_loops'] def remove_block_loops(routine, dimension): @@ -52,3 +58,80 @@ def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument return tuple(n for n in loop.body if n not in to_remove) routine.body = RemoveBlockLoopTransformer().visit(routine.body) + + +def add_block_loops(routine, dimension, default_type=None): + """ + Insert IFS-style (NPROMA) driver block-loops in ``!$loki + parallel`` regions. + + The provided :any:`Dimension` object describes the variables to + used when generating the loop and default assignments. It + encapsulates IFS-specific convention, where a strided loop over + points, defined by ``dimension.index``, ``dimension.bounds`` and + ``dimension.step`` is created, alongside assignments that define + the corresponding block index and upper bound, defined by + ``dimension.indices[1]`` and ``dimension.upper[1]`` repsectively. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine in which to add block loops. + dimension : :any:`Dimension` + The dimension object describing the block loop variables. + default_type : :any:`SymbolAttributes`, optional + Default type to use when creating variables; defaults to + ``integer(kind=JPIM)``. + """ + + _default = SymbolAttributes(BasicType.INTEGER, kind='JPIM') + dtype = default_type if default_type else _default + + # TODO: Explain convention in docstring + lidx = routine.parse_expr(dimension.index) + bidx = routine.parse_expr(dimension.indices[1]) + bupper = routine.parse_expr(dimension.upper[1]) + + # Ensure that local integer variables are declared + for v in (lidx, bupper, bidx): + if not v in routine.variable_map: + routine.variables += (v.clone(type=dtype),) + + def _create_block_loop(body, scope): + """ + Generate block loop object, including indexing preamble + """ + + bsize = scope.parse_expr(dimension.step) + lupper = scope.parse_expr(dimension.upper[0]) + lrange = sym.LoopRange((sym.Literal(1), lupper, bsize)) + + expr_tail = scope.parse_expr(f'{lupper}-{lidx}+1') + expr_max = sym.InlineCall( + function=sym.ProcedureSymbol('MIN', scope=scope), parameters=(bsize, expr_tail) + ) + preamble = (ir.Assignment(lhs=bupper, rhs=expr_max),) + preamble += (ir.Assignment( + lhs=bidx, rhs=scope.parse_expr(f'({lidx}-1)/{bsize}+1') + ),) + + return ir.Loop(variable=lidx, bounds=lrange, body=preamble + body) + + class InsertBlockLoopTransformer(Transformer): + + def visit_PragmaRegion(self, region, **kwargs): + """ + (Re-)insert driver-level block loops into marked parallel region. + """ + if not is_loki_pragma(region.pragma, starts_with='parallel'): + return region + + scope = kwargs.get('scope') + + loop = _create_block_loop(body=region.body, scope=scope) + + region._update(body=(ir.Comment(''), loop)) + return region + + with pragma_regions_attached(routine): + routine.body = InsertBlockLoopTransformer().visit(routine.body, scope=routine) diff --git a/loki/transformations/parallel/tests/test_block_loop.py b/loki/transformations/parallel/tests/test_block_loop.py index 29c6d327e..46dabe267 100644 --- a/loki/transformations/parallel/tests/test_block_loop.py +++ b/loki/transformations/parallel/tests/test_block_loop.py @@ -11,7 +11,9 @@ from loki.frontend import available_frontends from loki.ir import nodes as ir, FindNodes -from loki.transformations.parallel import remove_block_loops +from loki.transformations.parallel import ( + remove_block_loops, add_block_loops +) @pytest.mark.parametrize('frontend', available_frontends()) @@ -79,3 +81,69 @@ def test_remove_block_loops(tmp_path, frontend): assert loops[0].variable == 'jk' assert loops[1].variable == 'jl' assert len(FindNodes(ir.Assignment).visit(routine.body)) == 1 + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_add_block_loops(frontend): + """ + A simple test for :any:`add_block_loops` + """ + fcode = """ +subroutine test_add_block_loop(ydgeom, npoints, nlev, arr) + integer(kind=4), intent(in) :: npoints, nlev + real(kind=8), intent(inout) :: arr(:,:,:) + integer :: JKGLO, IBL, ICEND + +!$loki parallel + call my_kernel(arr(:,:,ibl)) +!$loki end parallel + +!$loki parallel + do jk=1, nlev + do jl=1, npoints + arr(jl, jk, ibl) = 42.0 + end do + end do +!$loki end parallel +end subroutine test_add_block_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 2 + assert loops[0].variable == 'jk' + assert loops[1].variable == 'jl' + assert len(FindNodes(ir.Assignment).visit(routine.body)) == 1 + + blocking = Dimension( + name='block', index=('JKGLO', 'IBL'), + lower=('1', 'ICST'), + upper=('YDGEOM%NGPTOT', 'ICEND'), + step='YDGEOM%NPROMA', + size='YDGEOM%NGPBLKS', + ) + + add_block_loops(routine, dimension=blocking) + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 4 + assert loops[0].variable == 'jkglo' + assert loops[1].variable == 'jkglo' + assert loops[2].variable == 'jk' + assert loops[3].variable == 'jl' + + assigns1 = FindNodes(ir.Assignment).visit(loops[0].body) + assert len(assigns1) == 2 + assert assigns1[0].lhs == 'ICEND' + assert str(assigns1[0].rhs) == 'MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1)' + assert assigns1[1].lhs == 'IBL' + assert assigns1[1].rhs == '(JKGLO - 1) / YDGEOM%NPROMA + 1' + + assigns2 = FindNodes(ir.Assignment).visit(loops[1].body) + assert len(assigns2) == 3 + assert assigns2[0].lhs == 'ICEND' + assert str(assigns2[0].rhs) == 'MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1)' + assert assigns2[1].lhs == 'IBL' + assert assigns2[1].rhs == '(JKGLO - 1) / YDGEOM%NPROMA + 1' + assert assigns2[2].lhs == 'arr(jl, jk, ibl)' + assert assigns2[2].rhs == 42.0 From 2fdb5801e4c2de4dec0d4f22ed00061be0445b4e Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sat, 19 Oct 2024 04:05:33 +0000 Subject: [PATCH 07/18] Parallel: Add utility to add/remove FIELD-API view updates --- loki/transformations/parallel/__init__.py | 1 + loki/transformations/parallel/field_api.py | 154 ++++++++++++++++++ .../parallel/tests/test_field_api.py | 121 ++++++++++++++ 3 files changed, 276 insertions(+) create mode 100644 loki/transformations/parallel/field_api.py create mode 100644 loki/transformations/parallel/tests/test_field_api.py diff --git a/loki/transformations/parallel/__init__.py b/loki/transformations/parallel/__init__.py index 825921022..0071a9814 100644 --- a/loki/transformations/parallel/__init__.py +++ b/loki/transformations/parallel/__init__.py @@ -11,4 +11,5 @@ """ from loki.transformations.parallel.block_loop import * # noqa +from loki.transformations.parallel.field_api import * # noqa from loki.transformations.parallel.openmp_region import * # noqa diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py new file mode 100644 index 000000000..2648276a2 --- /dev/null +++ b/loki/transformations/parallel/field_api.py @@ -0,0 +1,154 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Transformation utilities to manage and inject FIELD-API boilerplate code. +""" + +from loki.expression import symbols as sym +from loki.ir import ( + nodes as ir, FindNodes, FindVariables, Transformer +) +from loki.logging import warning +from loki.tools import as_tuple + + +__all__ = [ + 'remove_field_api_view_updates', 'add_field_api_view_updates' +] + + +def remove_field_api_view_updates(routine, field_group_types, dim_object=None): + """ + Remove FIELD API boilerplate calls for view updates of derived types. + + This utility is intended to remove the IFS-specific group type + objects that provide block-scope view pointers to deep kernel + trees. It will remove all calls to ``UPDATE_VIEW`` on derive-type + objects with the respective types. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine from which to remove FIELD API update calls + field_group_types : tuple of str + List of names of the derived types of "field group" objects to remove + dim_object : str, optional + Optional name of the "dimension" object; if provided it will remove the + call to ``%UPDATE(...)`` accordingly. + """ + field_group_types = as_tuple(field_group_types) + + class RemoveFieldAPITransformer(Transformer): + + def visit_CallStatement(self, call, **kwargs): # pylint: disable=unused-argument + + if '%update_view' in str(call.name).lower(): + if not call.name.parent: + warning(f'[Loki::ControlFlow] Removing {call.name} call without parent!') + if not str(call.name.parent.type.dtype) in field_group_types: + warning(f'[Loki::ControlFlow] Removing {call.name} call, but not in field group types!') + + return None + + if dim_object and f'{dim_object}%update'.lower() in str(call.name).lower(): + return None + + return call + + def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument + if assign.lhs.type.dtype in field_group_types: + warning(f'[Loki::ControlFlow] Found LHS field group assign: {assign}') + return assign + + def visit_Loop(self, loop, **kwargs): + loop = self.visit_Node(loop, **kwargs) + return loop if loop.body else None + + def visit_Conditional(self, cond, **kwargs): + cond = super().visit_Node(cond, **kwargs) + return cond if cond.body else None + + routine.body = RemoveFieldAPITransformer().visit(routine.body) + + +def add_field_api_view_updates(routine, dimension, field_group_types, dim_object=None): + """ + Adds FIELD API boilerplate calls for view updates. + + The provided :any:`Dimension` object describes the local loop variables to + pass to the respective update calls. In particular, ``dimension.indices[1]`` + is used to denote the block loop index that is passed to ``UPDATE_VIEW()`` + calls on field group object. The list of type names ``field_group_types`` + is used to identify for which objcets the view update calls get added. + + Parameters + ---------- + routine : :any:`Subroutine` + The routine from which to remove FIELD API update calls + dimension : :any:`Dimension` + The dimension object describing the block loop variables. + field_group_types : tuple of str + List of names of the derived types of "field group" objects to remove + dim_object : str, optional + Optional name of the "dimension" object; if provided it will remove the + call to ``%UPDATE(...)`` accordingly. + """ + + def _create_dim_update(scope, dim_object): + index = scope.parse_expr(dimension.index) + upper = scope.parse_expr(dimension.upper[1]) + bindex = scope.parse_expr(dimension.indices[1]) + idims = scope.get_symbol(dim_object) + csym = sym.ProcedureSymbol(name='UPDATE', parent=idims, scope=idims.scope) + return ir.CallStatement(name=csym, arguments=(bindex, upper, index), kwarguments=()) + + def _create_view_updates(section, scope): + bindex = scope.parse_expr(dimension.indices[1]) + + fgroup_vars = sorted(tuple( + v for v in FindVariables(unique=True).visit(section) + if str(v.type.dtype) in field_group_types + ), key=str) + calls = () + for fgvar in fgroup_vars: + fgsym = scope.get_symbol(fgvar.name) + csym = sym.ProcedureSymbol(name='UPDATE_VIEW', parent=fgsym, scope=fgsym.scope) + calls += (ir.CallStatement(name=csym, arguments=(bindex,), kwarguments=()),) + + return calls + + class InsertFieldAPIViewsTransformer(Transformer): + """ Injects FIELD-API view updates into block loops """ + + def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument + if not loop.variable == 'JKGLO': + return loop + + scope = kwargs.get('scope') + + # Find the loop-setup assignments + _loop_symbols = dimension.indices + _loop_symbols += as_tuple(dimension.lower) + as_tuple(dimension.upper) + loop_setup = tuple( + a for a in FindNodes(ir.Assignment).visit(loop.body) + if a.lhs in _loop_symbols + ) + idx = max(loop.body.index(a) for a in loop_setup) + 1 + + # Prepend FIELD API boilerplate + preamble = ( + ir.Comment(''), ir.Comment('! Set up thread-local view pointers') + ) + if dim_object: + preamble += (_create_dim_update(scope, dim_object=dim_object),) + preamble += _create_view_updates(loop.body, scope) + + loop._update(body=loop.body[:idx] + preamble + loop.body[idx:]) + return loop + + routine.body = InsertFieldAPIViewsTransformer().visit(routine.body, scope=routine) diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py new file mode 100644 index 000000000..1dbc76d10 --- /dev/null +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -0,0 +1,121 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import pytest + +from loki import Subroutine, Module, Dimension +from loki.frontend import available_frontends, OMNI +from loki.ir import nodes as ir, FindNodes + +from loki.transformations.parallel import ( + remove_field_api_view_updates, add_field_api_view_updates +) + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_field_api_remove_view_updates(frontend): + """ + A simple test for :any:`remove_field_api_view_updates` + """ + + fcode = """ +subroutine test_remove_block_loop(ngptot, nproma, nflux, dims, state, aux_fields, fluxes) + implicit none + integer(kind=4), intent(in) :: ngptot, nproma, nflux + type(dimension_type), intent(inout) :: dims + type(state_type), intent(inout) :: state + type(aux_type), intent(inout) :: aux_fields + type(flux_type), intent(inout) :: fluxes(nflux) + + integer :: JKGLO, IBL, ICEND, JK, JL, JF + + DO jkglo=1, ngptot, nproma + icend = min(nproma, ngptot - JKGLO + 1) + ibl = (jkglo - 1) / nproma + 1 + + CALL DIMS%UPDATE(IBL, ICEND, JKGLO) + CALL STATE%UPDATE_VIEW(IBL) + CALL AUX_FIELDS%UPDATE_VIEW(block_index=IBL) + DO jf=1, nflux + CALL FLUXES(JF)%UPDATE_VIEW(IBL) + END DO + + CALL MY_KERNEL(STATE%U, STATE%V, AUX_FIELDS%STUFF, FLUXES(1)%FOO, FLUXES(2)%BAR) + END DO +end subroutine test_remove_block_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + assert len(FindNodes(ir.CallStatement).visit(routine.body)) == 5 + assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 + + field_group_types = ['state_type', 'aux_type', 'flux_type'] + remove_field_api_view_updates( + routine, field_group_types=field_group_types, dim_object='DIMS' + ) + + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 1 + assert calls[0].name == 'MY_KERNEL' + + loops = FindNodes(ir.Loop).visit(routine.body) + assert len(loops) == 1 + assert loops[0].variable == 'jkglo' + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_field_api_add_view_updates(frontend): + """ + A simple test for :any:`add_field_api_view_updates`. + """ + + fcode = """ +subroutine test_remove_block_loop(ngptot, nproma, nflux, dims, state, aux_fields, fluxes) + implicit none + integer(kind=4), intent(in) :: ngptot, nproma, nflux + type(dimension_type), intent(inout) :: dims + type(state_type), intent(inout) :: state + type(aux_type), intent(inout) :: aux_fields + type(flux_type), intent(inout) :: fluxes + + integer :: JKGLO, IBL, ICEND, JK, JL, JF + + DO jkglo=1, ngptot, nproma + icend = min(nproma, ngptot - jkglo + 1) + ibl = (jkglo - 1) / nproma + 1 + + CALL MY_KERNEL(STATE%U, STATE%V, AUX_FIELDS%STUFF, FLUXES%FOO, FLUXES%BAR) + END DO +end subroutine test_remove_block_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + assert len(FindNodes(ir.CallStatement).visit(routine.body)) == 1 + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + block = Dimension( + index=('jkglo', 'ibl'), step='NPROMA', + lower=('1', 'ICST'), upper=('NGPTOT', 'ICEND') + ) + field_group_types = ['state_type', 'aux_type', 'flux_type'] + add_field_api_view_updates( + routine, dimension=block, field_group_types=field_group_types, + dim_object='DIMS' + ) + + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 5 + assert calls[0].name == 'DIMS%UPDATE' and calls[0].arguments == ('IBL', 'ICEND', 'JKGLO') + assert calls[1].name == 'AUX_FIELDS%UPDATE_VIEW' and calls[1].arguments == ('IBL',) + assert calls[2].name == 'FLUXES%UPDATE_VIEW' and calls[2].arguments == ('IBL',) + assert calls[3].name == 'STATE%UPDATE_VIEW' and calls[3].arguments == ('IBL',) + + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 From efe4be83104c474a04c74cb1c0ee6368ece1f7b1 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sat, 19 Oct 2024 04:13:27 +0000 Subject: [PATCH 08/18] Parallel: Add utility to manage explicit firstprivate copies --- .../transformations/parallel/openmp_region.py | 102 +++++++++++++- .../parallel/tests/test_openmp_region.py | 129 +++++++++++++++++- 2 files changed, 225 insertions(+), 6 deletions(-) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index efbdf75ae..69b111401 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -10,15 +10,21 @@ """ from loki.analyse import dataflow_analysis_attached -from loki.expression import symbols as sym +from loki.expression import symbols as sym, parse_expr from loki.ir import ( - nodes as ir, FindNodes, Transformer, is_loki_pragma, - pragmas_attached, pragma_regions_attached + nodes as ir, FindNodes, FindVariables, Transformer, + SubstituteStringExpressions, is_loki_pragma, pragmas_attached, + pragma_regions_attached ) from loki.tools import dict_override, flatten +from loki.types import DerivedType -__all__ = ['remove_openmp_regions', 'add_openmp_regions'] +__all__ = [ + 'remove_openmp_regions', 'add_openmp_regions', + 'remove_explicit_firstprivatisation', + 'create_explicit_firstprivatisation' +] def remove_openmp_regions(routine, insert_loki_parallel=False): @@ -175,3 +181,91 @@ def add_openmp_regions(routine, global_variables=None, field_group_types=None): pragma=ir.Pragma(keyword='OMP', content='DO SCHEDULE(DYNAMIC,1)'), pragma_post=ir.Pragma(keyword='OMP', content='END DO'), ) + + +def remove_explicit_firstprivatisation(region, fprivate_map, scope): + """ + Removes an IFS-specific workaround, where complex derived-type + objects are explicitly copied into a local copy of the object to + avoid erroneous firstprivatisation in OpenMP loops. + + Parameters + ---------- + region : tuple of :any:`Node` + The code region from which to remove firstprivate copies + fprivate_map : dict of (str, str) + String mapping of local-to-global names for explicitly + privatised objects + scope : :any:`Scope` + Scope to use for symbol susbtitution + """ + + class RemoveExplicitCopyTransformer(Transformer): + """ Remove assignments that match the firstprivatisation map """ + + def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument + if not isinstance(assign.lhs.type.dtype, DerivedType): + return assign + + lhs = assign.lhs.name + if lhs in fprivate_map and assign.rhs == fprivate_map[lhs]: + return None + return assign + + # Strip assignments of local copies + region = RemoveExplicitCopyTransformer().visit(region) + + # Invert the local use of the private copy + return SubstituteStringExpressions(fprivate_map, scope=scope).visit(region) + + +def create_explicit_firstprivatisation(routine, fprivate_map): + """ + Injects IFS-specific thread-local copies of named complex derived + type objects in parallel regions. This is to prevent issues with + firstprivate variables in OpenMP loops. + + Parameters + ---------- + routine : :any:`Subroutine` + Subroutine in which to insert privatisation copies + fprivate_map : dict of (str, str) + String mapping of local-to-global names for explicitly + privatised objects + """ + inverse_map = {v: k for k, v in fprivate_map.items()} + + # Ensure the local object copies are declared + for lcl, gbl in fprivate_map.items(): + lhs = parse_expr(lcl, scope=routine) + rhs = parse_expr(gbl, scope=routine) + if not lhs in routine.variable_map: + routine.variables += (lhs.clone(type=rhs.type.clone(intent=None)),) + + class InjectExplicitCopyTransformer(Transformer): + """" Inject assignments that match the firstprivate map in parallel regions """ + + def visit_PragmaRegion(self, region, **kwargs): # pylint: disable=unused-argument + # Apply to pragma-marked "parallel" regions only + if not is_loki_pragma(region.pragma, starts_with='parallel'): + return region + + # Collect the explicit privatisation copies + lvars = FindVariables(unique=True).visit(region.body) + assigns = () + for lcl, gbl in fprivate_map.items(): + lhs = parse_expr(lcl, scope=routine) + rhs = parse_expr(gbl, scope=routine) + if rhs in lvars: + assigns += (ir.Assignment(lhs=lhs, rhs=rhs),) + + # Remap from global to local name in marked regions + region = SubstituteStringExpressions(inverse_map, scope=routine).visit(region) + + # Add the copies and return + region.prepend(assigns) + return region + + with pragma_regions_attached(routine): + # Inject assignments of local copies + routine.body = InjectExplicitCopyTransformer().visit(routine.body) diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py index 22d15a18d..61fd7d742 100644 --- a/loki/transformations/parallel/tests/test_openmp_region.py +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -8,14 +8,16 @@ import pytest from loki import Subroutine, Module -from loki.frontend import available_frontends +from loki.frontend import available_frontends, OMNI from loki.ir import ( nodes as ir, FindNodes, pragmas_attached, pragma_regions_attached, is_loki_pragma ) from loki.transformations.parallel import ( - remove_openmp_regions, add_openmp_regions + remove_openmp_regions, add_openmp_regions, + remove_explicit_firstprivatisation, + create_explicit_firstprivatisation ) @@ -149,3 +151,126 @@ def test_add_openmp_regions(tmp_path, frontend): assert loops[0].pragma[0].content == 'DO SCHEDULE(DYNAMIC,1)' # TODO: Test field_group_types and known global variables + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_remove_explicit_firstprivatisation(frontend): + """ + A simple test for :any:`remove_explicit_firstprivatisation` + """ + fcode = """ +subroutine test_add_openmp_loop(ydgeom, state, arr) + use geom_mod, only: geom_type + implicit none + type(geom_type), intent(in) :: ydgeom + real(kind=8), intent(inout) :: arr(:,:,:) + type(state_type), intent(in) :: state + type(state_type) :: ydstate + integer :: jkglo, ibl, icend + + !$loki parallel + + ydstate = state + + do jkglo=1,ydgeom%ngptot,ydgeom%nproma + icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1) + ibl = (jkglo - 1) / ydgeom%nproma + 1 + + call ydstate%update_view(ibl) + + call my_kernel(ydstate%u(:,:), arr(:,:,ibl)) + end do + + !$loki end parallel +end subroutine test_add_openmp_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + fprivate_map = {'ydstate' : 'state'} + + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 3 + assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('ydstate%') + assert calls[1].arguments[0].parent == 'ydstate' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + # Remove the explicit copy of `ydstate = state` and adjust symbols + routine.body = remove_explicit_firstprivatisation( + region=routine.body, fprivate_map=fprivate_map, scope=routine + ) + + # Check removal and symbol replacement + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 2 + assert assigns[0].lhs == 'icend' + assert assigns[1].lhs == 'ibl' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('state%') + assert calls[1].arguments[0].parent == 'state' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI needs full type definitions for derived types')] +)) +def test_create_explicit_firstprivatisation(tmp_path, frontend): + """ + A simple test for :any:`create_explicit_firstprivatisation` + """ + + fcode = """ +subroutine test_add_openmp_loop(ydgeom, state, arr) + use geom_mod, only: geom_type + implicit none + type(geom_type), intent(in) :: ydgeom + real(kind=8), intent(inout) :: arr(:,:,:) + type(state_type), intent(in) :: state + integer :: jkglo, ibl, icend + + !$loki parallel + + do jkglo=1,ydgeom%ngptot,ydgeom%nproma + icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1) + ibl = (jkglo - 1) / ydgeom%nproma + 1 + + call state%update_view(ibl) + + call my_kernel(state%u(:,:), arr(:,:,ibl)) + end do + + !$loki end parallel +end subroutine test_add_openmp_loop +""" + routine = Subroutine.from_source(fcode, frontend=frontend) + + fprivate_map = {'ydstate' : 'state'} + + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 2 + assert assigns[0].lhs == 'icend' + assert assigns[1].lhs == 'ibl' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('state%') + assert calls[1].arguments[0].parent == 'state' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + + # Put the explicit firstprivate copies back in + create_explicit_firstprivatisation( + routine=routine, fprivate_map=fprivate_map + ) + + assigns = FindNodes(ir.Assignment).visit(routine.body) + assert len(assigns) == 3 + assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state' + calls = FindNodes(ir.CallStatement).visit(routine.body) + assert len(calls) == 2 + assert str(calls[0].name).startswith('ydstate%') + assert calls[1].arguments[0].parent == 'ydstate' + assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 From 180fd83dbb251cbf74daddbaa1fcdcc8ccb6fded Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sun, 3 Nov 2024 05:29:00 +0000 Subject: [PATCH 09/18] Parallel: Add `DEFAULT(SHARED)` to OpenMP parallel clause creation --- loki/transformations/parallel/openmp_region.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index 69b111401..3a8d08633 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -165,7 +165,7 @@ def add_openmp_regions(routine, global_variables=None, field_group_types=None): s_firstprivate = f'FIRSTPRIVATE({s_fp_vars})' if firstprivates else '' s_private = f'PRIVATE({", ".join(str(v) for v in privates)})' if privates else '' pragma_parallel = ir.Pragma( - keyword='OMP', content=f'PARALLEL {s_private} {s_firstprivate}' + keyword='OMP', content=f'PARALLEL DEFAULT(SHARED) {s_private} {s_firstprivate}' ) region._update( pragma=pragma_parallel, From b973bbb10bf9a5b1dc43a4037740b156f381e19b Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Mon, 11 Nov 2024 13:55:47 +0000 Subject: [PATCH 10/18] Parallel: Disable OMNI for OpenMP region insertion test OMNI has trouble when combining Loop and region pragmas, since it swallows the empy lines we use to differentiate them. --- loki/transformations/parallel/tests/test_openmp_region.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py index 61fd7d742..df1018a39 100644 --- a/loki/transformations/parallel/tests/test_openmp_region.py +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -83,7 +83,9 @@ def test_remove_openmp_regions(frontend, insert_loki_parallel): assert is_loki_pragma(region.pragma_post, starts_with='end parallel') -@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('frontend', available_frontends( + skip=[(OMNI, 'OMNI has trouble mixing Loop and Section pragmas')] +)) def test_add_openmp_regions(tmp_path, frontend): """ A simple test for :any:`add_openmp_regions` From db6df942320da36296be24e8c7b18b210f91619f Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Mon, 11 Nov 2024 13:56:50 +0000 Subject: [PATCH 11/18] Parallel: Add camel-case testing and check generated warnings --- .../parallel/tests/test_field_api.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py index 1dbc76d10..be154e32b 100644 --- a/loki/transformations/parallel/tests/test_field_api.py +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -10,6 +10,7 @@ from loki import Subroutine, Module, Dimension from loki.frontend import available_frontends, OMNI from loki.ir import nodes as ir, FindNodes +from loki.logging import WARNING from loki.transformations.parallel import ( remove_field_api_view_updates, add_field_api_view_updates @@ -19,7 +20,7 @@ @pytest.mark.parametrize('frontend', available_frontends( skip=[(OMNI, 'OMNI needs full type definitions for derived types')] )) -def test_field_api_remove_view_updates(frontend): +def test_field_api_remove_view_updates(caplog, frontend): """ A simple test for :any:`remove_field_api_view_updates` """ @@ -29,9 +30,9 @@ def test_field_api_remove_view_updates(frontend): implicit none integer(kind=4), intent(in) :: ngptot, nproma, nflux type(dimension_type), intent(inout) :: dims - type(state_type), intent(inout) :: state + type(STATE_TYPE), intent(inout) :: state type(aux_type), intent(inout) :: aux_fields - type(flux_type), intent(inout) :: fluxes(nflux) + type(FLUX_type), intent(inout) :: fluxes(nflux) integer :: JKGLO, IBL, ICEND, JK, JL, JF @@ -40,7 +41,7 @@ def test_field_api_remove_view_updates(frontend): ibl = (jkglo - 1) / nproma + 1 CALL DIMS%UPDATE(IBL, ICEND, JKGLO) - CALL STATE%UPDATE_VIEW(IBL) + CALL STATE%update_VIEW(IBL) CALL AUX_FIELDS%UPDATE_VIEW(block_index=IBL) DO jf=1, nflux CALL FLUXES(JF)%UPDATE_VIEW(IBL) @@ -55,10 +56,17 @@ def test_field_api_remove_view_updates(frontend): assert len(FindNodes(ir.CallStatement).visit(routine.body)) == 5 assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 - field_group_types = ['state_type', 'aux_type', 'flux_type'] - remove_field_api_view_updates( - routine, field_group_types=field_group_types, dim_object='DIMS' - ) + with caplog.at_level(WARNING): + field_group_types = ['state_type', 'aux_type', 'flux_type'] + remove_field_api_view_updates( + routine, field_group_types=field_group_types, dim_object='DIMS' + ) + + assert len(caplog.records) == 2 + assert '[Loki::ControlFlow] Removing STATE%update_VIEW call, but not in field group types!'\ + in caplog.records[0].message + assert '[Loki::ControlFlow] Removing FLUXES(JF)%UPDATE_VIEW call, but not in field group types!'\ + in caplog.records[1].message calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 From 6e60ce1a28142c6f75bdd66cc5cdc78f1dd8b0ea Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 13 Nov 2024 13:13:02 +0000 Subject: [PATCH 12/18] Parallel: Fixing typos, comments and imports --- loki/transformations/parallel/block_loop.py | 10 ++++------ loki/transformations/parallel/openmp_region.py | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/loki/transformations/parallel/block_loop.py b/loki/transformations/parallel/block_loop.py index 0203ead18..033e27a8a 100644 --- a/loki/transformations/parallel/block_loop.py +++ b/loki/transformations/parallel/block_loop.py @@ -14,9 +14,8 @@ nodes as ir, FindNodes, Transformer, pragma_regions_attached, is_loki_pragma ) -from loki.scope import SymbolAttributes from loki.tools import as_tuple -from loki.types import BasicType +from loki.types import BasicType, SymbolAttributes __all__ = ['remove_block_loops', 'add_block_loops'] @@ -26,9 +25,9 @@ def remove_block_loops(routine, dimension): """ Remove any outer block :any:`Loop` from a given :any:`Subroutine. - The loops are identified accoerding to a given :any:`Dimension` + The loops are identified according to a given :any:`Dimension` object, and will remove auxiliary assignments of index and bound - variables, as commongly used in IFS-style block loops. + variables, as commonly used in IFS-style block loops. Parameters ---------- @@ -71,7 +70,7 @@ def add_block_loops(routine, dimension, default_type=None): points, defined by ``dimension.index``, ``dimension.bounds`` and ``dimension.step`` is created, alongside assignments that define the corresponding block index and upper bound, defined by - ``dimension.indices[1]`` and ``dimension.upper[1]`` repsectively. + ``dimension.indices[1]`` and ``dimension.upper[1]`` respectively. Parameters ---------- @@ -87,7 +86,6 @@ def add_block_loops(routine, dimension, default_type=None): _default = SymbolAttributes(BasicType.INTEGER, kind='JPIM') dtype = default_type if default_type else _default - # TODO: Explain convention in docstring lidx = routine.parse_expr(dimension.index) bidx = routine.parse_expr(dimension.indices[1]) bupper = routine.parse_expr(dimension.upper[1]) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index 3a8d08633..3d63e8c74 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -53,7 +53,7 @@ class RemoveOpenMPRegionTransformer(Transformer): def visit_PragmaRegion(self, region, **kwargs): """ - Perform the fileterin and removeal of OpenMP pragma regions. + Perform the filtering and removal of OpenMP pragma regions. Parameters ---------- @@ -65,7 +65,7 @@ def visit_PragmaRegion(self, region, **kwargs): return region if kwargs['active'] and region.pragma.keyword.lower() == 'omp': - # Remove other OpenMP pragam regions when in active mode + # Remove other OpenMP pragma regions when in active mode region._update(pragma=None, pragma_post=None) return region From 4a1fadabdfc0de1f98a70a25d5585f6e721ecb26 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 13 Nov 2024 15:37:29 +0000 Subject: [PATCH 13/18] Parallel: Better test coverage and case sensitivity for F-API removal --- loki/transformations/parallel/field_api.py | 8 +++---- .../parallel/tests/test_field_api.py | 23 +++++++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py index 2648276a2..e1e06b491 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_api.py @@ -41,16 +41,14 @@ def remove_field_api_view_updates(routine, field_group_types, dim_object=None): Optional name of the "dimension" object; if provided it will remove the call to ``%UPDATE(...)`` accordingly. """ - field_group_types = as_tuple(field_group_types) + field_group_types = as_tuple(str(fgt).lower() for fgt in field_group_types) class RemoveFieldAPITransformer(Transformer): def visit_CallStatement(self, call, **kwargs): # pylint: disable=unused-argument if '%update_view' in str(call.name).lower(): - if not call.name.parent: - warning(f'[Loki::ControlFlow] Removing {call.name} call without parent!') - if not str(call.name.parent.type.dtype) in field_group_types: + if not str(call.name.parent.type.dtype).lower() in field_group_types: warning(f'[Loki::ControlFlow] Removing {call.name} call, but not in field group types!') return None @@ -61,7 +59,7 @@ def visit_CallStatement(self, call, **kwargs): # pylint: disable=unused-argumen return call def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument - if assign.lhs.type.dtype in field_group_types: + if str(assign.lhs.type.dtype).lower() in field_group_types: warning(f'[Loki::ControlFlow] Found LHS field group assign: {assign}') return assign diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py index be154e32b..b752dec85 100644 --- a/loki/transformations/parallel/tests/test_field_api.py +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -26,13 +26,15 @@ def test_field_api_remove_view_updates(caplog, frontend): """ fcode = """ -subroutine test_remove_block_loop(ngptot, nproma, nflux, dims, state, aux_fields, fluxes) +subroutine test_remove_block_loop(ngptot, nproma, nflux, dims, state, aux_fields, fluxes, ricks_fields) + use type_module, only: dimension_type, state_type, aux_type, flux_type, ricks_type implicit none integer(kind=4), intent(in) :: ngptot, nproma, nflux type(dimension_type), intent(inout) :: dims type(STATE_TYPE), intent(inout) :: state type(aux_type), intent(inout) :: aux_fields type(FLUX_type), intent(inout) :: fluxes(nflux) + type(ricks_type), intent(inout) :: ricks_fields integer :: JKGLO, IBL, ICEND, JK, JL, JF @@ -40,12 +42,17 @@ def test_field_api_remove_view_updates(caplog, frontend): icend = min(nproma, ngptot - JKGLO + 1) ibl = (jkglo - 1) / nproma + 1 + STATE = STATE%CLONE() + CALL DIMS%UPDATE(IBL, ICEND, JKGLO) CALL STATE%update_VIEW(IBL) CALL AUX_FIELDS%UPDATE_VIEW(block_index=IBL) - DO jf=1, nflux - CALL FLUXES(JF)%UPDATE_VIEW(IBL) - END DO + IF (NFLUX > 0) THEN + DO jf=1, nflux + CALL FLUXES(JF)%UPDATE_VIEW(IBL) + END DO + END IF + CALL RICKS_FIELDS%UPDATE_VIEW(IBL) CALL MY_KERNEL(STATE%U, STATE%V, AUX_FIELDS%STUFF, FLUXES(1)%FOO, FLUXES(2)%BAR) END DO @@ -53,7 +60,8 @@ def test_field_api_remove_view_updates(caplog, frontend): """ routine = Subroutine.from_source(fcode, frontend=frontend) - assert len(FindNodes(ir.CallStatement).visit(routine.body)) == 5 + assert len(FindNodes(ir.CallStatement).visit(routine.body)) == 6 + assert len(FindNodes(ir.Conditional).visit(routine.body)) == 1 assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 with caplog.at_level(WARNING): @@ -63,15 +71,16 @@ def test_field_api_remove_view_updates(caplog, frontend): ) assert len(caplog.records) == 2 - assert '[Loki::ControlFlow] Removing STATE%update_VIEW call, but not in field group types!'\ + assert '[Loki::ControlFlow] Found LHS field group assign: Assignment:: STATE = STATE%CLONE()'\ in caplog.records[0].message - assert '[Loki::ControlFlow] Removing FLUXES(JF)%UPDATE_VIEW call, but not in field group types!'\ + assert '[Loki::ControlFlow] Removing RICKS_FIELDS%UPDATE_VIEW call, but not in field group types!'\ in caplog.records[1].message calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 assert calls[0].name == 'MY_KERNEL' + assert len(FindNodes(ir.Conditional).visit(routine.body)) == 0 loops = FindNodes(ir.Loop).visit(routine.body) assert len(loops) == 1 assert loops[0].variable == 'jkglo' From c011e714f4da8025760ec4da46d097fb71c9c8a0 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 13 Nov 2024 18:33:38 +0000 Subject: [PATCH 14/18] Parallel: Use `Dimension` and improve tests for add_openmp_region --- .../transformations/parallel/openmp_region.py | 22 +++++++++--- .../parallel/tests/test_openmp_region.py | 36 +++++++++++++------ 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index 3d63e8c74..136f43eab 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -96,13 +96,25 @@ def visit_Pragma(self, pragma, **kwargs): routine.body = RemoveOpenMPRegionTransformer().visit(routine.body, active=False) -def add_openmp_regions(routine, global_variables=None, field_group_types=None): +def add_openmp_regions( + routine, dimension, global_variables=None, field_group_types=None +): """ Add the OpenMP directives for a parallel driver region with an outer block loop. - """ - block_dim_size = 'YDGEOMETRY%YRDIM%NGPBLKS' + Parameters + ---------- + routine : :any:`Subroutine` + The routine to which to add OpenMP parallel regions. + dimension : :any:`Dimension` + The dimension object describing the block loop variables. + global_variables : tuple of str + Names of variables that should neither be private nor firstprivate + field_group_types : tuple of str + Names of types designating "field groups", which should be + treated as firstprivate + """ global_variables = global_variables or {} field_group_types = field_group_types or {} @@ -117,7 +129,7 @@ def add_openmp_regions(routine, global_variables=None, field_group_types=None): # Filter arrays by block-dim size, as these are global local_arrays = tuple( v for v in local_variables - if isinstance(v, sym.Array) and not v.dimensions[-1] == block_dim_size + if isinstance(v, sym.Array) and not v.dimensions[-1] == dimension.size ) with pragma_regions_attached(routine): @@ -176,7 +188,7 @@ def add_openmp_regions(routine, global_variables=None, field_group_types=None): with pragmas_attached(routine, node_type=ir.Loop): for loop in FindNodes(ir.Loop).visit(region.body): # Add OpenMP DO directives onto block loops - if loop.variable == 'JKGLO': + if loop.variable == dimension.index: loop._update( pragma=ir.Pragma(keyword='OMP', content='DO SCHEDULE(DYNAMIC,1)'), pragma_post=ir.Pragma(keyword='OMP', content='END DO'), diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py index df1018a39..7bb88539f 100644 --- a/loki/transformations/parallel/tests/test_openmp_region.py +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -7,7 +7,7 @@ import pytest -from loki import Subroutine, Module +from loki import Subroutine, Module, Dimension from loki.frontend import available_frontends, OMNI from loki.ir import ( nodes as ir, FindNodes, pragmas_attached, pragma_regions_attached, @@ -101,6 +101,7 @@ def test_add_openmp_regions(tmp_path, frontend): fcode = """ subroutine test_add_openmp_loop(ydgeom, arr) use geom_mod, only: geom_type + use kernel_mod, only: my_kernel, my_non_kernel implicit none type(geom_type), intent(in) :: ydgeom real(kind=8), intent(inout) :: arr(:,:,:) @@ -117,40 +118,55 @@ def test_add_openmp_regions(tmp_path, frontend): !$loki end parallel + !$loki not-so-parallel + + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA + call my_non_kernel(arr(1,1,1)) + END DO + + !$loki end not-so-parallel + end subroutine test_add_openmp_loop """ _ = Module.from_source(fcode_type, frontend=frontend, xmods=[tmp_path]) routine = Subroutine.from_source(fcode, frontend=frontend, xmods=[tmp_path]) - assert len(FindNodes(ir.Pragma).visit(routine.body)) == 2 + assert len(FindNodes(ir.Pragma).visit(routine.body)) == 4 with pragma_regions_attached(routine): regions = FindNodes(ir.PragmaRegion).visit(routine.body) - assert len(regions) == 1 - assert regions[0].pragma.keyword == 'loki' and regions[0].pragma.content == 'parallel' - assert regions[0].pragma_post.keyword == 'loki' and regions[0].pragma_post.content == 'end parallel' + assert len(regions) == 2 + assert is_loki_pragma(regions[0].pragma, starts_with='parallel') + assert is_loki_pragma(regions[0].pragma_post, starts_with='end parallel') + assert is_loki_pragma(regions[1].pragma, starts_with='not-so-parallel') + assert is_loki_pragma(regions[1].pragma_post, starts_with='end not-so-parallel') - add_openmp_regions(routine) + block_dim = Dimension(index='JKGLO', size='YDGEOM%NGPBLK') + add_openmp_regions(routine, dimension=block_dim) # Ensure pragmas have been inserted pragmas = FindNodes(ir.Pragma).visit(routine.body) - assert len(pragmas) == 4 - assert all(p.keyword == 'OMP' for p in pragmas) + assert len(pragmas) == 6 + assert all(p.keyword == 'OMP' for p in pragmas[0:4]) + assert all(p.keyword == 'loki' for p in pragmas[5:6]) with pragmas_attached(routine, node_type=ir.Loop): with pragma_regions_attached(routine): # Ensure pragma region has been created regions = FindNodes(ir.PragmaRegion).visit(routine.body) - assert len(regions) == 1 + assert len(regions) == 2 assert regions[0].pragma.keyword == 'OMP' assert regions[0].pragma.content.startswith('PARALLEL') assert regions[0].pragma_post.keyword == 'OMP' assert regions[0].pragma_post.content == 'END PARALLEL' + assert is_loki_pragma(regions[1].pragma, starts_with='not-so-parallel') + assert is_loki_pragma(regions[1].pragma_post, starts_with='end not-so-parallel') # Ensure loops has been annotated loops = FindNodes(ir.Loop).visit(routine.body) - assert len(loops) == 1 + assert len(loops) == 2 assert loops[0].pragma[0].keyword == 'OMP' assert loops[0].pragma[0].content == 'DO SCHEDULE(DYNAMIC,1)' + assert not loops[1].pragma # TODO: Test field_group_types and known global variables From c4f0bc5d0454b49eaeb1ca6c36fc2e7cd6899a8e Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 13 Nov 2024 18:38:44 +0000 Subject: [PATCH 15/18] Parallel: Rename argument `global_variables` => `shared_variables` --- loki/transformations/parallel/openmp_region.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index 136f43eab..4233b096e 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -97,7 +97,7 @@ def visit_Pragma(self, pragma, **kwargs): def add_openmp_regions( - routine, dimension, global_variables=None, field_group_types=None + routine, dimension, shared_variables=None, field_group_types=None ): """ Add the OpenMP directives for a parallel driver region with an @@ -109,13 +109,13 @@ def add_openmp_regions( The routine to which to add OpenMP parallel regions. dimension : :any:`Dimension` The dimension object describing the block loop variables. - global_variables : tuple of str + shared_variables : tuple of str Names of variables that should neither be private nor firstprivate field_group_types : tuple of str Names of types designating "field groups", which should be treated as firstprivate """ - global_variables = global_variables or {} + shared_variables = shared_variables or {} field_group_types = field_group_types or {} # First get local variables and separate scalars and arrays @@ -159,7 +159,7 @@ def add_openmp_regions( )) # Filter out known global variables - local_vars = tuple(v for v in local_vars if v.name not in global_variables) + local_vars = tuple(v for v in local_vars if v.name not in shared_variables) # Make field group types firstprivate firstprivates = tuple(dict.fromkeys( From a3b85d25d54b3cc25074dc1c6641b39a4bf7a81e Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 14 Nov 2024 05:00:55 +0000 Subject: [PATCH 16/18] Parallel: Add more test coverage for corner cases --- .../parallel/tests/test_block_loop.py | 28 +++++-- .../parallel/tests/test_openmp_region.py | 77 ++++++++++++++----- 2 files changed, 79 insertions(+), 26 deletions(-) diff --git a/loki/transformations/parallel/tests/test_block_loop.py b/loki/transformations/parallel/tests/test_block_loop.py index 46dabe267..0612855ea 100644 --- a/loki/transformations/parallel/tests/test_block_loop.py +++ b/loki/transformations/parallel/tests/test_block_loop.py @@ -8,8 +8,9 @@ import pytest from loki import Subroutine, Module, Dimension -from loki.frontend import available_frontends +from loki.frontend import available_frontends, OMNI from loki.ir import nodes as ir, FindNodes +from loki.tools import flatten from loki.transformations.parallel import ( remove_block_loops, add_block_loops @@ -92,7 +93,7 @@ def test_add_block_loops(frontend): subroutine test_add_block_loop(ydgeom, npoints, nlev, arr) integer(kind=4), intent(in) :: npoints, nlev real(kind=8), intent(inout) :: arr(:,:,:) - integer :: JKGLO, IBL, ICEND + integer :: JKGLO !$loki parallel call my_kernel(arr(:,:,ibl)) @@ -105,15 +106,24 @@ def test_add_block_loops(frontend): end do end do !$loki end parallel + +!$omp parallel + do jk=1, nlev + do jl=1, npoints + arr(jl, jk, ibl) = 42.0 + end do + end do +!$omp end parallel + end subroutine test_add_block_loop """ routine = Subroutine.from_source(fcode, frontend=frontend) loops = FindNodes(ir.Loop).visit(routine.body) - assert len(loops) == 2 + assert len(loops) == 4 assert loops[0].variable == 'jk' assert loops[1].variable == 'jl' - assert len(FindNodes(ir.Assignment).visit(routine.body)) == 1 + assert len(FindNodes(ir.Assignment).visit(routine.body)) == 2 blocking = Dimension( name='block', index=('JKGLO', 'IBL'), @@ -126,11 +136,13 @@ def test_add_block_loops(frontend): add_block_loops(routine, dimension=blocking) loops = FindNodes(ir.Loop).visit(routine.body) - assert len(loops) == 4 + assert len(loops) == 6 assert loops[0].variable == 'jkglo' assert loops[1].variable == 'jkglo' assert loops[2].variable == 'jk' assert loops[3].variable == 'jl' + assert loops[4].variable == 'jk' + assert loops[5].variable == 'jl' assigns1 = FindNodes(ir.Assignment).visit(loops[0].body) assert len(assigns1) == 2 @@ -147,3 +159,9 @@ def test_add_block_loops(frontend): assert assigns2[1].rhs == '(JKGLO - 1) / YDGEOM%NPROMA + 1' assert assigns2[2].lhs == 'arr(jl, jk, ibl)' assert assigns2[2].rhs == 42.0 + + decls = FindNodes(ir.VariableDeclaration).visit(routine.spec) + assert len(decls) == 9 if frontend == OMNI else 5 + decl_symbols = tuple(flatten(d.symbols for d in decls)) + for v in ['JKGLO', 'IBL', 'ICEND']: + assert v in decl_symbols diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py index 7bb88539f..ede1490e9 100644 --- a/loki/transformations/parallel/tests/test_openmp_region.py +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -36,6 +36,7 @@ def test_remove_openmp_regions(frontend, insert_loki_parallel): !$omp parallel private(i) !$omp do schedule dynamic(1) do i=1, n + !$loki foo-bar arr(i) = arr(i) + 1.0 end do !$omp end do @@ -45,7 +46,9 @@ def test_remove_openmp_regions(frontend, insert_loki_parallel): !$OMP PARALLEL PRIVATE(i) !$OMP DO SCHEDULE DYNAMIC(1) do i=1, n + !$loki foo-baz arr(i) = arr(i) + 1.0 + !$loki end foo-baz end do !$OMP END DO !$OMP END PARALLEL @@ -62,25 +65,30 @@ def test_remove_openmp_regions(frontend, insert_loki_parallel): routine = Subroutine.from_source(fcode, frontend=frontend) assert len(FindNodes(ir.Loop).visit(routine.body)) == 3 - assert len(FindNodes(ir.Pragma).visit(routine.body)) == 11 + assert len(FindNodes(ir.Pragma).visit(routine.body)) == 14 with pragma_regions_attached(routine): # Without attaching Loop-pragmas, all are recognised as regions - assert len(FindNodes(ir.PragmaRegion).visit(routine.body)) == 5 + assert len(FindNodes(ir.PragmaRegion).visit(routine.body)) == 6 remove_openmp_regions(routine, insert_loki_parallel=insert_loki_parallel) assert len(FindNodes(ir.Loop).visit(routine.body)) == 3 pragmas = FindNodes(ir.Pragma).visit(routine.body) - assert len(pragmas) == (6 if insert_loki_parallel else 0) + assert len(pragmas) == (9 if insert_loki_parallel else 3) if insert_loki_parallel: with pragma_regions_attached(routine): pragma_regions = FindNodes(ir.PragmaRegion).visit(routine.body) - assert len(pragma_regions) == 3 - for region in pragma_regions: - assert is_loki_pragma(region.pragma, starts_with='parallel') - assert is_loki_pragma(region.pragma_post, starts_with='end parallel') + assert len(pragma_regions) == 4 + assert is_loki_pragma(pragma_regions[0].pragma, starts_with='parallel') + assert is_loki_pragma(pragma_regions[0].pragma_post, starts_with='end parallel') + assert is_loki_pragma(pragma_regions[1].pragma, starts_with='parallel') + assert is_loki_pragma(pragma_regions[1].pragma_post, starts_with='end parallel') + assert is_loki_pragma(pragma_regions[2].pragma, starts_with='foo-baz') + assert is_loki_pragma(pragma_regions[2].pragma_post, starts_with='end foo-baz') + assert is_loki_pragma(pragma_regions[3].pragma, starts_with='parallel') + assert is_loki_pragma(pragma_regions[3].pragma_post, starts_with='end parallel') @pytest.mark.parametrize('frontend', available_frontends( @@ -99,20 +107,26 @@ def test_add_openmp_regions(tmp_path, frontend): """ fcode = """ -subroutine test_add_openmp_loop(ydgeom, arr) - use geom_mod, only: geom_type +subroutine test_add_openmp_loop(ydgeom, ydfields, arr) + use geom_mod, only: geom_type, fld_type use kernel_mod, only: my_kernel, my_non_kernel implicit none type(geom_type), intent(in) :: ydgeom + type(fld_type), intent(inout) :: ydfields + type(fld_type), intent(inout) :: ylfields real(kind=8), intent(inout) :: arr(:,:,:) integer :: JKGLO, IBL, ICEND !$loki parallel + ylfields = ydfields + DO JKGLO=1,YDGEOM%NGPTOT,YDGEOM%NPROMA ICEND = MIN(YDGEOM%NPROMA, YDGEOM%NGPTOT - JKGLO + 1) IBL = (JKGLO - 1) / YDGEOM%NPROMA + 1 + CALL YDFIELDS%UPDATE_STUFF() + CALL MY_KERNEL(ARR(:,:,IBL)) END DO @@ -141,7 +155,11 @@ def test_add_openmp_regions(tmp_path, frontend): assert is_loki_pragma(regions[1].pragma_post, starts_with='end not-so-parallel') block_dim = Dimension(index='JKGLO', size='YDGEOM%NGPBLK') - add_openmp_regions(routine, dimension=block_dim) + add_openmp_regions( + routine, dimension=block_dim, + field_group_types=('fld_type',), + shared_variables=('ydfields',) + ) # Ensure pragmas have been inserted pragmas = FindNodes(ir.Pragma).visit(routine.body) @@ -161,6 +179,11 @@ def test_add_openmp_regions(tmp_path, frontend): assert is_loki_pragma(regions[1].pragma, starts_with='not-so-parallel') assert is_loki_pragma(regions[1].pragma_post, starts_with='end not-so-parallel') + # Ensure shared, private and firstprivate have been set right + assert 'PARALLEL DEFAULT(SHARED)' in regions[0].pragma.content + assert 'PRIVATE(JKGLO, IBL, ICEND)' in regions[0].pragma.content + assert 'FIRSTPRIVATE(ylfields)' in regions[0].pragma.content + # Ensure loops has been annotated loops = FindNodes(ir.Loop).visit(routine.body) assert len(loops) == 2 @@ -168,8 +191,6 @@ def test_add_openmp_regions(tmp_path, frontend): assert loops[0].pragma[0].content == 'DO SCHEDULE(DYNAMIC,1)' assert not loops[1].pragma - # TODO: Test field_group_types and known global variables - @pytest.mark.parametrize('frontend', available_frontends( skip=[(OMNI, 'OMNI needs full type definitions for derived types')] @@ -181,17 +202,21 @@ def test_remove_explicit_firstprivatisation(frontend): fcode = """ subroutine test_add_openmp_loop(ydgeom, state, arr) use geom_mod, only: geom_type + use type_mod, only: state_type, flux_type, NewFlux implicit none type(geom_type), intent(in) :: ydgeom real(kind=8), intent(inout) :: arr(:,:,:) type(state_type), intent(in) :: state type(state_type) :: ydstate + type(flux_type) :: ydflux integer :: jkglo, ibl, icend !$loki parallel ydstate = state + ydflux = NewFlux() + do jkglo=1,ydgeom%ngptot,ydgeom%nproma icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1) ibl = (jkglo - 1) / ydgeom%nproma + 1 @@ -209,7 +234,7 @@ def test_remove_explicit_firstprivatisation(frontend): fprivate_map = {'ydstate' : 'state'} assigns = FindNodes(ir.Assignment).visit(routine.body) - assert len(assigns) == 3 + assert len(assigns) == 4 assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state' calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 2 @@ -224,9 +249,10 @@ def test_remove_explicit_firstprivatisation(frontend): # Check removal and symbol replacement assigns = FindNodes(ir.Assignment).visit(routine.body) - assert len(assigns) == 2 - assert assigns[0].lhs == 'icend' - assert assigns[1].lhs == 'ibl' + assert len(assigns) == 3 + assert assigns[0].lhs == 'ydflux' + assert assigns[1].lhs == 'icend' + assert assigns[2].lhs == 'ibl' calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 2 assert str(calls[0].name).startswith('state%') @@ -263,6 +289,15 @@ def test_create_explicit_firstprivatisation(tmp_path, frontend): end do !$loki end parallel + + !$loki not-so-parallel + + do jkglo=1,ydgeom%ngptot,ydgeom%nproma + icend = min(ydgeom%nproma, ydgeom%ngptot - jkglo + 1) + ibl = (jkglo - 1) / ydgeom%nproma + 1 + end do + + !$loki end not-so-parallel end subroutine test_add_openmp_loop """ routine = Subroutine.from_source(fcode, frontend=frontend) @@ -270,25 +305,25 @@ def test_create_explicit_firstprivatisation(tmp_path, frontend): fprivate_map = {'ydstate' : 'state'} assigns = FindNodes(ir.Assignment).visit(routine.body) - assert len(assigns) == 2 + assert len(assigns) == 4 assert assigns[0].lhs == 'icend' assert assigns[1].lhs == 'ibl' calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 2 assert str(calls[0].name).startswith('state%') assert calls[1].arguments[0].parent == 'state' - assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 # Put the explicit firstprivate copies back in create_explicit_firstprivatisation( routine=routine, fprivate_map=fprivate_map ) - + assigns = FindNodes(ir.Assignment).visit(routine.body) - assert len(assigns) == 3 + assert len(assigns) == 5 assert assigns[0].lhs == 'ydstate' and assigns[0].rhs == 'state' calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 2 assert str(calls[0].name).startswith('ydstate%') assert calls[1].arguments[0].parent == 'ydstate' - assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 + assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 From efea62feb259c7c32e017720166b3ad6cf4522ed Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 14 Nov 2024 05:05:10 +0000 Subject: [PATCH 17/18] Parallel: Rename to `add/remove_firstprivate_copies` This is to shorten the method names and bring them in line with the naming convention of the sub-package. --- loki/transformations/parallel/openmp_region.py | 7 +++---- .../parallel/tests/test_openmp_region.py | 15 +++++++-------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/loki/transformations/parallel/openmp_region.py b/loki/transformations/parallel/openmp_region.py index 4233b096e..d3a50f996 100644 --- a/loki/transformations/parallel/openmp_region.py +++ b/loki/transformations/parallel/openmp_region.py @@ -22,8 +22,7 @@ __all__ = [ 'remove_openmp_regions', 'add_openmp_regions', - 'remove_explicit_firstprivatisation', - 'create_explicit_firstprivatisation' + 'remove_firstprivate_copies', 'add_firstprivate_copies' ] @@ -195,7 +194,7 @@ def add_openmp_regions( ) -def remove_explicit_firstprivatisation(region, fprivate_map, scope): +def remove_firstprivate_copies(region, fprivate_map, scope): """ Removes an IFS-specific workaround, where complex derived-type objects are explicitly copied into a local copy of the object to @@ -231,7 +230,7 @@ def visit_Assignment(self, assign, **kwargs): # pylint: disable=unused-argument return SubstituteStringExpressions(fprivate_map, scope=scope).visit(region) -def create_explicit_firstprivatisation(routine, fprivate_map): +def add_firstprivate_copies(routine, fprivate_map): """ Injects IFS-specific thread-local copies of named complex derived type objects in parallel regions. This is to prevent issues with diff --git a/loki/transformations/parallel/tests/test_openmp_region.py b/loki/transformations/parallel/tests/test_openmp_region.py index ede1490e9..e9a05705b 100644 --- a/loki/transformations/parallel/tests/test_openmp_region.py +++ b/loki/transformations/parallel/tests/test_openmp_region.py @@ -16,8 +16,7 @@ from loki.transformations.parallel import ( remove_openmp_regions, add_openmp_regions, - remove_explicit_firstprivatisation, - create_explicit_firstprivatisation + remove_firstprivate_copies, add_firstprivate_copies ) @@ -195,9 +194,9 @@ def test_add_openmp_regions(tmp_path, frontend): @pytest.mark.parametrize('frontend', available_frontends( skip=[(OMNI, 'OMNI needs full type definitions for derived types')] )) -def test_remove_explicit_firstprivatisation(frontend): +def test_remove_firstprivate_copies(frontend): """ - A simple test for :any:`remove_explicit_firstprivatisation` + A simple test for :any:`remove_firstprivate_copies` """ fcode = """ subroutine test_add_openmp_loop(ydgeom, state, arr) @@ -243,7 +242,7 @@ def test_remove_explicit_firstprivatisation(frontend): assert len(FindNodes(ir.Loop).visit(routine.body)) == 1 # Remove the explicit copy of `ydstate = state` and adjust symbols - routine.body = remove_explicit_firstprivatisation( + routine.body = remove_firstprivate_copies( region=routine.body, fprivate_map=fprivate_map, scope=routine ) @@ -263,9 +262,9 @@ def test_remove_explicit_firstprivatisation(frontend): @pytest.mark.parametrize('frontend', available_frontends( skip=[(OMNI, 'OMNI needs full type definitions for derived types')] )) -def test_create_explicit_firstprivatisation(tmp_path, frontend): +def test_add_firstprivate_copies(tmp_path, frontend): """ - A simple test for :any:`create_explicit_firstprivatisation` + A simple test for :any:`add_firstprivate_copies` """ fcode = """ @@ -315,7 +314,7 @@ def test_create_explicit_firstprivatisation(tmp_path, frontend): assert len(FindNodes(ir.Loop).visit(routine.body)) == 2 # Put the explicit firstprivate copies back in - create_explicit_firstprivatisation( + add_firstprivate_copies( routine=routine, fprivate_map=fprivate_map ) From 2d361bc0b2a231b658b4bfdbd4fe7500f17b3083 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 14 Nov 2024 09:07:52 +0000 Subject: [PATCH 18/18] Parallel: Add missing recursion on remove_block_loops --- loki/transformations/parallel/block_loop.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/loki/transformations/parallel/block_loop.py b/loki/transformations/parallel/block_loop.py index 033e27a8a..1926152b6 100644 --- a/loki/transformations/parallel/block_loop.py +++ b/loki/transformations/parallel/block_loop.py @@ -47,14 +47,16 @@ class RemoveBlockLoopTransformer(Transformer): """ def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument + body = self.visit(loop.body, **kwargs) + if not loop.variable == idx: - return loop + return loop._rebuild(body=body) to_remove = tuple( - a for a in FindNodes(ir.Assignment).visit(loop.body) + a for a in FindNodes(ir.Assignment).visit(body) if a.lhs in variables ) - return tuple(n for n in loop.body if n not in to_remove) + return tuple(n for n in body if n not in to_remove) routine.body = RemoveBlockLoopTransformer().visit(routine.body)