From 2bf5cc3316c9f0ca71e4fcf4b09c556c0cc7afca Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 6 Nov 2024 13:29:38 +0100 Subject: [PATCH 1/5] TransformLoopsTransformation: add trafo to call various loop transform utilities --- loki/transformations/remove_code.py | 2 +- loki/transformations/transform_loop.py | 59 +++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/loki/transformations/remove_code.py b/loki/transformations/remove_code.py index bd0b42e74..6e55d7eb5 100644 --- a/loki/transformations/remove_code.py +++ b/loki/transformations/remove_code.py @@ -29,7 +29,7 @@ class RemoveCodeTransformation(Transformation): """ A :any:`Transformation` that provides named call and import removal, code removal of pragma-marked regions and Dead Code - Elimination for batch processing vis the :any:`Scheduler`. + Elimination for batch processing via the :any:`Scheduler`. The transformation will apply the following methods in order: diff --git a/loki/transformations/transform_loop.py b/loki/transformations/transform_loop.py index 29299b4eb..b38d7d974 100644 --- a/loki/transformations/transform_loop.py +++ b/loki/transformations/transform_loop.py @@ -36,9 +36,10 @@ from loki.transformations.array_indexing import ( promotion_dimensions_from_loop_nest, promote_nonmatching_variables ) +from loki.batch import Transformation -__all__ = ['loop_interchange', 'loop_fusion', 'loop_fission', 'loop_unroll'] +__all__ = ['loop_interchange', 'loop_fusion', 'loop_fission', 'loop_unroll', 'TransformLoopsTransformation'] from loki.analyse.util_polyhedron import Polyhedron @@ -752,3 +753,59 @@ def visit_Loop(self, o, *args, **kwargs): with pragmas_attached(routine, Loop): routine.body = PragmaLoopUnrollTransformer(warn_iterations_length=warn_iterations_length).visit(routine.body) + + +class TransformLoopsTransformation(Transformation): + """ + A :any:`Transformation` that provides a common location for the various loop transformations to be called + in a :any:`Scheduler` pipeline. + + The transformation applies the following methods in order: + + * :any:`loop_interchange` + * :any:`loop_fusion` + * :any:`loop_fission` + * :any:`loop_unroll` + + Parameters + ---------- + project_bounds : bool + Project loop bounds whilst performing loop interchange. Default: ``False``. + promote : bool + Try to automatically detect read-after-write across fission points + and promote corresponding variables. Note that this does not affect + promotion of variables listed directly in the pragma's ``promote`` + option. Default: ``True``. + warn_loop_carries : bool + Try to automatically detect loop-carried dependencies and warn + when the fission point sits after the initial read and before the + final write. Default: ``True``. + warn_iterations_length : bool + This specifies if warnings should be generated when unrolling + loops with a large number of iterations (32). It's mainly to + disable warnings when loops are being unrolled for internal + transformations and analysis. Default: ``True``. + """ + + def __init__( + self, project_bounds=False, promote=True, warn_loop_carries=True, + warn_iterations_length=True + ): + self.project_bounds = project_bounds + self.promote = promote + self.warn_loop_carries = warn_loop_carries + self.warn_iterations_length = warn_iterations_length + + def transform_subroutine(self, routine, **kwargs): + + # Interchange loops + loop_interchange(routine, project_bounds=self.project_bounds) + + # Fuse loops + loop_fusion(routine) + + # Split loops + loop_fission(routine, promote=self.promote, warn_loop_carries=self.warn_loop_carries) + + # Unroll loops + loop_unroll(routine, warn_iterations_length=self.warn_iterations_length) From ca204e6f2f526015c85f2e636f65301f68e75901 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 8 Nov 2024 12:25:08 +0100 Subject: [PATCH 2/5] TransformLoopsTransformation: make individual components optional --- .../transformations/single_column/vertical.py | 8 +- .../tests/test_transform_loop.py | 80 +++++++++---------- loki/transformations/transform_loop.py | 46 +++++++---- 3 files changed, 76 insertions(+), 58 deletions(-) diff --git a/loki/transformations/single_column/vertical.py b/loki/transformations/single_column/vertical.py index b92e7ceef..4301bae64 100644 --- a/loki/transformations/single_column/vertical.py +++ b/loki/transformations/single_column/vertical.py @@ -15,7 +15,7 @@ get_pragma_parameters, FindVariables ) from loki.tools import as_tuple, CaseInsensitiveDict -from loki.transformations.transform_loop import loop_fusion, loop_interchange +from loki.transformations.transform_loop import do_loop_fusion, do_loop_interchange from loki.transformations.array_indexing import demote_variables from loki.transformations.utilities import get_local_arrays from loki.logging import info @@ -43,7 +43,7 @@ class SCCFuseVerticalLoops(Transformation): :any:`Dimension` object describing the variable conventions used in code to define the vertical data dimension and iteration space. apply_to : list of str, optional - list of routines to apply this transformation to, if not provided or None + list of routines to apply this transformation to, if not provided or None apply to all routines (default: None) """ @@ -85,12 +85,12 @@ def process_kernel(self, routine): # find "multilevel" thus "jk +/- 1" arrays multilevel_relevant_local_arrays = self.identify_multilevel_arrays(relevant_local_arrays) # loop interchange to expose vertical loops as outermost loops - loop_interchange(routine) + do_loop_interchange(routine) # handle initialization of arrays "jk +/- 1" arrays multilevel_relevant_local_arrays_names = set(arr.name.lower() for arr in multilevel_relevant_local_arrays) self.correct_init_of_multilevel_arrays(routine, multilevel_relevant_local_arrays_names) # fuse vertical loops - loop_fusion(routine) + do_loop_fusion(routine) # demote in vertical dimension if possible relevant_local_arrays_names = set(arr.name.lower() for arr in relevant_local_arrays) demote_candidates = relevant_local_arrays_names - multilevel_relevant_local_arrays_names diff --git a/loki/transformations/tests/test_transform_loop.py b/loki/transformations/tests/test_transform_loop.py index b7e04c607..f916f177b 100644 --- a/loki/transformations/tests/test_transform_loop.py +++ b/loki/transformations/tests/test_transform_loop.py @@ -20,7 +20,7 @@ ) from loki.transformations.transform_loop import ( - loop_interchange, loop_fusion, loop_fission, loop_unroll + do_loop_interchange, do_loop_fusion, do_loop_fission, do_loop_unroll ) @@ -66,7 +66,7 @@ def test_transform_loop_interchange_plain(tmp_path, frontend): assert np.all(a == ref) # Apply transformation - loop_interchange(routine) + do_loop_interchange(routine) interchanged_filepath = tmp_path/(f'{routine.name}_interchanged_{frontend}.f90') interchanged_function = jit_compile(routine, filepath=interchanged_filepath, objname=routine.name) @@ -137,7 +137,7 @@ def test_transform_loop_interchange(tmp_path, frontend): assert np.all(a == ref) # Apply transformation - loop_interchange(routine) + do_loop_interchange(routine) interchanged_filepath = tmp_path/(f'{routine.name}_interchanged_{frontend}.f90') interchanged_function = jit_compile(routine, filepath=interchanged_filepath, objname=routine.name) @@ -197,7 +197,7 @@ def test_transform_loop_interchange_project(tmp_path, frontend): assert np.all(a == ref) # Apply transformation - loop_interchange(routine, project_bounds=True) + do_loop_interchange(routine, project_bounds=True) interchanged_filepath = tmp_path/(f'{routine.name}_interchanged_{frontend}.f90') interchanged_function = jit_compile(routine, filepath=interchanged_filepath, objname=routine.name) @@ -241,7 +241,7 @@ def test_transform_loop_fuse_ordering(frontend, insert_loc): a(j, i) = i + j enddo end do - + do j=1,m c(j) = j enddo @@ -256,8 +256,8 @@ def test_transform_loop_fuse_ordering(frontend, insert_loc): """ routine = Subroutine.from_source(fcode, frontend=frontend) assert len(FindNodes(Loop).visit(routine.body)) == 7 - loop_interchange(routine) - loop_fusion(routine) + do_loop_interchange(routine) + do_loop_fusion(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 5 loop_0_vars = [var.name.lower() for var in FindVariables().visit(loops[0].body)] @@ -304,7 +304,7 @@ def test_transform_loop_fuse_matching(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fusion(routine) + do_loop_fusion(routine) assert len(FindNodes(Loop).visit(routine.body)) == 1 fused_filepath = tmp_path/(f'{routine.name}_fused_{frontend}.f90') @@ -366,7 +366,7 @@ def test_transform_loop_fuse_subranges(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 3 - loop_fusion(routine) + do_loop_fusion(routine) assert len(FindNodes(Loop).visit(routine.body)) == 1 fused_filepath = tmp_path/(f'{routine.name}_fused_{frontend}.f90') @@ -438,7 +438,7 @@ def test_transform_loop_fuse_groups(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 5 - loop_fusion(routine) + do_loop_fusion(routine) assert len(FindNodes(Loop).visit(routine.body)) == 2 fused_filepath = tmp_path/(f'{routine.name}_fused_{frontend}.f90') @@ -481,7 +481,7 @@ def test_transform_loop_fuse_failures(frontend): """ routine = Subroutine.from_source(fcode, frontend=frontend) with pytest.raises(RuntimeError): - loop_fusion(routine) + do_loop_fusion(routine) @pytest.mark.parametrize('frontend', available_frontends()) @@ -517,7 +517,7 @@ def test_transform_loop_fuse_alignment(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fusion(routine) + do_loop_fusion(routine) assert len(FindNodes(Loop).visit(routine.body)) == 1 fused_filepath = tmp_path/(f'{routine.name}_fused_{frontend}.f90') @@ -567,7 +567,7 @@ def test_transform_loop_fuse_nonmatching_lower(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fusion(routine) + do_loop_fusion(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 1 @@ -623,7 +623,7 @@ def test_transform_loop_fuse_nonmatching_lower_annotated(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fusion(routine) + do_loop_fusion(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 1 @@ -679,7 +679,7 @@ def test_transform_loop_fuse_nonmatching_upper(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fusion(routine) + do_loop_fusion(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 1 @@ -740,7 +740,7 @@ def test_transform_loop_fuse_collapse(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 4 - loop_fusion(routine) + do_loop_fusion(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 2 assert all(loop.bounds.start == '1' for loop in loops) @@ -801,7 +801,7 @@ def test_transform_loop_fuse_collapse_nonmatching(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 4 - loop_fusion(routine) + do_loop_fusion(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 2 assert all(loop.bounds.start == '1' for loop in loops) @@ -863,7 +863,7 @@ def test_transform_loop_fuse_collapse_range(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 4 - loop_fusion(routine) + do_loop_fusion(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 2 assert all(loop.bounds.start == '1' for loop in loops) @@ -916,7 +916,7 @@ def test_transform_loop_fission_single(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 1 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 2 @@ -971,7 +971,7 @@ def test_transform_loop_fission_nested(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 1 assert len(FindNodes(Conditional).visit(routine.body)) == 1 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 2 @@ -1031,7 +1031,7 @@ def test_transform_loop_fission_nested_promote(tmp_path, frontend): assert len(FindNodes(Loop).visit(routine.body)) == 1 assert len(FindNodes(Conditional).visit(routine.body)) == 2 assert len(FindNodes(Assignment).visit(routine.body)) == 3 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 2 @@ -1096,7 +1096,7 @@ def test_transform_loop_fission_collapse(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 assert len(FindNodes(Assignment).visit(routine.body)) == 8 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 8 @@ -1151,7 +1151,7 @@ def test_transform_loop_fission_multiple(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 1 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 3 @@ -1206,7 +1206,7 @@ def test_transform_loop_fission_promote(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 1 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 2 @@ -1265,7 +1265,7 @@ def test_transform_loop_fission_promote_conflicting_lengths(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 4 @@ -1322,7 +1322,7 @@ def test_transform_loop_fission_promote_array(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 3 @@ -1375,7 +1375,7 @@ def test_transform_loop_fission_promote_multiple(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 3 @@ -1441,7 +1441,7 @@ def test_transform_loop_fission_multiple_promote(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 5 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 8 @@ -1500,7 +1500,7 @@ def test_transform_loop_fission_promote_read_after_write(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 3 @@ -1567,7 +1567,7 @@ def test_transform_loop_fission_promote_multiple_read_after_write(tmp_path, fron # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 5 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 8 @@ -1634,9 +1634,9 @@ def test_transform_loop_fusion_fission(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 4 - loop_fusion(routine) + do_loop_fusion(routine) assert len(FindNodes(Loop).visit(routine.body)) == 3 - loop_fission(routine) + do_loop_fission(routine) loops = FindNodes(Loop).visit(routine.body) assert len(loops) == 4 @@ -1688,7 +1688,7 @@ def test_transform_loop_unroll(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 1 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 0 and len(FindNodes(Assignment).visit(routine.body)) == 10 unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90' @@ -1730,7 +1730,7 @@ def test_transform_loop_unroll_step(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 1 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 0 and len(FindNodes(Assignment).visit(routine.body)) == 5 unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90' @@ -1774,7 +1774,7 @@ def test_transform_loop_unroll_non_literal_range(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 1 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 1 and len(FindNodes(Assignment).visit(routine.body)) == 2 unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90' @@ -1819,7 +1819,7 @@ def test_transform_loop_unroll_nested(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 0 and len(FindNodes(Assignment).visit(routine.body)) == 50 unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90' @@ -1864,7 +1864,7 @@ def test_transform_loop_unroll_nested_restricted_depth(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 10 and len(FindNodes(Assignment).visit(routine.body)) == 10 unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90' @@ -1911,7 +1911,7 @@ def test_transform_loop_unroll_nested_restricted_depth_unrollable(tmp_path, fron # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 1 and len(FindNodes(Assignment).visit(routine.body)) == 6 unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90' @@ -1958,7 +1958,7 @@ def test_transform_loop_unroll_nested_counters(tmp_path, frontend): # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 2 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 0 and \ len(FindNodes(Assignment).visit(routine.body)) == len(tuples) @@ -2009,7 +2009,7 @@ def test_transform_loop_unroll_nested_neighbours(tmp_path, frontend): assert s == 2 * sum(a + b + 1 for (a, b) in itertools.product(range(1, 11), range(1, 6))) # Apply transformation assert len(FindNodes(Loop).visit(routine.body)) == 3 - loop_unroll(routine) + do_loop_unroll(routine) assert len(FindNodes(Loop).visit(routine.body)) == 10 and len(FindNodes(Assignment).visit(routine.body)) == 60 unrolled_filepath = tmp_path / f'{routine.name}_unrolled_{frontend}.f90' diff --git a/loki/transformations/transform_loop.py b/loki/transformations/transform_loop.py index b38d7d974..c09531136 100644 --- a/loki/transformations/transform_loop.py +++ b/loki/transformations/transform_loop.py @@ -39,7 +39,8 @@ from loki.batch import Transformation -__all__ = ['loop_interchange', 'loop_fusion', 'loop_fission', 'loop_unroll', 'TransformLoopsTransformation'] +__all__ = ['do_loop_interchange', 'do_loop_fusion', 'do_loop_fission', 'do_loop_unroll', + 'TransformLoopsTransformation'] from loki.analyse.util_polyhedron import Polyhedron @@ -182,7 +183,7 @@ def get_loop_components(loops): return (as_tuple(loop_variables), as_tuple(loop_ranges), as_tuple(loop_bodies)) -def loop_interchange(routine, project_bounds=False): +def do_loop_interchange(routine, project_bounds=False): """ Search for loops annotated with the `loki loop-interchange` pragma and attempt to reorder them. @@ -280,7 +281,7 @@ def pragma_ranges_to_loop_ranges(parameters, scope): return as_tuple(ranges) -def loop_fusion(routine): +def do_loop_fusion(routine): """ Search for loops annotated with the `loki loop-fusion` pragma and attempt to fuse them into a single loop. @@ -524,7 +525,7 @@ def rebuild_fission_branch(start_node, stop_node, **kwargs): return as_tuple(i for i in rebuilt if i) -def loop_fission(routine, promote=True, warn_loop_carries=True): +def do_loop_fission(routine, promote=True, warn_loop_carries=True): """ Search for ``!$loki loop-fission`` pragmas in loops and split them. @@ -687,7 +688,7 @@ def visit_Loop(self, o, depth=None): ) -def loop_unroll(routine, warn_iterations_length=True): +def do_loop_unroll(routine, warn_iterations_length=True): """ Search for ``!$loki loop-unroll`` pragmas in loops and unroll them. @@ -762,13 +763,21 @@ class TransformLoopsTransformation(Transformation): The transformation applies the following methods in order: - * :any:`loop_interchange` - * :any:`loop_fusion` - * :any:`loop_fission` - * :any:`loop_unroll` + * :any:`do_loop_interchange` + * :any:`do_loop_fusion` + * :any:`do_loop_fission` + * :any:`do_loop_unroll` Parameters ---------- + loop_interchange : bool + Run the ``do_loop_interchange`` utility. Default: ``False``. + loop_fusion : bool + Run the ``do_loop_fusion`` utility. Default: ``False``. + loop_fission : bool + Run the ``do_loop_fission`` utility. Default: ``False``. + loop_unroll : bool + Run the ``do_loop_unroll`` utility. Default: ``False``. project_bounds : bool Project loop bounds whilst performing loop interchange. Default: ``False``. promote : bool @@ -788,9 +797,14 @@ class TransformLoopsTransformation(Transformation): """ def __init__( - self, project_bounds=False, promote=True, warn_loop_carries=True, + self, loop_interchange=False, loop_fusion=False, loop_fission=False, + loop_unroll=False, project_bounds=False, promote=True, warn_loop_carries=True, warn_iterations_length=True ): + self.loop_interchange = loop_interchange + self.loop_fusion = loop_fusion + self.loop_fission = loop_fission + self.loop_unroll = loop_unroll self.project_bounds = project_bounds self.promote = promote self.warn_loop_carries = warn_loop_carries @@ -799,13 +813,17 @@ def __init__( def transform_subroutine(self, routine, **kwargs): # Interchange loops - loop_interchange(routine, project_bounds=self.project_bounds) + if self.loop_interchange: + do_loop_interchange(routine, project_bounds=self.project_bounds) # Fuse loops - loop_fusion(routine) + if self.loop_fusion: + do_loop_fusion(routine) # Split loops - loop_fission(routine, promote=self.promote, warn_loop_carries=self.warn_loop_carries) + if self.loop_fission: + do_loop_fission(routine, promote=self.promote, warn_loop_carries=self.warn_loop_carries) # Unroll loops - loop_unroll(routine, warn_iterations_length=self.warn_iterations_length) + if self.loop_unroll: + do_loop_unroll(routine, warn_iterations_length=self.warn_iterations_length) From 1437de644b0544ad127d343aba154696cbcfb33f Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 8 Nov 2024 13:43:14 +0100 Subject: [PATCH 3/5] TransformLoopsTransformation: clarify transformation options --- .../tests/test_transform_loop.py | 86 ++++++++++++++++++- loki/transformations/transform_loop.py | 26 +++--- 2 files changed, 97 insertions(+), 15 deletions(-) diff --git a/loki/transformations/tests/test_transform_loop.py b/loki/transformations/tests/test_transform_loop.py index f916f177b..b29ca8cde 100644 --- a/loki/transformations/tests/test_transform_loop.py +++ b/loki/transformations/tests/test_transform_loop.py @@ -16,11 +16,12 @@ from loki.frontend import available_frontends from loki.ir import ( is_loki_pragma, pragmas_attached, FindNodes, Loop, Conditional, - Assignment, FindVariables + Assignment, FindVariables, nodes as ir ) from loki.transformations.transform_loop import ( - do_loop_interchange, do_loop_fusion, do_loop_fission, do_loop_unroll + do_loop_interchange, do_loop_fusion, do_loop_fission, do_loop_unroll, + TransformLoopsTransformation ) @@ -2022,3 +2023,84 @@ def test_transform_loop_unroll_nested_neighbours(tmp_path, frontend): clean_test(filepath) clean_test(unrolled_filepath) + + +@pytest.mark.parametrize('frontend', available_frontends()) +@pytest.mark.parametrize('loop_trafo', ['loop_interchange', 'loop_fusion', 'loop_fission', + 'loop_unroll']) +def test_transform_loop_transformation(frontend, loop_trafo): + fcode = """ +subroutine transform_loop() + integer, parameter :: m = 8 + integer, parameter :: n = 16 + + integer :: array(m,n) + integer :: a(n), b(n) + integer :: i, j, s + + !$loki loop-interchange + do i=1,n + do j=1,m + array(j, i) = i + j + end do + end do + + !$loki loop-fusion + do i=1,n + a(i) = i + end do + + !$loki loop-fusion + do i=1,n + b(i) = n-i+1 + end do + + do j=1,n + a(j) = j + !$loki loop-fission + b(j) = n-j + end do + + !$loki loop-unroll + do i=1, 10 + s = s + i + 1 + end do +end subroutine transform_loop + """ + + routine = Subroutine.from_source(fcode, frontend=frontend) + + option = { + 'loop_interchange': 'loop_interchange' == loop_trafo, + 'loop_fusion': 'loop_fusion' == loop_trafo, + 'loop_fission': 'loop_fission' == loop_trafo, + 'loop_unroll': 'loop_unroll' == loop_trafo + } + transform = TransformLoopsTransformation(loop_interchange=option['loop_interchange'], + loop_fusion=option['loop_fusion'], + loop_fission=option['loop_fission'], + loop_unroll=option['loop_unroll']) + + # ensure only the correct transformation is enabled + transform.apply(routine) + pragmas = FindNodes(ir.Pragma).visit(routine.body) + assert len(pragmas) == 4 + assert not any(loop_trafo.replace('_', '-') in pragma.content for pragma in pragmas) + assert all(any(opt.replace('_', '-') in pragma.content for pragma in pragmas) + for opt in option if not opt == loop_trafo) + + loops = FindNodes(ir.Loop).visit(routine.body) + if loop_trafo == 'loop_interchange': + assert loops[0].variable == 'j' + inner_loops = FindNodes(ir.Loop).visit(loops[0].body) + assert inner_loops[0].variable == 'i' + + elif loop_trafo == 'loop_fusion': + assigns = FindNodes(ir.Assignment).visit(loops[2].body) + assert len(assigns) == 2 + + elif loop_trafo == 'loop_fission': + assert len(loops) == 7 + + elif loop_trafo == 'loop_unroll': + assert len(loops) == 5 diff --git a/loki/transformations/transform_loop.py b/loki/transformations/transform_loop.py index c09531136..6a8b83a73 100644 --- a/loki/transformations/transform_loop.py +++ b/loki/transformations/transform_loop.py @@ -778,18 +778,18 @@ class TransformLoopsTransformation(Transformation): Run the ``do_loop_fission`` utility. Default: ``False``. loop_unroll : bool Run the ``do_loop_unroll`` utility. Default: ``False``. - project_bounds : bool + interchange_project_bounds : bool Project loop bounds whilst performing loop interchange. Default: ``False``. - promote : bool + fission_promote : bool Try to automatically detect read-after-write across fission points and promote corresponding variables. Note that this does not affect promotion of variables listed directly in the pragma's ``promote`` option. Default: ``True``. - warn_loop_carries : bool + fission_warn_loop_carries : bool Try to automatically detect loop-carried dependencies and warn when the fission point sits after the initial read and before the final write. Default: ``True``. - warn_iterations_length : bool + unroll_warn_iterations_length : bool This specifies if warnings should be generated when unrolling loops with a large number of iterations (32). It's mainly to disable warnings when loops are being unrolled for internal @@ -798,23 +798,23 @@ class TransformLoopsTransformation(Transformation): def __init__( self, loop_interchange=False, loop_fusion=False, loop_fission=False, - loop_unroll=False, project_bounds=False, promote=True, warn_loop_carries=True, - warn_iterations_length=True + loop_unroll=False, interchange_project_bounds=False, fission_promote=True, + fission_warn_loop_carries=True, unroll_warn_iterations_length=True ): self.loop_interchange = loop_interchange self.loop_fusion = loop_fusion self.loop_fission = loop_fission self.loop_unroll = loop_unroll - self.project_bounds = project_bounds - self.promote = promote - self.warn_loop_carries = warn_loop_carries - self.warn_iterations_length = warn_iterations_length + self.interchange_project_bounds = interchange_project_bounds + self.fission_promote = fission_promote + self.fission_warn_loop_carries = fission_warn_loop_carries + self.unroll_warn_iterations_length = unroll_warn_iterations_length def transform_subroutine(self, routine, **kwargs): # Interchange loops if self.loop_interchange: - do_loop_interchange(routine, project_bounds=self.project_bounds) + do_loop_interchange(routine, project_bounds=self.interchange_project_bounds) # Fuse loops if self.loop_fusion: @@ -822,8 +822,8 @@ def transform_subroutine(self, routine, **kwargs): # Split loops if self.loop_fission: - do_loop_fission(routine, promote=self.promote, warn_loop_carries=self.warn_loop_carries) + do_loop_fission(routine, promote=self.fission_promote, warn_loop_carries=self.fission_warn_loop_carries) # Unroll loops if self.loop_unroll: - do_loop_unroll(routine, warn_iterations_length=self.warn_iterations_length) + do_loop_unroll(routine, warn_iterations_length=self.unroll_warn_iterations_length) From f3032e265d80860426e21a7a176a0d9138def314 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 8 Nov 2024 13:49:32 +0100 Subject: [PATCH 4/5] Update example notebook to use do_loop_fusion --- example/03_loop_fusion.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/example/03_loop_fusion.ipynb b/example/03_loop_fusion.ipynb index 12661f44d..5d2822fcb 100644 --- a/example/03_loop_fusion.ipynb +++ b/example/03_loop_fusion.ipynb @@ -923,7 +923,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "164b5054", "metadata": {}, "outputs": [ @@ -982,8 +982,8 @@ } ], "source": [ - "from loki import loop_fusion\n", - "loop_fusion(routine_copy)\n", + "from loki import do_loop_fusion\n", + "do_loop_fusion(routine_copy)\n", "pragma_map = {pragma: None for pragma in FindNodes(Pragma).visit(routine_copy.body)}\n", "routine_copy.body = Transformer(pragma_map).visit(routine_copy.body)\n", "print(routine_copy.to_fortran())" From d0fb25327a2d84803a2ce511c71f2f431ea3af6f Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 8 Nov 2024 14:24:05 +0100 Subject: [PATCH 5/5] TransformLoopsTransformation: rewrite test as matrix of sub-components --- .../tests/test_transform_loop.py | 67 ++++++++++--------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/loki/transformations/tests/test_transform_loop.py b/loki/transformations/tests/test_transform_loop.py index b29ca8cde..d0cd9a6cd 100644 --- a/loki/transformations/tests/test_transform_loop.py +++ b/loki/transformations/tests/test_transform_loop.py @@ -2026,9 +2026,11 @@ def test_transform_loop_unroll_nested_neighbours(tmp_path, frontend): @pytest.mark.parametrize('frontend', available_frontends()) -@pytest.mark.parametrize('loop_trafo', ['loop_interchange', 'loop_fusion', 'loop_fission', - 'loop_unroll']) -def test_transform_loop_transformation(frontend, loop_trafo): +@pytest.mark.parametrize('loop_interchange', [False, True]) +@pytest.mark.parametrize('loop_fusion', [False, True]) +@pytest.mark.parametrize('loop_fission', [False, True]) +@pytest.mark.parametrize('loop_unroll', [False, True]) +def test_transform_loop_transformation(frontend, loop_interchange, loop_fusion, loop_fission, loop_unroll): fcode = """ subroutine transform_loop() integer, parameter :: m = 8 @@ -2069,38 +2071,37 @@ def test_transform_loop_transformation(frontend, loop_trafo): """ routine = Subroutine.from_source(fcode, frontend=frontend) + transform = TransformLoopsTransformation(loop_interchange=loop_interchange, loop_fusion=loop_fusion, + loop_fission=loop_fission, loop_unroll=loop_unroll) + + num_pragmas = len(FindNodes(ir.Pragma).visit(routine.body)) + num_loops = len(FindNodes(ir.Loop).visit(routine.body)) - option = { - 'loop_interchange': 'loop_interchange' == loop_trafo, - 'loop_fusion': 'loop_fusion' == loop_trafo, - 'loop_fission': 'loop_fission' == loop_trafo, - 'loop_unroll': 'loop_unroll' == loop_trafo - } - transform = TransformLoopsTransformation(loop_interchange=option['loop_interchange'], - loop_fusion=option['loop_fusion'], - loop_fission=option['loop_fission'], - loop_unroll=option['loop_unroll']) - - # ensure only the correct transformation is enabled transform.apply(routine) pragmas = FindNodes(ir.Pragma).visit(routine.body) - assert len(pragmas) == 4 - assert not any(loop_trafo.replace('_', '-') in pragma.content for pragma in pragmas) - assert all(any(opt.replace('_', '-') in pragma.content for pragma in pragmas) - for opt in option if not opt == loop_trafo) - loops = FindNodes(ir.Loop).visit(routine.body) - if loop_trafo == 'loop_interchange': - assert loops[0].variable == 'j' - inner_loops = FindNodes(ir.Loop).visit(loops[0].body) - assert inner_loops[0].variable == 'i' - elif loop_trafo == 'loop_fusion': - assigns = FindNodes(ir.Assignment).visit(loops[2].body) - assert len(assigns) == 2 - - elif loop_trafo == 'loop_fission': - assert len(loops) == 7 - - elif loop_trafo == 'loop_unroll': - assert len(loops) == 5 + if loop_interchange: + num_pragmas -= 1 + assert loops[0].variable == 'j' + assert not any('loop-interchange' in pragma.content for pragma in pragmas) + assert FindNodes(ir.Loop).visit(loops[0].body)[0].variable == 'i' + + if loop_fusion: + num_pragmas -= 1 + num_loops -= 1 + assert not any('loop-fusion' in pragma.content for pragma in pragmas) + assert len(FindNodes(ir.Assignment).visit(loops[2].body)) == 2 + + if loop_fission: + num_pragmas -= 1 + num_loops += 1 + assert not any('loop-fission' in pragma.content for pragma in pragmas) + + if loop_unroll: + num_pragmas -= 1 + num_loops -= 1 + assert not any('loop-unroll' in pragma.content for pragma in pragmas) + + assert len(loops) == num_loops + assert len(pragmas) == num_pragmas