From face5820a0eea13b23637ea4f0e16049e30de097 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Tue, 16 Apr 2024 14:33:00 +0000 Subject: [PATCH 1/2] 'HoistTemporaryArraysPragmaOffloadTransformation' - hoisting arrays via pragma offload for low-level GPU implementations (no block index) --- transformations/tests/test_scc_cuf.py | 40 +++++++++++++++++++--- transformations/transformations/scc_cuf.py | 32 ++++++++++++++++- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/transformations/tests/test_scc_cuf.py b/transformations/tests/test_scc_cuf.py index 6913bcb5c..fabbd51d4 100644 --- a/transformations/tests/test_scc_cuf.py +++ b/transformations/tests/test_scc_cuf.py @@ -11,11 +11,15 @@ from conftest import available_frontends from loki import ( Scheduler, Subroutine, Dimension, FindNodes, Loop, Assignment, - CallStatement, Allocation, Deallocation, VariableDeclaration, Import, FindVariables + CallStatement, Allocation, Deallocation, VariableDeclaration, Import, FindVariables, + Pragma ) from loki.transform import HoistTemporaryArraysAnalysis, ParametriseTransformation from loki.expression import symbols as sym -from transformations import SccCufTransformation, HoistTemporaryArraysDeviceAllocatableTransformation +from transformations import ( + SccCufTransformation, HoistTemporaryArraysDeviceAllocatableTransformation, + HoistTemporaryArraysPragmaOffloadTransformation +) @pytest.fixture(scope='module', name='horizontal') @@ -278,7 +282,11 @@ def test_scc_cuf_parametrise(here, frontend, config, horizontal, vertical, block @pytest.mark.parametrize('frontend', available_frontends()) -def test_scc_cuf_hoist(here, frontend, config, horizontal, vertical, blocking): +@pytest.mark.parametrize('hoist_synthesis', ( + HoistTemporaryArraysDeviceAllocatableTransformation(), + HoistTemporaryArraysPragmaOffloadTransformation()) +) +def test_scc_cuf_hoist(here, frontend, config, horizontal, vertical, blocking, hoist_synthesis): """ Test SCC-CUF transformation type 1, thus including host side hoisting """ @@ -296,7 +304,7 @@ def test_scc_cuf_hoist(here, frontend, config, horizontal, vertical, blocking): # Transformation: Analysis scheduler.process(transformation=HoistTemporaryArraysAnalysis()) # Transformation: Synthesis - scheduler.process(transformation=HoistTemporaryArraysDeviceAllocatableTransformation()) + scheduler.process(transformation=hoist_synthesis) check_subroutine_driver(routine=scheduler["driver_mod#driver"].ir, blocking=blocking) check_subroutine_kernel(routine=scheduler["kernel_mod#kernel"].ir, horizontal=horizontal, @@ -306,6 +314,30 @@ def test_scc_cuf_hoist(here, frontend, config, horizontal, vertical, blocking): check_subroutine_elemental_device(routine=scheduler["kernel_mod#elemental_device"].ir) # check driver + driver_routine = scheduler["driver_mod#driver"].ir + assert 'kernel_local_z' in driver_routine.variable_map + assert 'device_local_x' in driver_routine.variable_map + if isinstance(hoist_synthesis, HoistTemporaryArraysDeviceAllocatableTransformation): + assert driver_routine.variable_map['kernel_local_z'].type.device + assert driver_routine.variable_map['device_local_x'].type.device + assert driver_routine.variable_map['kernel_local_z'].shape == ('nlon', 'nz', 'nb') + assert driver_routine.variable_map['device_local_x'].shape == ('nlon', 'nz', 'nb') + elif isinstance(hoist_synthesis, HoistTemporaryArraysPragmaOffloadTransformation): + assert driver_routine.variable_map['kernel_local_z'].type.device is None + assert driver_routine.variable_map['device_local_x'].type.device is None + assert driver_routine.variable_map['kernel_local_z'].shape == ('nlon', 'nz', 'nb') + assert driver_routine.variable_map['device_local_x'].shape == ('nlon', 'nz', 'nb') + pragmas = FindNodes(Pragma).visit(driver_routine.body) + assert pragmas[0].keyword == 'acc' + assert 'enter data create' in pragmas[0].content.lower() + assert 'kernel_local_z' in pragmas[0].content.lower() + assert 'device_local_x' in pragmas[0].content.lower() + assert pragmas[1].keyword == 'acc' + assert 'exit data delete' in pragmas[1].content.lower() + assert 'kernel_local_z' in pragmas[1].content.lower() + assert 'device_local_x' in pragmas[1].content.lower() + else: + raise ValueError for call in FindNodes(CallStatement).visit(scheduler["driver_mod#driver"].ir.body): argnames = [arg.name.lower() for arg in call.arguments] assert 'kernel_local_z' in argnames diff --git a/transformations/transformations/scc_cuf.py b/transformations/transformations/scc_cuf.py index 191bf778d..d284f619c 100644 --- a/transformations/transformations/scc_cuf.py +++ b/transformations/transformations/scc_cuf.py @@ -21,7 +21,8 @@ from transformations.single_column_base import SCCBaseTransformation from transformations.single_column_coalesced_vector import SCCDevectorTransformation -__all__ = ['SccCufTransformation', 'HoistTemporaryArraysDeviceAllocatableTransformation'] +__all__ = ['SccCufTransformation', 'HoistTemporaryArraysDeviceAllocatableTransformation', + 'HoistTemporaryArraysPragmaOffloadTransformation'] class HoistTemporaryArraysDeviceAllocatableTransformation(HoistVariablesTransformation): @@ -60,6 +61,35 @@ def driver_variable_declaration(self, routine, variables): routine.body.append(ir.Deallocation((var.clone(dimensions=None),))) +class HoistTemporaryArraysPragmaOffloadTransformation(HoistVariablesTransformation): + """ + Synthesis part for variable/array hoisting, offload via pragmas e.g., OpenACC. + """ + + def driver_variable_declaration(self, routine, variables): + """ + Standard Variable/Array declaration including + device offload via pragmas. + + Parameters + ---------- + routine: :any:`Subroutine` + The subroutine to add the variable declaration + var: :any:`Variable` + The variable to be declared + """ + for var in variables: + routine.variables += tuple([var.clone(scope=routine)]) + + vnames = ', '.join(v.name for v in variables) + pragma = ir.Pragma(keyword='acc', content=f'enter data create({vnames})') + pragma_post = ir.Pragma(keyword='acc', content=f'exit data delete({vnames})') + + # Add comments around standalone pragmas to avoid false attachment + routine.body.prepend((ir.Comment(''), pragma, ir.Comment(''))) + routine.body.append((ir.Comment(''), pragma_post, ir.Comment(''))) + + def dynamic_local_arrays(routine, vertical): """ Declaring local arrays with the ``vertical`` :any:`Dimension` to be From a6bd653eae4af7f4b422f4283c0536ba1d8ed118 Mon Sep 17 00:00:00 2001 From: Michael Staneker <50531288+MichaelSt98@users.noreply.github.com> Date: Thu, 18 Apr 2024 14:04:34 +0200 Subject: [PATCH 2/2] add hoisted variables at once, thus edit variables list once instead of multiple times Co-authored-by: Balthasar Reuter <6384870+reuterbal@users.noreply.github.com> --- transformations/transformations/scc_cuf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/transformations/transformations/scc_cuf.py b/transformations/transformations/scc_cuf.py index d284f619c..891146993 100644 --- a/transformations/transformations/scc_cuf.py +++ b/transformations/transformations/scc_cuf.py @@ -78,8 +78,7 @@ def driver_variable_declaration(self, routine, variables): var: :any:`Variable` The variable to be declared """ - for var in variables: - routine.variables += tuple([var.clone(scope=routine)]) + routine.variables += tuple(var.clone(scope=routine) for var in variables) vnames = ', '.join(v.name for v in variables) pragma = ir.Pragma(keyword='acc', content=f'enter data create({vnames})')