From 6dec22ac648208ce90df41de0b78780ab9664966 Mon Sep 17 00:00:00 2001 From: jasonjunweilyu Date: Wed, 17 Dec 2025 14:57:23 +1100 Subject: [PATCH 1/4] Merging NGARCH stochastic physics development from fcm to git based on git_migration tag --- .../physics_constants_mod.py | 197 +++++++++++++++++ .../psykal/algorithm/skeb_main_alg_mod.py | 196 +++++++++++++++++ .../psykal/algorithm/spt_main_alg_mod.py | 202 ++++++++++++++++++ .../physics_constants_mod.py | 1 + .../psykal/algorithm/skeb_main_alg_mod.py | 1 + .../psykal/algorithm/spt_main_alg_mod.py | 1 + .../source/algorithm/skeb_main_alg_mod.x90 | 3 +- .../source/algorithm/spt_main_alg_mod.x90 | 40 +++- .../stph/skeb_biharm_diss_kernel_mod.F90 | 22 +- ...pt_convection_cfl_limit_cap_kernel_mod.F90 | 28 ++- .../kernel/stph/spt_levels_cap_kernel_mod.F90 | 28 ++- .../spt_moisture_conservation_kernel_mod.F90 | 15 +- .../kernel/stph/spt_orog_cap_kernel_mod.F90 | 26 ++- .../stph/skeb_biharm_diss_kernel_mod_test.pf | 10 +- 14 files changed, 724 insertions(+), 46 deletions(-) create mode 100644 applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py create mode 100644 applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py create mode 100644 applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py create mode 120000 applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py create mode 120000 applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py create mode 120000 applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py new file mode 100644 index 000000000..afe83744b --- /dev/null +++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py @@ -0,0 +1,197 @@ +############################################################################## +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +############################################################################## + + +'''PSyclone transformation script for physics_constants_mod to apply colouring +and GPU offloading/CPU parallelization. Also adds redundant computation to +the level-1 halo for setval_* generically. This is based on +https://github.com/stfc/PSyclone/blob/master/examples/lfric/ +scripts/gpu_offloading.py . + +''' + +import os +import sys +from psyclone.domain.lfric import LFRicConstants +from psyclone.psyir.nodes import Directive, Loop, Routine +from psyclone.psyir.transformations import ( + ACCKernelsTrans, TransformationError, OMPTargetTrans) +from psyclone.transformations import ( + Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, + Dynamo0p3RedundantComputationTrans, OMPParallelTrans, + ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, + OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans) +from psyclone.domain.common.transformations import KernelModuleInlineTrans + + +# Names of any invoke that we won't add any GPU offloading +INVOKE_EXCLUSIONS = [ +] + +# Names of any kernel that we won't add parallelization +KERNEL_EXCLUSIONS = ["get_Pnm_star_code",] +# get_Pnm_star_code has data dependencies in the loops and is tested to be not suitable +# for parallelization + +# Names of any kernels that we won't offload to GPU +GPU_KERNEL_EXCLUSIONS = [ +] + +OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none") + + +def trans(psyir): + '''Applies PSyclone colouring and GPU offloading transformations. Any + kernels that cannot be offloaded to GPU are parallelised using OpenMP + on the CPU if they can be parallelised. Any setval_* kernels are + transformed so as to compute into the L1 halos. + + :param psyir: the PSyIR of the PSy-layer. + :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` + + ''' + inline_trans = KernelModuleInlineTrans() + rtrans = Dynamo0p3RedundantComputationTrans() + ctrans = Dynamo0p3ColourTrans() + otrans = Dynamo0p3OMPLoopTrans() + const = LFRicConstants() + cpu_parallel = OMPParallelTrans() + + if OFFLOAD_DIRECTIVES == "omp": + # Use OpenMP offloading + loop_offloading_trans = OMPLoopTrans( + omp_directive="teamsdistributeparalleldo", + omp_schedule="none" + ) + # OpenMP does not have a kernels parallelism directive equivalent + # to OpenACC 'kernels' + kernels_trans = None + gpu_region_trans = OMPTargetTrans() + gpu_annotation_trans = OMPDeclareTargetTrans() + elif OFFLOAD_DIRECTIVES == "acc": + # Use OpenACC offloading + enter_data_trans = ACCEnterDataTrans() + loop_offloading_trans = ACCLoopTrans() + kernels_trans = ACCKernelsTrans() + gpu_region_trans = ACCParallelTrans(default_present=False) + gpu_annotation_trans = ACCRoutineTrans() + elif OFFLOAD_DIRECTIVES == "none": + pass + else: + print(f"The PSyclone transformation script expects the " + f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'" + f"but found '{OFFLOAD_DIRECTIVES}'.") + sys.exit(-1) + + print(f"PSy name = '{psyir.name}'") + + for subroutine in psyir.walk(Routine): + + print("Transforming invoke '{0}' ...".format(subroutine.name)) + + # Make setval_* compute redundantly to the level 1 halo if it + # is in its own loop + for loop in subroutine.loops(): + if loop.iteration_space == "dof": + if len(loop.kernels()) == 1: + if loop.kernels()[0].name in ["setval_c"]: + rtrans.apply(loop, options={"depth": 1}) + + if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"): + print(f"Not adding GPU offloading to invoke '{subroutine.name}'") + offload = False + else: + offload = True + + # Keep a record of any kernels we fail and succeed to offload + succeeded_offload = set() + failed_to_offload = set() + + # Colour loops over cells unless they are on discontinuous spaces + # (alternatively we could annotate the kernels with atomics) + for loop in subroutine.loops(): + if loop.iteration_space.endswith("cell_column"): + if (loop.field_space.orig_name not in + const.VALID_DISCONTINUOUS_NAMES): + ctrans.apply(loop) + + # Mark kernels inside the loops over cells as GPU-enabled + # and inline them. + for loop in subroutine.loops(): + if loop.iteration_space.endswith("cell_column"): + if offload: + for kern in loop.kernels(): + if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)): + continue + else: + try: + gpu_annotation_trans.apply(kern, options={'force': True}) + print(f"GPU-annotated kernel '{kern.name}'") + try: + inline_trans.apply(kern) + print(f"Module-inlined kernel '{kern.name}'") + succeeded_offload.add(kern.name.lower()) + except TransformationError as err: + print(f"Failed to module-inline '{kern.name}' due " + f"to:\n{err.value}") + except TransformationError as err: + failed_to_offload.add(kern.name.lower()) + print(f"Failed to annotate '{kern.name}' with " + f"GPU-enabled directive due to:\n" + f"{err.value}") + # For annotated or inlined kernels we could attempt to + # provide compile-time dimensions for the temporary + # arrays and convert to code unsupported intrinsics. + + # Add GPU offloading to loops unless they are over colours or are null. + for loop in subroutine.walk(Loop): + kernel_names = [k.name.lower() for k in loop.kernels()] + if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + + KERNEL_EXCLUSIONS) for name in kernel_names): + try: + if loop.loop_type == "colours": + pass + if loop.loop_type == "colour": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "dof": + # Loops over dofs can contains reductions + if kernels_trans: + # If kernel offloading is available it should + # manage them + kernels_trans.apply(loop) + else: + # Otherwise, if the reductions exists, they will + # be detected by the dependencyAnalysis and raise + # a TransformationError captured below + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + # Alternatively we could use loop parallelism with + # reduction clauses + print(f"Successfully offloaded loop with {kernel_names}") + except TransformationError as err: + print(f"Failed to offload loop with {kernel_names} " + f"because: {err}") + + # Apply OpenMP thread parallelism for any kernels we've not been able + # to offload to GPU. + for loop in subroutine.walk(Loop): + if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()): + continue + if not offload or any(kern.name.lower() in (list(failed_to_offload) + + GPU_KERNEL_EXCLUSIONS) for + kern in loop.kernels()): + if loop.loop_type not in ["colours", "null"]: + cpu_parallel.apply(loop) + otrans.apply(loop, options={"reprod": True}) + + print(subroutine.view()) diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py new file mode 100644 index 000000000..8d926f517 --- /dev/null +++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py @@ -0,0 +1,196 @@ +############################################################################## +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +############################################################################## + + +'''PSyclone transformation script for skeb_main_alg_mod.py to apply colouring +and GPU offloading/CPU parallelization. Also adds redundant computation to the +level-1 halo for setval_* generically. This is based on +https://github.com/stfc/PSyclone/blob/master/examples/lfric/ +scripts/gpu_offloading.py . + +''' + +import os +import sys +from psyclone.domain.lfric import LFRicConstants +from psyclone.psyir.nodes import Directive, Loop, Routine +from psyclone.psyir.transformations import ( + ACCKernelsTrans, TransformationError, OMPTargetTrans) +from psyclone.transformations import ( + Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, + Dynamo0p3RedundantComputationTrans, OMPParallelTrans, + ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, + OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans) +from psyclone.domain.common.transformations import KernelModuleInlineTrans + + +# Names of any invoke that we won't add any GPU offloading +INVOKE_EXCLUSIONS = [ +] + +# Names of any kernel that we won't add parallelization +KERNEL_EXCLUSIONS = [ +] + +# Names of any kernels that we won't offload to GPU +GPU_KERNEL_EXCLUSIONS = [ +] + +OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none") + + +def trans(psyir): + '''Applies PSyclone colouring and GPU offloading transformations. Any + kernels that cannot be offloaded to GPU are parallelised using OpenMP + on the CPU if they can be parallelised. Any setval_* kernels are + transformed so as to compute into the L1 halos. + + :param psyir: the PSyIR of the PSy-layer. + :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` + + ''' + inline_trans = KernelModuleInlineTrans() + rtrans = Dynamo0p3RedundantComputationTrans() + ctrans = Dynamo0p3ColourTrans() + otrans = Dynamo0p3OMPLoopTrans() + const = LFRicConstants() + cpu_parallel = OMPParallelTrans() + + if OFFLOAD_DIRECTIVES == "omp": + # Use OpenMP offloading + loop_offloading_trans = OMPLoopTrans( + omp_directive="teamsdistributeparalleldo", + omp_schedule="none" + ) + # OpenMP does not have a kernels parallelism directive equivalent + # to OpenACC 'kernels' + kernels_trans = None + gpu_region_trans = OMPTargetTrans() + gpu_annotation_trans = OMPDeclareTargetTrans() + elif OFFLOAD_DIRECTIVES == "acc": + # Use OpenACC offloading + enter_data_trans = ACCEnterDataTrans() + loop_offloading_trans = ACCLoopTrans() + kernels_trans = ACCKernelsTrans() + gpu_region_trans = ACCParallelTrans(default_present=False) + gpu_annotation_trans = ACCRoutineTrans() + elif OFFLOAD_DIRECTIVES == "none": + pass + else: + print(f"The PSyclone transformation script expects the " + f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'" + f"but found '{OFFLOAD_DIRECTIVES}'.") + sys.exit(-1) + + print(f"PSy name = '{psyir.name}'") + + for subroutine in psyir.walk(Routine): + + print("Transforming invoke '{0}' ...".format(subroutine.name)) + + # Make setval_* compute redundantly to the level 1 halo if it + # is in its own loop + for loop in subroutine.loops(): + if loop.iteration_space == "dof": + if len(loop.kernels()) == 1: + if loop.kernels()[0].name in ["setval_c"]: + rtrans.apply(loop, options={"depth": 1}) + + if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"): + print(f"Not adding GPU offloading to invoke '{subroutine.name}'") + offload = False + else: + offload = True + + # Keep a record of any kernels we fail and succeed to offload + succeeded_offload = set() + failed_to_offload = set() + + # Colour loops over cells unless they are on discontinuous spaces + # (alternatively we could annotate the kernels with atomics) + for loop in subroutine.loops(): + if loop.iteration_space.endswith("cell_column"): + if (loop.field_space.orig_name not in + const.VALID_DISCONTINUOUS_NAMES): + ctrans.apply(loop) + + # Mark kernels inside the loops over cells as GPU-enabled + # and inline them. + for loop in subroutine.loops(): + if loop.iteration_space.endswith("cell_column"): + if offload: + for kern in loop.kernels(): + if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)): + continue + else: + try: + gpu_annotation_trans.apply(kern, options={'force': True}) + print(f"GPU-annotated kernel '{kern.name}'") + try: + inline_trans.apply(kern) + print(f"Module-inlined kernel '{kern.name}'") + succeeded_offload.add(kern.name.lower()) + except TransformationError as err: + print(f"Failed to module-inline '{kern.name}' due " + f"to:\n{err.value}") + except TransformationError as err: + failed_to_offload.add(kern.name.lower()) + print(f"Failed to annotate '{kern.name}' with " + f"GPU-enabled directive due to:\n" + f"{err.value}") + # For annotated or inlined kernels we could attempt to + # provide compile-time dimensions for the temporary + # arrays and convert to code unsupported intrinsics. + + # Add GPU offloading to loops unless they are over colours or are null. + for loop in subroutine.walk(Loop): + kernel_names = [k.name.lower() for k in loop.kernels()] + if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + + KERNEL_EXCLUSIONS) for name in kernel_names): + try: + if loop.loop_type == "colours": + pass + if loop.loop_type == "colour": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "dof": + # Loops over dofs can contains reductions + if kernels_trans: + # If kernel offloading is available it should + # manage them + kernels_trans.apply(loop) + else: + # Otherwise, if the reductions exists, they will + # be detected by the dependencyAnalysis and raise + # a TransformationError captured below + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + # Alternatively we could use loop parallelism with + # reduction clauses + print(f"Successfully offloaded loop with {kernel_names}") + except TransformationError as err: + print(f"Failed to offload loop with {kernel_names} " + f"because: {err}") + + # Apply OpenMP thread parallelism for any kernels we've not been able + # to offload to GPU. + for loop in subroutine.walk(Loop): + if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()): + continue + if not offload or any(kern.name.lower() in (list(failed_to_offload) + + GPU_KERNEL_EXCLUSIONS) for + kern in loop.kernels()): + if loop.loop_type not in ["colours", "null"]: + cpu_parallel.apply(loop) + otrans.apply(loop, options={"reprod": True}) + + print(subroutine.view()) diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py new file mode 100644 index 000000000..3872b2539 --- /dev/null +++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py @@ -0,0 +1,202 @@ +############################################################################## +# (C) Crown copyright Met Office. All rights reserved. +# The file LICENCE, distributed with this code, contains details of the terms +# under which the code may be used. +############################################################################## + + +'''PSyclone transformation script for spt_main_alg_mod to apply colouring +and GPU offloading/CPU parallelization. Also adds redundant computation to +the level-1 halo for setval_* generically. This is based on +https://github.com/stfc/PSyclone/blob/master/examples/lfric/ +scripts/gpu_offloading.py . + +''' + +import os +import sys +from psyclone.domain.lfric import LFRicConstants +from psyclone.psyir.nodes import Directive, Loop, Routine +from psyclone.psyir.transformations import ( + ACCKernelsTrans, TransformationError, OMPTargetTrans) +from psyclone.transformations import ( + Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, + Dynamo0p3RedundantComputationTrans, OMPParallelTrans, + ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, + OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans) +from psyclone.domain.common.transformations import KernelModuleInlineTrans + + +# Names of any invoke that we won't add any GPU offloading +INVOKE_EXCLUSIONS = [ +] + +# Names of any kernel that we won't add parallelization +KERNEL_EXCLUSIONS = [ +] + +# Names of any kernels that we won't offload to GPU +GPU_KERNEL_EXCLUSIONS = ["spt_saturation_cap_code",] +# spt_saturation_cap_code: GPU transformation cannot be applied because of +# using qsat_wat_mix from qsat_mod. As qsat_mod is going to be modified in future, +# this falls out of the scope of the NGARCH project. +# Error message: Transformation Error: Kernel 'spt_saturation_cap_code' accesses +# the symbol 'qsat_wat_mix: RoutineSymbol' +# which is imported. If this symbol represents data then it must first be converted +# to a Kernel argument using the KernelImportsToArguments transformation. + +OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none") + + +def trans(psyir): + '''Applies PSyclone colouring and GPU offloading transformations. Any + kernels that cannot be offloaded to GPU are parallelised using OpenMP + on the CPU if they can be parallelised. Any setval_* kernels are + transformed so as to compute into the L1 halos. + + :param psyir: the PSyIR of the PSy-layer. + :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` + + ''' + inline_trans = KernelModuleInlineTrans() + rtrans = Dynamo0p3RedundantComputationTrans() + ctrans = Dynamo0p3ColourTrans() + otrans = Dynamo0p3OMPLoopTrans() + const = LFRicConstants() + cpu_parallel = OMPParallelTrans() + + if OFFLOAD_DIRECTIVES == "omp": + # Use OpenMP offloading + loop_offloading_trans = OMPLoopTrans( + omp_directive="teamsdistributeparalleldo", + omp_schedule="none" + ) + # OpenMP does not have a kernels parallelism directive equivalent + # to OpenACC 'kernels' + kernels_trans = None + gpu_region_trans = OMPTargetTrans() + gpu_annotation_trans = OMPDeclareTargetTrans() + elif OFFLOAD_DIRECTIVES == "acc": + # Use OpenACC offloading + enter_data_trans = ACCEnterDataTrans() + loop_offloading_trans = ACCLoopTrans() + kernels_trans = ACCKernelsTrans() + gpu_region_trans = ACCParallelTrans(default_present=False) + gpu_annotation_trans = ACCRoutineTrans() + elif OFFLOAD_DIRECTIVES == "none": + pass + else: + print(f"The PSyclone transformation script expects the " + f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'" + f"but found '{OFFLOAD_DIRECTIVES}'.") + sys.exit(-1) + + print(f"PSy name = '{psyir.name}'") + + for subroutine in psyir.walk(Routine): + + print("Transforming invoke '{0}' ...".format(subroutine.name)) + + # Make setval_* compute redundantly to the level 1 halo if it + # is in its own loop + for loop in subroutine.loops(): + if loop.iteration_space == "dof": + if len(loop.kernels()) == 1: + if loop.kernels()[0].name in ["setval_c"]: + rtrans.apply(loop, options={"depth": 1}) + + if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"): + print(f"Not adding GPU offloading to invoke '{subroutine.name}'") + offload = False + else: + offload = True + + # Keep a record of any kernels we fail and succeed to offload + succeeded_offload = set() + failed_to_offload = set() + + # Colour loops over cells unless they are on discontinuous spaces + # (alternatively we could annotate the kernels with atomics) + for loop in subroutine.loops(): + if loop.iteration_space.endswith("cell_column"): + if (loop.field_space.orig_name not in + const.VALID_DISCONTINUOUS_NAMES): + ctrans.apply(loop) + + # Mark kernels inside the loops over cells as GPU-enabled + # and inline them. + for loop in subroutine.loops(): + if loop.iteration_space.endswith("cell_column"): + if offload: + for kern in loop.kernels(): + if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)): + continue + else: + try: + gpu_annotation_trans.apply(kern, options={'force': True}) + print(f"GPU-annotated kernel '{kern.name}'") + try: + inline_trans.apply(kern) + print(f"Module-inlined kernel '{kern.name}'") + succeeded_offload.add(kern.name.lower()) + except TransformationError as err: + print(f"Failed to module-inline '{kern.name}' due " + f"to:\n{err.value}") + except TransformationError as err: + failed_to_offload.add(kern.name.lower()) + print(f"Failed to annotate '{kern.name}' with " + f"GPU-enabled directive due to:\n" + f"{err.value}") + # For annotated or inlined kernels we could attempt to + # provide compile-time dimensions for the temporary + # arrays and convert to code unsupported intrinsics. + + # Add GPU offloading to loops unless they are over colours or are null. + for loop in subroutine.walk(Loop): + kernel_names = [k.name.lower() for k in loop.kernels()] + if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + + KERNEL_EXCLUSIONS) for name in kernel_names): + try: + if loop.loop_type == "colours": + pass + if loop.loop_type == "colour": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "": + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "dof": + # Loops over dofs can contains reductions + if kernels_trans: + # If kernel offloading is available it should + # manage them + kernels_trans.apply(loop) + else: + # Otherwise, if the reductions exists, they will + # be detected by the dependencyAnalysis and raise + # a TransformationError captured below + loop_offloading_trans.apply( + loop, options={"independent": True}) + gpu_region_trans.apply(loop.ancestor(Directive)) + # Alternatively we could use loop parallelism with + # reduction clauses + print(f"Successfully offloaded loop with {kernel_names}") + except TransformationError as err: + print(f"Failed to offload loop with {kernel_names} " + f"because: {err}") + + # Apply OpenMP thread parallelism for any kernels we've not been able + # to offload to GPU. + for loop in subroutine.walk(Loop): + if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()): + continue + if not offload or any(kern.name.lower() in (list(failed_to_offload) + + GPU_KERNEL_EXCLUSIONS) for + kern in loop.kernels()): + if loop.loop_type not in ["colours", "null"]: + cpu_parallel.apply(loop) + otrans.apply(loop, options={"reprod": True}) + + print(subroutine.view()) diff --git a/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py new file mode 120000 index 000000000..7a552184e --- /dev/null +++ b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py @@ -0,0 +1 @@ +../../../../meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py \ No newline at end of file diff --git a/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py new file mode 120000 index 000000000..52e0e341a --- /dev/null +++ b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py @@ -0,0 +1 @@ +../../../meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py \ No newline at end of file diff --git a/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py new file mode 120000 index 000000000..7dfc313ea --- /dev/null +++ b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py @@ -0,0 +1 @@ +../../../meto-ex1a/psykal/algorithm/spt_main_alg_mod.py \ No newline at end of file diff --git a/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90 b/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90 index b49ad7f82..fd8a48eba 100644 --- a/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90 +++ b/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90 @@ -458,7 +458,8 @@ module skeb_main_alg_mod skeb_biharm_diss_kernel_type(ndisp, vorticity, divergence, & stencil_extent, dx_at_w2, & skeb_level_bottom, skeb_level_top, & - dt, norm_xi, norm_div) ) + dt, norm_xi, norm_div, & + norm_xi_flag, norm_div_flag) ) if (write_diag .and. use_xios_io) then if (norm_div_flag) & diff --git a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 index 0f61d1267..bbc398fae 100644 --- a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 +++ b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 @@ -8,7 +8,7 @@ module spt_main_alg_mod - use constants_mod, only: r_def, i_def, l_def + use constants_mod, only: r_def, i_def, l_def, r_second use fs_continuity_mod, only: W0, Wtheta ! define types use clock_mod, only: clock_type @@ -79,6 +79,9 @@ module spt_main_alg_mod microphysics_fields, radiation_fields, & derived_fields, orography_fields, clock) + ! Timestepping parameters + use timestepping_config_mod, only: dt_timestep => dt + ! SPT parameters use stochastic_physics_config_mod, only: & ! Switches to use different @@ -102,6 +105,10 @@ module spt_main_alg_mod ! SPT levels spt_level_bottom, & spt_level_top, & + spt_level_begin_tapering_bottom, & + spt_level_begin_tapering_top, & + spt_orog_forcing_pattern_thresh, & + spt_stddev_orog_thres, & ! Stoch Phy wavenumbers stph_n_max, & stph_spectral_dim, & @@ -222,6 +229,9 @@ module spt_main_alg_mod real(kind=r_def) :: mlcrcp ! iterators in for loops integer(i_def) :: n,n_row, m + ! Timestepping_config_mod scalar (for PSyclone to know data type) + real(kind=r_second) :: timestepping_config_mod_dt + timestepping_config_mod_dt = dt_timestep if ( subroutine_timers ) call timer("spt_main_alg") @@ -362,10 +372,12 @@ module spt_main_alg_mod if (spt_convection_cfl_limit) then if (.not. spt_mse_conservation) then call invoke(spt_convection_cfl_limit_cap_kernel_type(dt_conv_cfl, massflux_up, & - fp_spt, pressure)) + fp_spt, pressure, spt_level_bottom, & + spt_level_top, timestepping_config_mod_dt)) end if call invoke(spt_convection_cfl_limit_cap_kernel_type(dmv_conv_cfl, massflux_up, & - fp_spt, pressure)) + fp_spt, pressure, spt_level_bottom, & + spt_level_top, timestepping_config_mod_dt)) end if ! Apply tendencies to dX_spt, conver dt to theta @@ -393,18 +405,28 @@ module spt_main_alg_mod !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if (.not. spt_mse_conservation) then - call invoke(spt_levels_cap_kernel_type(dtheta_spt)) + call invoke(spt_levels_cap_kernel_type(dtheta_spt, spt_level_bottom, & + spt_level_top, spt_level_begin_tapering_bottom, & + spt_level_begin_tapering_top)) end if - call invoke(spt_levels_cap_kernel_type(dmv_spt)) + call invoke(spt_levels_cap_kernel_type(dmv_spt, spt_level_bottom, & + spt_level_top, spt_level_begin_tapering_bottom, & + spt_level_begin_tapering_top)) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! 5) Apply orographic capping !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if (.not. spt_mse_conservation) then - call invoke(spt_orog_cap_kernel_type(dtheta_spt,fp_spt,sd_orog)) + call invoke(spt_orog_cap_kernel_type(dtheta_spt,fp_spt,sd_orog, & + spt_level_bottom, spt_level_top, & + spt_orog_forcing_pattern_thresh, & + spt_stddev_orog_thres)) end if - call invoke(spt_orog_cap_kernel_type(dmv_spt,fp_spt,sd_orog)) + call invoke(spt_orog_cap_kernel_type(dmv_spt,fp_spt,sd_orog, & + spt_level_bottom, spt_level_top, & + spt_orog_forcing_pattern_thresh, & + spt_stddev_orog_thres)) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! 6) Remove points where perturbations cause super-saturation !! @@ -462,7 +484,9 @@ module spt_main_alg_mod if (spt_moisture_conservation) then mesh => dtheta%get_mesh() dz_wth => get_dz_at_wtheta(mesh%get_id()) - call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth,rho_in_wth)) + call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth,& + rho_in_wth,spt_level_bottom,& + spt_level_top)) end if ! Apply MSE conservation in the column if requested diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 index c10d72912..6d66ec22d 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 @@ -9,18 +9,18 @@ module skeb_biharm_diss_kernel_mod use argument_mod, only: arg_type, GH_FIELD, & GH_REAL, GH_WRITE, GH_READ, & CELL_COLUMN, GH_INTEGER, & - GH_SCALAR, STENCIL, CROSS + GH_SCALAR, STENCIL, CROSS, & + GH_LOGICAL use fs_continuity_mod, only: W3, Wtheta, W1, W2 - use constants_mod, only: r_def, i_def + use constants_mod, only: r_def, i_def, l_def use kernel_mod, only: kernel_type - use empty_data_mod, only: empty_real_data implicit none !> Kernel metadata for Psyclone type, public, extends(kernel_type) :: skeb_biharm_diss_kernel_type private - type(arg_type) :: meta_args(9) = (/ & + type(arg_type) :: meta_args(11) = (/ & arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3), & ! ndisp arg_type(GH_FIELD, GH_REAL, GH_READ, W1), & ! vorticity arg_type(GH_FIELD, GH_REAL, GH_READ, W3, STENCIL(CROSS)), & ! divergence @@ -29,7 +29,9 @@ module skeb_biharm_diss_kernel_mod arg_type(GH_SCALAR, GH_INTEGER, GH_READ ), & ! skeb_level_top arg_type(GH_SCALAR, GH_REAL, GH_READ ), & ! dt arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3), & ! norm_xi - arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3) & ! norm_div + arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3), & ! norm_div + arg_type(GH_SCALAR, GH_LOGICAL, GH_READ), & ! norm_xi_flag + arg_type(GH_SCALAR, GH_LOGICAL, GH_READ) & ! norm_div_flag /) integer :: operates_on = CELL_COLUMN @@ -60,6 +62,8 @@ module skeb_biharm_diss_kernel_mod !> @param[in] ndf_w2 Number of DOFs per cell for w2 space !> @param[in] undf_w2 Number of unique DOFs for w2 space !> @param[in] map_w2 dofmap for the cell at the base of the column for w2 space + !> @param[in] norm_xi_flag Control whether norm_xi calculation is needed + !> @param[in] norm_div_flag Control whether norm_div calculation is needed subroutine skeb_biharm_diss_code(nlayers, & ndisp, & @@ -73,6 +77,8 @@ subroutine skeb_biharm_diss_code(nlayers, & dt, & norm_xi, & norm_div, & + norm_xi_flag, & + norm_div_flag, & ndf_w3, & undf_w3, & map_w3, & @@ -94,6 +100,8 @@ subroutine skeb_biharm_diss_code(nlayers, & integer(kind=i_def), intent(in), dimension(ndf_w2) :: map_w2 integer(kind=i_def), intent(in), dimension(ndf_w3,map_w3_sten_size) :: map_w3_sten integer(kind=i_def), intent(in), dimension(ndf_w1) :: map_w1 + logical(kind=l_def), intent(in) :: norm_xi_flag + logical(kind=l_def), intent(in) :: norm_div_flag ! Fields real(kind=r_def), intent(in), dimension(undf_w1) :: vorticity @@ -156,10 +164,10 @@ subroutine skeb_biharm_diss_code(nlayers, & ndisp(map_w3(1)+k-1) = (biharmonic_x_div + biharmonic_y_div + & biharmonic_x_xi + biharmonic_y_xi) * amp_K - if (.not. associated(norm_xi, empty_real_data)) then + if (norm_xi_flag) then norm_xi(map_w3(1)+k-1) = biharmonic_x_xi + biharmonic_y_xi end if - if (.not. associated(norm_div, empty_real_data)) then + if (norm_div_flag) then norm_div(map_w3(1)+k-1) = biharmonic_x_div + biharmonic_y_div end if diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 index 71f156fec..f7f82dbcd 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 @@ -8,9 +8,11 @@ module spt_convection_cfl_limit_cap_kernel_mod use argument_mod, only: arg_type, GH_FIELD, & GH_WRITE, GH_REAL, & + GH_SCALAR, GH_INTEGER, & GH_READ, CELL_COLUMN use fs_continuity_mod, only: Wtheta - use constants_mod, only: r_def, i_def, l_def + use constants_mod, only: r_def, i_def, l_def, & + r_second use kernel_mod, only: kernel_type implicit none @@ -24,11 +26,14 @@ module spt_convection_cfl_limit_cap_kernel_mod !> type, public, extends(kernel_type) :: spt_convection_cfl_limit_cap_kernel_type private - type(arg_type) :: meta_args(4) = (/ & + type(arg_type) :: meta_args(7) = (/ & arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & ! dX_conv_cfl arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA), & ! massflux_up arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA), & ! fp_spt - arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA) & ! pressure + arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA), & ! pressure + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_bottom + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_top + arg_type(GH_SCALAR, GH_REAL, GH_READ) & ! dt /) integer :: operates_on = CELL_COLUMN contains @@ -52,20 +57,20 @@ module spt_convection_cfl_limit_cap_kernel_mod !> @param[in] ndf_wth Number of degrees of freedom per cell for wtheta !> @param[in] undf_wth Number of total degrees of freedom for wtheta !> @param[in] map_wth Dofmap for the cell at the base of the column + !> @param[in] spt_level_bottom Bottom level of the stochastic scheme + !> @param[in] spt_level_top Top level of the stochastic scheme + !> @param[in] dt Timestep from timestepping_config_mod subroutine spt_convection_cfl_limit_cap_code(nlayers, & dX_conv_cfl, & massflux_up, & fp_spt, & pressure, & + spt_level_bottom, & + spt_level_top, & + dt, & ndf_wth, & undf_wth, & - map_wth & - ) - - use stochastic_physics_config_mod, only: spt_level_bottom, & - spt_level_top - - use timestepping_config_mod, only: dt + map_wth) implicit none @@ -74,6 +79,9 @@ subroutine spt_convection_cfl_limit_cap_code(nlayers, & integer(kind=i_def), intent(in) :: ndf_wth integer(kind=i_def), intent(in) :: undf_wth integer(kind=i_def), intent(in), dimension(ndf_wth) :: map_wth + integer(kind=i_def), intent(in) :: spt_level_bottom + integer(kind=i_def), intent(in) :: spt_level_top + real(kind=r_second), intent(in) :: dt ! Fields perturbations + tendencies real(kind=r_def), intent(inout), dimension(undf_wth) :: dX_conv_cfl diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 index ac3b44fbc..d4fc83913 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 @@ -8,7 +8,8 @@ module spt_levels_cap_kernel_mod use argument_mod, only: arg_type, GH_FIELD, & GH_WRITE, GH_REAL, & - CELL_COLUMN + GH_SCALAR, GH_INTEGER, & + GH_READ, CELL_COLUMN use fs_continuity_mod, only: Wtheta @@ -26,8 +27,12 @@ module spt_levels_cap_kernel_mod !> type, public, extends(kernel_type) :: spt_levels_cap_kernel_type private - type(arg_type) :: meta_args(1) = (/ & - arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA) & !dX + type(arg_type) :: meta_args(5) = (/ & + arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & !dX + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_bottom + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_top + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_begin_tapering_bottom + arg_type(GH_SCALAR, GH_INTEGER, GH_READ) & ! spt_level_begin_tapering_top /) integer :: operates_on = CELL_COLUMN contains @@ -49,19 +54,22 @@ module spt_levels_cap_kernel_mod !> @param[in] ndf_wth Number of degrees of freedom per cell for wtheta !> @param[in] undf_wth Number of total degrees of freedom for wtheta !> @param[in] map_wth Dofmap for the cell at the base of the column + !> @param[in] spt_level_bottom Bottom level of the stochastic scheme + !> @param[in] spt_level_top Top level of the stochastic scheme + !> @param[in] spt_level_begin_tapering_bottom spt_level_begin_tapering_bottom in stochastic_physics_config_mod + !> @param[in] spt_level_begin_tapering_top spt_level_begin_tapering_top in stochastic_physics_config_mod subroutine spt_levels_cap_code(nlayers, & dX, & + spt_level_bottom, & + spt_level_top, & + spt_level_begin_tapering_bottom, & + spt_level_begin_tapering_top, & ndf_wth, & undf_wth, & map_wth & ) - use stochastic_physics_config_mod, only: spt_level_bottom, & - spt_level_top, & - spt_level_begin_tapering_bottom, & - spt_level_begin_tapering_top - implicit none !Arguments @@ -69,6 +77,10 @@ subroutine spt_levels_cap_code(nlayers, & integer(kind=i_def), intent(in) :: ndf_wth integer(kind=i_def), intent(in) :: undf_wth integer(kind=i_def), intent(in), dimension(ndf_wth) :: map_wth + integer(kind=i_def), intent(in) :: spt_level_bottom + integer(kind=i_def), intent(in) :: spt_level_top + integer(kind=i_def), intent(in) :: spt_level_begin_tapering_bottom + integer(kind=i_def), intent(in) :: spt_level_begin_tapering_top ! field with perturbation real(kind=r_def), intent(inout), dimension(undf_wth) :: dX diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 index d4bf26dd9..107075419 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 @@ -8,6 +8,7 @@ module spt_moisture_conservation_kernel_mod use argument_mod, only: arg_type, GH_FIELD, & GH_WRITE, GH_REAL, & + GH_SCALAR, GH_INTEGER, & GH_READ, CELL_COLUMN use fs_continuity_mod, only: Wtheta use constants_mod, only: r_def, i_def @@ -24,11 +25,13 @@ module spt_moisture_conservation_kernel_mod !> type, public, extends(kernel_type) :: spt_moisture_conservation_kernel_type private - type(arg_type) :: meta_args(4) = (/ & + type(arg_type) :: meta_args(6) = (/ & arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & !dmv arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA), & !mv arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA), & !dz_wth - arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA) & !rho_wth + arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA), & !rho_wth + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & !spt_level_bottom + arg_type(GH_SCALAR, GH_INTEGER, GH_READ) & !spt_level_top /) integer :: operates_on = CELL_COLUMN @@ -53,19 +56,21 @@ module spt_moisture_conservation_kernel_mod !> @param[in] ndf_wth Number of DOFs per cell for potential temperature space !> @param[in] undf_wth Number of unique DOFs for potential temperature space !> @param[in] map_wth dofmap for the cell at the base of the column for potential temperature space + !> @param[in] spt_level_bottom Bottom level of the stochastic scheme + !> @param[in] spt_level_top Top level of the stochastic scheme subroutine spt_moisture_conservation_code(nlayers, & dmv, & mv, & rho_wth, & dz_wth, & + spt_level_bottom, & + spt_level_top, & ndf_wth, & undf_wth, & map_wth & ) - use stochastic_physics_config_mod, only: spt_level_bottom, spt_level_top - implicit none !Arguments @@ -73,6 +78,8 @@ subroutine spt_moisture_conservation_code(nlayers, & integer(kind=i_def), intent(in) :: ndf_wth integer(kind=i_def), intent(in) :: undf_wth integer(kind=i_def), intent(in), dimension(ndf_wth) :: map_wth + integer(kind=i_def), intent(in) :: spt_level_bottom + integer(kind=i_def), intent(in) :: spt_level_top ! Fields real(kind=r_def), intent(inout), dimension(undf_wth) :: dmv diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 index 3e51f5e18..2f8d4e70b 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 @@ -8,6 +8,7 @@ module spt_orog_cap_kernel_mod use argument_mod, only: arg_type, GH_FIELD, & GH_WRITE, GH_REAL, & + GH_SCALAR, GH_INTEGER, & GH_READ, & ANY_DISCONTINUOUS_SPACE_1, & CELL_COLUMN @@ -27,10 +28,14 @@ module spt_orog_cap_kernel_mod !> type, public, extends(kernel_type) :: spt_orog_cap_kernel_type private - type(arg_type) :: meta_args(3) = (/ & + type(arg_type) :: meta_args(7) = (/ & arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & !dX arg_type(GH_FIELD, GH_REAL, GH_READ, WTHETA), & !fp_spt - arg_type(GH_FIELD, GH_REAL, GH_READ, ANY_DISCONTINUOUS_SPACE_1) & !sd_orog + arg_type(GH_FIELD, GH_REAL, GH_READ, ANY_DISCONTINUOUS_SPACE_1),& !sd_orog + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_bottom + arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_top + arg_type(GH_SCALAR, GH_REAL, GH_READ), & ! spt_orog_forcing_pattern_thresh + arg_type(GH_SCALAR, GH_REAL, GH_READ) & ! spt_stddev_orog_thres /) integer :: operates_on = CELL_COLUMN @@ -58,11 +63,19 @@ module spt_orog_cap_kernel_mod !> @param[in] ndf_2d Number of degrees of freedom per cell for density space !> @param[in] undf_2d Number of unique degrees of freedom for density space !> @param[in] map_2d Dofmap for the cell at the base of the column for density space + !> @param[in] spt_level_bottom Bottom level of the stochastic scheme + !> @param[in] spt_level_top Top level of the stochastic scheme + !> @param[in] spt_orog_forcing_pattern_thresh spt_orog_forcing_pattern_thresh in stochastic_physics_config_mod + !> @param[in] spt_stddev_orog_thres spt_stddev_orog_thres in stochastic_physics_config_mod subroutine spt_orog_cap_code(nlayers, & dX, & fp_spt, & sd_orog, & + spt_level_bottom, & + spt_level_top, & + spt_orog_forcing_pattern_thresh, & + spt_stddev_orog_thres, & ndf_wth, & undf_wth, & map_wth, & @@ -71,11 +84,6 @@ subroutine spt_orog_cap_code(nlayers, & map_2d & ) - use stochastic_physics_config_mod, only: spt_level_bottom, & - spt_level_top, & - spt_orog_forcing_pattern_thresh, & - spt_stddev_orog_thres - implicit none !Arguments @@ -84,6 +92,10 @@ subroutine spt_orog_cap_code(nlayers, & integer(kind=i_def), intent(in) :: undf_wth, undf_2d integer(kind=i_def), intent(in), dimension(ndf_wth) :: map_wth integer(kind=i_def), intent(in), dimension(ndf_2d) :: map_2d + integer(kind=i_def), intent(in) :: spt_level_bottom + integer(kind=i_def), intent(in) :: spt_level_top + real(kind=r_def), intent(in) :: spt_orog_forcing_pattern_thresh + real(kind=r_def), intent(in) :: spt_stddev_orog_thres ! Fields perturbations + tendencies real(kind=r_def), intent(inout), dimension(undf_wth) :: dX diff --git a/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf b/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf index 1840b2fee..2060c58b9 100644 --- a/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf +++ b/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf @@ -7,7 +7,7 @@ !> module skeb_biharm_diss_kernel_mod_test - use constants_mod, only : i_def, r_def + use constants_mod, only : i_def, r_def, l_def use funit use get_unit_test_m3x3_dofmap_mod, only : get_w2_m3x3_dofmap, & get_w1_m3x3_dofmap, & @@ -69,6 +69,9 @@ contains divergence(:), vorticity(:), ndisp(:) real(r_def), pointer :: norm_xi(:), norm_div(:) + ! Logical controlling whether spectral coeffs need calculating + logical(kind=l_def) :: norm_xi_flag, norm_div_flag + ! Dofmaps integer(i_def), allocatable :: map_w2(:,:), map_w1(:,:), map_w3(:,:), & map_w3_stencil(:,:,:) @@ -112,6 +115,9 @@ contains norm_xi => empty_real_data norm_div => empty_real_data + + norm_xi_flag = .false. + norm_div_flag = .false. dx_at_w2 = 10.0_r_def dt = 3.0_r_def/128.0_r_def @@ -144,6 +150,8 @@ contains 2, 2, dt, & norm_xi, & norm_div, & + norm_xi_flag, & + norm_div_flag, & ndf_w3, & undf_w3, & map_w3(:, cell), & From 65efd75ffdcac98ee7c44acc13bf1aaeb3843aa4 Mon Sep 17 00:00:00 2001 From: jasonjunweilyu Date: Wed, 17 Dec 2025 16:09:09 +1100 Subject: [PATCH 2/4] Added my name to contributor list --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d0f7ae14d..525917292 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -4,3 +4,4 @@ | ----------- | --------- | ----------- | ---- | | james-bruten-mo | James Bruten | Met Office | 2025-12-09 | | jennyhickson | Jenny Hickson | Met Office | 2025-12-10 | +| jasonjunweilyu | Junwei (Jason) Lyu | Bureau of Meteorology, Australia | 2025-12-17 | From aaa25ec4b03082d86715cd32256d91c0cff2d8ee Mon Sep 17 00:00:00 2001 From: jasonjunweilyu Date: Thu, 18 Dec 2025 13:47:55 +1100 Subject: [PATCH 3/4] Fix Fortran style issues (aligning ampersands and spaces) --- .../source/algorithm/spt_main_alg_mod.x90 | 44 +++++++++---------- .../stph/skeb_biharm_diss_kernel_mod.F90 | 2 +- ...pt_convection_cfl_limit_cap_kernel_mod.F90 | 20 ++++----- .../kernel/stph/spt_levels_cap_kernel_mod.F90 | 20 ++++----- .../spt_moisture_conservation_kernel_mod.F90 | 20 ++++----- .../kernel/stph/spt_orog_cap_kernel_mod.F90 | 28 ++++++------ 6 files changed, 67 insertions(+), 67 deletions(-) diff --git a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 index bbc398fae..9484edcc7 100644 --- a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 +++ b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 @@ -83,35 +83,35 @@ module spt_main_alg_mod use timestepping_config_mod, only: dt_timestep => dt ! SPT parameters - use stochastic_physics_config_mod, only: & + use stochastic_physics_config_mod, only: & ! Switches to use different ! parametrizations - spt_use_radiation, & - spt_use_microphysics,& - spt_use_convection, & + spt_use_radiation, & + spt_use_microphysics, & + spt_use_convection, & ! Std dev of each param. - spt_stddev_radiation, & - spt_stddev_microphysics, & - spt_stddev_convection, & + spt_stddev_radiation, & + spt_stddev_microphysics, & + spt_stddev_convection, & ! CFL criteria - spt_convection_cfl_limit, & + spt_convection_cfl_limit, & ! conservation - spt_mse_conservation, & - spt_moisture_conservation, & + spt_mse_conservation, & + spt_moisture_conservation, & ! 1-2-1 smoothing - spt_n_smoothing_iters, & + spt_n_smoothing_iters, & ! Add increments - spt_add_increments, & + spt_add_increments, & ! SPT levels - spt_level_bottom, & - spt_level_top, & - spt_level_begin_tapering_bottom, & - spt_level_begin_tapering_top, & - spt_orog_forcing_pattern_thresh, & - spt_stddev_orog_thres, & + spt_level_bottom, & + spt_level_top, & + spt_level_begin_tapering_bottom, & + spt_level_begin_tapering_top, & + spt_orog_forcing_pattern_thresh, & + spt_stddev_orog_thres, & ! Stoch Phy wavenumbers - stph_n_max, & - stph_spectral_dim, & + stph_n_max, & + stph_spectral_dim, & ! power law spt_decorrelation_time @@ -484,8 +484,8 @@ module spt_main_alg_mod if (spt_moisture_conservation) then mesh => dtheta%get_mesh() dz_wth => get_dz_at_wtheta(mesh%get_id()) - call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth,& - rho_in_wth,spt_level_bottom,& + call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth, & + rho_in_wth,spt_level_bottom, & spt_level_top)) end if diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 index 6d66ec22d..7bd349755 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 @@ -20,7 +20,7 @@ module skeb_biharm_diss_kernel_mod !> Kernel metadata for Psyclone type, public, extends(kernel_type) :: skeb_biharm_diss_kernel_type private - type(arg_type) :: meta_args(11) = (/ & + type(arg_type) :: meta_args(11) = (/ & arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3), & ! ndisp arg_type(GH_FIELD, GH_REAL, GH_READ, W1), & ! vorticity arg_type(GH_FIELD, GH_REAL, GH_READ, W3, STENCIL(CROSS)), & ! divergence diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 index f7f82dbcd..7a4ca2a7a 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 @@ -60,16 +60,16 @@ module spt_convection_cfl_limit_cap_kernel_mod !> @param[in] spt_level_bottom Bottom level of the stochastic scheme !> @param[in] spt_level_top Top level of the stochastic scheme !> @param[in] dt Timestep from timestepping_config_mod - subroutine spt_convection_cfl_limit_cap_code(nlayers, & - dX_conv_cfl, & - massflux_up, & - fp_spt, & - pressure, & - spt_level_bottom, & - spt_level_top, & - dt, & - ndf_wth, & - undf_wth, & + subroutine spt_convection_cfl_limit_cap_code(nlayers, & + dX_conv_cfl, & + massflux_up, & + fp_spt, & + pressure, & + spt_level_bottom, & + spt_level_top, & + dt, & + ndf_wth, & + undf_wth, & map_wth) implicit none diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 index d4fc83913..a359f5009 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 @@ -27,7 +27,7 @@ module spt_levels_cap_kernel_mod !> type, public, extends(kernel_type) :: spt_levels_cap_kernel_type private - type(arg_type) :: meta_args(5) = (/ & + type(arg_type) :: meta_args(5) = (/ & arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & !dX arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_bottom arg_type(GH_SCALAR, GH_INTEGER, GH_READ), & ! spt_level_top @@ -59,15 +59,15 @@ module spt_levels_cap_kernel_mod !> @param[in] spt_level_begin_tapering_bottom spt_level_begin_tapering_bottom in stochastic_physics_config_mod !> @param[in] spt_level_begin_tapering_top spt_level_begin_tapering_top in stochastic_physics_config_mod - subroutine spt_levels_cap_code(nlayers, & - dX, & - spt_level_bottom, & - spt_level_top, & - spt_level_begin_tapering_bottom, & - spt_level_begin_tapering_top, & - ndf_wth, & - undf_wth, & - map_wth & + subroutine spt_levels_cap_code(nlayers, & + dX, & + spt_level_bottom, & + spt_level_top, & + spt_level_begin_tapering_bottom, & + spt_level_begin_tapering_top, & + ndf_wth, & + undf_wth, & + map_wth & ) implicit none diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 index 107075419..001c90bdc 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 @@ -59,16 +59,16 @@ module spt_moisture_conservation_kernel_mod !> @param[in] spt_level_bottom Bottom level of the stochastic scheme !> @param[in] spt_level_top Top level of the stochastic scheme - subroutine spt_moisture_conservation_code(nlayers, & - dmv, & - mv, & - rho_wth, & - dz_wth, & - spt_level_bottom, & - spt_level_top, & - ndf_wth, & - undf_wth, & - map_wth & + subroutine spt_moisture_conservation_code(nlayers, & + dmv, & + mv, & + rho_wth, & + dz_wth, & + spt_level_bottom, & + spt_level_top, & + ndf_wth, & + undf_wth, & + map_wth & ) implicit none diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 index 2f8d4e70b..6b15c66c5 100644 --- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 +++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 @@ -68,20 +68,20 @@ module spt_orog_cap_kernel_mod !> @param[in] spt_orog_forcing_pattern_thresh spt_orog_forcing_pattern_thresh in stochastic_physics_config_mod !> @param[in] spt_stddev_orog_thres spt_stddev_orog_thres in stochastic_physics_config_mod - subroutine spt_orog_cap_code(nlayers, & - dX, & - fp_spt, & - sd_orog, & - spt_level_bottom, & - spt_level_top, & - spt_orog_forcing_pattern_thresh, & - spt_stddev_orog_thres, & - ndf_wth, & - undf_wth, & - map_wth, & - ndf_2d, & - undf_2d, & - map_2d & + subroutine spt_orog_cap_code(nlayers, & + dX, & + fp_spt, & + sd_orog, & + spt_level_bottom, & + spt_level_top, & + spt_orog_forcing_pattern_thresh, & + spt_stddev_orog_thres, & + ndf_wth, & + undf_wth, & + map_wth, & + ndf_2d, & + undf_2d, & + map_2d & ) implicit none From 2628ff82923a73e129b036484e7ac0184afae4c5 Mon Sep 17 00:00:00 2001 From: jasonjunweilyu Date: Thu, 18 Dec 2025 16:34:32 +1100 Subject: [PATCH 4/4] Fixed flake8 issues with transformation scripts --- .../physics_constants_mod.py | 105 +++++++++----- .../psykal/algorithm/skeb_main_alg_mod.py | 132 ++++++++++++------ .../psykal/algorithm/spt_main_alg_mod.py | 113 +++++++++------ 3 files changed, 227 insertions(+), 123 deletions(-) diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py index afe83744b..d54f4e149 100644 --- a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py +++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py @@ -23,22 +23,23 @@ Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, Dynamo0p3RedundantComputationTrans, OMPParallelTrans, ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, - OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans) + OMPDeclareTargetTrans, OMPLoopTrans) from psyclone.domain.common.transformations import KernelModuleInlineTrans # Names of any invoke that we won't add any GPU offloading -INVOKE_EXCLUSIONS = [ +INVOKE_EXCLUSIONS = [ ] # Names of any kernel that we won't add parallelization KERNEL_EXCLUSIONS = ["get_Pnm_star_code",] -# get_Pnm_star_code has data dependencies in the loops and is tested to be not suitable +# get_Pnm_star_code has data dependencies in the loops +# and is tested to be not suitable # for parallelization # Names of any kernels that we won't offload to GPU GPU_KERNEL_EXCLUSIONS = [ -] +] OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none") @@ -46,7 +47,7 @@ def trans(psyir): '''Applies PSyclone colouring and GPU offloading transformations. Any kernels that cannot be offloaded to GPU are parallelised using OpenMP - on the CPU if they can be parallelised. Any setval_* kernels are + on the CPU if they can be parallelised. Any setval_* kernels are transformed so as to compute into the L1 halos. :param psyir: the PSyIR of the PSy-layer. @@ -73,7 +74,6 @@ def trans(psyir): gpu_annotation_trans = OMPDeclareTargetTrans() elif OFFLOAD_DIRECTIVES == "acc": # Use OpenACC offloading - enter_data_trans = ACCEnterDataTrans() loop_offloading_trans = ACCLoopTrans() kernels_trans = ACCKernelsTrans() gpu_region_trans = ACCParallelTrans(default_present=False) @@ -81,9 +81,11 @@ def trans(psyir): elif OFFLOAD_DIRECTIVES == "none": pass else: - print(f"The PSyclone transformation script expects the " - f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'" - f"but found '{OFFLOAD_DIRECTIVES}'.") + print( + f"The PSyclone transformation script expects the " + f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or " + f"'none' but found '{OFFLOAD_DIRECTIVES}'." + ) sys.exit(-1) print(f"PSy name = '{psyir.name}'") @@ -100,8 +102,13 @@ def trans(psyir): if loop.kernels()[0].name in ["setval_c"]: rtrans.apply(loop, options={"depth": 1}) - if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"): - print(f"Not adding GPU offloading to invoke '{subroutine.name}'") + if ( + psyir.name.lower() in INVOKE_EXCLUSIONS + or OFFLOAD_DIRECTIVES == "none" + ): + print( + f"Not adding GPU offloading to invoke '{subroutine.name}'" + ) offload = False else: offload = True @@ -124,24 +131,33 @@ def trans(psyir): if loop.iteration_space.endswith("cell_column"): if offload: for kern in loop.kernels(): - if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)): - continue - else: + if kern.name.lower() in ( + GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + + list(succeeded_offload) + ): + continue + + try: + gpu_annotation_trans.apply( + kern, options={'force': True} + ) + print(f"GPU-annotated kernel '{kern.name}'") + try: - gpu_annotation_trans.apply(kern, options={'force': True}) - print(f"GPU-annotated kernel '{kern.name}'") - try: - inline_trans.apply(kern) - print(f"Module-inlined kernel '{kern.name}'") - succeeded_offload.add(kern.name.lower()) - except TransformationError as err: - print(f"Failed to module-inline '{kern.name}' due " - f"to:\n{err.value}") + inline_trans.apply(kern) + print(f"Module-inlined kernel '{kern.name}'") + succeeded_offload.add(kern.name.lower()) except TransformationError as err: - failed_to_offload.add(kern.name.lower()) - print(f"Failed to annotate '{kern.name}' with " - f"GPU-enabled directive due to:\n" - f"{err.value}") + print( + f"Failed to module-inline '{kern.name}'" + f" due to:\n{err.value}" + ) + except TransformationError as err: + failed_to_offload.add(kern.name.lower()) + print( + f"Failed to annotate '{kern.name}' with " + f"GPU-enabled directive due to:\n{err.value}" + ) # For annotated or inlined kernels we could attempt to # provide compile-time dimensions for the temporary # arrays and convert to code unsupported intrinsics. @@ -149,8 +165,13 @@ def trans(psyir): # Add GPU offloading to loops unless they are over colours or are null. for loop in subroutine.walk(Loop): kernel_names = [k.name.lower() for k in loop.kernels()] - if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS - + KERNEL_EXCLUSIONS) for name in kernel_names): + if offload and all( + name not in ( + list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + + KERNEL_EXCLUSIONS + ) + for name in kernel_names + ): try: if loop.loop_type == "colours": pass @@ -160,8 +181,9 @@ def trans(psyir): gpu_region_trans.apply(loop.ancestor(Directive)) if loop.loop_type == "": loop_offloading_trans.apply( - loop, options={"independent": True}) - gpu_region_trans.apply(loop.ancestor(Directive)) + loop, options={"independent": True} + ) + gpu_region_trans.apply(loop.ancestor(Directive)) if loop.loop_type == "dof": # Loops over dofs can contains reductions if kernels_trans: @@ -185,13 +207,22 @@ def trans(psyir): # Apply OpenMP thread parallelism for any kernels we've not been able # to offload to GPU. for loop in subroutine.walk(Loop): - if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()): - continue - if not offload or any(kern.name.lower() in (list(failed_to_offload) + - GPU_KERNEL_EXCLUSIONS) for - kern in loop.kernels()): + if any( + kern.name.lower() in KERNEL_EXCLUSIONS + for kern in loop.kernels() + ): + continue + + if ( + not offload + or any( + kern.name.lower() in ( + list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + ) + for kern in loop.kernels() + ) + ): if loop.loop_type not in ["colours", "null"]: cpu_parallel.apply(loop) otrans.apply(loop, options={"reprod": True}) - print(subroutine.view()) diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py index 8d926f517..d2e17951a 100644 --- a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py +++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py @@ -23,12 +23,12 @@ Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, Dynamo0p3RedundantComputationTrans, OMPParallelTrans, ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, - OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans) + OMPDeclareTargetTrans, OMPLoopTrans) from psyclone.domain.common.transformations import KernelModuleInlineTrans # Names of any invoke that we won't add any GPU offloading -INVOKE_EXCLUSIONS = [ +INVOKE_EXCLUSIONS = [ ] # Names of any kernel that we won't add parallelization @@ -45,7 +45,7 @@ def trans(psyir): '''Applies PSyclone colouring and GPU offloading transformations. Any kernels that cannot be offloaded to GPU are parallelised using OpenMP - on the CPU if they can be parallelised. Any setval_* kernels are + on the CPU if they can be parallelised. Any setval_* kernels are transformed so as to compute into the L1 halos. :param psyir: the PSyIR of the PSy-layer. @@ -72,7 +72,6 @@ def trans(psyir): gpu_annotation_trans = OMPDeclareTargetTrans() elif OFFLOAD_DIRECTIVES == "acc": # Use OpenACC offloading - enter_data_trans = ACCEnterDataTrans() loop_offloading_trans = ACCLoopTrans() kernels_trans = ACCKernelsTrans() gpu_region_trans = ACCParallelTrans(default_present=False) @@ -80,9 +79,11 @@ def trans(psyir): elif OFFLOAD_DIRECTIVES == "none": pass else: - print(f"The PSyclone transformation script expects the " - f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'" - f"but found '{OFFLOAD_DIRECTIVES}'.") + print( + f"The PSyclone transformation script expects the " + f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or " + f"'none' but found '{OFFLOAD_DIRECTIVES}'." + ) sys.exit(-1) print(f"PSy name = '{psyir.name}'") @@ -99,8 +100,13 @@ def trans(psyir): if loop.kernels()[0].name in ["setval_c"]: rtrans.apply(loop, options={"depth": 1}) - if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"): - print(f"Not adding GPU offloading to invoke '{subroutine.name}'") + if ( + psyir.name.lower() in INVOKE_EXCLUSIONS + or OFFLOAD_DIRECTIVES == "none" + ): + print( + f"Not adding GPU offloading to invoke '{subroutine.name}'" + ) offload = False else: offload = True @@ -123,44 +129,65 @@ def trans(psyir): if loop.iteration_space.endswith("cell_column"): if offload: for kern in loop.kernels(): - if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)): - continue - else: + if kern.name.lower() in ( + GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + + list(succeeded_offload) + ): + continue + + try: + gpu_annotation_trans.apply( + kern, options={'force': True} + ) + print(f"GPU-annotated kernel '{kern.name}'") + try: - gpu_annotation_trans.apply(kern, options={'force': True}) - print(f"GPU-annotated kernel '{kern.name}'") - try: - inline_trans.apply(kern) - print(f"Module-inlined kernel '{kern.name}'") - succeeded_offload.add(kern.name.lower()) - except TransformationError as err: - print(f"Failed to module-inline '{kern.name}' due " - f"to:\n{err.value}") + inline_trans.apply(kern) + print(f"Module-inlined kernel '{kern.name}'") + succeeded_offload.add(kern.name.lower()) except TransformationError as err: - failed_to_offload.add(kern.name.lower()) - print(f"Failed to annotate '{kern.name}' with " - f"GPU-enabled directive due to:\n" - f"{err.value}") + print( + f"Failed to module-inline '{kern.name}'" + f" due to:\n{err.value}" + ) + except TransformationError as err: + failed_to_offload.add(kern.name.lower()) + print( + f"Failed to annotate '{kern.name}' with " + f"GPU-enabled directive due to:\n{err.value}" + ) # For annotated or inlined kernels we could attempt to # provide compile-time dimensions for the temporary # arrays and convert to code unsupported intrinsics. # Add GPU offloading to loops unless they are over colours or are null. for loop in subroutine.walk(Loop): - kernel_names = [k.name.lower() for k in loop.kernels()] - if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS - + KERNEL_EXCLUSIONS) for name in kernel_names): + kernel_names = [ + k.name.lower() for k in loop.kernels() + ] + if offload and all( + name not in ( + list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + + KERNEL_EXCLUSIONS + ) + for name in kernel_names + ): try: if loop.loop_type == "colours": pass + if loop.loop_type == "colour": loop_offloading_trans.apply( - loop, options={"independent": True}) + loop, options={"independent": True} + ) gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "": loop_offloading_trans.apply( - loop, options={"independent": True}) - gpu_region_trans.apply(loop.ancestor(Directive)) + loop, options={"independent": True} + ) + gpu_region_trans.apply(loop.ancestor(Directive)) + if loop.loop_type == "dof": # Loops over dofs can contains reductions if kernels_trans: @@ -168,29 +195,44 @@ def trans(psyir): # manage them kernels_trans.apply(loop) else: - # Otherwise, if the reductions exists, they will - # be detected by the dependencyAnalysis and raise - # a TransformationError captured below + # Otherwise, if the reductions exists, they + # will be detected by the dependencyAnalysis + # and raise a TransformationError captured + # below loop_offloading_trans.apply( - loop, options={"independent": True}) + loop, options={"independent": True} + ) gpu_region_trans.apply(loop.ancestor(Directive)) - # Alternatively we could use loop parallelism with - # reduction clauses + + # Alternatively we could use loop parallelism with + # reduction clauses print(f"Successfully offloaded loop with {kernel_names}") except TransformationError as err: - print(f"Failed to offload loop with {kernel_names} " - f"because: {err}") + print( + f"Failed to offload loop with {kernel_names} " + f"because: {err}" + ) # Apply OpenMP thread parallelism for any kernels we've not been able # to offload to GPU. for loop in subroutine.walk(Loop): - if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()): - continue - if not offload or any(kern.name.lower() in (list(failed_to_offload) + - GPU_KERNEL_EXCLUSIONS) for - kern in loop.kernels()): + if any( + kern.name.lower() in KERNEL_EXCLUSIONS + for kern in loop.kernels() + ): + continue + + if ( + not offload + or any( + kern.name.lower() in ( + list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + ) + for kern in loop.kernels() + ) + ): if loop.loop_type not in ["colours", "null"]: cpu_parallel.apply(loop) otrans.apply(loop, options={"reprod": True}) - + print(subroutine.view()) diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py index 3872b2539..1b5cfea8a 100644 --- a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py +++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py @@ -23,12 +23,12 @@ Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, Dynamo0p3RedundantComputationTrans, OMPParallelTrans, ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, - OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans) + OMPDeclareTargetTrans, OMPLoopTrans) from psyclone.domain.common.transformations import KernelModuleInlineTrans # Names of any invoke that we won't add any GPU offloading -INVOKE_EXCLUSIONS = [ +INVOKE_EXCLUSIONS = [ ] # Names of any kernel that we won't add parallelization @@ -36,14 +36,15 @@ ] # Names of any kernels that we won't offload to GPU -GPU_KERNEL_EXCLUSIONS = ["spt_saturation_cap_code",] +GPU_KERNEL_EXCLUSIONS = ["spt_saturation_cap_code",] # spt_saturation_cap_code: GPU transformation cannot be applied because of -# using qsat_wat_mix from qsat_mod. As qsat_mod is going to be modified in future, -# this falls out of the scope of the NGARCH project. -# Error message: Transformation Error: Kernel 'spt_saturation_cap_code' accesses -# the symbol 'qsat_wat_mix: RoutineSymbol' -# which is imported. If this symbol represents data then it must first be converted -# to a Kernel argument using the KernelImportsToArguments transformation. +# using qsat_wat_mix from qsat_mod. As qsat_mod is going to be modified in +# future, this falls out of the scope of the NGARCH project. +# Error message: Transformation Error: Kernel 'spt_saturation_cap_code' +# accesses the symbol 'qsat_wat_mix: RoutineSymbol' which is imported. +# If this symbol represents data then it must first be converted to a +# Kernel argument using the KernelImportsToArguments transformation. OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none") @@ -51,7 +52,7 @@ def trans(psyir): '''Applies PSyclone colouring and GPU offloading transformations. Any kernels that cannot be offloaded to GPU are parallelised using OpenMP - on the CPU if they can be parallelised. Any setval_* kernels are + on the CPU if they can be parallelised. Any setval_* kernels are transformed so as to compute into the L1 halos. :param psyir: the PSyIR of the PSy-layer. @@ -78,7 +79,6 @@ def trans(psyir): gpu_annotation_trans = OMPDeclareTargetTrans() elif OFFLOAD_DIRECTIVES == "acc": # Use OpenACC offloading - enter_data_trans = ACCEnterDataTrans() loop_offloading_trans = ACCLoopTrans() kernels_trans = ACCKernelsTrans() gpu_region_trans = ACCParallelTrans(default_present=False) @@ -86,9 +86,11 @@ def trans(psyir): elif OFFLOAD_DIRECTIVES == "none": pass else: - print(f"The PSyclone transformation script expects the " - f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'" - f"but found '{OFFLOAD_DIRECTIVES}'.") + print( + f"The PSyclone transformation script expects the " + f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or " + f"'none' but found '{OFFLOAD_DIRECTIVES}'." + ) sys.exit(-1) print(f"PSy name = '{psyir.name}'") @@ -105,8 +107,13 @@ def trans(psyir): if loop.kernels()[0].name in ["setval_c"]: rtrans.apply(loop, options={"depth": 1}) - if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"): - print(f"Not adding GPU offloading to invoke '{subroutine.name}'") + if ( + psyir.name.lower() in INVOKE_EXCLUSIONS + or OFFLOAD_DIRECTIVES == "none" + ): + print( + f"Not adding GPU offloading to invoke '{subroutine.name}'" + ) offload = False else: offload = True @@ -129,24 +136,33 @@ def trans(psyir): if loop.iteration_space.endswith("cell_column"): if offload: for kern in loop.kernels(): - if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)): - continue - else: + if kern.name.lower() in ( + GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + + list(succeeded_offload) + ): + continue + + try: + gpu_annotation_trans.apply( + kern, options={'force': True} + ) + print(f"GPU-annotated kernel '{kern.name}'") + try: - gpu_annotation_trans.apply(kern, options={'force': True}) - print(f"GPU-annotated kernel '{kern.name}'") - try: - inline_trans.apply(kern) - print(f"Module-inlined kernel '{kern.name}'") - succeeded_offload.add(kern.name.lower()) - except TransformationError as err: - print(f"Failed to module-inline '{kern.name}' due " - f"to:\n{err.value}") + inline_trans.apply(kern) + print(f"Module-inlined kernel '{kern.name}'") + succeeded_offload.add(kern.name.lower()) except TransformationError as err: - failed_to_offload.add(kern.name.lower()) - print(f"Failed to annotate '{kern.name}' with " - f"GPU-enabled directive due to:\n" - f"{err.value}") + print( + f"Failed to module-inline '{kern.name}'" + f" due to:\n{err.value}" + ) + except TransformationError as err: + failed_to_offload.add(kern.name.lower()) + print( + f"Failed to annotate '{kern.name}' with " + f"GPU-enabled directive due to:\n{err.value}" + ) # For annotated or inlined kernels we could attempt to # provide compile-time dimensions for the temporary # arrays and convert to code unsupported intrinsics. @@ -154,8 +170,13 @@ def trans(psyir): # Add GPU offloading to loops unless they are over colours or are null. for loop in subroutine.walk(Loop): kernel_names = [k.name.lower() for k in loop.kernels()] - if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS - + KERNEL_EXCLUSIONS) for name in kernel_names): + if offload and all( + name not in ( + list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + + KERNEL_EXCLUSIONS + ) + for name in kernel_names + ): try: if loop.loop_type == "colours": pass @@ -166,7 +187,7 @@ def trans(psyir): if loop.loop_type == "": loop_offloading_trans.apply( loop, options={"independent": True}) - gpu_region_trans.apply(loop.ancestor(Directive)) + gpu_region_trans.apply(loop.ancestor(Directive)) if loop.loop_type == "dof": # Loops over dofs can contains reductions if kernels_trans: @@ -190,13 +211,23 @@ def trans(psyir): # Apply OpenMP thread parallelism for any kernels we've not been able # to offload to GPU. for loop in subroutine.walk(Loop): - if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()): - continue - if not offload or any(kern.name.lower() in (list(failed_to_offload) + - GPU_KERNEL_EXCLUSIONS) for - kern in loop.kernels()): + if any( + kern.name.lower() in KERNEL_EXCLUSIONS + for kern in loop.kernels() + ): + continue + + if ( + not offload + or any( + kern.name.lower() in ( + list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS + ) + for kern in loop.kernels() + ) + ): if loop.loop_type not in ["colours", "null"]: cpu_parallel.apply(loop) otrans.apply(loop, options={"reprod": True}) - + print(subroutine.view())