From 6dec22ac648208ce90df41de0b78780ab9664966 Mon Sep 17 00:00:00 2001
From: jasonjunweilyu <junwei.lyu@uq.net.au>
Date: Wed, 17 Dec 2025 14:57:23 +1100
Subject: [PATCH 1/4] Merging NGARCH stochastic physics development from fcm to
 git based on git_migration tag

---
 .../physics_constants_mod.py                  | 197 +++++++++++++++++
 .../psykal/algorithm/skeb_main_alg_mod.py     | 196 +++++++++++++++++
 .../psykal/algorithm/spt_main_alg_mod.py      | 202 ++++++++++++++++++
 .../physics_constants_mod.py                  |   1 +
 .../psykal/algorithm/skeb_main_alg_mod.py     |   1 +
 .../psykal/algorithm/spt_main_alg_mod.py      |   1 +
 .../source/algorithm/skeb_main_alg_mod.x90    |   3 +-
 .../source/algorithm/spt_main_alg_mod.x90     |  40 +++-
 .../stph/skeb_biharm_diss_kernel_mod.F90      |  22 +-
 ...pt_convection_cfl_limit_cap_kernel_mod.F90 |  28 ++-
 .../kernel/stph/spt_levels_cap_kernel_mod.F90 |  28 ++-
 .../spt_moisture_conservation_kernel_mod.F90  |  15 +-
 .../kernel/stph/spt_orog_cap_kernel_mod.F90   |  26 ++-
 .../stph/skeb_biharm_diss_kernel_mod_test.pf  |  10 +-
 14 files changed, 724 insertions(+), 46 deletions(-)
 create mode 100644 applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py
 create mode 100644 applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py
 create mode 100644 applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py
 create mode 120000 applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py
 create mode 120000 applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py
 create mode 120000 applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py

diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py
new file mode 100644
index 000000000..afe83744b
--- /dev/null
+++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py
@@ -0,0 +1,197 @@
+##############################################################################
+# (C) Crown copyright Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+##############################################################################
+
+
+'''PSyclone transformation script for physics_constants_mod to apply colouring
+and GPU offloading/CPU parallelization. Also adds redundant computation to
+the level-1 halo for setval_* generically. This is based on
+https://github.com/stfc/PSyclone/blob/master/examples/lfric/
+scripts/gpu_offloading.py .
+
+'''
+
+import os
+import sys
+from psyclone.domain.lfric import LFRicConstants
+from psyclone.psyir.nodes import Directive, Loop, Routine
+from psyclone.psyir.transformations import (
+    ACCKernelsTrans, TransformationError, OMPTargetTrans)
+from psyclone.transformations import (
+    Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
+    Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
+    ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans,
+    OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans)
+from psyclone.domain.common.transformations import KernelModuleInlineTrans
+
+
+# Names of any invoke that we won't add any GPU offloading
+INVOKE_EXCLUSIONS = [ 
+]
+
+# Names of any kernel that we won't add parallelization
+KERNEL_EXCLUSIONS = ["get_Pnm_star_code",]
+# get_Pnm_star_code has data dependencies in the loops and is tested to be not suitable
+# for parallelization
+
+# Names of any kernels that we won't offload to GPU
+GPU_KERNEL_EXCLUSIONS = [
+] 
+
+OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")
+
+
+def trans(psyir):
+    '''Applies PSyclone colouring and GPU offloading transformations. Any
+    kernels that cannot be offloaded to GPU are parallelised using OpenMP
+    on the CPU if they can be parallelised. Any setval_* kernels are 
+    transformed so as to compute into the L1 halos.
+
+    :param psyir: the PSyIR of the PSy-layer.
+    :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer`
+
+    '''
+    inline_trans = KernelModuleInlineTrans()
+    rtrans = Dynamo0p3RedundantComputationTrans()
+    ctrans = Dynamo0p3ColourTrans()
+    otrans = Dynamo0p3OMPLoopTrans()
+    const = LFRicConstants()
+    cpu_parallel = OMPParallelTrans()
+
+    if OFFLOAD_DIRECTIVES == "omp":
+        # Use OpenMP offloading
+        loop_offloading_trans = OMPLoopTrans(
+            omp_directive="teamsdistributeparalleldo",
+            omp_schedule="none"
+        )
+        # OpenMP does not have a kernels parallelism directive equivalent
+        # to OpenACC 'kernels'
+        kernels_trans = None
+        gpu_region_trans = OMPTargetTrans()
+        gpu_annotation_trans = OMPDeclareTargetTrans()
+    elif OFFLOAD_DIRECTIVES == "acc":
+        # Use OpenACC offloading
+        enter_data_trans = ACCEnterDataTrans()
+        loop_offloading_trans = ACCLoopTrans()
+        kernels_trans = ACCKernelsTrans()
+        gpu_region_trans = ACCParallelTrans(default_present=False)
+        gpu_annotation_trans = ACCRoutineTrans()
+    elif OFFLOAD_DIRECTIVES == "none":
+        pass
+    else:
+        print(f"The PSyclone transformation script expects the "
+              f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'"
+              f"but found '{OFFLOAD_DIRECTIVES}'.")
+        sys.exit(-1)
+
+    print(f"PSy name = '{psyir.name}'")
+
+    for subroutine in psyir.walk(Routine):
+
+        print("Transforming invoke '{0}' ...".format(subroutine.name))
+
+        # Make setval_* compute redundantly to the level 1 halo if it
+        # is in its own loop
+        for loop in subroutine.loops():
+            if loop.iteration_space == "dof":
+                if len(loop.kernels()) == 1:
+                    if loop.kernels()[0].name in ["setval_c"]:
+                        rtrans.apply(loop, options={"depth": 1})
+
+        if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"):
+            print(f"Not adding GPU offloading to invoke '{subroutine.name}'")
+            offload = False
+        else:
+            offload = True
+
+        # Keep a record of any kernels we fail and succeed to offload
+        succeeded_offload = set()
+        failed_to_offload = set()
+
+        # Colour loops over cells unless they are on discontinuous spaces
+        # (alternatively we could annotate the kernels with atomics)
+        for loop in subroutine.loops():
+            if loop.iteration_space.endswith("cell_column"):
+                if (loop.field_space.orig_name not in
+                        const.VALID_DISCONTINUOUS_NAMES):
+                    ctrans.apply(loop)
+
+        # Mark kernels inside the loops over cells as GPU-enabled
+        # and inline them.
+        for loop in subroutine.loops():
+            if loop.iteration_space.endswith("cell_column"):
+                if offload:
+                    for kern in loop.kernels():
+                        if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)):
+                           continue
+                        else:
+                            try:
+                                gpu_annotation_trans.apply(kern, options={'force': True})
+                                print(f"GPU-annotated kernel '{kern.name}'")
+                                try:
+                                    inline_trans.apply(kern)
+                                    print(f"Module-inlined kernel '{kern.name}'")
+                                    succeeded_offload.add(kern.name.lower())
+                                except TransformationError as err:
+                                    print(f"Failed to module-inline '{kern.name}' due "
+                                        f"to:\n{err.value}")
+                            except TransformationError as err:
+                                failed_to_offload.add(kern.name.lower())
+                                print(f"Failed to annotate '{kern.name}' with "
+                                    f"GPU-enabled directive due to:\n"
+                                    f"{err.value}")
+                        # For annotated or inlined kernels we could attempt to
+                        # provide compile-time dimensions for the temporary
+                        # arrays and convert to code unsupported intrinsics.
+
+        # Add GPU offloading to loops unless they are over colours or are null.
+        for loop in subroutine.walk(Loop):
+            kernel_names = [k.name.lower() for k in loop.kernels()]
+            if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS 
+                               + KERNEL_EXCLUSIONS) for name in kernel_names):
+                try:
+                    if loop.loop_type == "colours":
+                        pass
+                    if loop.loop_type == "colour":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))
+                    if loop.loop_type == "":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))                        
+                    if loop.loop_type == "dof":
+                        # Loops over dofs can contains reductions
+                        if kernels_trans:
+                            # If kernel offloading is available it should
+                            # manage them
+                            kernels_trans.apply(loop)
+                        else:
+                            # Otherwise, if the reductions exists, they will
+                            # be detected by the dependencyAnalysis and raise
+                            # a TransformationError captured below
+                            loop_offloading_trans.apply(
+                                loop, options={"independent": True})
+                            gpu_region_trans.apply(loop.ancestor(Directive))
+                        # Alternatively we could use loop parallelism with
+                        # reduction clauses
+                    print(f"Successfully offloaded loop with {kernel_names}")
+                except TransformationError as err:
+                    print(f"Failed to offload loop with {kernel_names} "
+                          f"because: {err}")
+
+        # Apply OpenMP thread parallelism for any kernels we've not been able
+        # to offload to GPU.
+        for loop in subroutine.walk(Loop):
+            if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()):
+               continue 
+            if not offload or any(kern.name.lower() in (list(failed_to_offload) + 
+                                  GPU_KERNEL_EXCLUSIONS) for
+                                  kern in loop.kernels()):
+                if loop.loop_type not in ["colours", "null"]:
+                    cpu_parallel.apply(loop)
+                    otrans.apply(loop, options={"reprod": True})
+        
+        print(subroutine.view())
diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py
new file mode 100644
index 000000000..8d926f517
--- /dev/null
+++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py
@@ -0,0 +1,196 @@
+##############################################################################
+# (C) Crown copyright Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+##############################################################################
+
+
+'''PSyclone transformation script for skeb_main_alg_mod.py to apply colouring
+and GPU offloading/CPU parallelization. Also adds redundant computation to the
+level-1 halo for setval_* generically. This is based on
+https://github.com/stfc/PSyclone/blob/master/examples/lfric/
+scripts/gpu_offloading.py .
+
+'''
+
+import os
+import sys
+from psyclone.domain.lfric import LFRicConstants
+from psyclone.psyir.nodes import Directive, Loop, Routine
+from psyclone.psyir.transformations import (
+    ACCKernelsTrans, TransformationError, OMPTargetTrans)
+from psyclone.transformations import (
+    Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
+    Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
+    ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans,
+    OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans)
+from psyclone.domain.common.transformations import KernelModuleInlineTrans
+
+
+# Names of any invoke that we won't add any GPU offloading
+INVOKE_EXCLUSIONS = [ 
+]
+
+# Names of any kernel that we won't add parallelization
+KERNEL_EXCLUSIONS = [
+]
+
+# Names of any kernels that we won't offload to GPU
+GPU_KERNEL_EXCLUSIONS = [
+]
+
+OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")
+
+
+def trans(psyir):
+    '''Applies PSyclone colouring and GPU offloading transformations. Any
+    kernels that cannot be offloaded to GPU are parallelised using OpenMP
+    on the CPU if they can be parallelised. Any setval_* kernels are 
+    transformed so as to compute into the L1 halos.
+
+    :param psyir: the PSyIR of the PSy-layer.
+    :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer`
+
+    '''
+    inline_trans = KernelModuleInlineTrans()
+    rtrans = Dynamo0p3RedundantComputationTrans()
+    ctrans = Dynamo0p3ColourTrans()
+    otrans = Dynamo0p3OMPLoopTrans()
+    const = LFRicConstants()
+    cpu_parallel = OMPParallelTrans()
+
+    if OFFLOAD_DIRECTIVES == "omp":
+        # Use OpenMP offloading
+        loop_offloading_trans = OMPLoopTrans(
+            omp_directive="teamsdistributeparalleldo",
+            omp_schedule="none"
+        )
+        # OpenMP does not have a kernels parallelism directive equivalent
+        # to OpenACC 'kernels'
+        kernels_trans = None
+        gpu_region_trans = OMPTargetTrans()
+        gpu_annotation_trans = OMPDeclareTargetTrans()
+    elif OFFLOAD_DIRECTIVES == "acc":
+        # Use OpenACC offloading
+        enter_data_trans = ACCEnterDataTrans()
+        loop_offloading_trans = ACCLoopTrans()
+        kernels_trans = ACCKernelsTrans()
+        gpu_region_trans = ACCParallelTrans(default_present=False)
+        gpu_annotation_trans = ACCRoutineTrans()
+    elif OFFLOAD_DIRECTIVES == "none":
+        pass
+    else:
+        print(f"The PSyclone transformation script expects the "
+              f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'"
+              f"but found '{OFFLOAD_DIRECTIVES}'.")
+        sys.exit(-1)
+
+    print(f"PSy name = '{psyir.name}'")
+
+    for subroutine in psyir.walk(Routine):
+
+        print("Transforming invoke '{0}' ...".format(subroutine.name))
+
+        # Make setval_* compute redundantly to the level 1 halo if it
+        # is in its own loop
+        for loop in subroutine.loops():
+            if loop.iteration_space == "dof":
+                if len(loop.kernels()) == 1:
+                    if loop.kernels()[0].name in ["setval_c"]:
+                        rtrans.apply(loop, options={"depth": 1})
+
+        if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"):
+            print(f"Not adding GPU offloading to invoke '{subroutine.name}'")
+            offload = False
+        else:
+            offload = True
+
+        # Keep a record of any kernels we fail and succeed to offload
+        succeeded_offload = set()
+        failed_to_offload = set()
+
+        # Colour loops over cells unless they are on discontinuous spaces
+        # (alternatively we could annotate the kernels with atomics)
+        for loop in subroutine.loops():
+            if loop.iteration_space.endswith("cell_column"):
+                if (loop.field_space.orig_name not in
+                        const.VALID_DISCONTINUOUS_NAMES):
+                    ctrans.apply(loop)
+
+        # Mark kernels inside the loops over cells as GPU-enabled
+        # and inline them.
+        for loop in subroutine.loops():
+            if loop.iteration_space.endswith("cell_column"):
+                if offload:
+                    for kern in loop.kernels():
+                        if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)):
+                           continue
+                        else:
+                            try:
+                                gpu_annotation_trans.apply(kern, options={'force': True})
+                                print(f"GPU-annotated kernel '{kern.name}'")
+                                try:
+                                    inline_trans.apply(kern)
+                                    print(f"Module-inlined kernel '{kern.name}'")
+                                    succeeded_offload.add(kern.name.lower())
+                                except TransformationError as err:
+                                    print(f"Failed to module-inline '{kern.name}' due "
+                                        f"to:\n{err.value}")
+                            except TransformationError as err:
+                                failed_to_offload.add(kern.name.lower())
+                                print(f"Failed to annotate '{kern.name}' with "
+                                    f"GPU-enabled directive due to:\n"
+                                    f"{err.value}")
+                        # For annotated or inlined kernels we could attempt to
+                        # provide compile-time dimensions for the temporary
+                        # arrays and convert to code unsupported intrinsics.
+
+        # Add GPU offloading to loops unless they are over colours or are null.
+        for loop in subroutine.walk(Loop):
+            kernel_names = [k.name.lower() for k in loop.kernels()]
+            if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS 
+                               + KERNEL_EXCLUSIONS) for name in kernel_names):
+                try:
+                    if loop.loop_type == "colours":
+                        pass
+                    if loop.loop_type == "colour":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))
+                    if loop.loop_type == "":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))                        
+                    if loop.loop_type == "dof":
+                        # Loops over dofs can contains reductions
+                        if kernels_trans:
+                            # If kernel offloading is available it should
+                            # manage them
+                            kernels_trans.apply(loop)
+                        else:
+                            # Otherwise, if the reductions exists, they will
+                            # be detected by the dependencyAnalysis and raise
+                            # a TransformationError captured below
+                            loop_offloading_trans.apply(
+                                loop, options={"independent": True})
+                            gpu_region_trans.apply(loop.ancestor(Directive))
+                        # Alternatively we could use loop parallelism with
+                        # reduction clauses
+                    print(f"Successfully offloaded loop with {kernel_names}")
+                except TransformationError as err:
+                    print(f"Failed to offload loop with {kernel_names} "
+                          f"because: {err}")
+
+        # Apply OpenMP thread parallelism for any kernels we've not been able
+        # to offload to GPU.
+        for loop in subroutine.walk(Loop):
+            if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()):
+               continue 
+            if not offload or any(kern.name.lower() in (list(failed_to_offload) + 
+                                  GPU_KERNEL_EXCLUSIONS) for
+                                  kern in loop.kernels()):
+                if loop.loop_type not in ["colours", "null"]:
+                    cpu_parallel.apply(loop)
+                    otrans.apply(loop, options={"reprod": True})
+        
+        print(subroutine.view())
diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py
new file mode 100644
index 000000000..3872b2539
--- /dev/null
+++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py
@@ -0,0 +1,202 @@
+##############################################################################
+# (C) Crown copyright Met Office. All rights reserved.
+# The file LICENCE, distributed with this code, contains details of the terms
+# under which the code may be used.
+##############################################################################
+
+
+'''PSyclone transformation script for spt_main_alg_mod to apply colouring
+and GPU offloading/CPU parallelization. Also adds redundant computation to
+the level-1 halo for setval_* generically. This is based on
+https://github.com/stfc/PSyclone/blob/master/examples/lfric/
+scripts/gpu_offloading.py .
+
+'''
+
+import os
+import sys
+from psyclone.domain.lfric import LFRicConstants
+from psyclone.psyir.nodes import Directive, Loop, Routine
+from psyclone.psyir.transformations import (
+    ACCKernelsTrans, TransformationError, OMPTargetTrans)
+from psyclone.transformations import (
+    Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
+    Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
+    ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans,
+    OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans)
+from psyclone.domain.common.transformations import KernelModuleInlineTrans
+
+
+# Names of any invoke that we won't add any GPU offloading
+INVOKE_EXCLUSIONS = [ 
+]
+
+# Names of any kernel that we won't add parallelization
+KERNEL_EXCLUSIONS = [
+]
+
+# Names of any kernels that we won't offload to GPU
+GPU_KERNEL_EXCLUSIONS = ["spt_saturation_cap_code",] 
+# spt_saturation_cap_code: GPU transformation cannot be applied because of
+# using qsat_wat_mix from qsat_mod. As qsat_mod is going to be modified in future,
+# this falls out of the scope of the NGARCH project.
+# Error message: Transformation Error: Kernel 'spt_saturation_cap_code' accesses
+# the symbol 'qsat_wat_mix: RoutineSymbol<NoType, pure=unknown, elemental=unknown>'
+# which is imported. If this symbol represents data then it must first be converted
+# to a Kernel argument using the KernelImportsToArguments transformation.
+
+OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")
+
+
+def trans(psyir):
+    '''Applies PSyclone colouring and GPU offloading transformations. Any
+    kernels that cannot be offloaded to GPU are parallelised using OpenMP
+    on the CPU if they can be parallelised. Any setval_* kernels are 
+    transformed so as to compute into the L1 halos.
+
+    :param psyir: the PSyIR of the PSy-layer.
+    :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer`
+
+    '''
+    inline_trans = KernelModuleInlineTrans()
+    rtrans = Dynamo0p3RedundantComputationTrans()
+    ctrans = Dynamo0p3ColourTrans()
+    otrans = Dynamo0p3OMPLoopTrans()
+    const = LFRicConstants()
+    cpu_parallel = OMPParallelTrans()
+
+    if OFFLOAD_DIRECTIVES == "omp":
+        # Use OpenMP offloading
+        loop_offloading_trans = OMPLoopTrans(
+            omp_directive="teamsdistributeparalleldo",
+            omp_schedule="none"
+        )
+        # OpenMP does not have a kernels parallelism directive equivalent
+        # to OpenACC 'kernels'
+        kernels_trans = None
+        gpu_region_trans = OMPTargetTrans()
+        gpu_annotation_trans = OMPDeclareTargetTrans()
+    elif OFFLOAD_DIRECTIVES == "acc":
+        # Use OpenACC offloading
+        enter_data_trans = ACCEnterDataTrans()
+        loop_offloading_trans = ACCLoopTrans()
+        kernels_trans = ACCKernelsTrans()
+        gpu_region_trans = ACCParallelTrans(default_present=False)
+        gpu_annotation_trans = ACCRoutineTrans()
+    elif OFFLOAD_DIRECTIVES == "none":
+        pass
+    else:
+        print(f"The PSyclone transformation script expects the "
+              f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'"
+              f"but found '{OFFLOAD_DIRECTIVES}'.")
+        sys.exit(-1)
+
+    print(f"PSy name = '{psyir.name}'")
+
+    for subroutine in psyir.walk(Routine):
+
+        print("Transforming invoke '{0}' ...".format(subroutine.name))
+
+        # Make setval_* compute redundantly to the level 1 halo if it
+        # is in its own loop
+        for loop in subroutine.loops():
+            if loop.iteration_space == "dof":
+                if len(loop.kernels()) == 1:
+                    if loop.kernels()[0].name in ["setval_c"]:
+                        rtrans.apply(loop, options={"depth": 1})
+
+        if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"):
+            print(f"Not adding GPU offloading to invoke '{subroutine.name}'")
+            offload = False
+        else:
+            offload = True
+
+        # Keep a record of any kernels we fail and succeed to offload
+        succeeded_offload = set()
+        failed_to_offload = set()
+
+        # Colour loops over cells unless they are on discontinuous spaces
+        # (alternatively we could annotate the kernels with atomics)
+        for loop in subroutine.loops():
+            if loop.iteration_space.endswith("cell_column"):
+                if (loop.field_space.orig_name not in
+                        const.VALID_DISCONTINUOUS_NAMES):
+                    ctrans.apply(loop)
+
+        # Mark kernels inside the loops over cells as GPU-enabled
+        # and inline them.
+        for loop in subroutine.loops():
+            if loop.iteration_space.endswith("cell_column"):
+                if offload:
+                    for kern in loop.kernels():
+                        if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)):
+                           continue
+                        else:
+                            try:
+                                gpu_annotation_trans.apply(kern, options={'force': True})
+                                print(f"GPU-annotated kernel '{kern.name}'")
+                                try:
+                                    inline_trans.apply(kern)
+                                    print(f"Module-inlined kernel '{kern.name}'")
+                                    succeeded_offload.add(kern.name.lower())
+                                except TransformationError as err:
+                                    print(f"Failed to module-inline '{kern.name}' due "
+                                        f"to:\n{err.value}")
+                            except TransformationError as err:
+                                failed_to_offload.add(kern.name.lower())
+                                print(f"Failed to annotate '{kern.name}' with "
+                                    f"GPU-enabled directive due to:\n"
+                                    f"{err.value}")
+                        # For annotated or inlined kernels we could attempt to
+                        # provide compile-time dimensions for the temporary
+                        # arrays and convert to code unsupported intrinsics.
+
+        # Add GPU offloading to loops unless they are over colours or are null.
+        for loop in subroutine.walk(Loop):
+            kernel_names = [k.name.lower() for k in loop.kernels()]
+            if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS 
+                               + KERNEL_EXCLUSIONS) for name in kernel_names):
+                try:
+                    if loop.loop_type == "colours":
+                        pass
+                    if loop.loop_type == "colour":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))
+                    if loop.loop_type == "":
+                        loop_offloading_trans.apply(
+                            loop, options={"independent": True})
+                        gpu_region_trans.apply(loop.ancestor(Directive))                        
+                    if loop.loop_type == "dof":
+                        # Loops over dofs can contains reductions
+                        if kernels_trans:
+                            # If kernel offloading is available it should
+                            # manage them
+                            kernels_trans.apply(loop)
+                        else:
+                            # Otherwise, if the reductions exists, they will
+                            # be detected by the dependencyAnalysis and raise
+                            # a TransformationError captured below
+                            loop_offloading_trans.apply(
+                                loop, options={"independent": True})
+                            gpu_region_trans.apply(loop.ancestor(Directive))
+                        # Alternatively we could use loop parallelism with
+                        # reduction clauses
+                    print(f"Successfully offloaded loop with {kernel_names}")
+                except TransformationError as err:
+                    print(f"Failed to offload loop with {kernel_names} "
+                          f"because: {err}")
+
+        # Apply OpenMP thread parallelism for any kernels we've not been able
+        # to offload to GPU.
+        for loop in subroutine.walk(Loop):
+            if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()):
+               continue 
+            if not offload or any(kern.name.lower() in (list(failed_to_offload) + 
+                                  GPU_KERNEL_EXCLUSIONS) for
+                                  kern in loop.kernels()):
+                if loop.loop_type not in ["colours", "null"]:
+                    cpu_parallel.apply(loop)
+                    otrans.apply(loop, options={"reprod": True})
+        
+        print(subroutine.view())
diff --git a/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py
new file mode 120000
index 000000000..7a552184e
--- /dev/null
+++ b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/runtime_constants/physics_constants_mod.py
@@ -0,0 +1 @@
+../../../../meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py
\ No newline at end of file
diff --git a/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py
new file mode 120000
index 000000000..52e0e341a
--- /dev/null
+++ b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/skeb_main_alg_mod.py
@@ -0,0 +1 @@
+../../../meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py
\ No newline at end of file
diff --git a/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py
new file mode 120000
index 000000000..7dfc313ea
--- /dev/null
+++ b/applications/lfric_atm/optimisation/nci-gadi/psykal/algorithm/spt_main_alg_mod.py
@@ -0,0 +1 @@
+../../../meto-ex1a/psykal/algorithm/spt_main_alg_mod.py
\ No newline at end of file
diff --git a/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90 b/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90
index b49ad7f82..fd8a48eba 100644
--- a/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90
+++ b/interfaces/physics_schemes_interface/source/algorithm/skeb_main_alg_mod.x90
@@ -458,7 +458,8 @@ module skeb_main_alg_mod
                  skeb_biharm_diss_kernel_type(ndisp, vorticity, divergence,      &
                                               stencil_extent, dx_at_w2,          &
                                               skeb_level_bottom, skeb_level_top, &
-                                              dt, norm_xi, norm_div) )
+                                              dt, norm_xi, norm_div,             &
+                                              norm_xi_flag, norm_div_flag) )
 
     if (write_diag .and. use_xios_io) then
       if (norm_div_flag) &
diff --git a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90
index 0f61d1267..bbc398fae 100644
--- a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90
+++ b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90
@@ -8,7 +8,7 @@
 
 module spt_main_alg_mod
 
-    use constants_mod,        only: r_def, i_def, l_def
+    use constants_mod,        only: r_def, i_def, l_def, r_second
     use fs_continuity_mod,    only: W0, Wtheta
     ! define types
     use clock_mod,            only: clock_type
@@ -79,6 +79,9 @@ module spt_main_alg_mod
                           microphysics_fields, radiation_fields,     &
                           derived_fields, orography_fields, clock)
 
+  ! Timestepping parameters
+  use timestepping_config_mod,    only: dt_timestep => dt
+
   ! SPT parameters
   use stochastic_physics_config_mod, only:                            &
                                            ! Switches to use different
@@ -102,6 +105,10 @@ module spt_main_alg_mod
                                            ! SPT levels
                                            spt_level_bottom,          &
                                            spt_level_top,             &
+                                           spt_level_begin_tapering_bottom,          &
+                                           spt_level_begin_tapering_top,             &
+                                           spt_orog_forcing_pattern_thresh,          &
+                                           spt_stddev_orog_thres,                    &
                                            ! Stoch Phy wavenumbers
                                            stph_n_max,                &
                                            stph_spectral_dim,         &
@@ -222,6 +229,9 @@ module spt_main_alg_mod
   real(kind=r_def) :: mlcrcp
   ! iterators in for loops
   integer(i_def) :: n,n_row, m
+  ! Timestepping_config_mod scalar (for PSyclone to know data type)
+  real(kind=r_second) :: timestepping_config_mod_dt
+  timestepping_config_mod_dt = dt_timestep
 
   if ( subroutine_timers ) call timer("spt_main_alg")
 
@@ -362,10 +372,12 @@ module spt_main_alg_mod
     if (spt_convection_cfl_limit) then
       if (.not. spt_mse_conservation) then
         call invoke(spt_convection_cfl_limit_cap_kernel_type(dt_conv_cfl,  massflux_up, &
-                                            fp_spt, pressure))
+                                            fp_spt, pressure, spt_level_bottom, &
+                                            spt_level_top, timestepping_config_mod_dt))
       end if
       call invoke(spt_convection_cfl_limit_cap_kernel_type(dmv_conv_cfl,  massflux_up,  &
-                                          fp_spt, pressure))
+                                          fp_spt, pressure, spt_level_bottom, &
+                                          spt_level_top, timestepping_config_mod_dt))
     end if
 
     ! Apply tendencies to dX_spt, conver dt to theta
@@ -393,18 +405,28 @@ module spt_main_alg_mod
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
   if (.not. spt_mse_conservation) then
-    call invoke(spt_levels_cap_kernel_type(dtheta_spt))
+    call invoke(spt_levels_cap_kernel_type(dtheta_spt, spt_level_bottom,  &
+                              spt_level_top, spt_level_begin_tapering_bottom,  &
+                              spt_level_begin_tapering_top))
   end if
-  call invoke(spt_levels_cap_kernel_type(dmv_spt))
+  call invoke(spt_levels_cap_kernel_type(dmv_spt, spt_level_bottom,  &
+                              spt_level_top, spt_level_begin_tapering_bottom,  &
+                              spt_level_begin_tapering_top))
 
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!  5)  Apply orographic capping  !!
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
   if (.not. spt_mse_conservation) then
-    call invoke(spt_orog_cap_kernel_type(dtheta_spt,fp_spt,sd_orog))
+    call invoke(spt_orog_cap_kernel_type(dtheta_spt,fp_spt,sd_orog,  &
+                            spt_level_bottom, spt_level_top,  &
+                            spt_orog_forcing_pattern_thresh,  &
+                            spt_stddev_orog_thres))
   end if
-  call invoke(spt_orog_cap_kernel_type(dmv_spt,fp_spt,sd_orog))
+  call invoke(spt_orog_cap_kernel_type(dmv_spt,fp_spt,sd_orog,  &
+                            spt_level_bottom, spt_level_top,  &
+                            spt_orog_forcing_pattern_thresh,  &
+                            spt_stddev_orog_thres))
 
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!  6)  Remove points where perturbations cause super-saturation  !!
@@ -462,7 +484,9 @@ module spt_main_alg_mod
   if (spt_moisture_conservation) then
     mesh   => dtheta%get_mesh()
     dz_wth => get_dz_at_wtheta(mesh%get_id())
-    call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth,rho_in_wth))
+    call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth,&
+                                            rho_in_wth,spt_level_bottom,&
+                                            spt_level_top))
   end if
 
   ! Apply MSE conservation in the column if requested
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90
index c10d72912..6d66ec22d 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90
@@ -9,18 +9,18 @@ module skeb_biharm_diss_kernel_mod
   use argument_mod,      only: arg_type, GH_FIELD,          &
                                GH_REAL, GH_WRITE, GH_READ,  &
                                CELL_COLUMN, GH_INTEGER,     &
-                               GH_SCALAR, STENCIL, CROSS
+                               GH_SCALAR, STENCIL, CROSS,   &
+                               GH_LOGICAL
   use fs_continuity_mod, only: W3, Wtheta, W1, W2
-  use constants_mod,     only: r_def, i_def
+  use constants_mod,     only: r_def, i_def, l_def
   use kernel_mod,        only: kernel_type
-  use empty_data_mod,    only: empty_real_data
 
   implicit none
 
   !> Kernel metadata for Psyclone
   type, public, extends(kernel_type) :: skeb_biharm_diss_kernel_type
     private
-    type(arg_type) :: meta_args(9) = (/                       &
+    type(arg_type) :: meta_args(11) = (/                       &
     arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3),                & ! ndisp
     arg_type(GH_FIELD, GH_REAL, GH_READ, W1),                 & ! vorticity
     arg_type(GH_FIELD, GH_REAL, GH_READ, W3, STENCIL(CROSS)), & ! divergence
@@ -29,7 +29,9 @@ module skeb_biharm_diss_kernel_mod
     arg_type(GH_SCALAR, GH_INTEGER, GH_READ ),                & ! skeb_level_top
     arg_type(GH_SCALAR, GH_REAL, GH_READ ),                   & ! dt
     arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3),                & ! norm_xi
-    arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3)                 & ! norm_div
+    arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3),                & ! norm_div
+    arg_type(GH_SCALAR, GH_LOGICAL, GH_READ),                 & ! norm_xi_flag
+    arg_type(GH_SCALAR, GH_LOGICAL, GH_READ)                  & ! norm_div_flag
 
     /)
     integer :: operates_on = CELL_COLUMN
@@ -60,6 +62,8 @@ module skeb_biharm_diss_kernel_mod
   !> @param[in]    ndf_w2      Number of DOFs per cell for w2 space
   !> @param[in]    undf_w2     Number of unique DOFs  for w2 space
   !> @param[in]    map_w2      dofmap for the cell at the base of the column for w2 space
+  !> @param[in]    norm_xi_flag  Control whether norm_xi calculation is needed
+  !> @param[in]    norm_div_flag  Control whether norm_div calculation is needed
 
   subroutine skeb_biharm_diss_code(nlayers,           &
                                    ndisp,             &
@@ -73,6 +77,8 @@ subroutine skeb_biharm_diss_code(nlayers,           &
                                    dt,                &
                                    norm_xi,           &
                                    norm_div,          &
+                                   norm_xi_flag,      &
+                                   norm_div_flag,     &
                                    ndf_w3,            &
                                    undf_w3,           &
                                    map_w3,            &
@@ -94,6 +100,8 @@ subroutine skeb_biharm_diss_code(nlayers,           &
     integer(kind=i_def), intent(in),  dimension(ndf_w2)   :: map_w2
     integer(kind=i_def), intent(in),  dimension(ndf_w3,map_w3_sten_size) :: map_w3_sten
     integer(kind=i_def), intent(in),  dimension(ndf_w1)   :: map_w1
+    logical(kind=l_def), intent(in) :: norm_xi_flag
+    logical(kind=l_def), intent(in) :: norm_div_flag
 
     ! Fields
     real(kind=r_def),    intent(in),    dimension(undf_w1)  :: vorticity
@@ -156,10 +164,10 @@ subroutine skeb_biharm_diss_code(nlayers,           &
       ndisp(map_w3(1)+k-1) = (biharmonic_x_div + biharmonic_y_div + &
                               biharmonic_x_xi + biharmonic_y_xi) * amp_K
 
-      if (.not. associated(norm_xi, empty_real_data)) then
+      if (norm_xi_flag) then
         norm_xi(map_w3(1)+k-1) = biharmonic_x_xi + biharmonic_y_xi
       end if
-      if (.not. associated(norm_div, empty_real_data)) then
+      if (norm_div_flag) then
         norm_div(map_w3(1)+k-1) = biharmonic_x_div + biharmonic_y_div
       end if
 
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90
index 71f156fec..f7f82dbcd 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90
@@ -8,9 +8,11 @@ module spt_convection_cfl_limit_cap_kernel_mod
 
   use argument_mod,      only: arg_type, GH_FIELD, &
                                GH_WRITE, GH_REAL,  &
+                               GH_SCALAR, GH_INTEGER, &
                                GH_READ, CELL_COLUMN
   use fs_continuity_mod, only: Wtheta
-  use constants_mod,     only: r_def, i_def, l_def
+  use constants_mod,     only: r_def, i_def, l_def, &
+                               r_second
   use kernel_mod,        only: kernel_type
 
   implicit none
@@ -24,11 +26,14 @@ module spt_convection_cfl_limit_cap_kernel_mod
   !>
   type, public, extends(kernel_type) :: spt_convection_cfl_limit_cap_kernel_type
     private
-    type(arg_type) :: meta_args(4) = (/                 &
+    type(arg_type) :: meta_args(7) = (/                 &
          arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & ! dX_conv_cfl
          arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA), & ! massflux_up
          arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA), & ! fp_spt
-         arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA)  & ! pressure
+         arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA), & ! pressure
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_bottom
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_top
+         arg_type(GH_SCALAR, GH_REAL, GH_READ)          & ! dt
          /)
     integer :: operates_on = CELL_COLUMN
   contains
@@ -52,20 +57,20 @@ module spt_convection_cfl_limit_cap_kernel_mod
   !> @param[in]      ndf_wth      Number of degrees of freedom per cell for wtheta
   !> @param[in]      undf_wth     Number of total degrees of freedom for wtheta
   !> @param[in]      map_wth      Dofmap for the cell at the base of the column
+  !> @param[in]      spt_level_bottom      Bottom level of the stochastic scheme
+  !> @param[in]      spt_level_top         Top level of the stochastic scheme
+  !> @param[in]      dt                    Timestep from timestepping_config_mod
   subroutine spt_convection_cfl_limit_cap_code(nlayers,     &
                                                dX_conv_cfl, &
                                                massflux_up, &
                                                fp_spt,      &
                                                pressure,    &
+                                               spt_level_bottom,   &
+                                               spt_level_top,      &
+                                               dt,                 &
                                                ndf_wth,     &
                                                undf_wth,    &
-                                               map_wth      &
-                                               )
-
-    use stochastic_physics_config_mod,   only: spt_level_bottom, &
-                                               spt_level_top
-
-    use timestepping_config_mod,    only: dt
+                                               map_wth)
 
     implicit none
 
@@ -74,6 +79,9 @@ subroutine spt_convection_cfl_limit_cap_code(nlayers,     &
     integer(kind=i_def), intent(in) :: ndf_wth
     integer(kind=i_def), intent(in) :: undf_wth
     integer(kind=i_def), intent(in), dimension(ndf_wth)  :: map_wth
+    integer(kind=i_def), intent(in) :: spt_level_bottom
+    integer(kind=i_def), intent(in) :: spt_level_top
+    real(kind=r_second), intent(in) :: dt
 
     ! Fields perturbations + tendencies
     real(kind=r_def), intent(inout), dimension(undf_wth) :: dX_conv_cfl
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90
index ac3b44fbc..d4fc83913 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90
@@ -8,7 +8,8 @@ module spt_levels_cap_kernel_mod
 
   use argument_mod,      only: arg_type, GH_FIELD, &
                                GH_WRITE, GH_REAL,  &
-                               CELL_COLUMN
+                               GH_SCALAR, GH_INTEGER, &
+                               GH_READ, CELL_COLUMN
 
 
   use fs_continuity_mod, only: Wtheta
@@ -26,8 +27,12 @@ module spt_levels_cap_kernel_mod
   !>
   type, public, extends(kernel_type) :: spt_levels_cap_kernel_type
     private
-    type(arg_type) :: meta_args(1) = (/                &
-         arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA) & !dX
+    type(arg_type) :: meta_args(5) = (/                &
+         arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & !dX
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_bottom
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_top
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_begin_tapering_bottom
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ)       & ! spt_level_begin_tapering_top
          /)
     integer :: operates_on = CELL_COLUMN
   contains
@@ -49,19 +54,22 @@ module spt_levels_cap_kernel_mod
   !> @param[in]     ndf_wth        Number of degrees of freedom per cell for wtheta
   !> @param[in]     undf_wth       Number of total degrees of freedom for wtheta
   !> @param[in]     map_wth        Dofmap for the cell at the base of the column
+  !> @param[in]     spt_level_bottom      Bottom level of the stochastic scheme
+  !> @param[in]     spt_level_top         Top level of the stochastic scheme
+  !> @param[in]     spt_level_begin_tapering_bottom        spt_level_begin_tapering_bottom in stochastic_physics_config_mod
+  !> @param[in]     spt_level_begin_tapering_top           spt_level_begin_tapering_top in stochastic_physics_config_mod
 
   subroutine spt_levels_cap_code(nlayers,  &
                                  dX,       &
+                                 spt_level_bottom,                 &
+                                 spt_level_top,                    &
+                                 spt_level_begin_tapering_bottom,  &
+                                 spt_level_begin_tapering_top,     &
                                  ndf_wth,  &
                                  undf_wth, &
                                  map_wth   &
                                  )
 
-    use stochastic_physics_config_mod, only: spt_level_bottom, &
-                                             spt_level_top, &
-                                             spt_level_begin_tapering_bottom, &
-                                             spt_level_begin_tapering_top
-
     implicit none
 
     !Arguments
@@ -69,6 +77,10 @@ subroutine spt_levels_cap_code(nlayers,  &
     integer(kind=i_def), intent(in) :: ndf_wth
     integer(kind=i_def), intent(in) :: undf_wth
     integer(kind=i_def), intent(in), dimension(ndf_wth)  :: map_wth
+    integer(kind=i_def), intent(in) :: spt_level_bottom
+    integer(kind=i_def), intent(in) :: spt_level_top
+    integer(kind=i_def), intent(in) :: spt_level_begin_tapering_bottom
+    integer(kind=i_def), intent(in) :: spt_level_begin_tapering_top
 
     ! field with perturbation
     real(kind=r_def), intent(inout), dimension(undf_wth) :: dX
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90
index d4bf26dd9..107075419 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90
@@ -8,6 +8,7 @@ module spt_moisture_conservation_kernel_mod
 
     use argument_mod,      only: arg_type, GH_FIELD, &
                                  GH_WRITE, GH_REAL,  &
+                                 GH_SCALAR, GH_INTEGER, &
                                  GH_READ, CELL_COLUMN
     use fs_continuity_mod, only: Wtheta
     use constants_mod,     only: r_def, i_def
@@ -24,11 +25,13 @@ module spt_moisture_conservation_kernel_mod
     !>
     type, public, extends(kernel_type) :: spt_moisture_conservation_kernel_type
       private
-      type(arg_type) :: meta_args(4) = (/                 &
+      type(arg_type) :: meta_args(6) = (/                 &
            arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & !dmv
            arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA), & !mv
            arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA), & !dz_wth
-           arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA)  & !rho_wth
+           arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA), & !rho_wth
+           arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & !spt_level_bottom
+           arg_type(GH_SCALAR, GH_INTEGER, GH_READ)       & !spt_level_top
            /)
            integer :: operates_on = CELL_COLUMN
 
@@ -53,19 +56,21 @@ module spt_moisture_conservation_kernel_mod
     !> @param[in]     ndf_wth     Number of DOFs per cell for potential temperature space
     !> @param[in]     undf_wth    Number of unique DOFs for potential temperature space
     !> @param[in]     map_wth     dofmap for the cell at the base of the column for potential temperature space
+    !> @param[in]     spt_level_bottom      Bottom level of the stochastic scheme
+    !> @param[in]     spt_level_top         Top level of the stochastic scheme
 
     subroutine spt_moisture_conservation_code(nlayers,  &
                                               dmv,      &
                                               mv,       &
                                               rho_wth,  &
                                               dz_wth,   &
+                                              spt_level_bottom,   &
+                                              spt_level_top,      &
                                               ndf_wth,  &
                                               undf_wth, &
                                               map_wth   &
                                               )
 
-      use stochastic_physics_config_mod,   only: spt_level_bottom, spt_level_top
-
       implicit none
 
       !Arguments
@@ -73,6 +78,8 @@ subroutine spt_moisture_conservation_code(nlayers,  &
       integer(kind=i_def), intent(in) :: ndf_wth
       integer(kind=i_def), intent(in) :: undf_wth
       integer(kind=i_def), intent(in), dimension(ndf_wth)  :: map_wth
+      integer(kind=i_def), intent(in) :: spt_level_bottom
+      integer(kind=i_def), intent(in) :: spt_level_top
 
       ! Fields
       real(kind=r_def), intent(inout), dimension(undf_wth) :: dmv
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90
index 3e51f5e18..2f8d4e70b 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90
@@ -8,6 +8,7 @@ module spt_orog_cap_kernel_mod
 
   use argument_mod,      only: arg_type, GH_FIELD,        &
                                GH_WRITE, GH_REAL,         &
+                               GH_SCALAR, GH_INTEGER,     &
                                GH_READ,                   &
                                ANY_DISCONTINUOUS_SPACE_1, &
                                CELL_COLUMN
@@ -27,10 +28,14 @@ module spt_orog_cap_kernel_mod
   !>
   type, public, extends(kernel_type) :: spt_orog_cap_kernel_type
     private
-    type(arg_type) :: meta_args(3) = (/                                   &
+    type(arg_type) :: meta_args(7) = (/                                   &
          arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA),                   & !dX
          arg_type(GH_FIELD, GH_REAL, GH_READ,  WTHETA),                   & !fp_spt
-         arg_type(GH_FIELD, GH_REAL, GH_READ,  ANY_DISCONTINUOUS_SPACE_1) & !sd_orog
+         arg_type(GH_FIELD, GH_REAL, GH_READ,  ANY_DISCONTINUOUS_SPACE_1),& !sd_orog
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_bottom
+         arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_top
+         arg_type(GH_SCALAR, GH_REAL, GH_READ),         & ! spt_orog_forcing_pattern_thresh
+         arg_type(GH_SCALAR, GH_REAL, GH_READ)          & ! spt_stddev_orog_thres
          /)
          integer :: operates_on = CELL_COLUMN
 
@@ -58,11 +63,19 @@ module spt_orog_cap_kernel_mod
   !> @param[in]     ndf_2d      Number of degrees of freedom per cell for density space
   !> @param[in]     undf_2d     Number of unique degrees of freedom for density space
   !> @param[in]     map_2d      Dofmap for the cell at the base of the column for density space
+  !> @param[in]     spt_level_bottom      Bottom level of the stochastic scheme
+  !> @param[in]     spt_level_top         Top level of the stochastic scheme
+  !> @param[in]     spt_orog_forcing_pattern_thresh        spt_orog_forcing_pattern_thresh in stochastic_physics_config_mod
+  !> @param[in]     spt_stddev_orog_thres                  spt_stddev_orog_thres in stochastic_physics_config_mod
 
   subroutine spt_orog_cap_code(nlayers,  &
                                dX,       &
                                fp_spt,   &
                                sd_orog,  &
+                               spt_level_bottom,                 &
+                               spt_level_top,                    &
+                               spt_orog_forcing_pattern_thresh,  &
+                               spt_stddev_orog_thres,            &
                                ndf_wth,  &
                                undf_wth, &
                                map_wth,  &
@@ -71,11 +84,6 @@ subroutine spt_orog_cap_code(nlayers,  &
                                map_2d    &
                                )
 
-    use stochastic_physics_config_mod,    only: spt_level_bottom, &
-                                                spt_level_top, &
-                                                spt_orog_forcing_pattern_thresh, &
-                                                spt_stddev_orog_thres
-
     implicit none
 
     !Arguments
@@ -84,6 +92,10 @@ subroutine spt_orog_cap_code(nlayers,  &
     integer(kind=i_def), intent(in) :: undf_wth, undf_2d
     integer(kind=i_def), intent(in), dimension(ndf_wth)  :: map_wth
     integer(kind=i_def), intent(in), dimension(ndf_2d)  ::  map_2d
+    integer(kind=i_def), intent(in) :: spt_level_bottom
+    integer(kind=i_def), intent(in) :: spt_level_top
+    real(kind=r_def), intent(in) :: spt_orog_forcing_pattern_thresh
+    real(kind=r_def), intent(in) :: spt_stddev_orog_thres
 
     ! Fields perturbations + tendencies
     real(kind=r_def), intent(inout), dimension(undf_wth) :: dX
diff --git a/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf b/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf
index 1840b2fee..2060c58b9 100644
--- a/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf
+++ b/interfaces/physics_schemes_interface/unit-test/kernel/stph/skeb_biharm_diss_kernel_mod_test.pf
@@ -7,7 +7,7 @@
 !>
 module skeb_biharm_diss_kernel_mod_test
 
-  use constants_mod,                 only : i_def, r_def
+  use constants_mod,                 only : i_def, r_def, l_def
   use funit
   use get_unit_test_m3x3_dofmap_mod, only : get_w2_m3x3_dofmap, &
                                             get_w1_m3x3_dofmap, &
@@ -69,6 +69,9 @@ contains
          divergence(:), vorticity(:), ndisp(:)
     real(r_def), pointer :: norm_xi(:), norm_div(:)
 
+    ! Logical controlling whether spectral coeffs need calculating
+    logical(kind=l_def) :: norm_xi_flag, norm_div_flag
+
     ! Dofmaps
     integer(i_def), allocatable :: map_w2(:,:), map_w1(:,:), map_w3(:,:), &
          map_w3_stencil(:,:,:)
@@ -112,6 +115,9 @@ contains
 
     norm_xi => empty_real_data
     norm_div => empty_real_data
+ 
+    norm_xi_flag  = .false.
+    norm_div_flag = .false.
 
     dx_at_w2 = 10.0_r_def
     dt = 3.0_r_def/128.0_r_def
@@ -144,6 +150,8 @@ contains
                                 2, 2, dt,                 &
                                 norm_xi,                  &
                                 norm_div,                 &
+                                norm_xi_flag,             &
+                                norm_div_flag,            &
                                 ndf_w3,                   &
                                 undf_w3,                  &
                                 map_w3(:, cell),          &

From 65efd75ffdcac98ee7c44acc13bf1aaeb3843aa4 Mon Sep 17 00:00:00 2001
From: jasonjunweilyu <junwei.lyu@uq.net.au>
Date: Wed, 17 Dec 2025 16:09:09 +1100
Subject: [PATCH 2/4] Added my name to contributor list

---
 CONTRIBUTORS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index d0f7ae14d..525917292 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -4,3 +4,4 @@
 | ----------- | --------- | ----------- | ---- |
 | james-bruten-mo | James Bruten | Met Office | 2025-12-09 |
 | jennyhickson | Jenny Hickson | Met Office | 2025-12-10 |
+| jasonjunweilyu | Junwei (Jason) Lyu | Bureau of Meteorology, Australia | 2025-12-17 |

From aaa25ec4b03082d86715cd32256d91c0cff2d8ee Mon Sep 17 00:00:00 2001
From: jasonjunweilyu <junwei.lyu@uq.net.au>
Date: Thu, 18 Dec 2025 13:47:55 +1100
Subject: [PATCH 3/4] Fix Fortran style issues (aligning ampersands and spaces)

---
 .../source/algorithm/spt_main_alg_mod.x90     | 44 +++++++++----------
 .../stph/skeb_biharm_diss_kernel_mod.F90      |  2 +-
 ...pt_convection_cfl_limit_cap_kernel_mod.F90 | 20 ++++-----
 .../kernel/stph/spt_levels_cap_kernel_mod.F90 | 20 ++++-----
 .../spt_moisture_conservation_kernel_mod.F90  | 20 ++++-----
 .../kernel/stph/spt_orog_cap_kernel_mod.F90   | 28 ++++++------
 6 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90 b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90
index bbc398fae..9484edcc7 100644
--- a/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90
+++ b/interfaces/physics_schemes_interface/source/algorithm/spt_main_alg_mod.x90
@@ -83,35 +83,35 @@ module spt_main_alg_mod
   use timestepping_config_mod,    only: dt_timestep => dt
 
   ! SPT parameters
-  use stochastic_physics_config_mod, only:                            &
+  use stochastic_physics_config_mod, only:                                  &
                                            ! Switches to use different
                                            ! parametrizations
-                                           spt_use_radiation,         &
-                                           spt_use_microphysics,&
-                                           spt_use_convection, &
+                                           spt_use_radiation,               &
+                                           spt_use_microphysics,            &
+                                           spt_use_convection,              &
                                            ! Std dev of each param.
-                                           spt_stddev_radiation,      &
-                                           spt_stddev_microphysics,   &
-                                           spt_stddev_convection,     &
+                                           spt_stddev_radiation,            &
+                                           spt_stddev_microphysics,         &
+                                           spt_stddev_convection,           &
                                            ! CFL criteria
-                                           spt_convection_cfl_limit, &
+                                           spt_convection_cfl_limit,        &
                                            ! conservation
-                                           spt_mse_conservation, &
-                                           spt_moisture_conservation, &
+                                           spt_mse_conservation,            &
+                                           spt_moisture_conservation,       &
                                            ! 1-2-1 smoothing
-                                           spt_n_smoothing_iters, &
+                                           spt_n_smoothing_iters,           &
                                            ! Add increments
-                                           spt_add_increments,        &
+                                           spt_add_increments,              &
                                            ! SPT levels
-                                           spt_level_bottom,          &
-                                           spt_level_top,             &
-                                           spt_level_begin_tapering_bottom,          &
-                                           spt_level_begin_tapering_top,             &
-                                           spt_orog_forcing_pattern_thresh,          &
-                                           spt_stddev_orog_thres,                    &
+                                           spt_level_bottom,                &
+                                           spt_level_top,                   &
+                                           spt_level_begin_tapering_bottom, &
+                                           spt_level_begin_tapering_top,    &
+                                           spt_orog_forcing_pattern_thresh, &
+                                           spt_stddev_orog_thres,           &
                                            ! Stoch Phy wavenumbers
-                                           stph_n_max,                &
-                                           stph_spectral_dim,         &
+                                           stph_n_max,                      &
+                                           stph_spectral_dim,               &
                                            ! power law
                                            spt_decorrelation_time
 
@@ -484,8 +484,8 @@ module spt_main_alg_mod
   if (spt_moisture_conservation) then
     mesh   => dtheta%get_mesh()
     dz_wth => get_dz_at_wtheta(mesh%get_id())
-    call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth,&
-                                            rho_in_wth,spt_level_bottom,&
+    call invoke(spt_moisture_conservation_kernel_type(dmv_spt,mv,dz_wth, &
+                                            rho_in_wth,spt_level_bottom, &
                                             spt_level_top))
   end if
 
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90
index 6d66ec22d..7bd349755 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/skeb_biharm_diss_kernel_mod.F90
@@ -20,7 +20,7 @@ module skeb_biharm_diss_kernel_mod
   !> Kernel metadata for Psyclone
   type, public, extends(kernel_type) :: skeb_biharm_diss_kernel_type
     private
-    type(arg_type) :: meta_args(11) = (/                       &
+    type(arg_type) :: meta_args(11) = (/                      &
     arg_type(GH_FIELD, GH_REAL, GH_WRITE, W3),                & ! ndisp
     arg_type(GH_FIELD, GH_REAL, GH_READ, W1),                 & ! vorticity
     arg_type(GH_FIELD, GH_REAL, GH_READ, W3, STENCIL(CROSS)), & ! divergence
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90
index f7f82dbcd..7a4ca2a7a 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_convection_cfl_limit_cap_kernel_mod.F90
@@ -60,16 +60,16 @@ module spt_convection_cfl_limit_cap_kernel_mod
   !> @param[in]      spt_level_bottom      Bottom level of the stochastic scheme
   !> @param[in]      spt_level_top         Top level of the stochastic scheme
   !> @param[in]      dt                    Timestep from timestepping_config_mod
-  subroutine spt_convection_cfl_limit_cap_code(nlayers,     &
-                                               dX_conv_cfl, &
-                                               massflux_up, &
-                                               fp_spt,      &
-                                               pressure,    &
-                                               spt_level_bottom,   &
-                                               spt_level_top,      &
-                                               dt,                 &
-                                               ndf_wth,     &
-                                               undf_wth,    &
+  subroutine spt_convection_cfl_limit_cap_code(nlayers,          &
+                                               dX_conv_cfl,      &
+                                               massflux_up,      &
+                                               fp_spt,           &
+                                               pressure,         &
+                                               spt_level_bottom, &
+                                               spt_level_top,    &
+                                               dt,               &
+                                               ndf_wth,          &
+                                               undf_wth,         &
                                                map_wth)
 
     implicit none
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90
index d4fc83913..a359f5009 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_levels_cap_kernel_mod.F90
@@ -27,7 +27,7 @@ module spt_levels_cap_kernel_mod
   !>
   type, public, extends(kernel_type) :: spt_levels_cap_kernel_type
     private
-    type(arg_type) :: meta_args(5) = (/                &
+    type(arg_type) :: meta_args(5) = (/                 &
          arg_type(GH_FIELD, GH_REAL, GH_WRITE, WTHETA), & !dX
          arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_bottom
          arg_type(GH_SCALAR, GH_INTEGER, GH_READ),      & ! spt_level_top
@@ -59,15 +59,15 @@ module spt_levels_cap_kernel_mod
   !> @param[in]     spt_level_begin_tapering_bottom        spt_level_begin_tapering_bottom in stochastic_physics_config_mod
   !> @param[in]     spt_level_begin_tapering_top           spt_level_begin_tapering_top in stochastic_physics_config_mod
 
-  subroutine spt_levels_cap_code(nlayers,  &
-                                 dX,       &
-                                 spt_level_bottom,                 &
-                                 spt_level_top,                    &
-                                 spt_level_begin_tapering_bottom,  &
-                                 spt_level_begin_tapering_top,     &
-                                 ndf_wth,  &
-                                 undf_wth, &
-                                 map_wth   &
+  subroutine spt_levels_cap_code(nlayers,                         &
+                                 dX,                              &
+                                 spt_level_bottom,                &
+                                 spt_level_top,                   &
+                                 spt_level_begin_tapering_bottom, &
+                                 spt_level_begin_tapering_top,    &
+                                 ndf_wth,                         &
+                                 undf_wth,                        &
+                                 map_wth                          &
                                  )
 
     implicit none
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90
index 107075419..001c90bdc 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_moisture_conservation_kernel_mod.F90
@@ -59,16 +59,16 @@ module spt_moisture_conservation_kernel_mod
     !> @param[in]     spt_level_bottom      Bottom level of the stochastic scheme
     !> @param[in]     spt_level_top         Top level of the stochastic scheme
 
-    subroutine spt_moisture_conservation_code(nlayers,  &
-                                              dmv,      &
-                                              mv,       &
-                                              rho_wth,  &
-                                              dz_wth,   &
-                                              spt_level_bottom,   &
-                                              spt_level_top,      &
-                                              ndf_wth,  &
-                                              undf_wth, &
-                                              map_wth   &
+    subroutine spt_moisture_conservation_code(nlayers,          &
+                                              dmv,              &
+                                              mv,               &
+                                              rho_wth,          &
+                                              dz_wth,           &
+                                              spt_level_bottom, &
+                                              spt_level_top,    &
+                                              ndf_wth,          &
+                                              undf_wth,         &
+                                              map_wth           &
                                               )
 
       implicit none
diff --git a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90 b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90
index 2f8d4e70b..6b15c66c5 100644
--- a/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90
+++ b/interfaces/physics_schemes_interface/source/kernel/stph/spt_orog_cap_kernel_mod.F90
@@ -68,20 +68,20 @@ module spt_orog_cap_kernel_mod
   !> @param[in]     spt_orog_forcing_pattern_thresh        spt_orog_forcing_pattern_thresh in stochastic_physics_config_mod
   !> @param[in]     spt_stddev_orog_thres                  spt_stddev_orog_thres in stochastic_physics_config_mod
 
-  subroutine spt_orog_cap_code(nlayers,  &
-                               dX,       &
-                               fp_spt,   &
-                               sd_orog,  &
-                               spt_level_bottom,                 &
-                               spt_level_top,                    &
-                               spt_orog_forcing_pattern_thresh,  &
-                               spt_stddev_orog_thres,            &
-                               ndf_wth,  &
-                               undf_wth, &
-                               map_wth,  &
-                               ndf_2d,   &
-                               undf_2d,  &
-                               map_2d    &
+  subroutine spt_orog_cap_code(nlayers,                         &
+                               dX,                              &
+                               fp_spt,                          &
+                               sd_orog,                         &
+                               spt_level_bottom,                &
+                               spt_level_top,                   &
+                               spt_orog_forcing_pattern_thresh, &
+                               spt_stddev_orog_thres,           &
+                               ndf_wth,                         &
+                               undf_wth,                        &
+                               map_wth,                         &
+                               ndf_2d,                          &
+                               undf_2d,                         &
+                               map_2d                           &
                                )
 
     implicit none

From 2628ff82923a73e129b036484e7ac0184afae4c5 Mon Sep 17 00:00:00 2001
From: jasonjunweilyu <junwei.lyu@uq.net.au>
Date: Thu, 18 Dec 2025 16:34:32 +1100
Subject: [PATCH 4/4] Fixed flake8 issues with transformation scripts

---
 .../physics_constants_mod.py                  | 105 +++++++++-----
 .../psykal/algorithm/skeb_main_alg_mod.py     | 132 ++++++++++++------
 .../psykal/algorithm/spt_main_alg_mod.py      | 113 +++++++++------
 3 files changed, 227 insertions(+), 123 deletions(-)

diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py
index afe83744b..d54f4e149 100644
--- a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py
+++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/runtime_constants/physics_constants_mod.py
@@ -23,22 +23,23 @@
     Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
     Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
     ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans,
-    OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans)
+    OMPDeclareTargetTrans, OMPLoopTrans)
 from psyclone.domain.common.transformations import KernelModuleInlineTrans
 
 
 # Names of any invoke that we won't add any GPU offloading
-INVOKE_EXCLUSIONS = [ 
+INVOKE_EXCLUSIONS = [
 ]
 
 # Names of any kernel that we won't add parallelization
 KERNEL_EXCLUSIONS = ["get_Pnm_star_code",]
-# get_Pnm_star_code has data dependencies in the loops and is tested to be not suitable
+# get_Pnm_star_code has data dependencies in the loops
+# and is tested to be not suitable
 # for parallelization
 
 # Names of any kernels that we won't offload to GPU
 GPU_KERNEL_EXCLUSIONS = [
-] 
+]
 
 OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")
 
@@ -46,7 +47,7 @@
 def trans(psyir):
     '''Applies PSyclone colouring and GPU offloading transformations. Any
     kernels that cannot be offloaded to GPU are parallelised using OpenMP
-    on the CPU if they can be parallelised. Any setval_* kernels are 
+    on the CPU if they can be parallelised. Any setval_* kernels are
     transformed so as to compute into the L1 halos.
 
     :param psyir: the PSyIR of the PSy-layer.
@@ -73,7 +74,6 @@ def trans(psyir):
         gpu_annotation_trans = OMPDeclareTargetTrans()
     elif OFFLOAD_DIRECTIVES == "acc":
         # Use OpenACC offloading
-        enter_data_trans = ACCEnterDataTrans()
         loop_offloading_trans = ACCLoopTrans()
         kernels_trans = ACCKernelsTrans()
         gpu_region_trans = ACCParallelTrans(default_present=False)
@@ -81,9 +81,11 @@ def trans(psyir):
     elif OFFLOAD_DIRECTIVES == "none":
         pass
     else:
-        print(f"The PSyclone transformation script expects the "
-              f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'"
-              f"but found '{OFFLOAD_DIRECTIVES}'.")
+        print(
+            f"The PSyclone transformation script expects the "
+            f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or "
+            f"'none' but found '{OFFLOAD_DIRECTIVES}'."
+        )
         sys.exit(-1)
 
     print(f"PSy name = '{psyir.name}'")
@@ -100,8 +102,13 @@ def trans(psyir):
                     if loop.kernels()[0].name in ["setval_c"]:
                         rtrans.apply(loop, options={"depth": 1})
 
-        if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"):
-            print(f"Not adding GPU offloading to invoke '{subroutine.name}'")
+        if (
+            psyir.name.lower() in INVOKE_EXCLUSIONS
+            or OFFLOAD_DIRECTIVES == "none"
+        ):
+            print(
+                f"Not adding GPU offloading to invoke '{subroutine.name}'"
+            )
             offload = False
         else:
             offload = True
@@ -124,24 +131,33 @@ def trans(psyir):
             if loop.iteration_space.endswith("cell_column"):
                 if offload:
                     for kern in loop.kernels():
-                        if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)):
-                           continue
-                        else:
+                        if kern.name.lower() in (
+                            GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS +
+                            list(succeeded_offload)
+                        ):
+                            continue
+
+                        try:
+                            gpu_annotation_trans.apply(
+                                kern, options={'force': True}
+                            )
+                            print(f"GPU-annotated kernel '{kern.name}'")
+
                             try:
-                                gpu_annotation_trans.apply(kern, options={'force': True})
-                                print(f"GPU-annotated kernel '{kern.name}'")
-                                try:
-                                    inline_trans.apply(kern)
-                                    print(f"Module-inlined kernel '{kern.name}'")
-                                    succeeded_offload.add(kern.name.lower())
-                                except TransformationError as err:
-                                    print(f"Failed to module-inline '{kern.name}' due "
-                                        f"to:\n{err.value}")
+                                inline_trans.apply(kern)
+                                print(f"Module-inlined kernel '{kern.name}'")
+                                succeeded_offload.add(kern.name.lower())
                             except TransformationError as err:
-                                failed_to_offload.add(kern.name.lower())
-                                print(f"Failed to annotate '{kern.name}' with "
-                                    f"GPU-enabled directive due to:\n"
-                                    f"{err.value}")
+                                print(
+                                    f"Failed to module-inline '{kern.name}'"
+                                    f" due to:\n{err.value}"
+                                )
+                        except TransformationError as err:
+                            failed_to_offload.add(kern.name.lower())
+                            print(
+                                f"Failed to annotate '{kern.name}' with "
+                                f"GPU-enabled directive due to:\n{err.value}"
+                            )
                         # For annotated or inlined kernels we could attempt to
                         # provide compile-time dimensions for the temporary
                         # arrays and convert to code unsupported intrinsics.
@@ -149,8 +165,13 @@ def trans(psyir):
         # Add GPU offloading to loops unless they are over colours or are null.
         for loop in subroutine.walk(Loop):
             kernel_names = [k.name.lower() for k in loop.kernels()]
-            if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS 
-                               + KERNEL_EXCLUSIONS) for name in kernel_names):
+            if offload and all(
+                name not in (
+                    list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS +
+                    KERNEL_EXCLUSIONS
+                )
+                for name in kernel_names
+            ):
                 try:
                     if loop.loop_type == "colours":
                         pass
@@ -160,8 +181,9 @@ def trans(psyir):
                         gpu_region_trans.apply(loop.ancestor(Directive))
                     if loop.loop_type == "":
                         loop_offloading_trans.apply(
-                            loop, options={"independent": True})
-                        gpu_region_trans.apply(loop.ancestor(Directive))                        
+                            loop, options={"independent": True}
+                        )
+                        gpu_region_trans.apply(loop.ancestor(Directive))
                     if loop.loop_type == "dof":
                         # Loops over dofs can contains reductions
                         if kernels_trans:
@@ -185,13 +207,22 @@ def trans(psyir):
         # Apply OpenMP thread parallelism for any kernels we've not been able
         # to offload to GPU.
         for loop in subroutine.walk(Loop):
-            if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()):
-               continue 
-            if not offload or any(kern.name.lower() in (list(failed_to_offload) + 
-                                  GPU_KERNEL_EXCLUSIONS) for
-                                  kern in loop.kernels()):
+            if any(
+                kern.name.lower() in KERNEL_EXCLUSIONS
+                for kern in loop.kernels()
+            ):
+                continue
+
+            if (
+                not offload
+                or any(
+                    kern.name.lower() in (
+                        list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS
+                    )
+                    for kern in loop.kernels()
+                )
+            ):
                 if loop.loop_type not in ["colours", "null"]:
                     cpu_parallel.apply(loop)
                     otrans.apply(loop, options={"reprod": True})
-        
         print(subroutine.view())
diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py
index 8d926f517..d2e17951a 100644
--- a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py
+++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/skeb_main_alg_mod.py
@@ -23,12 +23,12 @@
     Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
     Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
     ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans,
-    OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans)
+    OMPDeclareTargetTrans, OMPLoopTrans)
 from psyclone.domain.common.transformations import KernelModuleInlineTrans
 
 
 # Names of any invoke that we won't add any GPU offloading
-INVOKE_EXCLUSIONS = [ 
+INVOKE_EXCLUSIONS = [
 ]
 
 # Names of any kernel that we won't add parallelization
@@ -45,7 +45,7 @@
 def trans(psyir):
     '''Applies PSyclone colouring and GPU offloading transformations. Any
     kernels that cannot be offloaded to GPU are parallelised using OpenMP
-    on the CPU if they can be parallelised. Any setval_* kernels are 
+    on the CPU if they can be parallelised. Any setval_* kernels are
     transformed so as to compute into the L1 halos.
 
     :param psyir: the PSyIR of the PSy-layer.
@@ -72,7 +72,6 @@ def trans(psyir):
         gpu_annotation_trans = OMPDeclareTargetTrans()
     elif OFFLOAD_DIRECTIVES == "acc":
         # Use OpenACC offloading
-        enter_data_trans = ACCEnterDataTrans()
         loop_offloading_trans = ACCLoopTrans()
         kernels_trans = ACCKernelsTrans()
         gpu_region_trans = ACCParallelTrans(default_present=False)
@@ -80,9 +79,11 @@ def trans(psyir):
     elif OFFLOAD_DIRECTIVES == "none":
         pass
     else:
-        print(f"The PSyclone transformation script expects the "
-              f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'"
-              f"but found '{OFFLOAD_DIRECTIVES}'.")
+        print(
+            f"The PSyclone transformation script expects the "
+            f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or "
+            f"'none' but found '{OFFLOAD_DIRECTIVES}'."
+        )
         sys.exit(-1)
 
     print(f"PSy name = '{psyir.name}'")
@@ -99,8 +100,13 @@ def trans(psyir):
                     if loop.kernels()[0].name in ["setval_c"]:
                         rtrans.apply(loop, options={"depth": 1})
 
-        if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"):
-            print(f"Not adding GPU offloading to invoke '{subroutine.name}'")
+        if (
+            psyir.name.lower() in INVOKE_EXCLUSIONS
+            or OFFLOAD_DIRECTIVES == "none"
+        ):
+            print(
+                f"Not adding GPU offloading to invoke '{subroutine.name}'"
+            )
             offload = False
         else:
             offload = True
@@ -123,44 +129,65 @@ def trans(psyir):
             if loop.iteration_space.endswith("cell_column"):
                 if offload:
                     for kern in loop.kernels():
-                        if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)):
-                           continue
-                        else:
+                        if kern.name.lower() in (
+                            GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS +
+                            list(succeeded_offload)
+                        ):
+                            continue
+
+                        try:
+                            gpu_annotation_trans.apply(
+                                kern, options={'force': True}
+                            )
+                            print(f"GPU-annotated kernel '{kern.name}'")
+
                             try:
-                                gpu_annotation_trans.apply(kern, options={'force': True})
-                                print(f"GPU-annotated kernel '{kern.name}'")
-                                try:
-                                    inline_trans.apply(kern)
-                                    print(f"Module-inlined kernel '{kern.name}'")
-                                    succeeded_offload.add(kern.name.lower())
-                                except TransformationError as err:
-                                    print(f"Failed to module-inline '{kern.name}' due "
-                                        f"to:\n{err.value}")
+                                inline_trans.apply(kern)
+                                print(f"Module-inlined kernel '{kern.name}'")
+                                succeeded_offload.add(kern.name.lower())
                             except TransformationError as err:
-                                failed_to_offload.add(kern.name.lower())
-                                print(f"Failed to annotate '{kern.name}' with "
-                                    f"GPU-enabled directive due to:\n"
-                                    f"{err.value}")
+                                print(
+                                    f"Failed to module-inline '{kern.name}'"
+                                    f" due to:\n{err.value}"
+                                )
+                        except TransformationError as err:
+                            failed_to_offload.add(kern.name.lower())
+                            print(
+                                f"Failed to annotate '{kern.name}' with "
+                                f"GPU-enabled directive due to:\n{err.value}"
+                            )
                         # For annotated or inlined kernels we could attempt to
                         # provide compile-time dimensions for the temporary
                         # arrays and convert to code unsupported intrinsics.
 
         # Add GPU offloading to loops unless they are over colours or are null.
         for loop in subroutine.walk(Loop):
-            kernel_names = [k.name.lower() for k in loop.kernels()]
-            if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS 
-                               + KERNEL_EXCLUSIONS) for name in kernel_names):
+            kernel_names = [
+                k.name.lower() for k in loop.kernels()
+            ]
+            if offload and all(
+                name not in (
+                    list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS +
+                    KERNEL_EXCLUSIONS
+                )
+                for name in kernel_names
+            ):
                 try:
                     if loop.loop_type == "colours":
                         pass
+
                     if loop.loop_type == "colour":
                         loop_offloading_trans.apply(
-                            loop, options={"independent": True})
+                            loop, options={"independent": True}
+                        )
                         gpu_region_trans.apply(loop.ancestor(Directive))
+
                     if loop.loop_type == "":
                         loop_offloading_trans.apply(
-                            loop, options={"independent": True})
-                        gpu_region_trans.apply(loop.ancestor(Directive))                        
+                            loop, options={"independent": True}
+                        )
+                        gpu_region_trans.apply(loop.ancestor(Directive))
+
                     if loop.loop_type == "dof":
                         # Loops over dofs can contains reductions
                         if kernels_trans:
@@ -168,29 +195,44 @@ def trans(psyir):
                             # manage them
                             kernels_trans.apply(loop)
                         else:
-                            # Otherwise, if the reductions exists, they will
-                            # be detected by the dependencyAnalysis and raise
-                            # a TransformationError captured below
+                            # Otherwise, if the reductions exists, they
+                            # will be detected by the dependencyAnalysis
+                            # and raise a TransformationError captured
+                            # below
                             loop_offloading_trans.apply(
-                                loop, options={"independent": True})
+                                loop, options={"independent": True}
+                            )
                             gpu_region_trans.apply(loop.ancestor(Directive))
-                        # Alternatively we could use loop parallelism with
-                        # reduction clauses
+
+                    # Alternatively we could use loop parallelism with
+                    # reduction clauses
                     print(f"Successfully offloaded loop with {kernel_names}")
                 except TransformationError as err:
-                    print(f"Failed to offload loop with {kernel_names} "
-                          f"because: {err}")
+                    print(
+                        f"Failed to offload loop with {kernel_names} "
+                        f"because: {err}"
+                    )
 
         # Apply OpenMP thread parallelism for any kernels we've not been able
         # to offload to GPU.
         for loop in subroutine.walk(Loop):
-            if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()):
-               continue 
-            if not offload or any(kern.name.lower() in (list(failed_to_offload) + 
-                                  GPU_KERNEL_EXCLUSIONS) for
-                                  kern in loop.kernels()):
+            if any(
+                kern.name.lower() in KERNEL_EXCLUSIONS
+                for kern in loop.kernels()
+            ):
+                continue
+
+            if (
+                not offload
+                or any(
+                    kern.name.lower() in (
+                        list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS
+                        )
+                    for kern in loop.kernels()
+                )
+            ):
                 if loop.loop_type not in ["colours", "null"]:
                     cpu_parallel.apply(loop)
                     otrans.apply(loop, options={"reprod": True})
-        
+
         print(subroutine.view())
diff --git a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py
index 3872b2539..1b5cfea8a 100644
--- a/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py
+++ b/applications/lfric_atm/optimisation/meto-ex1a/psykal/algorithm/spt_main_alg_mod.py
@@ -23,12 +23,12 @@
     Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans,
     Dynamo0p3RedundantComputationTrans, OMPParallelTrans,
     ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans,
-    OMPDeclareTargetTrans, OMPLoopTrans, ACCEnterDataTrans)
+    OMPDeclareTargetTrans, OMPLoopTrans)
 from psyclone.domain.common.transformations import KernelModuleInlineTrans
 
 
 # Names of any invoke that we won't add any GPU offloading
-INVOKE_EXCLUSIONS = [ 
+INVOKE_EXCLUSIONS = [
 ]
 
 # Names of any kernel that we won't add parallelization
@@ -36,14 +36,15 @@
 ]
 
 # Names of any kernels that we won't offload to GPU
-GPU_KERNEL_EXCLUSIONS = ["spt_saturation_cap_code",] 
+GPU_KERNEL_EXCLUSIONS = ["spt_saturation_cap_code",]
 # spt_saturation_cap_code: GPU transformation cannot be applied because of
-# using qsat_wat_mix from qsat_mod. As qsat_mod is going to be modified in future,
-# this falls out of the scope of the NGARCH project.
-# Error message: Transformation Error: Kernel 'spt_saturation_cap_code' accesses
-# the symbol 'qsat_wat_mix: RoutineSymbol<NoType, pure=unknown, elemental=unknown>'
-# which is imported. If this symbol represents data then it must first be converted
-# to a Kernel argument using the KernelImportsToArguments transformation.
+# using qsat_wat_mix from qsat_mod. As qsat_mod is going to be modified in
+# future, this falls out of the scope of the NGARCH project.
+# Error message: Transformation Error: Kernel 'spt_saturation_cap_code'
+# accesses the symbol 'qsat_wat_mix: RoutineSymbol<NoType, pure=unknown,'
+# 'elemental=unknown>' which is imported.
+# If this symbol represents data then it must first be converted to a
+# Kernel argument using the KernelImportsToArguments transformation.
 
 OFFLOAD_DIRECTIVES = os.getenv('LFRIC_OFFLOAD_DIRECTIVES', "none")
 
@@ -51,7 +52,7 @@
 def trans(psyir):
     '''Applies PSyclone colouring and GPU offloading transformations. Any
     kernels that cannot be offloaded to GPU are parallelised using OpenMP
-    on the CPU if they can be parallelised. Any setval_* kernels are 
+    on the CPU if they can be parallelised. Any setval_* kernels are
     transformed so as to compute into the L1 halos.
 
     :param psyir: the PSyIR of the PSy-layer.
@@ -78,7 +79,6 @@ def trans(psyir):
         gpu_annotation_trans = OMPDeclareTargetTrans()
     elif OFFLOAD_DIRECTIVES == "acc":
         # Use OpenACC offloading
-        enter_data_trans = ACCEnterDataTrans()
         loop_offloading_trans = ACCLoopTrans()
         kernels_trans = ACCKernelsTrans()
         gpu_region_trans = ACCParallelTrans(default_present=False)
@@ -86,9 +86,11 @@ def trans(psyir):
     elif OFFLOAD_DIRECTIVES == "none":
         pass
     else:
-        print(f"The PSyclone transformation script expects the "
-              f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or 'none'"
-              f"but found '{OFFLOAD_DIRECTIVES}'.")
+        print(
+            f"The PSyclone transformation script expects the "
+            f"LFRIC_OFFLOAD_DIRECTIVES to be set to 'omp' or 'acc' or "
+            f"'none' but found '{OFFLOAD_DIRECTIVES}'."
+        )
         sys.exit(-1)
 
     print(f"PSy name = '{psyir.name}'")
@@ -105,8 +107,13 @@ def trans(psyir):
                     if loop.kernels()[0].name in ["setval_c"]:
                         rtrans.apply(loop, options={"depth": 1})
 
-        if (psyir.name.lower() in INVOKE_EXCLUSIONS) or (OFFLOAD_DIRECTIVES == "none"):
-            print(f"Not adding GPU offloading to invoke '{subroutine.name}'")
+        if (
+            psyir.name.lower() in INVOKE_EXCLUSIONS
+            or OFFLOAD_DIRECTIVES == "none"
+        ):
+            print(
+                f"Not adding GPU offloading to invoke '{subroutine.name}'"
+            )
             offload = False
         else:
             offload = True
@@ -129,24 +136,33 @@ def trans(psyir):
             if loop.iteration_space.endswith("cell_column"):
                 if offload:
                     for kern in loop.kernels():
-                        if kern.name.lower() in (GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS + list(succeeded_offload)):
-                           continue
-                        else:
+                        if kern.name.lower() in (
+                            GPU_KERNEL_EXCLUSIONS + KERNEL_EXCLUSIONS +
+                            list(succeeded_offload)
+                        ):
+                            continue
+
+                        try:
+                            gpu_annotation_trans.apply(
+                                kern, options={'force': True}
+                            )
+                            print(f"GPU-annotated kernel '{kern.name}'")
+
                             try:
-                                gpu_annotation_trans.apply(kern, options={'force': True})
-                                print(f"GPU-annotated kernel '{kern.name}'")
-                                try:
-                                    inline_trans.apply(kern)
-                                    print(f"Module-inlined kernel '{kern.name}'")
-                                    succeeded_offload.add(kern.name.lower())
-                                except TransformationError as err:
-                                    print(f"Failed to module-inline '{kern.name}' due "
-                                        f"to:\n{err.value}")
+                                inline_trans.apply(kern)
+                                print(f"Module-inlined kernel '{kern.name}'")
+                                succeeded_offload.add(kern.name.lower())
                             except TransformationError as err:
-                                failed_to_offload.add(kern.name.lower())
-                                print(f"Failed to annotate '{kern.name}' with "
-                                    f"GPU-enabled directive due to:\n"
-                                    f"{err.value}")
+                                print(
+                                    f"Failed to module-inline '{kern.name}'"
+                                    f" due to:\n{err.value}"
+                                )
+                        except TransformationError as err:
+                            failed_to_offload.add(kern.name.lower())
+                            print(
+                                f"Failed to annotate '{kern.name}' with "
+                                f"GPU-enabled directive due to:\n{err.value}"
+                            )
                         # For annotated or inlined kernels we could attempt to
                         # provide compile-time dimensions for the temporary
                         # arrays and convert to code unsupported intrinsics.
@@ -154,8 +170,13 @@ def trans(psyir):
         # Add GPU offloading to loops unless they are over colours or are null.
         for loop in subroutine.walk(Loop):
             kernel_names = [k.name.lower() for k in loop.kernels()]
-            if offload and all(name not in (list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS 
-                               + KERNEL_EXCLUSIONS) for name in kernel_names):
+            if offload and all(
+                name not in (
+                    list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS +
+                    KERNEL_EXCLUSIONS
+                )
+                for name in kernel_names
+            ):
                 try:
                     if loop.loop_type == "colours":
                         pass
@@ -166,7 +187,7 @@ def trans(psyir):
                     if loop.loop_type == "":
                         loop_offloading_trans.apply(
                             loop, options={"independent": True})
-                        gpu_region_trans.apply(loop.ancestor(Directive))                        
+                        gpu_region_trans.apply(loop.ancestor(Directive))
                     if loop.loop_type == "dof":
                         # Loops over dofs can contains reductions
                         if kernels_trans:
@@ -190,13 +211,23 @@ def trans(psyir):
         # Apply OpenMP thread parallelism for any kernels we've not been able
         # to offload to GPU.
         for loop in subroutine.walk(Loop):
-            if any(kern.name.lower() in KERNEL_EXCLUSIONS for kern in loop.kernels()):
-               continue 
-            if not offload or any(kern.name.lower() in (list(failed_to_offload) + 
-                                  GPU_KERNEL_EXCLUSIONS) for
-                                  kern in loop.kernels()):
+            if any(
+                kern.name.lower() in KERNEL_EXCLUSIONS
+                for kern in loop.kernels()
+            ):
+                continue
+
+            if (
+                not offload
+                or any(
+                    kern.name.lower() in (
+                        list(failed_to_offload) + GPU_KERNEL_EXCLUSIONS
+                    )
+                    for kern in loop.kernels()
+                )
+            ):
                 if loop.loop_type not in ["colours", "null"]:
                     cpu_parallel.apply(loop)
                     otrans.apply(loop, options={"reprod": True})
-        
+
         print(subroutine.view())