From 2b3d35d7ab47c84290094d3efa6811423e8a8f46 Mon Sep 17 00:00:00 2001
From: anandrdbz <62814442+anandrdbz@users.noreply.github.com>
Date: Fri, 5 Apr 2024 23:18:05 -0400
Subject: [PATCH] OpenACC + Cray CCE + AMD MI200+ (#368)

Co-authored-by: Henry Le Berre <hberre3@gatech.edu>
Co-authored-by: Steve Abbott <stephen.abbott@hpe.com>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login2.crusher.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@frontier10195.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login03.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@frontier00516.frontier.olcf.ornl.gov>
Co-authored-by: wilfonba <bwilfong3@gatech.edu>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login08.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@frontier00518.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@frontier00517.frontier.olcf.ornl.gov>
Co-authored-by: Steve Abbott <steve.abt@gmail.com>
Co-authored-by: Abbott, Stephen R <abbottsr@ornl.gov>
Co-authored-by: Spencer Bryngelson <shb@gatech.edu>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login13.frontier.olcf.ornl.gov>
Co-authored-by: Ben Wilfong <48168887+wilfonba@users.noreply.github.com>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login05.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login07.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@frontier06151.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@frontier08146.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login14.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login12.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login04.frontier.olcf.ornl.gov>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login-phoenix-slurm-3.pace.gatech.edu>
Co-authored-by: Anand Radhakrishnan <aradhakr34@atl1-1-02-006-31-0.pace.gatech.edu>
Co-authored-by: Anand Radhakrishnan <aradhakr34@login-phoenix-slurm-1.pace.gatech.edu>
Co-authored-by: Anand Radhakrishnan <aradhakr34@atl1-1-02-008-35-0.pace.gatech.edu>
Co-authored-by: Anand Radhakrishnan <aradhakr34@atl1-1-02-005-30-0.pace.gatech.edu>
Co-authored-by: Anand <anand@ipsec-10-2-194-67.vpn.gatech.edu>
Co-authored-by: Anand Radhakrishnan <aradhakr34@frontier00081.frontier.olcf.ornl.gov>
Co-authored-by: Anand <anand@Anands-MacBook-Pro.local>
Co-authored-by: Spencer Bryngelson <sbryngelson@gmail.com>
---
 .github/workflows/frontier/build.sh       |   4 +
 .github/workflows/frontier/submit.sh      |  43 ++
 .github/workflows/frontier/test.sh        |   3 +
 .github/workflows/test.yml                |  17 +-
 .typos.toml                               |   4 +-
 CMakeLists.txt                            |  65 ++-
 docs/Doxyfile.in                          |   2 +-
 examples/3D_weak_scaling/analyze.sh       |   5 +
 misc/profiling_amdgpu.txt                 |  23 ++
 src/common/include/inline_conversions.fpp |   8 +-
 src/common/include/macros.fpp             | 102 ++++-
 src/common/m_derived_types.fpp            |   2 +-
 src/common/m_helper.fpp                   |   7 +-
 src/common/m_mpi_common.fpp               |   2 +-
 src/common/m_phase_change.fpp             |  24 +-
 src/common/m_variables_conversion.fpp     |  59 ++-
 src/post_process/m_start_up.f90           |   2 +-
 src/pre_process/m_checker.f90             |  35 +-
 src/pre_process/m_start_up.fpp            |   2 +-
 src/simulation/include/case.fpp           |   5 +-
 src/simulation/m_bubbles.fpp              |  42 +-
 src/simulation/m_cbc.fpp                  | 236 ++++++-----
 src/simulation/m_compute_cbc.fpp          |  48 ++-
 src/simulation/m_data_output.fpp          |  56 +--
 src/simulation/m_derived_variables.f90    |   3 +-
 src/simulation/m_fftw.fpp                 | 114 ++++--
 src/simulation/m_global_parameters.fpp    | 155 ++++++--
 src/simulation/m_hypoelastic.fpp          |  28 +-
 src/simulation/m_ibm.fpp                  |  58 +--
 src/simulation/m_monopole.fpp             |  35 +-
 src/simulation/m_mpi_proxy.fpp            | 178 ++++-----
 src/simulation/m_qbmm.fpp                 |  67 +++-
 src/simulation/m_rhs.fpp                  | 323 ++++++++-------
 src/simulation/m_riemann_solvers.fpp      | 222 +++++++----
 src/simulation/m_start_up.fpp             |  39 +-
 src/simulation/m_time_steppers.fpp        | 114 ++++--
 src/simulation/m_viscous.fpp              | 459 +++++++++++-----------
 src/simulation/m_weno.fpp                 | 253 +++++++-----
 toolchain/bootstrap/format.sh             |  10 +-
 toolchain/bootstrap/format_file.sh        |  37 ++
 toolchain/bootstrap/modules.sh            |   8 +-
 toolchain/cce_simulation_workgroup_256.sh |  48 +++
 toolchain/dependencies/CMakeLists.txt     |  77 ++--
 toolchain/mfc/args.py                     |   8 +-
 toolchain/mfc/build.py                    |  27 +-
 toolchain/mfc/common.py                   |   1 -
 toolchain/mfc/run/input.py                |   2 +-
 toolchain/mfc/state.py                    |   9 +-
 toolchain/mfc/test/test.py                |   4 +-
 toolchain/modules                         |  12 +-
 toolchain/requirements.txt                |   2 +
 toolchain/templates/frontier.mako         |  54 +++
 toolchain/templates/include/helpers.mako  |   8 +-
 53 files changed, 2064 insertions(+), 1087 deletions(-)
 create mode 100644 .github/workflows/frontier/build.sh
 create mode 100644 .github/workflows/frontier/submit.sh
 create mode 100644 .github/workflows/frontier/test.sh
 create mode 100755 examples/3D_weak_scaling/analyze.sh
 create mode 100644 misc/profiling_amdgpu.txt
 create mode 100644 toolchain/bootstrap/format_file.sh
 create mode 100755 toolchain/cce_simulation_workgroup_256.sh
 create mode 100644 toolchain/templates/frontier.mako

diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
new file mode 100644
index 000000000..a6a51b65f
--- /dev/null
+++ b/.github/workflows/frontier/build.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+. ./mfc.sh load -c f -m g
+./mfc.sh build -j 8 --gpu
diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh
new file mode 100644
index 000000000..706d7e468
--- /dev/null
+++ b/.github/workflows/frontier/submit.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -e
+
+usage() {
+    echo "Usage: $0 [script.sh] [cpu|gpu]"
+}
+
+if [ ! -z "$1" ]; then
+    sbatch_script_contents=`cat $1`
+else
+    usage
+    exit 1
+fi
+
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
+
+sbatch <<EOT
+#!/bin/bash
+#SBATCH -JMFC-$job_slug            # Job name
+#SBATCH -A CFD154                  # charge account
+#SBATCH -N 1                       # Number of nodes required
+#SBATCH -n 8                       # Number of cores required
+#SBATCH -t 02:00:00                # Duration of the job (Ex: 15 mins)
+#SBATCH -q debug                   # QOS Name
+#SBATCH -o$job_slug.out            # Combined output and error messages file
+#SBATCH -W                         # Do not exit until the submitted job terminates.
+
+set -e
+set -x
+
+cd "\$SLURM_SUBMIT_DIR"
+echo "Running in $(pwd):"
+
+job_slug="$job_slug"
+job_device="$2"
+
+. ./mfc.sh load -c f -m g
+
+$sbatch_script_contents
+
+EOT
+
diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh
new file mode 100644
index 000000000..fab53ef8e
--- /dev/null
+++ b/.github/workflows/frontier/test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+./mfc.sh test -j 4 -a -- -c frontier
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d4e14c519..ee9de140c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -99,22 +99,35 @@ jobs:
       run:  sudo ./mfc.sh docker ./mfc.sh test -j $(nproc) -a
 
   self:
-    name: Georgia Tech | Phoenix (NVHPC)
+    name: Self Hosted
     if: github.repository == 'MFlowCode/MFC'
     continue-on-error: true
     strategy:
       matrix:
         device: ['cpu', 'gpu']
+        lbl: ['gt', 'frontier']
+        exclude:
+          - device:   cpu
+            lbl: frontier
     runs-on:
       group:  phoenix
-      labels: gt
+      labels: ${{ matrix.lbl }}
     steps:
       - name: Clone
         uses: actions/checkout@v3
 
       - name: Build & Test
+        if:   matrix.lbl == 'gt'
         run:  bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}
 
+      - name: Build
+        if:   matrix.lbl == 'frontier'
+        run:  bash .github/workflows/frontier/build.sh
+
+      - name: Test
+        if:   matrix.lbl == 'frontier'
+        run:  bash .github/workflows/frontier/submit.sh .github/workflows/frontier/test.sh ${{matrix.device}}
+
       - name: Print Logs
         if:   always()
         run:  cat test-${{ matrix.device }}.out
diff --git a/.typos.toml b/.typos.toml
index c786a50b8..db23ab691 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -10,6 +10,8 @@ AttributeIDSupressMenu = "AttributeIDSupressMenu"
 INOUT = "INOUT"
 WRONLY = "WRONLY"
 nd = "nd"
+iy = "iy"
+HPE = "HPE"
 
 [files]
-extend-exclude = ["docs/documentation/references*", "tests/"]
+extend-exclude = ["docs/documentation/references*", "tests/", "toolchain/cce_simulation_workgroup_256.sh"]
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 06dbb3a25..fd15f0cf6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,8 +12,7 @@ cmake_minimum_required(VERSION 3.20)
 # We include C as a language because - for some reason -
 # FIND_LIBRARY_USE_LIB64_PATHS is otherwise ignored.
 
-project(MFC LANGUAGES C Fortran)
-
+project(MFC LANGUAGES C CXX Fortran)
 
 # Build options exposed to users and their default values.
 
@@ -109,7 +108,7 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
 
     if (CMAKE_BUILD_TYPE STREQUAL "Debug")
         add_compile_options(
-            -fcheck=all,no-array-temps 
+            -fcheck=all,no-array-temps
             -fbacktrace
             -fimplicit-none
             #-ffpe-trap=invalid,zero,denormal,overflow
@@ -125,11 +124,22 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
 elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
     add_compile_options(
         "SHELL:-h nomessage=296:878:1391:1069"
-        "SHELL:-h msgs" "SHELL:-h static" "SHELL:-h keepfiles"
+        "SHELL:-h static" "SHELL:-h keepfiles"
+        "SHELL:-h acc_model=auto_async_none"
+        "SHELL: -h acc_model=no_fast_addr"
+        "SHELL: -h list=adm" "-DCRAY_ACC_SIMPLIFY" "-DCRAY_ACC_WAR"
     )
 
+    add_link_options("SHELL:-hkeepfiles")
+    
     if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-        add_compile_options(-e D "SHELL:-h func_trace")
+        add_compile_options(
+                "SHELL:-h acc_model=auto_async_none"
+                "SHELL: -h acc_model=no_fast_addr"
+                "SHELL: -K trap=fp" "SHELL: -G2"
+
+        )
+        add_link_options("SHELL: -K trap=fp" "SHELL: -G2")
     endif()
 elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Flang")
     add_compile_options(
@@ -197,13 +207,14 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug")
 endif()
 
 
+
 # HANDLE_SOURCES: Given a target (herein <target>):
 #
 # *  Locate all source files for <target> of the type
 #
 #               src/[<target>,common]/[.,include]/*.[f90,fpp].
-# 
-# *  For each .fpp file found with filepath <dirpath>/<filename>.fpp, using a 
+#
+# *  For each .fpp file found with filepath <dirpath>/<filename>.fpp, using a
 #    custom command, instruct CMake how to generate a file with path
 #
 #                     src/<target>/fypp/<filename>.f90
@@ -224,7 +235,7 @@ endif()
 #    file is modified, but also when any file with filepath of the form
 #
 #                     src/[<target>,common]/include/*.fpp
-#    
+#
 #    is modified. This is a reasonable compromise as modifications to .fpp files
 #    in the include directories will be rare - by design. Other approaches would
 #    have required a more complex CMakeLists.txt file (perhaps parsing the .fpp
@@ -333,11 +344,11 @@ function(MFC_SETUP_TARGET)
 
     set_target_properties(${ARGS_TARGET} PROPERTIES Fortran_PREPROCESS ON)
 
-    target_include_directories(${ARGS_TARGET} PRIVATE 
+    target_include_directories(${ARGS_TARGET} PRIVATE
         "${CMAKE_SOURCE_DIR}/src/common"
         "${CMAKE_SOURCE_DIR}/src/common/include"
         "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")
-    
+
     if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
         target_include_directories(${ARGS_TARGET} PRIVATE
             "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
@@ -367,9 +378,14 @@ function(MFC_SETUP_TARGET)
     endif()
 
     if (ARGS_FFTW)
-        if (MFC_OpenACC AND ARGS_OpenACC AND (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI"))
-            find_package(CUDAToolkit REQUIRED)
-            target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
+        if (MFC_OpenACC AND ARGS_OpenACC)
+            if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
+                find_package(CUDAToolkit REQUIRED)
+                target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
+            else()
+                find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
+                target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hipfft)
+            endif()
         else()
             find_package(FFTW REQUIRED)
             target_link_libraries(${ARGS_TARGET} PRIVATE FFTW::FFTW)
@@ -420,6 +436,9 @@ function(MFC_SETUP_TARGET)
                     PRIVATE -gpu=autocompare,debug
                 )
             endif()
+        elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
+            find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
+            target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
         endif()
     elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
         target_compile_options(${ARGS_TARGET} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
@@ -438,19 +457,31 @@ if (MFC_PRE_PROCESS)
     MFC_SETUP_TARGET(TARGET  pre_process
                      SOURCES "${pre_process_SRCs}"
                      MPI)
+    if(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
+        target_compile_options(pre_process PRIVATE -hfp0)
+    endif()
 endif()
 
 if (MFC_SIMULATION)
     MFC_SETUP_TARGET(TARGET  simulation
                      SOURCES "${simulation_SRCs}"
                      MPI OpenACC FFTW)
+
+    if (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray" AND MFC_OpenACC)
+        add_custom_command(TARGET simulation POST_BUILD
+            COMMAND           "${CMAKE_CURRENT_SOURCE_DIR}/toolchain/cce_simulation_workgroup_256.sh"
+                              "${CMAKE_CURRENT_BINARY_DIR}"
+            WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+            COMMENT           "Patching & Rebuilding with Cray hacks"
+        )
+    endif()
 endif()
 
 if (MFC_POST_PROCESS)
     MFC_SETUP_TARGET(TARGET  post_process
                      SOURCES "${post_process_SRCs}"
                      MPI SILO HDF5 FFTW)
-    
+
     # -O0 is in response to https://github.com/MFlowCode/MFC-develop/issues/95
     target_compile_options(post_process PRIVATE -O0)
 endif()
@@ -468,7 +499,7 @@ if (MFC_DOCUMENTATION)
     add_custom_command(
         OUTPUT  "${CMAKE_CURRENT_SOURCE_DIR}/docs/documentation/examples.md"
         DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/docs/examples.sh;${examples_DOCs}"
-        COMMAND "bash" "${CMAKE_CURRENT_SOURCE_DIR}/docs/examples.sh" 
+        COMMAND "bash" "${CMAKE_CURRENT_SOURCE_DIR}/docs/examples.sh"
                        "${CMAKE_CURRENT_SOURCE_DIR}"
         COMMENT "Generating examples.md"
         VERBATIM
@@ -500,7 +531,7 @@ if (MFC_DOCUMENTATION)
         set(DOXYGEN_IMAGE_PATH       "\"${CMAKE_CURRENT_SOURCE_DIR}/docs/res\"\
                                       \"${CMAKE_CURRENT_SOURCE_DIR}/docs/${target}\"")
 
-        file(MAKE_DIRECTORY "${DOXYGEN_OUTPUT_DIRECTORY}") 
+        file(MAKE_DIRECTORY "${DOXYGEN_OUTPUT_DIRECTORY}")
 
         configure_file(
             "${CMAKE_CURRENT_SOURCE_DIR}/docs/Doxyfile.in"
@@ -537,7 +568,7 @@ if (MFC_DOCUMENTATION)
     endmacro()
 
     add_custom_target(documentation)
-    
+
     find_package(Doxygen REQUIRED dot REQUIRED)
 
     # > Fetch CSS Theme
diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
index 67b87a704..26e1a0156 100644
--- a/docs/Doxyfile.in
+++ b/docs/Doxyfile.in
@@ -548,7 +548,7 @@ INTERNAL_DOCS          = YES
 # names in lower-case letters. If set to YES, upper-case letters are also
 # allowed. This is useful if you have classes or files whose names only differ
 # in case and if your file system supports case sensitive file names. Windows
-# (including Cygwin) ands Mac users are advised to set this option to NO.
+# (including Cygwin) and Mac users are advised to set this option to NO.
 # The default value is: system dependent.
 
 CASE_SENSE_NAMES       = YES
diff --git a/examples/3D_weak_scaling/analyze.sh b/examples/3D_weak_scaling/analyze.sh
new file mode 100755
index 000000000..a6cd72428
--- /dev/null
+++ b/examples/3D_weak_scaling/analyze.sh
@@ -0,0 +1,5 @@
+# This script is ran from the 3D_weak_scaling case directory after running
+# MFC with --omni -n <name>. To analyze, run chmod u+x ./analyze.sh followed
+# by ./analyze.sh <name>
+
+omniperf analyze -p workloads/$1/mi200 --metric 0 7.1.5 7.1.6 7.1.7 7.1.8 7.1.9 16.3.1 16.3.2 16.3.7 17.3.2 17.3.3 17.3.8
diff --git a/misc/profiling_amdgpu.txt b/misc/profiling_amdgpu.txt
new file mode 100644
index 000000000..7421ac514
--- /dev/null
+++ b/misc/profiling_amdgpu.txt
@@ -0,0 +1,23 @@
+Profile MFC using omniperf
+
+0) Start an interactive session with the desired number of nodes and total tasks using `salloc -A [account] -J interactive -t 2:00:00 -p batch -N [nnodes] -n [total tasks]`
+
+1) Generate MFC input files by running `./mfc.sh run [path to casefile] -N [nnodes] -n  [total tasks] --gpu -t pre_process simulation --case-optimization`
+
+2) Move to the simulation directory using `cd [path to casefile]`
+
+3) - `module load` the following modules:
+    - rocm/5.5.1
+    - cray-python
+    - omniperf
+    These must be loaded in the order that they are listed.
+
+4) Run `omniperf profile -n [profile name] -- [path to MFC beginning with /]/build/install/bin/simulation`
+
+5) Run `omniperf analyze --gui -p [path to casefile]/workloads/[profile name]/mi200`
+
+6) Determine what login node you're on, call it [node name]
+
+7) Open a new terminal window and log into Frontier using `ssh -L8050:localhost:8050 username@[node name].frontier.olcf.ornl.gov`
+
+8) Open a web browser and navigate to `http://localhost:8050/`
diff --git a/src/common/include/inline_conversions.fpp b/src/common/include/inline_conversions.fpp
index 9811290a8..81ce0fa9b 100644
--- a/src/common/include/inline_conversions.fpp
+++ b/src/common/include/inline_conversions.fpp
@@ -1,6 +1,10 @@
 #:def s_compute_speed_of_sound()
     subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c)
-
+#ifdef CRAY_ACC_WAR
+!DIR$ INLINEALWAYS s_compute_speed_of_sound
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), intent(IN) :: pres
         real(kind(0d0)), intent(IN) :: rho, gamma, pi_inf
         real(kind(0d0)), intent(IN) :: H
@@ -39,7 +43,6 @@
                     (pres + pi_inf/(gamma + 1d0))/ &
                     (rho*(1d0 - adv(num_fluids)))
             end if
-
         else
             c = ((H - 5d-1*vel_sum)/gamma)
         end if
@@ -49,7 +52,6 @@
         else
             c = sqrt(c)
         end if
-
     end subroutine s_compute_speed_of_sound
 #:enddef
 
diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp
index 2f2d2fcf6..10f1a678f 100644
--- a/src/common/include/macros.fpp
+++ b/src/common/include/macros.fpp
@@ -4,6 +4,7 @@
         use iso_fortran_env, only: output_unit
 
         print *, '${_FILE_.split('/')[-1]}$:${_LINE_}$: ', ${expr}$
+
         call flush (output_unit)
     end block
 #endif
@@ -12,18 +13,107 @@
 #:def ALLOCATE(*args)
     @:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
     allocate (${', '.join(args)}$)
-    #:if MFC_COMPILER == 'Cray'
-        !$acc enter data create(${', '.join([ arg.split('(')[0] for arg in args ])}$)
-    #:else
-        !$acc enter data create(${', '.join(args)}$)
-    #:endif
+#ifndef CRAY_ACC_WAR
+!$acc enter data create(${', '.join(args)}$)
+#endif
 #:enddef ALLOCATE
 
 #:def DEALLOCATE(*args)
     @:LOG({'@:DEALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
     deallocate (${', '.join(args)}$)
-    !$acc exit data delete(${', '.join(args)}$)
+#ifndef CRAY_ACC_WAR
+!$acc exit data delete(${', '.join(args)}$)
+#endif
 #:enddef DEALLOCATE
 
+#:def ALLOCATE_GLOBAL(*args)
+    @:LOG({'@:ALLOCATE_GLOBAL(${re.sub(' +', ' ', ', '.join(args))}$)'})
+#ifdef CRAY_ACC_WAR
+    allocate (${', '.join(('p_' + arg.strip() for arg in args))}$)
+    #:for arg in args
+        ${re.sub('\(.*\)','',arg)}$ => ${ 'p_' + re.sub('\(.*\)','',arg.strip()) }$
+    #:endfor
+!$acc enter data create(${', '.join(('p_' + re.sub('\(.*\)','',arg.strip()) for arg in args))}$) &
+!$acc& attach(${', '.join(map(lambda x: re.sub('\(.*\)','',x), args))}$)
+#else
+    allocate (${', '.join(args)}$)
+!$acc enter data create(${', '.join(args)}$)
+#endif
+
+#:enddef ALLOCATE_GLOBAL
+
+#:def DEALLOCATE_GLOBAL(*args)
+    @:LOG({'@:DEALLOCATE_GLOBAL(${re.sub(' +', ' ', ', '.join(args))}$)'})
+#ifdef CRAY_ACC_WAR
+    !$acc exit data delete(${', '.join(('p_' + arg.strip() for arg in args))}$) &
+    !$acc& detach(${', '.join(args)}$)
+    #:for arg in args
+        nullify (${arg}$)
+    #:endfor
+    deallocate (${', '.join(('p_' + arg.strip() for arg in args))}$)
+#else
+    deallocate (${', '.join(args)}$)
+!$acc exit data delete(${', '.join(args)}$)
+#endif
+
+#:enddef DEALLOCATE_GLOBAL
+
+#:def CRAY_DECLARE_GLOBAL(intype, dim, *args)
+#ifdef CRAY_ACC_WAR
+    ${intype}$, ${dim}$, allocatable, target :: ${', '.join(('p_' + arg.strip() for arg in args))}$
+    ${intype}$, ${dim}$, pointer :: ${', '.join(args)}$
+#else
+    ${intype}$, ${dim}$, allocatable :: ${', '.join(args)}$
+#endif
+#:enddef CRAY_DECLARE_GLOBAL
+
+#:def CRAY_DECLARE_GLOBAL_SCALAR(intype, *args)
+#ifdef CRAY_ACC_WAR
+    ${intype}$, target :: ${', '.join(('p_' + arg.strip() for arg in args))}$
+    ${intype}$, pointer :: ${', '.join(args)}$
+#else
+    ${intype}$::${', '.join(args)}$
+#endif
+#:enddef CRAY_DECLARE_GLOBAL_SCALAR
+
+#:def ACC_SETUP_VFs(*args)
+#ifdef CRAY_ACC_WAR
+    block
+        integer :: macros_setup_vfs_i
+
+        @:LOG({'@:ACC_SETUP_VFs(${', '.join(args)}$)'})
+
+        #:for arg in args
+            !$acc enter data copyin(${arg}$)
+            !$acc enter data copyin(${arg}$%vf)
+            if (allocated(${arg}$%vf)) then
+                do macros_setup_vfs_i = lbound(${arg}$%vf, 1), ubound(${arg}$%vf, 1)
+                    if (associated(${arg}$%vf(macros_setup_vfs_i)%sf)) then
+                        !$acc enter data copyin(${arg}$%vf(macros_setup_vfs_i))
+                        !$acc enter data create(${arg}$%vf(macros_setup_vfs_i)%sf)
+                    end if
+                end do
+            end if
+        #:endfor
+    end block
+#endif
+#:enddef
+
+#:def ACC_SETUP_SFs(*args)
+#ifdef CRAY_ACC_WAR
+    block
+
+        @:LOG({'@:ACC_SETUP_SFs(${', '.join(args)}$)'})
+
+        #:for arg in args
+            !$acc enter data copyin(${arg}$)
+            if (associated(${arg}$%sf)) then
+                !$acc enter data create(${arg}$%sf)
+            end if
+        #:endfor
+    end block
+#endif
+#:enddef
+
 #define t_vec3   real(kind(0d0)), dimension(1:3)
 #define t_mat4x4 real(kind(0d0)), dimension(1:4,1:4)
diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index 05346f69f..5dfa90dd7 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -272,7 +272,7 @@ module m_derived_types
 
         real(kind(0d0)), dimension(3) :: loc !< Physical location of the ghost point
         real(kind(0d0)), dimension(3) :: ip_loc !< Physical location of the image point
-        real(kind(0d0)), dimension(3) :: ip_grid !< Top left grid point of IP
+        integer, dimension(3) :: ip_grid !< Top left grid point of IP
         real(kind(0d0)), dimension(2, 2, 2) :: interp_coeffs !< Interpolation Coefficients of image point
         integer :: ib_patch_id !< ID of the IB Patch the ghost point is part of
         logical :: slip
diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp
index 528d79a9d..1fb72389a 100644
--- a/src/common/m_helper.fpp
+++ b/src/common/m_helper.fpp
@@ -184,10 +184,15 @@ contains
 
         rhol0 = rhoref
         pl0 = pref
-
+#ifdef MFC_SIMULATION
+        @:ALLOCATE_GLOBAL(pb0(nb), mass_n0(nb), mass_v0(nb), Pe_T(nb))
+        @:ALLOCATE_GLOBAL(k_n(nb), k_v(nb), omegaN(nb))
+        @:ALLOCATE_GLOBAL(Re_trans_T(nb), Re_trans_c(nb), Im_trans_T(nb), Im_trans_c(nb))
+#else
         @:ALLOCATE(pb0(nb), mass_n0(nb), mass_v0(nb), Pe_T(nb))
         @:ALLOCATE(k_n(nb), k_v(nb), omegaN(nb))
         @:ALLOCATE(Re_trans_T(nb), Re_trans_c(nb), Im_trans_T(nb), Im_trans_c(nb))
+#endif
 
         pb0(:) = dflt_real
         mass_n0(:) = dflt_real
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 3c5596edb..348d6b903 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -134,7 +134,7 @@ contains
 #ifndef MFC_POST_PROCESS
         if (present(ib_markers)) then
 
-#ifdef MPI_SIMULATION
+#ifdef MFC_PRE_PROCESS
             MPI_IO_IB_DATA%var%sf => ib_markers%sf
 #else
             MPI_IO_IB_DATA%var%sf => ib_markers%sf(0:m, 0:n, 0:p)
diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp
index 0c636c799..1dada8cb4 100644
--- a/src/common/m_phase_change.fpp
+++ b/src/common/m_phase_change.fpp
@@ -74,7 +74,6 @@ contains
         !!      selecting the phase change module that will be used
         !!      (pT- or pTg-equilibrium)
     subroutine s_initialize_phasechange_module()
-
         ! variables used in the calculation of the saturation curves for fluids 1 and 2
         A = (gs_min(lp)*cvs(lp) - gs_min(vp)*cvs(vp) &
              + qvps(vp) - qvps(lp))/((gs_min(vp) - 1.0d0)*cvs(vp))
@@ -87,14 +86,6 @@ contains
         D = ((gs_min(lp) - 1.0d0)*cvs(lp)) &
             /((gs_min(vp) - 1.0d0)*cvs(vp))
 
-        ! Associating procedural pointer to the subroutine that will be
-        ! utilized to calculate the solution to the selected relaxation system
-        if ((relax_model == 5) .or. (relax_model == 6)) then
-            s_relaxation_solver => s_infinite_relaxation_k
-        else
-            call s_mpi_abort('relaxation solver was not set!')
-        end if
-
     end subroutine s_initialize_phasechange_module !-------------------------------
 
     !>  This subroutine is created to activate either the pT- (N fluids) or the
@@ -103,6 +94,7 @@ contains
         !!      state conditions.
         !!  @param q_cons_vf Cell-average conservative variables
     subroutine s_infinite_relaxation_k(q_cons_vf) ! ----------------
+
         type(scalar_field), dimension(sys_size), intent(INOUT) :: q_cons_vf
         real(kind(0.0d0)) :: pS, pSOV, pSSL !< equilibrium pressure for mixture, overheated vapor, and subcooled liquid
         real(kind(0.0d0)) :: TS, TSOV, TSSL, TSatOV, TSatSL !< equilibrium temperature for mixture, overheated vapor, and subcooled liquid. Saturation Temperatures at overheated vapor and subcooled liquid
@@ -305,6 +297,7 @@ contains
         !!  @param rhoe mixture energy
         !!  @param TS equilibrium temperature at the interface
     subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, rM, q_cons_vf, rhoe, TS)
+
         !$acc routine seq
 
         ! initializing variables
@@ -321,10 +314,7 @@ contains
         integer :: i, ns !< generic loop iterators
 
         ! auxiliary variables for the pT-equilibrium solver
-        mCP = 0.0d0; mQ = 0.0d0; p_infpT = ps_inf; pk(1:num_fluids) = 0.0d0
-
-        ig(1:num_fluids) = 0
-
+        mCP = 0.0d0; mQ = 0.0d0; p_infpT = ps_inf; 
         ! Performing tests before initializing the pT-equilibrium
         !$acc loop seq
         do i = 1, num_fluids
@@ -409,6 +399,7 @@ contains
         !!  @param q_cons_vf Cell-average conservative variables
         !!  @param TS equilibrium temperature at the interface
     subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS)
+
         !$acc routine seq
 
         type(scalar_field), dimension(sys_size), intent(INOUT) :: q_cons_vf
@@ -424,7 +415,6 @@ contains
 
         !< Generic loop iterators
         integer :: i, ns
-
         ! pTg-equilibrium solution procedure
         ! Newton Solver parameters
         ! counter
@@ -517,7 +507,6 @@ contains
 
         ! common temperature
         TS = (rhoe + pS - mQ)/mCP
-
     end subroutine s_infinite_ptg_relaxation_k ! -----------------------
 
     !>  This auxiliary subroutine corrects the partial densities of the REACTING fluids in case one of them is negative
@@ -538,7 +527,6 @@ contains
         real(kind(0.0d0)), intent(OUT) :: MCT
         integer, intent(IN) :: j, k, l
         !> @}
-
         if (rM < 0.0d0) then
 
             if ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) >= -1.0d0*mixM) .and. &
@@ -572,7 +560,6 @@ contains
             q_cons_vf(vp + contxb - 1)%sf(j, k, l) = MCT*rM
 
         end if
-
     end subroutine s_correct_partial_densities
 
     !>  This auxiliary subroutine calculates the 2 x 2 Jacobian and, its inverse and transpose
@@ -785,9 +772,6 @@ contains
 
     !>  This subroutine finalizes the phase change module
     subroutine s_finalize_relaxation_solver_module()
-
-        s_relaxation_solver => null()
-
     end subroutine
 
 #endif
diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
index bc7fa2911..d1b9a008f 100644
--- a/src/common/m_variables_conversion.fpp
+++ b/src/common/m_variables_conversion.fpp
@@ -89,11 +89,17 @@ module m_variables_conversion
 !$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
 #endif
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), Gs)
+    @:CRAY_DECLARE_GLOBAL(integer,         dimension(:), bubrs)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), Res)
+!$acc declare link(bubrs, Gs, Res)
+#else
     real(kind(0d0)), allocatable, dimension(:) :: Gs
     integer, allocatable, dimension(:) :: bubrs
     real(kind(0d0)), allocatable, dimension(:, :) :: Res
-    !$acc declare create(bubrs, Gs, Res)
-
+!$acc declare create(bubrs, Gs, Res)
+#endif
     integer :: is1b, is2b, is3b, is1e, is2e, is3e
     !$acc declare create(is1b, is2b, is3b, is1e, is2e, is3e)
 
@@ -449,7 +455,11 @@ contains
                                                           gamma_K, pi_inf_K, qv_K, &
                                                           alpha_K, alpha_rho_K, Re_K, k, l, r, &
                                                           G_K, G)
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_acc
+#else
+!$acc routine seq
+#endif
 
         real(kind(0d0)), intent(OUT) :: rho_K, gamma_K, pi_inf_K, qv_K
 
@@ -527,7 +537,11 @@ contains
     subroutine s_convert_species_to_mixture_variables_bubbles_acc(rho_K, &
                                                                   gamma_K, pi_inf_K, qv_K, &
                                                                   alpha_K, alpha_rho_K, Re_K, k, l, r)
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_convert_species_to_mixture_variables_bubbles_acc
+#else
+!$acc routine seq
+#endif
 
         real(kind(0d0)), intent(INOUT) :: rho_K, gamma_K, pi_inf_K, qv_K
 
@@ -612,8 +626,20 @@ contains
         end if
 #endif
 
-        !$acc update device(ixb, ixe, iyb, iye, izb, ize)
+!$acc enter data copyin(ixb, ixe, iyb, iye, izb, ize)
+!$acc enter data copyin(is1b, is1e, is2b, is2e, is3b, is3e)
+!$acc update device(ixb, ixe, iyb, iye, izb, ize)
 
+#ifdef MFC_SIMULATION
+        @:ALLOCATE_GLOBAL(gammas (1:num_fluids))
+        @:ALLOCATE_GLOBAL(gs_min (1:num_fluids))
+        @:ALLOCATE_GLOBAL(pi_infs(1:num_fluids))
+        @:ALLOCATE_GLOBAL(ps_inf(1:num_fluids))
+        @:ALLOCATE_GLOBAL(cvs    (1:num_fluids))
+        @:ALLOCATE_GLOBAL(qvs    (1:num_fluids))
+        @:ALLOCATE_GLOBAL(qvps    (1:num_fluids))
+        @:ALLOCATE_GLOBAL(Gs     (1:num_fluids))
+#else
         @:ALLOCATE(gammas (1:num_fluids))
         @:ALLOCATE(gs_min (1:num_fluids))
         @:ALLOCATE(pi_infs(1:num_fluids))
@@ -622,6 +648,7 @@ contains
         @:ALLOCATE(qvs    (1:num_fluids))
         @:ALLOCATE(qvps    (1:num_fluids))
         @:ALLOCATE(Gs     (1:num_fluids))
+#endif
 
         do i = 1, num_fluids
             gammas(i) = fluid_pp(i)%gamma
@@ -638,8 +665,7 @@ contains
 #ifdef MFC_SIMULATION
 
         if (any(Re_size > 0)) then
-            @:ALLOCATE(Res(1:2, 1:maxval(Re_size)))
-
+            @:ALLOCATE_GLOBAL(Res(1:2, 1:maxval(Re_size)))
             do i = 1, 2
                 do j = 1, Re_size(i)
                     Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i)
@@ -651,7 +677,11 @@ contains
 #endif
 
         if (bubbles) then
+#ifdef MFC_SIMULATION
+            @:ALLOCATE_GLOBAL(bubrs(1:nb))
+#else
             @:ALLOCATE(bubrs(1:nb))
+#endif
 
             do i = 1, nb
                 bubrs(i) = bub_idx%rs(i)
@@ -837,7 +867,7 @@ contains
 
         real(kind(0d0)) :: pres
 
-        integer :: i, j, k, l !< Generic loop iterators
+        integer :: i, j, k, l, q !< Generic loop iterators
 
         real(kind(0.d0)) :: ntmp
 
@@ -978,6 +1008,7 @@ contains
                     do i = advxb, advxe
                         qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)
                     end do
+
                 end do
             end do
         end do
@@ -1014,7 +1045,7 @@ contains
         real(kind(0d0)) :: dyn_pres
         real(kind(0d0)) :: nbub, R3, vftmp, R3tmp
         real(kind(0d0)), dimension(nb) :: Rtmp
-        real(kind(0d0)) :: G
+        real(kind(0d0)) :: G = 0d0
         real(kind(0d0)), dimension(2) :: Re_K
 
         integer :: i, j, k, l, q !< Generic loop iterators
@@ -1245,7 +1276,6 @@ contains
                     ! energy flux, u(E+p)
                     FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K)
 
-                    ! have been using == 2
                     if (riemann_solver == 1) then
                         !$acc loop seq
                         do i = advxb, advxe
@@ -1266,6 +1296,7 @@ contains
                         end do
 
                     end if
+
                 end do
             end do
         end do
@@ -1281,11 +1312,17 @@ contains
         deallocate (rho_sf, gamma_sf, pi_inf_sf, qv_sf)
 #endif
 
+#ifdef MFC_SIMULATION
+        @:DEALLOCATE_GLOBAL(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs)
+        if (bubbles) then
+            @:DEALLOCATE_GLOBAL(bubrs)
+        end if
+#else
         @:DEALLOCATE(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps, Gs)
-
         if (bubbles) then
             @:DEALLOCATE(bubrs)
         end if
+#endif
 
         ! Nullifying the procedure pointer to the subroutine transferring/
         ! computing the mixture/species variables to the mixture variables
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index db871c955..01320658d 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -25,7 +25,7 @@ module m_start_up
     use m_data_output           !< Procedures that write the grid and chosen flow
                                 !! variable(s) to the formatted database file(s)
 
-    use m_derived_variables     !< Procedures used to compute quantites derived
+    use m_derived_variables     !< Procedures used to compute quantities derived
                                 !! from the conservative and primitive variables
     use m_helper
 
diff --git a/src/pre_process/m_checker.f90 b/src/pre_process/m_checker.f90
index cc728e313..7d18b3020 100644
--- a/src/pre_process/m_checker.f90
+++ b/src/pre_process/m_checker.f90
@@ -165,16 +165,31 @@ subroutine s_check_inputs()
 
         if (cyl_coord) then ! Cartesian coordinates
 
-            ! Constraints on domain boundaries for cylindrical coordinates
-            if (n == 0 &
-                .or. &
-                y_domain%beg /= 0d0 &
-                .or. &
-                y_domain%end == dflt_real &
-                .or. &
-                y_domain%end < 0d0 &
-                .or. &
-                y_domain%beg >= y_domain%end) then
+            ! in case restart of a simulation
+            if (old_grid .and. old_ic) then
+                ! checking of there is any input to the domains
+                if ((x_domain%beg /= dflt_real .or. x_domain%end /= dflt_real) &
+                    .or. &
+                    (y_domain%beg /= dflt_real .or. y_domain%end /= dflt_real) &
+                    .or. &
+                    (y_domain%beg /= dflt_real .or. y_domain%end /= dflt_real)) then
+                    call s_mpi_abort('domain are not dflt_real.'// &
+                                     'Please, correct them')
+                elseif (m == dflt_int .or. n == dflt_int .or. p == dflt_int) then
+                    call s_mpi_abort('m, n, and/or p are set to dflt_int.'// &
+                                     'Please, correct them')
+                end if
+                ! in case it is NOT restart
+                ! Constraints on domain boundaries for cylindrical coordinates
+            elseif (n == 0 &
+                    .or. &
+                    y_domain%beg /= 0d0 &
+                    .or. &
+                    y_domain%end == dflt_real &
+                    .or. &
+                    y_domain%end < 0d0 &
+                    .or. &
+                    y_domain%beg >= y_domain%end) then
                 call s_mpi_abort('Unsupported choice of the combination of '// &
                                  'cyl_coord and n, y_domain%beg, or         '// &
                                  'y_domain%end. Exiting ...')
diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp
index 5d4746b81..14a7f2a4d 100644
--- a/src/pre_process/m_start_up.fpp
+++ b/src/pre_process/m_start_up.fpp
@@ -837,7 +837,7 @@ contains
 &                pTg-equilirium (relax = "T" activated)'
             end if
 
-            call s_relaxation_solver(q_cons_vf)
+            call s_infinite_relaxation_k(q_cons_vf)
         end if
 
         call s_write_data_files(q_cons_vf, ib_markers)
diff --git a/src/simulation/include/case.fpp b/src/simulation/include/case.fpp
index 2fb832db3..b75ab9152 100644
--- a/src/simulation/include/case.fpp
+++ b/src/simulation/include/case.fpp
@@ -1,2 +1,3 @@
-! This file is purposefully empty. It is only important for builds that make use
-! of --case-optimization.
+! This file was generated by MFC. It is only used if the --case-optimization
+! option is passed to ./mfc.sh run or test, enabling a GPU-oriented optimization
+! that hard-codes certain case parameters from the input file.
diff --git a/src/simulation/m_bubbles.fpp b/src/simulation/m_bubbles.fpp
index 47f195b62..971541770 100644
--- a/src/simulation/m_bubbles.fpp
+++ b/src/simulation/m_bubbles.fpp
@@ -24,10 +24,24 @@ module m_bubbles
     real(kind(0.d0)) :: chi_vw  !< Bubble wall properties (Ando 2010)
     real(kind(0.d0)) :: k_mw    !< Bubble wall properties (Ando 2010)
     real(kind(0.d0)) :: rho_mw  !< Bubble wall properties (Ando 2010)
-    !$acc declare create(chi_vw, k_mw, rho_mw)
+!$acc declare create(chi_vw, k_mw, rho_mw)
 
+#ifdef CRAY_ACC_WAR
     !> @name Bubble dynamic source terms
     !> @{
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), bub_adv_src)
+    !$acc declare link(bub_adv_src)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), bub_r_src, bub_v_src, bub_p_src, bub_m_src)
+    !$acc declare link(bub_r_src, bub_v_src, bub_p_src, bub_m_src)
+
+    type(scalar_field) :: divu !< matrix for div(u)
+    !$acc declare create(divu)
+
+    @:CRAY_DECLARE_GLOBAL(integer, dimension(:), rs, vs, ms, ps)
+!$acc declare link(rs, vs, ms, ps)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :) :: bub_adv_src
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: bub_r_src, bub_v_src, bub_p_src, bub_m_src
     !$acc declare create(bub_adv_src, bub_r_src, bub_v_src, bub_p_src, bub_m_src)
@@ -36,7 +50,8 @@ module m_bubbles
     !$acc declare create(divu)
 
     integer, allocatable, dimension(:) :: rs, vs, ms, ps
-    !$acc declare create(rs, vs, ms, ps)
+!$acc declare create(rs, vs, ms, ps)
+#endif
 
 contains
 
@@ -53,11 +68,11 @@ contains
         ix%end = m - ix%beg; iy%end = n - iy%beg; iz%end = p - iz%beg
         ! ==================================================================
 
-        @:ALLOCATE(rs(1:nb))
-        @:ALLOCATE(vs(1:nb))
+        @:ALLOCATE_GLOBAL(rs(1:nb))
+        @:ALLOCATE_GLOBAL(vs(1:nb))
         if (.not. polytropic) then
-            @:ALLOCATE(ps(1:nb))
-            @:ALLOCATE(ms(1:nb))
+            @:ALLOCATE_GLOBAL(ps(1:nb))
+            @:ALLOCATE_GLOBAL(ms(1:nb))
         end if
 
         do l = 1, nb
@@ -75,14 +90,15 @@ contains
         end if
 
         @:ALLOCATE(divu%sf(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
+        @:ACC_SETUP_SFs(divu)
 
-        @:ALLOCATE(bub_adv_src(0:m, 0:n, 0:p))
-        @:ALLOCATE(bub_r_src(0:m, 0:n, 0:p, 1:nb))
-        @:ALLOCATE(bub_v_src(0:m, 0:n, 0:p, 1:nb))
-        @:ALLOCATE(bub_p_src(0:m, 0:n, 0:p, 1:nb))
-        @:ALLOCATE(bub_m_src(0:m, 0:n, 0:p, 1:nb))
+        @:ALLOCATE_GLOBAL(bub_adv_src(0:m, 0:n, 0:p))
+        @:ALLOCATE_GLOBAL(bub_r_src(0:m, 0:n, 0:p, 1:nb))
+        @:ALLOCATE_GLOBAL(bub_v_src(0:m, 0:n, 0:p, 1:nb))
+        @:ALLOCATE_GLOBAL(bub_p_src(0:m, 0:n, 0:p, 1:nb))
+        @:ALLOCATE_GLOBAL(bub_m_src(0:m, 0:n, 0:p, 1:nb))
 
-    end subroutine
+    end subroutine s_initialize_bubbles_module
 
     subroutine s_compute_bubbles_rhs(idir, q_prim_vf)
 
@@ -137,7 +153,7 @@ contains
 
         end if
 
-    end subroutine
+    end subroutine s_compute_bubbles_rhs
 
     !>  The purpose of this procedure is to compute the source terms
         !!      that are needed for the bubble modeling
diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp
index 822e66dfd..febf24695 100644
--- a/src/simulation/m_cbc.fpp
+++ b/src/simulation/m_cbc.fpp
@@ -18,6 +18,7 @@
 !!                           8) Supersonic Outflow
 !!              Please refer to Thompson (1987, 1990) for detailed descriptions.
 
+#:include 'macros.fpp'
 #:include 'inline_conversions.fpp'
 
 module m_cbc
@@ -39,42 +40,87 @@ module m_cbc
     !! The cell-average primitive variables. They are obtained by reshaping (RS)
     !! q_prim_vf in the coordinate direction normal to the domain boundary along
     !! which the CBC is applied.
-
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), q_prim_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), q_prim_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), q_prim_rsz_vf)
+!$acc declare link(q_prim_rsx_vf, q_prim_rsy_vf, q_prim_rsz_vf)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: q_prim_rsx_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: q_prim_rsy_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: q_prim_rsz_vf
+#endif
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(scalar_field), dimension(:), F_rs_vf, F_src_rs_vf)
+!$acc declare link(F_rs_vf, F_src_rs_vf)
+#else
     type(scalar_field), allocatable, dimension(:) :: F_rs_vf, F_src_rs_vf !<
+#endif
     !! Cell-average fluxes (src - source). These are directly determined from the
     !! cell-average primitive variables, q_prims_rs_vf, and not a Riemann solver.
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), F_rsx_vf, F_src_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), F_rsy_vf, F_src_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), F_rsz_vf, F_src_rsz_vf)
+!$acc declare link(F_rsx_vf, F_src_rsx_vf, F_rsy_vf, F_src_rsy_vf, F_rsz_vf, F_src_rsz_vf)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: F_rsx_vf, F_src_rsx_vf !<
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: F_rsy_vf, F_src_rsy_vf !<
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: F_rsz_vf, F_src_rsz_vf !<
-
+#endif
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_rsx_vf, flux_src_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_rsy_vf, flux_src_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_rsz_vf, flux_src_rsz_vf)
+!$acc declare link(flux_rsx_vf, flux_src_rsx_vf, flux_rsy_vf, flux_src_rsy_vf, flux_rsz_vf, flux_src_rsz_vf)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_rsx_vf, flux_src_rsx_vf !<
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_rsy_vf, flux_src_rsy_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_rsz_vf, flux_src_rsz_vf
+#endif
 
     real(kind(0d0)) :: c           !< Cell averaged speed of sound
     real(kind(0d0)), dimension(2) :: Re          !< Cell averaged Reynolds numbers
+    !$acc declare create(c, Re)
 
     real(kind(0d0)) :: dpres_ds !< Spatial derivatives in s-dir of pressure
+!$acc declare create(dpres_ds)
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), ds)
+!$acc declare link(ds)
+#else
     real(kind(0d0)), allocatable, dimension(:) :: ds !< Cell-width distribution in the s-direction
+#endif
 
     ! CBC Coefficients =========================================================
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), fd_coef_x)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), fd_coef_y)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), fd_coef_z)
+!$acc declare link(fd_coef_x, fd_coef_y, fd_coef_z)
+#else
     real(kind(0d0)), allocatable, dimension(:, :) :: fd_coef_x !< Finite diff. coefficients x-dir
     real(kind(0d0)), allocatable, dimension(:, :) :: fd_coef_y !< Finite diff. coefficients y-dir
     real(kind(0d0)), allocatable, dimension(:, :) :: fd_coef_z !< Finite diff. coefficients z-dir
+#endif
     !! The first dimension identifies the location of a coefficient in the FD
     !! formula, while the last dimension denotes the location of the CBC.
 
     ! Bug with NVHPC when using nullified pointers in a declare create
     !    real(kind(0d0)), pointer, dimension(:, :) :: fd_coef => null()
-
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), pi_coef_x)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), pi_coef_y)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), pi_coef_z)
+!$acc declare link(pi_coef_x, pi_coef_y, pi_coef_z)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :) :: pi_coef_x !< Polynominal interpolant coefficients in x-dir
     real(kind(0d0)), allocatable, dimension(:, :, :) :: pi_coef_y !< Polynominal interpolant coefficients in y-dir
     real(kind(0d0)), allocatable, dimension(:, :, :) :: pi_coef_z !< Polynominal interpolant coefficients in z-dir
+#endif
     !! The first dimension of the array identifies the polynomial, the
     !! second dimension identifies the position of its coefficients and the last
     !! dimension denotes the location of the CBC.
@@ -82,17 +128,19 @@ module m_cbc
     ! ==========================================================================
 
     type(int_bounds_info) :: is1, is2, is3 !< Indical bounds in the s1-, s2- and s3-directions
+    !$acc declare create(is1, is2, is3)
 
     integer :: dj
-
     integer :: bcxb, bcxe, bcyb, bcye, bczb, bcze
-
     integer :: cbc_dir, cbc_loc
+!$acc declare create(dj, bcxb, bcxe, bcyb, bcye, bczb, bcze, cbc_dir, cbc_loc)
 
-    !$acc declare create(q_prim_rsx_vf, q_prim_rsy_vf, q_prim_rsz_vf,  F_rsx_vf, F_src_rsx_vf,flux_rsx_vf, flux_src_rsx_vf, &
-    !$acc                 F_rsy_vf, F_src_rsy_vf,flux_rsy_vf, flux_src_rsy_vf, F_rsz_vf, F_src_rsz_vf,flux_rsz_vf, flux_src_rsz_vf,Re, &
-    !$acc                 ds,fd_coef_x,fd_coef_y,fd_coef_z,      &
-    !$acc                 pi_coef_x,pi_coef_y,pi_coef_z,  bcxb, bcxe, bcyb, bcye, bczb, bcze, is1, is2, is3, dj, cbc_dir, cbc_loc)
+#ifndef CRAY_ACC_WAR
+!$acc declare create(q_prim_rsx_vf, q_prim_rsy_vf, q_prim_rsz_vf,  F_rsx_vf, F_src_rsx_vf,flux_rsx_vf, flux_src_rsx_vf, &
+!$acc                 F_rsy_vf, F_src_rsy_vf,flux_rsy_vf, flux_src_rsy_vf, F_rsz_vf, F_src_rsz_vf,flux_rsz_vf, flux_src_rsz_vf,Re, &
+!$acc                 ds,fd_coef_x,fd_coef_y,fd_coef_z,      &
+!$acc                 pi_coef_x,pi_coef_y,pi_coef_z)
+#endif
 
 contains
 
@@ -127,29 +175,29 @@ contains
         end if
         is3%end = p - is3%beg
 
-        allocate (q_prim_rsx_vf(0:buff_size, &
-                                is2%beg:is2%end, &
-                                is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(q_prim_rsx_vf(0:buff_size, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:sys_size))
 
         if (weno_order > 1) then
 
-            allocate (F_rsx_vf(0:buff_size, &
-                               is2%beg:is2%end, &
-                               is3%beg:is3%end, 1:adv_idx%end))
+            @:ALLOCATE_GLOBAL(F_rsx_vf(0:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:adv_idx%end))
 
-            allocate (F_src_rsx_vf(0:buff_size, &
-                                   is2%beg:is2%end, &
-                                   is3%beg:is3%end, adv_idx%beg:adv_idx%end))
+            @:ALLOCATE_GLOBAL(F_src_rsx_vf(0:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, adv_idx%beg:adv_idx%end))
 
         end if
 
-        allocate (flux_rsx_vf(-1:buff_size, &
-                              is2%beg:is2%end, &
-                              is3%beg:is3%end, 1:adv_idx%end))
+        @:ALLOCATE_GLOBAL(flux_rsx_vf(-1:buff_size, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:adv_idx%end))
 
-        allocate (flux_src_rsx_vf(-1:buff_size, &
-                                  is2%beg:is2%end, &
-                                  is3%beg:is3%end, adv_idx%beg:adv_idx%end))
+        @:ALLOCATE_GLOBAL(flux_src_rsx_vf(-1:buff_size, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, adv_idx%beg:adv_idx%end))
 
         if (n > 0) then
 
@@ -170,29 +218,29 @@ contains
             end if
             is3%end = p - is3%beg
 
-            allocate (q_prim_rsy_vf(0:buff_size, &
-                                    is2%beg:is2%end, &
-                                    is3%beg:is3%end, 1:sys_size))
+            @:ALLOCATE_GLOBAL(q_prim_rsy_vf(0:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:sys_size))
 
             if (weno_order > 1) then
 
-                allocate (F_rsy_vf(0:buff_size, &
-                                   is2%beg:is2%end, &
-                                   is3%beg:is3%end, 1:adv_idx%end))
+                @:ALLOCATE_GLOBAL(F_rsy_vf(0:buff_size, &
+                    is2%beg:is2%end, &
+                    is3%beg:is3%end, 1:adv_idx%end))
 
-                allocate (F_src_rsy_vf(0:buff_size, &
-                                       is2%beg:is2%end, &
-                                       is3%beg:is3%end, adv_idx%beg:adv_idx%end))
+                @:ALLOCATE_GLOBAL(F_src_rsy_vf(0:buff_size, &
+                    is2%beg:is2%end, &
+                    is3%beg:is3%end, adv_idx%beg:adv_idx%end))
 
             end if
 
-            allocate (flux_rsy_vf(-1:buff_size, &
-                                  is2%beg:is2%end, &
-                                  is3%beg:is3%end, 1:adv_idx%end))
+            @:ALLOCATE_GLOBAL(flux_rsy_vf(-1:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:adv_idx%end))
 
-            allocate (flux_src_rsy_vf(-1:buff_size, &
-                                      is2%beg:is2%end, &
-                                      is3%beg:is3%end, adv_idx%beg:adv_idx%end))
+            @:ALLOCATE_GLOBAL(flux_src_rsy_vf(-1:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, adv_idx%beg:adv_idx%end))
 
         end if
 
@@ -215,42 +263,42 @@ contains
             end if
             is3%end = m - is3%beg
 
-            allocate (q_prim_rsz_vf(0:buff_size, &
-                                    is2%beg:is2%end, &
-                                    is3%beg:is3%end, 1:sys_size))
+            @:ALLOCATE_GLOBAL(q_prim_rsz_vf(0:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:sys_size))
 
             if (weno_order > 1) then
 
-                allocate (F_rsz_vf(0:buff_size, &
-                                   is2%beg:is2%end, &
-                                   is3%beg:is3%end, 1:adv_idx%end))
+                @:ALLOCATE_GLOBAL(F_rsz_vf(0:buff_size, &
+                    is2%beg:is2%end, &
+                    is3%beg:is3%end, 1:adv_idx%end))
 
-                allocate (F_src_rsz_vf(0:buff_size, &
-                                       is2%beg:is2%end, &
-                                       is3%beg:is3%end, adv_idx%beg:adv_idx%end))
+                @:ALLOCATE_GLOBAL(F_src_rsz_vf(0:buff_size, &
+                    is2%beg:is2%end, &
+                    is3%beg:is3%end, adv_idx%beg:adv_idx%end))
 
             end if
 
-            allocate (flux_rsz_vf(-1:buff_size, &
-                                  is2%beg:is2%end, &
-                                  is3%beg:is3%end, 1:adv_idx%end))
+            @:ALLOCATE_GLOBAL(flux_rsz_vf(-1:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:adv_idx%end))
 
-            allocate (flux_src_rsz_vf(-1:buff_size, &
-                                      is2%beg:is2%end, &
-                                      is3%beg:is3%end, adv_idx%beg:adv_idx%end))
+            @:ALLOCATE_GLOBAL(flux_src_rsz_vf(-1:buff_size, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, adv_idx%beg:adv_idx%end))
 
         end if
 
         ! Allocating the cell-width distribution in the s-direction
-        allocate (ds(0:buff_size))
+        @:ALLOCATE_GLOBAL(ds(0:buff_size))
 
         ! Allocating/Computing CBC Coefficients in x-direction =============
         if (all((/bc_x%beg, bc_x%end/) <= -5) .and. all((/bc_x%beg, bc_x%end/) >= -13)) then
 
-            allocate (fd_coef_x(0:buff_size, -1:1))
+            @:ALLOCATE_GLOBAL(fd_coef_x(0:buff_size, -1:1))
 
             if (weno_order > 1) then
-                allocate (pi_coef_x(0:weno_polyn - 1, 0:weno_order - 3, -1:1))
+                @:ALLOCATE_GLOBAL(pi_coef_x(0:weno_polyn - 1, 0:weno_order - 3, -1:1))
             end if
 
             call s_compute_cbc_coefficients(1, -1)
@@ -258,20 +306,20 @@ contains
 
         elseif (bc_x%beg <= -5 .and. bc_x%beg >= -13) then
 
-            allocate (fd_coef_x(0:buff_size, -1:-1))
+            @:ALLOCATE_GLOBAL(fd_coef_x(0:buff_size, -1:-1))
 
             if (weno_order > 1) then
-                allocate (pi_coef_x(0:weno_polyn - 1, 0:weno_order - 3, -1:-1))
+                @:ALLOCATE_GLOBAL(pi_coef_x(0:weno_polyn - 1, 0:weno_order - 3, -1:-1))
             end if
 
             call s_compute_cbc_coefficients(1, -1)
 
         elseif (bc_x%end <= -5 .and. bc_x%end >= -13) then
 
-            allocate (fd_coef_x(0:buff_size, 1:1))
+            @:ALLOCATE_GLOBAL(fd_coef_x(0:buff_size, 1:1))
 
             if (weno_order > 1) then
-                allocate (pi_coef_x(0:weno_polyn - 1, 0:weno_order - 3, 1:1))
+                @:ALLOCATE_GLOBAL(pi_coef_x(0:weno_polyn - 1, 0:weno_order - 3, 1:1))
             end if
 
             call s_compute_cbc_coefficients(1, 1)
@@ -284,10 +332,10 @@ contains
 
             if (all((/bc_y%beg, bc_y%end/) <= -5) .and. all((/bc_y%beg, bc_y%end/) >= -13)) then
 
-                allocate (fd_coef_y(0:buff_size, -1:1))
+                @:ALLOCATE_GLOBAL(fd_coef_y(0:buff_size, -1:1))
 
                 if (weno_order > 1) then
-                    allocate (pi_coef_y(0:weno_polyn - 1, 0:weno_order - 3, -1:1))
+                    @:ALLOCATE_GLOBAL(pi_coef_y(0:weno_polyn - 1, 0:weno_order - 3, -1:1))
                 end if
 
                 call s_compute_cbc_coefficients(2, -1)
@@ -295,20 +343,20 @@ contains
 
             elseif (bc_y%beg <= -5 .and. bc_y%beg >= -13) then
 
-                allocate (fd_coef_y(0:buff_size, -1:-1))
+                @:ALLOCATE_GLOBAL(fd_coef_y(0:buff_size, -1:-1))
 
                 if (weno_order > 1) then
-                    allocate (pi_coef_y(0:weno_polyn - 1, 0:weno_order - 3, -1:-1))
+                    @:ALLOCATE_GLOBAL(pi_coef_y(0:weno_polyn - 1, 0:weno_order - 3, -1:-1))
                 end if
 
                 call s_compute_cbc_coefficients(2, -1)
 
             elseif (bc_y%end <= -5 .and. bc_y%end >= -13) then
 
-                allocate (fd_coef_y(0:buff_size, 1:1))
+                @:ALLOCATE_GLOBAL(fd_coef_y(0:buff_size, 1:1))
 
                 if (weno_order > 1) then
-                    allocate (pi_coef_y(0:weno_polyn - 1, 0:weno_order - 3, 1:1))
+                    @:ALLOCATE_GLOBAL(pi_coef_y(0:weno_polyn - 1, 0:weno_order - 3, 1:1))
                 end if
 
                 call s_compute_cbc_coefficients(2, 1)
@@ -323,10 +371,10 @@ contains
 
             if (all((/bc_z%beg, bc_z%end/) <= -5) .and. all((/bc_z%beg, bc_z%end/) >= -13)) then
 
-                allocate (fd_coef_z(0:buff_size, -1:1))
+                @:ALLOCATE_GLOBAL(fd_coef_z(0:buff_size, -1:1))
 
                 if (weno_order > 1) then
-                    allocate (pi_coef_z(0:weno_polyn - 1, 0:weno_order - 3, -1:1))
+                    @:ALLOCATE_GLOBAL(pi_coef_z(0:weno_polyn - 1, 0:weno_order - 3, -1:1))
                 end if
 
                 call s_compute_cbc_coefficients(3, -1)
@@ -334,20 +382,20 @@ contains
 
             elseif (bc_z%beg <= -5 .and. bc_z%beg >= -13) then
 
-                allocate (fd_coef_z(0:buff_size, -1:-1))
+                @:ALLOCATE_GLOBAL(fd_coef_z(0:buff_size, -1:-1))
 
                 if (weno_order > 1) then
-                    allocate (pi_coef_z(0:weno_polyn - 1, 0:weno_order - 3, -1:-1))
+                    @:ALLOCATE_GLOBAL(pi_coef_z(0:weno_polyn - 1, 0:weno_order - 3, -1:-1))
                 end if
 
                 call s_compute_cbc_coefficients(3, -1)
 
             elseif (bc_z%end <= -5 .and. bc_z%end >= -13) then
 
-                allocate (fd_coef_z(0:buff_size, 1:1))
+                @:ALLOCATE_GLOBAL(fd_coef_z(0:buff_size, 1:1))
 
                 if (weno_order > 1) then
-                    allocate (pi_coef_z(0:weno_polyn - 1, 0:weno_order - 3, 1:1))
+                    @:ALLOCATE_GLOBAL(pi_coef_z(0:weno_polyn - 1, 0:weno_order - 3, 1:1))
                 end if
 
                 call s_compute_cbc_coefficients(3, 1)
@@ -748,7 +796,6 @@ contains
 
                         if (bubbles) then
                             call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc, 0, k, r)
-
                         else
                             call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv, alpha_rho, Re_cbc, 0, k, r)
                         end if
@@ -958,10 +1005,8 @@ contains
         ! The reshaping of outputted data and disssociation of the FD and PI
         ! coefficients, or CBC coefficients, respectively, based on selected
         ! CBC coordinate direction.
-
         call s_finalize_cbc(flux_vf, flux_src_vf, &
                             ix, iy, iz)
-
     end subroutine s_cbc ! -------------------------------------------------
 
     !>  The computation of parameters, the allocation of memory,
@@ -1007,8 +1052,8 @@ contains
         end if
 
         dj = max(0, cbc_loc)
-
-        !$acc update device(is1, is2, is3, dir_idx, dir_flg, dj)
+        !$acc update device(is1, is2, is3, dj)
+        !$acc update device( dir_idx, dir_flg)
 
         ! Reshaping Inputted Data in x-direction ===========================
         if (cbc_dir == 1) then
@@ -1461,46 +1506,55 @@ contains
         if (is_cbc .eqv. .false.) return
 
         ! Deallocating the cell-average primitive variables
-        deallocate (q_prim_rsx_vf)
+        @:DEALLOCATE_GLOBAL(q_prim_rsx_vf)
         if (weno_order > 1) then
-            deallocate (F_rsx_vf, F_src_rsx_vf)
+            @:DEALLOCATE_GLOBAL(F_rsx_vf, F_src_rsx_vf)
         end if
-        deallocate (flux_rsx_vf, flux_src_rsx_vf)
+        @:DEALLOCATE_GLOBAL(flux_rsx_vf, flux_src_rsx_vf)
 
         if (n > 0) then
-            deallocate (q_prim_rsy_vf)
+            @:DEALLOCATE_GLOBAL(q_prim_rsy_vf)
             if (weno_order > 1) then
-                deallocate (F_rsy_vf, F_src_rsy_vf)
+                @:DEALLOCATE_GLOBAL(F_rsy_vf, F_src_rsy_vf)
             end if
-            deallocate (flux_rsy_vf, flux_src_rsy_vf)
+            @:DEALLOCATE_GLOBAL(flux_rsy_vf, flux_src_rsy_vf)
         end if
         if (p > 0) then
-            deallocate (q_prim_rsz_vf)
+            @:DEALLOCATE_GLOBAL(q_prim_rsz_vf)
             if (weno_order > 1) then
-                deallocate (F_rsz_vf, F_src_rsz_vf)
+                @:DEALLOCATE_GLOBAL(F_rsz_vf, F_src_rsz_vf)
             end if
-            deallocate (flux_rsz_vf, flux_src_rsz_vf)
+            @:DEALLOCATE_GLOBAL(flux_rsz_vf, flux_src_rsz_vf)
         end if
 
         ! Deallocating the cell-width distribution in the s-direction
-        deallocate (ds)
+        @:DEALLOCATE_GLOBAL(ds)
 
         ! Deallocating CBC Coefficients in x-direction =====================
         if (any((/bc_x%beg, bc_x%end/) <= -5) .and. any((/bc_x%beg, bc_x%end/) >= -13)) then
-            deallocate (fd_coef_x); if (weno_order > 1) deallocate (pi_coef_x)
+            @:DEALLOCATE_GLOBAL(fd_coef_x)
+            if (weno_order > 1) then
+                @:DEALLOCATE_GLOBAL(pi_coef_x)
+            end if
         end if
         ! ==================================================================
 
         ! Deallocating CBC Coefficients in y-direction =====================
         if (n > 0 .and. any((/bc_y%beg, bc_y%end/) <= -5) .and. &
             any((/bc_y%beg, bc_y%end/) >= -13 .and. bc_y%beg /= -14)) then
-            deallocate (fd_coef_y); if (weno_order > 1) deallocate (pi_coef_y)
+            @:DEALLOCATE_GLOBAL(fd_coef_y)
+            if (weno_order > 1) then
+                @:DEALLOCATE_GLOBAL(pi_coef_y)
+            end if
         end if
         ! ==================================================================
 
         ! Deallocating CBC Coefficients in z-direction =====================
         if (p > 0 .and. any((/bc_z%beg, bc_z%end/) <= -5) .and. any((/bc_z%beg, bc_z%end/) >= -13)) then
-            deallocate (fd_coef_z); if (weno_order > 1) deallocate (pi_coef_z)
+            @:DEALLOCATE_GLOBAL(fd_coef_z)
+            if (weno_order > 1) then
+                @:DEALLOCATE_GLOBAL(pi_coef_z)
+            end if
         end if
         ! ==================================================================
 
diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp
index 4b3a98528..0b893d34f 100644
--- a/src/simulation/m_compute_cbc.fpp
+++ b/src/simulation/m_compute_cbc.fpp
@@ -28,7 +28,11 @@ contains
         !!      the normal component of velocity is zero at all times,
         !!      while the transverse velocities may be nonzero.
     subroutine s_compute_slip_wall_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_compute_slip_wall_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
@@ -52,7 +56,11 @@ contains
         !!      buffer reduces the amplitude of any reflections caused by
         !!      outgoing waves.
     subroutine s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_buffer_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
@@ -89,7 +97,11 @@ contains
         !!      CBC assumes an incoming flow and reduces the amplitude of
         !!      any reflections caused by outgoing waves.
     subroutine s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS ss_compute_nonreflecting_subsonic_inflow_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
@@ -111,7 +123,11 @@ contains
         !!      subsonic CBC presumes an outgoing flow and reduces the
         !!      amplitude of any reflections caused by outgoing waves.
     subroutine s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_compute_nonreflecting_subsonic_outflow_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
@@ -147,7 +163,11 @@ contains
         !!      at the boundary is simply advected outward at the fluid
         !!      velocity.
     subroutine s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_compute_force_free_subsonic_outflow_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
@@ -179,7 +199,11 @@ contains
         !!      subsonic outflow maintains a fixed pressure at the CBC
         !!      boundary in absence of any transverse effects.
     subroutine s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_compute_constant_pressure_subsonic_outflow_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
@@ -212,7 +236,11 @@ contains
         !!      transverse terms may generate a time dependence at the
         !!      inflow boundary.
     subroutine s_compute_supersonic_inflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_compute_supersonic_inflow_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
@@ -232,7 +260,11 @@ contains
         !!      flow evolution at the boundary is determined completely
         !!      by the interior data.
     subroutine s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) ! --------------
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_compute_supersonic_outflow_L
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(3), intent(IN) :: lambda
         real(kind(0d0)), dimension(num_fluids), intent(IN) :: mf, dalpha_rho_ds, dadv_ds
         real(kind(0d0)), dimension(num_dims), intent(IN) :: dvel_ds
diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index 9e525f0a2..2eca61fbb 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -67,19 +67,24 @@ module m_data_output
 
         end subroutine s_write_abstract_data_files ! -------------------
     end interface ! ========================================================
-
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), icfl_sf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), vcfl_sf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), ccfl_sf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), Rc_sf)
+!$acc declare link(icfl_sf, vcfl_sf, ccfl_sf, Rc_sf)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :) :: icfl_sf  !< ICFL stability criterion
     real(kind(0d0)), allocatable, dimension(:, :, :) :: vcfl_sf  !< VCFL stability criterion
     real(kind(0d0)), allocatable, dimension(:, :, :) :: ccfl_sf  !< CCFL stability criterion
     real(kind(0d0)), allocatable, dimension(:, :, :) :: Rc_sf  !< Rc stability criterion
-
-    !$acc declare create(icfl_sf, vcfl_sf, ccfl_sf, Rc_sf)
+!$acc declare create(icfl_sf, vcfl_sf, ccfl_sf, Rc_sf)
+#endif
 
     real(kind(0d0)) :: icfl_max_loc, icfl_max_glb !< ICFL stability extrema on local and global grids
     real(kind(0d0)) :: vcfl_max_loc, vcfl_max_glb !< VCFL stability extrema on local and global grids
     real(kind(0d0)) :: ccfl_max_loc, ccfl_max_glb !< CCFL stability extrema on local and global grids
     real(kind(0d0)) :: Rc_min_loc, Rc_min_glb !< Rc   stability extrema on local and global grids
-
     !$acc declare create(icfl_max_loc, icfl_max_glb, vcfl_max_loc, vcfl_max_glb, ccfl_max_loc, ccfl_max_glb, Rc_min_loc, Rc_min_glb)
 
     !> @name ICFL, VCFL, CCFL and Rc stability criteria extrema over all the time-steps
@@ -94,6 +99,8 @@ module m_data_output
 
 contains
 
+    @:s_compute_speed_of_sound()
+
     !>  The purpose of this subroutine is to open a new or pre-
         !!          existing run-time information file and append to it the
         !!      basic header information relevant to current simulation.
@@ -249,7 +256,7 @@ contains
             !! Modified dtheta accounting for Fourier filtering in azimuthal direction.
 
         ! Computing Stability Criteria at Current Time-step ================
-        !$acc parallel loop collapse(3) gang vector default(present) private(alpha_rho, vel, alpha, Re)
+        !$acc parallel loop collapse(3) gang vector default(present) private(alpha_rho, vel, alpha, Re, fltr_dtheta, Nfq)
         do l = 0, p
             do k = 0, n
                 do j = 0, m
@@ -364,6 +371,20 @@ contains
 
         ! Determining local stability criteria extrema at current time-step
 
+#ifdef CRAY_ACC_WAR
+        !$acc update host(icfl_sf)
+
+        if (any(Re_size > 0)) then
+            !$acc update host(vcfl_sf, Rc_sf)
+        end if
+
+        icfl_max_loc = maxval(icfl_sf)
+
+        if (any(Re_size > 0)) then
+            vcfl_max_loc = maxval(vcfl_sf)
+            Rc_min_loc = minval(Rc_sf)
+        end if
+#else
         !$acc kernels
         icfl_max_loc = maxval(icfl_sf)
         !$acc end kernels
@@ -374,6 +395,7 @@ contains
             Rc_min_loc = minval(Rc_sf)
             !$acc end kernels
         end if
+#endif
 
         ! Determining global stability criteria extrema at current time-step
         if (num_procs > 1) then
@@ -980,18 +1002,6 @@ contains
             call MPI_FILE_CLOSE(ifile, ierr)
         end if
 
-        if (ib) then
-            var_MOK = int(sys_size + 1, MPI_OFFSET_KIND)
-
-            ! Initial displacement to skip at beginning of file
-            disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1)
-
-            call MPI_FILE_SET_VIEW(ifile, disp, MPI_INTEGER, MPI_IO_IB_DATA%view, &
-                                   'native', mpi_info_int, ierr)
-            call MPI_FILE_WRITE_ALL(ifile, MPI_IO_IB_DATA%var%sf, data_size, &
-                                    MPI_DOUBLE_PRECISION, status, ierr)
-        end if
-
         call MPI_FILE_CLOSE(ifile, ierr)
 
 #endif
@@ -1598,8 +1608,6 @@ contains
 
     end subroutine s_write_probe_files ! -----------------------------------
 
-    @:s_compute_speed_of_sound()
-
     !>  The goal of this subroutine is to write to the run-time
         !!      information file basic footer information applicable to
         !!      the current computation and to close the file when done.
@@ -1649,12 +1657,12 @@ contains
         integer :: i !< Generic loop iterator
 
         ! Allocating/initializing ICFL, VCFL, CCFL and Rc stability criteria
-        @:ALLOCATE(icfl_sf(0:m, 0:n, 0:p))
+        @:ALLOCATE_GLOBAL(icfl_sf(0:m, 0:n, 0:p))
         icfl_max = 0d0
 
         if (any(Re_size > 0)) then
-            @:ALLOCATE(vcfl_sf(0:m, 0:n, 0:p))
-            @:ALLOCATE(Rc_sf  (0:m, 0:n, 0:p))
+            @:ALLOCATE_GLOBAL(vcfl_sf(0:m, 0:n, 0:p))
+            @:ALLOCATE_GLOBAL(Rc_sf  (0:m, 0:n, 0:p))
 
             vcfl_max = 0d0
             Rc_min = 1d3
@@ -1688,9 +1696,9 @@ contains
         integer :: i !< Generic loop iterator
 
         ! Deallocating the ICFL, VCFL, CCFL, and Rc stability criteria
-        @:DEALLOCATE(icfl_sf)
+        @:DEALLOCATE_GLOBAL(icfl_sf)
         if (any(Re_size > 0)) then
-            @:DEALLOCATE(vcfl_sf, Rc_sf)
+            @:DEALLOCATE_GLOBAL(vcfl_sf, Rc_sf)
         end if
 
         ! Disassociating the pointer to the procedure that was utilized to
diff --git a/src/simulation/m_derived_variables.f90 b/src/simulation/m_derived_variables.f90
index 459193a5b..ce615ea80 100644
--- a/src/simulation/m_derived_variables.f90
+++ b/src/simulation/m_derived_variables.f90
@@ -177,7 +177,7 @@ end subroutine s_compute_derived_variables ! ---------------------------
         !!  @param q_sf Acceleration component
     subroutine s_derive_acceleration_component(i, q_prim_vf0, q_prim_vf1, &
                                                q_prim_vf2, q_prim_vf3, q_sf) ! ----------
-
+!DIR$ INLINEALWAYS s_derive_acceleration_component
         integer, intent(IN) :: i
 
         type(scalar_field), dimension(sys_size), intent(IN) :: q_prim_vf0
@@ -202,6 +202,7 @@ subroutine s_derive_acceleration_component(i, q_prim_vf0, q_prim_vf1, &
 
                         do r = -fd_number, fd_number
                             if (n == 0) then ! 1D simulation
+                                print *, q_sf(j, k, l), q_prim_vf0(mom_idx%beg)%sf(j, k, l), fd_coeff_x(r, j), q_prim_vf0(mom_idx%beg)%sf(r + j, k, l)
                                 q_sf(j, k, l) = q_sf(j, k, l) &
                                                 + q_prim_vf0(mom_idx%beg)%sf(j, k, l)*fd_coeff_x(r, j)* &
                                                 q_prim_vf0(mom_idx%beg)%sf(r + j, k, l)
diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp
index db45a2993..9dd955c22 100644
--- a/src/simulation/m_fftw.fpp
+++ b/src/simulation/m_fftw.fpp
@@ -18,7 +18,12 @@ module m_fftw
 
 #if defined(MFC_OpenACC) && defined(__PGI)
     use cufft
+#elif defined(MFC_OpenACC)
+    use hipfort
+    use hipfort_check
+    use hipfort_hipfft
 #endif
+
     ! ==========================================================================
 
     implicit none
@@ -27,13 +32,13 @@ module m_fftw
  s_apply_fourier_filter, &
  s_finalize_fftw_module
 
-#if !(defined(MFC_OpenACC) && defined(__PGI))
+#if !defined(MFC_OpenACC)
     include 'fftw3.f03'
 #endif
 
     type(c_ptr) :: fwd_plan, bwd_plan
     type(c_ptr) :: fftw_real_data, fftw_cmplx_data, fftw_fltr_cmplx_data
-    integer :: real_size, cmplx_size, x_size, batch_size
+    integer :: real_size, cmplx_size, x_size, batch_size, Nfq
 
     real(c_double), pointer :: data_real(:) !< Real data
 
@@ -43,17 +48,29 @@ module m_fftw
     complex(c_double_complex), pointer :: data_fltr_cmplx(:) !<
     !! Filtered complex data in Fourier space
 
-#if defined(MFC_OpenACC) && defined(__PGI)
-    !$acc declare create(real_size, cmplx_size, x_size, batch_size)
+#if defined(MFC_OpenACC)
+!$acc declare create(real_size, cmplx_size, x_size, batch_size, Nfq)
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:),  data_real_gpu)
+    @:CRAY_DECLARE_GLOBAL(complex(kind(0d0)), dimension(:), data_cmplx_gpu)
+    @:CRAY_DECLARE_GLOBAL(complex(kind(0d0)), dimension(:), data_fltr_cmplx_gpu)
+!$acc declare link(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
+#else
     real(kind(0d0)), allocatable :: data_real_gpu(:)
     complex(kind(0d0)), allocatable :: data_cmplx_gpu(:)
     complex(kind(0d0)), allocatable :: data_fltr_cmplx_gpu(:)
-    !$acc declare create(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
+!$acc declare create(data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu)
+#endif
 
-    integer :: fwd_plan_gpu, bwd_plan_gpu, ierr
+#if defined(__PGI)
+    integer :: fwd_plan_gpu, bwd_plan_gpu
+#else
+    type(c_ptr) :: fwd_plan_gpu, bwd_plan_gpu
+#endif
+    integer :: ierr
 
-    integer, allocatable :: cufft_size(:), iembed(:), oembed(:)
+    integer, allocatable :: gpu_fft_size(:), iembed(:), oembed(:)
 
     integer :: istride, ostride, idist, odist, rank
 #endif
@@ -65,13 +82,6 @@ contains
         !!      applying the Fourier filter in the azimuthal direction.
     subroutine s_initialize_fftw_module() ! ----------------------------------
 
-#if defined(MFC_OpenACC) && !defined(__PGI)
-
-        print *, "The FFTW module is not supported when using OpenACC with a compiler other than NVHPC/PGI."
-        stop 1
-
-#endif
-
         ! Size of input array going into DFT
         real_size = p + 1
         ! Size of output array coming out of DFT
@@ -81,15 +91,15 @@ contains
 
         batch_size = x_size*sys_size
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
         rank = 1; istride = 1; ostride = 1
 
-        allocate (cufft_size(1:rank), iembed(1:rank), oembed(1:rank))
+        allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank))
 
-        cufft_size(1) = real_size; 
+        gpu_fft_size(1) = real_size; 
         iembed(1) = 0
         oembed(1) = 0
-
+!$acc enter data copyin(real_size, cmplx_size, x_size, sys_size, batch_size, Nfq)
 !$acc update device(real_size, cmplx_size, x_size, sys_size, batch_size)
 #else
         ! Allocate input and output DFT data sizes
@@ -106,13 +116,19 @@ contains
         bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, FFTW_ESTIMATE)
 #endif
 
-#if defined(MFC_OpenACC) && defined(__PGI)
-        @:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size))
-        @:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size))
-        @:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size))
+#if defined(MFC_OpenACC)
+        @:ALLOCATE_GLOBAL(data_real_gpu(1:real_size*x_size*sys_size))
+        @:ALLOCATE_GLOBAL(data_cmplx_gpu(1:cmplx_size*x_size*sys_size))
+        @:ALLOCATE_GLOBAL(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size))
+
+#if defined(__PGI)
+        ierr = cufftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, CUFFT_D2Z, batch_size)
+        ierr = cufftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, CUFFT_Z2D, batch_size)
+#else
+        ierr = hipfftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, HIPFFT_D2Z, batch_size)
+        ierr = hipfftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, HIPFFT_Z2D, batch_size)
+#endif
 
-        ierr = cufftPlanMany(fwd_plan_gpu, rank, cufft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, CUFFT_D2Z, batch_size)
-        ierr = cufftPlanMany(bwd_plan_gpu, rank, cufft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, CUFFT_Z2D, batch_size)
 #endif
 
     end subroutine s_initialize_fftw_module ! ------------------------------
@@ -125,14 +141,11 @@ contains
 
         type(scalar_field), dimension(sys_size), intent(INOUT) :: q_cons_vf
 
-        integer :: Nfq !< Number of kept modes
-
         integer :: i, j, k, l !< Generic loop iterators
 
         ! Restrict filter to processors that have cells adjacent to axis
         if (bc_y%beg >= 0) return
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
 
         !$acc parallel loop collapse(3) gang vector default(present)
         do k = 1, sys_size
@@ -152,13 +165,18 @@ contains
             end do
         end do
 
-        !$acc host_data use_device(data_real_gpu, data_cmplx_gpu)
+!$acc host_data use_device(data_real_gpu, data_cmplx_gpu)
+#if defined(__PGI)
         ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
+#else
+        ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(data_real_gpu), c_loc(data_cmplx_gpu))
+        call hipCheck(hipDeviceSynchronize())
+#endif
         !$acc end host_data
-
         Nfq = 3
+        !$acc update device(Nfq)
 
-        !$acc parallel loop collapse(3) gang vector default(present) firstprivate(Nfq)
+        !$acc parallel loop collapse(3) gang vector default(present)
         do k = 1, sys_size
             do j = 0, m
                 do l = 1, Nfq
@@ -167,8 +185,13 @@ contains
             end do
         end do
 
-        !$acc host_data use_device(data_real_gpu, data_fltr_cmplx_gpu)
+!$acc host_data use_device(data_real_gpu, data_fltr_cmplx_gpu)
+#if defined(__PGI)
         ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
+#else
+        ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(data_fltr_cmplx_gpu), c_loc(data_real_gpu))
+        call hipCheck(hipDeviceSynchronize())
+#endif
         !$acc end host_data
 
         !$acc parallel loop collapse(3) gang vector default(present)
@@ -201,13 +224,19 @@ contains
                 end do
             end do
 
-            !$acc host_data use_device(data_real_gpu, data_cmplx_gpu)
+!$acc host_data use_device(data_real_gpu, data_cmplx_gpu)
+#if defined(__PGI)
             ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu)
+#else
+            ierr = hipfftExecD2Z(fwd_plan_gpu, c_loc(data_real_gpu), c_loc(data_cmplx_gpu))
+            call hipCheck(hipDeviceSynchronize())
+#endif
             !$acc end host_data
 
             Nfq = min(floor(2d0*real(i, kind(0d0))*pi), cmplx_size)
+            !$acc update device(Nfq)
 
-            !$acc parallel loop collapse(3) gang vector default(present) firstprivate(Nfq)
+            !$acc parallel loop collapse(3) gang vector default(present)
             do k = 1, sys_size
                 do j = 0, m
                     do l = 1, Nfq
@@ -216,8 +245,13 @@ contains
                 end do
             end do
 
-            !$acc host_data use_device(data_real_gpu, data_fltr_cmplx_gpu)
+!$acc host_data use_device(data_real_gpu, data_fltr_cmplx_gpu)
+#if defined(__PGI)
             ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu)
+#else
+            ierr = hipfftExecZ2D(bwd_plan_gpu, c_loc(data_fltr_cmplx_gpu), c_loc(data_real_gpu))
+            call hipCheck(hipDeviceSynchronize())
+#endif
             !$acc end host_data
 
             !$acc parallel loop collapse(3) gang vector default(present) firstprivate(i)
@@ -270,10 +304,16 @@ contains
         !!      applying the Fourier filter in the azimuthal direction.
     subroutine s_finalize_fftw_module() ! ------------------------------------
 
-#if defined(MFC_OpenACC) && defined(__PGI)
-        @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)
+#if defined(MFC_OpenACC)
+        @:DEALLOCATE_GLOBAL(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu)
+#if defined(__PGI)
+
         ierr = cufftDestroy(fwd_plan_gpu)
         ierr = cufftDestroy(bwd_plan_gpu)
+#else
+        ierr = hipfftDestroy(fwd_plan_gpu)
+        ierr = hipfftDestroy(bwd_plan_gpu)
+#endif
 #else
         call fftw_free(fftw_real_data)
         call fftw_free(fftw_cmplx_data)
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index e9d7e81b0..64a56c5e5 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -59,24 +59,40 @@ module m_global_parameters
 
     !> @name Cell-boundary (CB) locations in the x-, y- and z-directions, respectively
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), x_cb, y_cb, z_cb)
+#else
     real(kind(0d0)), target, allocatable, dimension(:) :: x_cb, y_cb, z_cb
+#endif
     !> @}
 
     !> @name Cell-center (CC) locations in the x-, y- and z-directions, respectively
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), x_cc, y_cc, z_cc)
+#else
     real(kind(0d0)), target, allocatable, dimension(:) :: x_cc, y_cc, z_cc
+#endif
     !> @}
     !type(bounds_info) :: x_domain, y_domain, z_domain !<
     !! Locations of the domain bounds in the x-, y- and z-coordinate directions
     !> @name Cell-width distributions in the x-, y- and z-directions, respectively
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), dx, dy, dz)
+#else
     real(kind(0d0)), target, allocatable, dimension(:) :: dx, dy, dz
+#endif
     !> @}
 
     real(kind(0d0)) :: dt !< Size of the time-step
 
-    !$acc declare create(x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz, dt, m, n, p)
-
+#ifdef CRAY_ACC_WAR
+!$acc declare link(x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz)
+!$acc declare create(m, n, p, dt)
+#else
+!$acc declare create(x_cb, y_cb, z_cb, x_cc, y_cc, z_cc, dx, dy, dz, dt, m, n, p)
+#endif
     !> @name Starting time-step iteration, stopping time-step iteration and the number
     !! of time-step iterations between successive solution backups, respectively
     !> @{
@@ -94,7 +110,7 @@ module m_global_parameters
     #:else
         integer :: num_dims       !< Number of spatial dimensions
     #:endif
-    integer :: num_fluids     !< Number of fluids in the flow
+    integer :: num_fluids
     logical :: adv_alphan     !< Advection of the last volume fraction
     logical :: mpp_lim        !< Mixture physical parameters (MPP) limits
     integer :: time_stepper   !< Time-stepper algorithm
@@ -116,10 +132,6 @@ module m_global_parameters
     integer :: riemann_solver !< Riemann solver algorithm
     integer :: wave_speeds    !< Wave speeds estimation method
     integer :: avg_state      !< Average state evaluation method
-    logical :: relax          !< activate phase change
-    integer :: relax_model    !< Relaxation model
-    real(kind(0d0)) :: palpha_eps     !< trigger parameter for the p relaxation procedure, phase change model
-    real(kind(0d0)) :: ptgalpha_eps   !< trigger parameter for the pTg relaxation procedure, phase change model
     logical :: alt_soundspeed !< Alternate mixture sound speed
     logical :: null_weights   !< Null undesired WENO weights
     logical :: mixture_err    !< Mixture properties correction
@@ -132,7 +144,16 @@ module m_global_parameters
         !$acc declare create(num_dims, weno_polyn, weno_order)
     #:endif
 
-    !$acc declare create(mpp_lim, num_fluids, model_eqns, mixture_err, alt_soundspeed, avg_state, mapped_weno, mp_weno, weno_eps, hypoelasticity, relax, palpha_eps,ptgalpha_eps)
+    !$acc declare create(mpp_lim, num_fluids, model_eqns, mixture_err, alt_soundspeed, avg_state, mapped_weno, mp_weno, weno_eps, hypoelasticity)
+
+    logical :: relax          !< activate phase change
+    integer :: relax_model    !< Relaxation model
+    real(kind(0d0)) :: palpha_eps     !< trigger parameter for the p relaxation procedure, phase change model
+    real(kind(0d0)) :: ptgalpha_eps   !< trigger parameter for the pTg relaxation procedure, phase change model
+
+!#ifndef _CRAYFTN
+!$acc declare create(relax, relax_model, palpha_eps,ptgalpha_eps)
+!#endif
 
     !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively
     !> @{
@@ -184,10 +205,18 @@ module m_global_parameters
     !! numbers, will be non-negligible.
     !> @{
     integer, dimension(2) :: Re_size
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(integer, dimension(:, :), Re_idx)
+#else
     integer, allocatable, dimension(:, :) :: Re_idx
-    !> @{
-
-    !$acc declare create(Re_size, Re_idx)
+#endif
+    !> @}
+#ifdef CRAY_ACC_WAR
+!$acc declare create(Re_size)
+!$acc declare link(Re_idx)
+#else
+!$acc declare create(Re_size, Re_idx)
+#endif
 
     ! The WENO average (WA) flag regulates whether the calculation of any cell-
     ! average spatial derivatives is carried out in each cell by utilizing the
@@ -288,16 +317,30 @@ module m_global_parameters
     real(kind(0d0)) :: Ca       !< Cavitation number
     real(kind(0d0)) :: Web      !< Weber number
     real(kind(0d0)) :: Re_inv   !< Inverse Reynolds number
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), weight)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), R0)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), V0)
+!$acc declare link(weight, R0, V0)
+#else
     real(kind(0d0)), dimension(:), allocatable :: weight !< Simpson quadrature weights
     real(kind(0d0)), dimension(:), allocatable :: R0     !< Bubble sizes
     real(kind(0d0)), dimension(:), allocatable :: V0     !< Bubble velocities
+!$acc declare create(weight, R0, V0)
+#endif
     logical :: bubbles      !< Bubbles on/off
     logical :: polytropic   !< Polytropic  switch
     logical :: polydisperse !< Polydisperse bubbles
 
     integer :: bubble_model !< Gilmore or Keller--Miksis bubble model
     integer :: thermal      !< Thermal behavior. 1 = adiabatic, 2 = isotherm, 3 = transfer
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), ptil)
+!$acc declare link(ptil)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :) :: ptil  !< Pressure modification
+!$acc declare create(ptil)
+#endif
     real(kind(0d0)) :: poly_sigma  !< log normal sigma for polydisperse PDF
 
     logical :: qbmm      !< Quadrature moment method
@@ -310,23 +353,39 @@ module m_global_parameters
         !$acc declare create(nb)
     #:endif
 
-    !$acc declare create(R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles, polytropic, polydisperse, qbmm, nmomsp, nmomtot, R0_type, ptil, bubble_model, thermal, poly_sigma)
+!$acc declare create(R0ref, Ca, Web, Re_inv, bubbles, polytropic, polydisperse, qbmm, nmomsp, nmomtot, R0_type, bubble_model, thermal, poly_sigma)
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(scalar_field), dimension(:), mom_sp)
+    @:CRAY_DECLARE_GLOBAL(type(scalar_field), dimension(:, :, :), mom_3d)
+!$acc declare link(mom_sp, mom_3d)
+#else
     type(scalar_field), allocatable, dimension(:) :: mom_sp
     type(scalar_field), allocatable, dimension(:, :, :) :: mom_3d
+!$acc declare create(mom_sp, mom_3d)
+#endif
     !> @}
-    !$acc declare create(mom_sp, mom_3d)
 
     !> @name Physical bubble parameters (see Ando 2010, Preston 2007)
     !> @{
     real(kind(0d0)) :: R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v
+!$acc declare create(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v)
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), k_n, k_v, pb0, mass_n0, mass_v0, Pe_T)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN)
+!$acc declare link( k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN)
+#else
     real(kind(0d0)), dimension(:), allocatable :: k_n, k_v, pb0, mass_n0, mass_v0, Pe_T
     real(kind(0d0)), dimension(:), allocatable :: Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN
+!$acc declare create( k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN)
+#endif
     real(kind(0d0)) :: mul0, ss, gamma_v, mu_v
     real(kind(0d0)) :: gamma_m, gamma_n, mu_n
     real(kind(0d0)) :: gam
     !> @}
-    !$acc declare create(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN , mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam)
+
+    !$acc declare create(mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam)
+
     !> @name Acoustic monopole parameters
     !> @{
     logical :: monopole !< Monopole switch
@@ -341,19 +400,33 @@ module m_global_parameters
     integer :: intxb, intxe
     integer :: bubxb, bubxe
     integer :: strxb, strxe
-    !$acc declare create(momxb, momxe, advxb, advxe, contxb, contxe, intxb, intxe, bubxb, bubxe, strxb, strxe)
+!$acc declare create(momxb, momxe, advxb, advxe, contxb, contxe, intxb, intxe, bubxb, bubxe, strxb, strxe)
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
+!$acc declare link(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
+#else
     real(kind(0d0)), allocatable, dimension(:) :: gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps
-    !$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
+!$acc declare create(gammas, gs_min, pi_infs, ps_inf, cvs, qvs, qvps)
+#endif
 
     real(kind(0d0)) :: mytime       !< Current simulation time
     real(kind(0d0)) :: finaltime    !< Final simulation time
 
     logical :: weno_flat, riemann_flat, cu_mpi
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(pres_field), dimension(:), pb_ts)
 
+    @:CRAY_DECLARE_GLOBAL(type(pres_field), dimension(:), mv_ts)
+
+!$acc declare link(pb_ts, mv_ts)
+#else
     type(pres_field), allocatable, dimension(:) :: pb_ts
+
     type(pres_field), allocatable, dimension(:) :: mv_ts
-    !$acc declare create(pb_ts, mv_ts)
+
+!$acc declare create(pb_ts, mv_ts)
+#endif
     ! ======================================================================
 
 contains
@@ -538,6 +611,7 @@ contains
             weno_polyn = (weno_order - 1)/2
             !$acc update device(weno_polyn)
             !$acc update device(nb)
+            !$acc update device(num_dims, num_fluids)
         #:endif
 
         ! Initializing the number of fluids for which viscous effects will
@@ -612,7 +686,7 @@ contains
                     ! print*, 'alf idx', alf_idx
                     ! print*, 'bub -idx beg end', bub_idx%beg, bub_idx%end
 
-                    @:ALLOCATE(weight(nb), R0(nb), V0(nb))
+                    @:ALLOCATE_GLOBAL(weight(nb), R0(nb), V0(nb))
                     @:ALLOCATE(bub_idx%rs(nb), bub_idx%vs(nb))
                     @:ALLOCATE(bub_idx%ps(nb), bub_idx%ms(nb))
 
@@ -674,7 +748,7 @@ contains
                         if (polytropic) then
                             pv = fluid_pp(1)%pv
                             pv = pv/pref
-                            @:ALLOCATE(pb0(nb))
+                            @:ALLOCATE_GLOBAL(pb0(nb))
                             if (Web == dflt_real) then
                                 pb0 = pref
                                 pb0 = pb0/pref
@@ -683,7 +757,6 @@ contains
                             rhoref = 1d0
                         end if
                     end if
-
                 end if
 
                 if (hypoelasticity) then
@@ -726,7 +799,7 @@ contains
 
                     @:ALLOCATE(bub_idx%rs(nb), bub_idx%vs(nb))
                     @:ALLOCATE(bub_idx%ps(nb), bub_idx%ms(nb))
-                    @:ALLOCATE(weight(nb), R0(nb), V0(nb))
+                    @:ALLOCATE_GLOBAL(weight(nb), R0(nb), V0(nb))
 
                     do i = 1, nb
                         if (polytropic) then
@@ -773,7 +846,7 @@ contains
             ! fluids whose interface will support effects of surface tension
             if (any(Re_size > 0)) then
 
-                @:ALLOCATE(Re_idx(1:2, 1:maxval(Re_size)))
+                @:ALLOCATE_GLOBAL(Re_idx(1:2, 1:maxval(Re_size)))
 
                 k = 0
                 do i = 1, num_fluids
@@ -849,7 +922,7 @@ contains
 
             ix%end = m - ix%beg; iy%end = n - iy%beg; iz%end = p - iz%beg
 
-            @:ALLOCATE(ptil(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
+            @:ALLOCATE_GLOBAL(ptil(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
         end if
 
         if (probe_wrt) then
@@ -891,21 +964,31 @@ contains
         intxe = internalEnergies_idx%end
 
         !$acc update device(momxb, momxe, advxb, advxe, contxb, contxe, bubxb, bubxe, intxb, intxe, sys_size, buff_size, E_idx, alf_idx, strxb, strxe)
+        !$acc update device(m, n, p)
+
+        !$acc update device(alt_soundspeed, monopole, num_mono)
+        !$acc update device(dt, sys_size, buff_size, pref, rhoref, gamma_idx, pi_inf_idx, E_idx, alf_idx, stress_idx, mpp_lim, bubbles, hypoelasticity, alt_soundspeed, avg_state, num_fluids, model_eqns, num_dims, mixture_err, grid_geometry, cyl_coord, mapped_weno, mp_weno, weno_eps)
+
+        !$acc enter data copyin(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma)
+        !$acc enter data copyin(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN , mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam)
+        !$acc enter data copyin(dir_idx, dir_flg, dir_idx_tau)
+
+        !$acc enter data copyin(relax, relax_model, palpha_eps,ptgalpha_eps)
 
         ! Allocating grid variables for the x-, y- and z-directions
-        @:ALLOCATE(x_cb(-1 - buff_size:m + buff_size))
-        @:ALLOCATE(x_cc(-buff_size:m + buff_size))
-        @:ALLOCATE(dx(-buff_size:m + buff_size))
+        @:ALLOCATE_GLOBAL(x_cb(-1 - buff_size:m + buff_size))
+        @:ALLOCATE_GLOBAL(x_cc(-buff_size:m + buff_size))
+        @:ALLOCATE_GLOBAL(dx(-buff_size:m + buff_size))
 
         if (n == 0) return; 
-        @:ALLOCATE(y_cb(-1 - buff_size:n + buff_size))
-        @:ALLOCATE(y_cc(-buff_size:n + buff_size))
-        @:ALLOCATE(dy(-buff_size:n + buff_size))
+        @:ALLOCATE_GLOBAL(y_cb(-1 - buff_size:n + buff_size))
+        @:ALLOCATE_GLOBAL(y_cc(-buff_size:n + buff_size))
+        @:ALLOCATE_GLOBAL(dy(-buff_size:n + buff_size))
 
         if (p == 0) return; 
-        @:ALLOCATE(z_cb(-1 - buff_size:p + buff_size))
-        @:ALLOCATE(z_cc(-buff_size:p + buff_size))
-        @:ALLOCATE(dz(-buff_size:p + buff_size))
+        @:ALLOCATE_GLOBAL(z_cb(-1 - buff_size:p + buff_size))
+        @:ALLOCATE_GLOBAL(z_cc(-buff_size:p + buff_size))
+        @:ALLOCATE_GLOBAL(dz(-buff_size:p + buff_size))
 
     end subroutine s_initialize_global_parameters_module ! -----------------
 
@@ -949,17 +1032,17 @@ contains
         ! fluids and any pairs of fluids whose interfaces supported effects
         ! of surface tension
         if (any(Re_size > 0)) then
-            @:DEALLOCATE(Re_idx)
+            @:DEALLOCATE_GLOBAL(Re_idx)
         end if
 
         ! Deallocating grid variables for the x-, y- and z-directions
-        @:DEALLOCATE(x_cb, x_cc, dx)
+        @:DEALLOCATE_GLOBAL(x_cb, x_cc, dx)
 
         if (n == 0) return; 
-        @:DEALLOCATE(y_cb, y_cc, dy)
+        @:DEALLOCATE_GLOBAL(y_cb, y_cc, dy)
 
         if (p == 0) return; 
-        @:DEALLOCATE(z_cb, z_cc, dz)
+        @:DEALLOCATE_GLOBAL(z_cb, z_cc, dz)
 
         deallocate (proc_coords)
         if (parallel_io) then
diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp
index c799cb367..5e8f9d2e9 100644
--- a/src/simulation/m_hypoelastic.fpp
+++ b/src/simulation/m_hypoelastic.fpp
@@ -22,6 +22,18 @@ module m_hypoelastic
     private; public :: s_initialize_hypoelastic_module, &
  s_compute_hypoelastic_rhs
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), Gs)
+    !$acc declare link(Gs)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), du_dx, du_dy, du_dz)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), dv_dx, dv_dy, dv_dz)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), dw_dx, dw_dy, dw_dz)
+    !$acc declare link(du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), rho_K_field, G_K_field)
+!$acc declare link(rho_K_field, G_K_field)
+#else
     real(kind(0d0)), allocatable, dimension(:) :: Gs
     !$acc declare create(Gs)
 
@@ -31,7 +43,9 @@ module m_hypoelastic
     !$acc declare create(du_dx,du_dy,du_dz,dv_dx,dv_dy,dv_dz,dw_dx,dw_dy,dw_dz)
 
     real(kind(0d0)), allocatable, dimension(:, :, :) :: rho_K_field, G_K_field
-    !$acc declare create(rho_K_field, G_K_field)
+!$acc declare create(rho_K_field, G_K_field)
+
+#endif
 
 contains
 
@@ -39,14 +53,14 @@ contains
 
         integer :: i
 
-        @:ALLOCATE(Gs(1:num_fluids))
-        @:ALLOCATE(rho_K_field(0:m,0:n,0:p), G_K_field(0:m,0:n,0:p))
-        @:ALLOCATE(du_dx(0:m,0:n,0:p))
+        @:ALLOCATE_GLOBAL(Gs(1:num_fluids))
+        @:ALLOCATE_GLOBAL(rho_K_field(0:m,0:n,0:p), G_K_field(0:m,0:n,0:p))
+        @:ALLOCATE_GLOBAL(du_dx(0:m,0:n,0:p))
         if (n > 0) then
-            @:ALLOCATE(du_dy(0:m,0:n,0:p), dv_dx(0:m,0:n,0:p), dv_dy(0:m,0:n,0:p))
+            @:ALLOCATE_GLOBAL(du_dy(0:m,0:n,0:p), dv_dx(0:m,0:n,0:p), dv_dy(0:m,0:n,0:p))
             if (p > 0) then
-                @:ALLOCATE(du_dz(0:m,0:n,0:p), dv_dz(0:m,0:n,0:p))
-                @:ALLOCATE(dw_dx(0:m,0:n,0:p), dw_dy(0:m,0:n,0:p), dw_dz(0:m,0:n,0:p))
+                @:ALLOCATE_GLOBAL(du_dz(0:m,0:n,0:p), dv_dz(0:m,0:n,0:p))
+                @:ALLOCATE_GLOBAL(dw_dx(0:m,0:n,0:p), dw_dy(0:m,0:n,0:p), dw_dz(0:m,0:n,0:p))
             end if
         end if
 
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 342a1de63..e9620352b 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -7,7 +7,6 @@
 !> @brief This module is used to handle all operations related to immersed
 !!              boundary methods (IBMs)
 module m_ibm
-
     ! Dependencies =============================================================
 
     use m_derived_types        !< Definitions of the derived types
@@ -38,22 +37,30 @@ module m_ibm
  s_finalize_ibm_module
 
     type(integer_field), public :: ib_markers
-    !! Marker for solid cells. 0 if liquid, the patch id of its IB if solid
+!$acc declare create(ib_markers)
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), levelset)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :, :), levelset_norm)
+    @:CRAY_DECLARE_GLOBAL(type(ghost_point), dimension(:), ghost_points)
 
+!$acc declare link(levelset, levelset_norm, ghost_points)
+#else
+
+    !! Marker for solid cells. 0 if liquid, the patch id of its IB if solid
     real(kind(0d0)), dimension(:, :, :, :), allocatable :: levelset
     !! Matrix of distance to IB
-
     real(kind(0d0)), dimension(:, :, :, :, :), allocatable :: levelset_norm
     !! Matrix of normal vector to IB
-
     type(ghost_point), dimension(:), allocatable :: ghost_points
     !! Matrix of normal vector to IB
 
+!$acc declare create(levelset, levelset_norm, ghost_points)
+#endif
+
     integer :: gp_layers !< Number of ghost point layers
     integer :: num_gps !< Number of ghost points
-
-    !$acc declare create(ib_markers, levelset, levelset_norm)
-    !$acc declare create(ghost_points, gp_layers, num_gps)
+    !$acc declare create(gp_layers, num_gps)
 
 contains
 
@@ -69,16 +76,21 @@ contains
             @:ALLOCATE(ib_markers%sf(-gp_layers:m+gp_layers, &
                 -gp_layers:n+gp_layers, 0:0))
         end if
+        @:ACC_SETUP_SFs(ib_markers)
 
         ! @:ALLOCATE(ib_markers%sf(0:m, 0:n, 0:p))
-        @:ALLOCATE(levelset(0:m, 0:n, 0:p, num_ibs))
-        @:ALLOCATE(levelset_norm(0:m, 0:n, 0:p, num_ibs, 3))
+        @:ALLOCATE_GLOBAL(levelset(0:m, 0:n, 0:p, num_ibs))
+        @:ALLOCATE_GLOBAL(levelset_norm(0:m, 0:n, 0:p, num_ibs, 3))
+
+        !$acc enter data copyin(gp_layers, num_gps)
 
     end subroutine s_initialize_ibm_module
 
     subroutine s_ibm_setup()
 
-        integer :: i, j
+        integer :: i, j, k
+
+        !$acc update device(ib_markers%sf)
 
         ! Get neighboring IB variables from other processors
         call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
@@ -86,7 +98,8 @@ contains
         call s_find_num_ghost_points()
 
         !$acc update device(num_gps)
-        @:ALLOCATE(ghost_points(num_gps))
+        @:ALLOCATE_GLOBAL(ghost_points(num_gps))
+        !$acc enter data copyin(ghost_points)
 
         call s_find_ghost_points(ghost_points)
         !$acc update device(ghost_points)
@@ -116,7 +129,7 @@ contains
 
         real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), optional, intent(INOUT) :: pb, mv
 
-        integer :: i, j, k, l, q, r !< Iterator variables
+        integer :: i, j, k, l, q, r!< Iterator variables
         integer :: patch_id !< Patch ID of ghost point
         real(kind(0d0)) :: rho, gamma, pi_inf, dyn_pres !< Mixture variables
         real(kind(0d0)), dimension(2) :: Re_K
@@ -124,7 +137,8 @@ contains
         real(kind(0d0)) :: qv_K
         real(kind(0d0)), dimension(num_fluids) :: Gs
 
-        real(kind(0d0)) :: pres_IP, vel_IP(3), vel_norm_IP(3)
+        real(kind(0d0)) :: pres_IP, coeff
+        real(kind(0d0)), dimension(3) :: vel_IP, vel_norm_IP
         real(kind(0d0)), dimension(num_fluids) :: alpha_rho_IP, alpha_IP
         real(kind(0d0)), dimension(nb) :: r_IP, v_IP, pb_IP, mv_IP
         real(kind(0d0)), dimension(nb*nmom) :: nmom_IP
@@ -140,14 +154,14 @@ contains
         real(kind(0d0)) :: buf
         type(ghost_point) :: gp
 
-        !$acc parallel loop gang vector private(physical_loc, dyn_pres, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, vel_g, vel_norm_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, presb_IP, massv_IP, rho, gamma, pi_inf, Re_K, G_K, Gs, gp, norm, buf)
+        !$acc parallel loop gang vector private(physical_loc, dyn_pres, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, vel_g, vel_norm_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, presb_IP, massv_IP, rho, gamma, pi_inf, Re_K, G_K, Gs, gp, norm, buf, j, k, l, q, coeff)
         do i = 1, num_gps
 
             gp = ghost_points(i)
             j = gp%loc(1)
             k = gp%loc(2)
             l = gp%loc(3)
-            patch_id = gp%ib_patch_id
+            patch_id = ghost_points(i)%ib_patch_id
 
             ! Calculate physical location of GP
             if (p > 0) then
@@ -156,7 +170,7 @@ contains
                 physical_loc = [x_cc(j), y_cc(k), 0d0]
             end if
 
-            ! Interpolate primitive variables at image point associated w/ GP
+            !Interpolate primitive variables at image point associated w/ GP
             if (bubbles .and. .not. qbmm) then
                 call s_interpolate_image_point(q_prim_vf, gp, &
                                                alpha_rho_IP, alpha_IP, pres_IP, vel_IP, &
@@ -174,9 +188,10 @@ contains
                                                alpha_rho_IP, alpha_IP, pres_IP, vel_IP)
             end if
 
-            dyn_pres = 0
+            dyn_pres = 0d0
 
             ! Set q_prim_vf params at GP so that mixture vars calculated properly
+            !$acc loop seq
             do q = 1, num_fluids
                 q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q)
                 q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q)
@@ -383,7 +398,6 @@ contains
     end subroutine s_compute_image_points
 
     subroutine s_find_num_ghost_points()
-
         integer, dimension(2*gp_layers + 1, 2*gp_layers + 1) &
             :: subsection_2D
         integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) &
@@ -629,7 +643,6 @@ contains
 
     subroutine s_interpolate_image_point(q_prim_vf, gp, alpha_rho_IP, alpha_IP, pres_IP, vel_IP, r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb, mv, presb_IP, massv_IP)
         !$acc routine seq
-
         type(scalar_field), &
             dimension(sys_size), &
             intent(IN) :: q_prim_vf !< Primitive Variables
@@ -777,9 +790,10 @@ contains
         !!  @param fV Current bubble velocity
         !!  @param fpb Internal bubble pressure
     subroutine s_finalize_ibm_module()
-        deallocate (ib_markers%sf)
-        deallocate (levelset)
-        deallocate (levelset_norm)
+
+        @:DEALLOCATE(ib_markers%sf)
+        @:DEALLOCATE_GLOBAL(levelset)
+        @:DEALLOCATE_GLOBAL(levelset_norm)
     end subroutine s_finalize_ibm_module
 
 end module m_ibm
diff --git a/src/simulation/m_monopole.fpp b/src/simulation/m_monopole.fpp
index 519ff38cf..431e14a18 100644
--- a/src/simulation/m_monopole.fpp
+++ b/src/simulation/m_monopole.fpp
@@ -20,10 +20,30 @@ module m_monopole
     private; public :: s_initialize_monopole_module, s_monopole_calculations, &
  s_compute_monopole_rhs
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(integer, dimension(:), pulse, support)
+    !$acc declare link(pulse, support)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), loc_mono)
+    !$acc declare link(loc_mono)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), foc_length, aperture, support_width)
+    !$acc declare link(foc_length, aperture, support_width)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), mag, length, npulse, dir, delay)
+    !$acc declare link(mag, length, npulse, dir, delay)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), mono_mass_src, mono_e_src)
+    !$acc declare link(mono_mass_src, mono_e_src)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), mono_mom_src)
+!$acc declare link(mono_mom_src)
+
+#else
     integer, allocatable, dimension(:) :: pulse, support
     !$acc declare create(pulse, support)
 
-    real(kind(0d0)), allocatable, dimension(:, :) :: loc_mono
+    real(kind(0d0)), allocatable, target, dimension(:, :) :: loc_mono
     !$acc declare create(loc_mono)
 
     real(kind(0d0)), allocatable, dimension(:) :: foc_length, aperture, support_width
@@ -37,14 +57,16 @@ module m_monopole
     real(kind(0d0)), allocatable, dimension(:, :, :) :: mono_mass_src, mono_e_src
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: mono_mom_src
     !> @}
-    !$acc declare create(mono_mass_src, mono_e_src, mono_mom_src)
+!$acc declare create(mono_mass_src, mono_e_src, mono_mom_src)
+
+#endif
 
 contains
 
     subroutine s_initialize_monopole_module()
         integer :: i, j !< generic loop variables
 
-        @:ALLOCATE(mag(1:num_mono), support(1:num_mono), length(1:num_mono), npulse(1:num_mono), pulse(1:num_mono), dir(1:num_mono), delay(1:num_mono), loc_mono(1:3, 1:num_mono), foc_length(1:num_mono), aperture(1:num_mono), support_width(1:num_mono))
+        @:ALLOCATE_GLOBAL(mag(1:num_mono), support(1:num_mono), length(1:num_mono), npulse(1:num_mono), pulse(1:num_mono), dir(1:num_mono), delay(1:num_mono), loc_mono(1:3, 1:num_mono), foc_length(1:num_mono), aperture(1:num_mono), support_width(1:num_mono))
 
         do i = 1, num_mono
             mag(i) = mono(i)%mag
@@ -63,9 +85,9 @@ contains
         end do
         !$acc update device(mag, support, length, npulse, pulse, dir, delay, foc_length, aperture, loc_mono, support_width)
 
-        @:ALLOCATE(mono_mass_src(0:m, 0:n, 0:p))
-        @:ALLOCATE(mono_mom_src(1:num_dims, 0:m, 0:n, 0:p))
-        @:ALLOCATE(mono_E_src(0:m, 0:n, 0:p))
+        @:ALLOCATE_GLOBAL(mono_mass_src(0:m, 0:n, 0:p))
+        @:ALLOCATE_GLOBAL(mono_mom_src(1:num_dims, 0:m, 0:n, 0:p))
+        @:ALLOCATE_GLOBAL(mono_E_src(0:m, 0:n, 0:p))
 
     end subroutine
 
@@ -321,6 +343,7 @@ contains
         !! @param mono_loc Nominal source term location
         !! @param mono_leng Length of source term in space
     function f_delta(j, k, l, mono_loc, mono_leng, nm, angle, angle_z)
+
         !$acc routine seq
         real(kind(0d0)), dimension(3), intent(IN) :: mono_loc
         integer, intent(IN) :: nm
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 969d59fc6..2ca830bad 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -28,6 +28,14 @@ module m_mpi_proxy
 
     implicit none
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), q_cons_buff_send)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), q_cons_buff_recv)
+    @:CRAY_DECLARE_GLOBAL(integer, dimension(:), ib_buff_send)
+    @:CRAY_DECLARE_GLOBAL(integer, dimension(:), ib_buff_recv)
+
+!$acc declare link(q_cons_buff_recv, q_cons_buff_send, ib_buff_send, ib_buff_recv)
+#else
     real(kind(0d0)), private, allocatable, dimension(:) :: q_cons_buff_send !<
     !! This variable is utilized to pack and send the buffer of the cell-average
     !! conservative variables, for a single computational domain boundary at the
@@ -48,13 +56,13 @@ module m_mpi_proxy
     !! immersed boundary markers, for a single computational domain boundary
     !! at the time, from the relevant neighboring processor.
 
+!$acc declare create(q_cons_buff_send, q_cons_buff_recv, ib_buff_send, ib_buff_recv)
+#endif
     !> @name Generic flags used to identify and report MPI errors
     !> @{
     integer, private :: err_code, ierr, v_size
+    !$acc declare create(v_size)
     !> @}
-
-    !$acc declare create(q_cons_buff_send, q_cons_buff_recv, v_size)
-
     !real :: s_time, e_time
     !real :: compress_time, mpi_time, decompress_time
     !integer :: nCalls_time = 0
@@ -75,43 +83,45 @@ contains
         if (qbmm .and. .not. polytropic) then
             if (n > 0) then
                 if (p > 0) then
-                    @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
+                    @:ALLOCATE_GLOBAL(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
                                              & (m + 2*buff_size + 1)* &
                                              & (n + 2*buff_size + 1)* &
                                              & (p + 2*buff_size + 1)/ &
                                              & (min(m, n, p) + 2*buff_size + 1)))
                 else
-                    @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
+                    @:ALLOCATE_GLOBAL(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
                                              & (max(m, n) + 2*buff_size + 1)))
                 end if
             else
-                @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)))
+                @:ALLOCATE_GLOBAL(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)))
             end if
 
-            @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
+            @:ALLOCATE_GLOBAL(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
 
             v_size = sys_size + 2*nb*4
         else
             if (n > 0) then
                 if (p > 0) then
-                    @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* &
+                    @:ALLOCATE_GLOBAL(q_cons_buff_send(0:-1 + buff_size*sys_size* &
                                              & (m + 2*buff_size + 1)* &
                                              & (n + 2*buff_size + 1)* &
                                              & (p + 2*buff_size + 1)/ &
                                              & (min(m, n, p) + 2*buff_size + 1)))
                 else
-                    @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* &
+                    @:ALLOCATE_GLOBAL(q_cons_buff_send(0:-1 + buff_size*sys_size* &
                                              & (max(m, n) + 2*buff_size + 1)))
                 end if
             else
-                @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size))
+                @:ALLOCATE_GLOBAL(q_cons_buff_send(0:-1 + buff_size*sys_size))
             end if
 
-            @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
+            @:ALLOCATE_GLOBAL(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
 
             v_size = sys_size
         end if
 
+!$acc update device(v_size)
+
 #endif
 
     end subroutine s_initialize_mpi_proxy_module ! -------------------------
@@ -840,7 +850,7 @@ contains
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send, ib_buff_recv, ib_buff_send)
 
@@ -896,7 +906,7 @@ contains
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
@@ -953,7 +963,7 @@ contains
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -1005,13 +1015,12 @@ contains
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
                 end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi .eqv. .false.) then
                     !$acc update device(q_cons_buff_recv)
                 end if
@@ -1127,7 +1136,7 @@ contains
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -1156,7 +1165,6 @@ contains
                         !$acc wait
                     else
 #endif
-
                         !$acc update host(q_cons_buff_send)
                         if (qbmm .and. .not. polytropic) then
                             call MPI_SENDRECV( &
@@ -1178,7 +1186,7 @@ contains
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
@@ -1234,8 +1242,7 @@ contains
                     end if
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -1264,7 +1271,6 @@ contains
                         !$acc wait
                     else
 #endif
-
                         !$acc update host(q_cons_buff_send)
 
                         if (qbmm .and. .not. polytropic) then
@@ -1287,7 +1293,7 @@ contains
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
@@ -1411,8 +1417,7 @@ contains
                     end if
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -1464,8 +1469,7 @@ contains
                                 MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
@@ -1520,8 +1524,7 @@ contains
                     end if
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -1573,14 +1576,12 @@ contains
                                 MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
                 end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi .eqv. .false.) then
                     !$acc update device(q_cons_buff_recv)
                 end if
@@ -1694,8 +1695,7 @@ contains
                     end if
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -1747,8 +1747,7 @@ contains
                                 MPI_DOUBLE_PRECISION, bc_y%end, 1, &
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
@@ -1804,7 +1803,7 @@ contains
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -1856,14 +1855,12 @@ contains
                                 MPI_DOUBLE_PRECISION, bc_y%end, 1, &
                                 MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
                         end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
                 end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi .eqv. .false.) then
                     !$acc update device(q_cons_buff_recv)
                 end if
@@ -1986,8 +1983,7 @@ contains
                     end if
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -2017,8 +2013,7 @@ contains
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1), &
                             MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
                             MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
@@ -2075,8 +2070,7 @@ contains
                     end if
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -2107,13 +2101,12 @@ contains
                             MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
                             MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
                 end if
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi .eqv. .false.) then
                     !$acc update device(q_cons_buff_recv)
                 end if
@@ -2232,7 +2225,7 @@ contains
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -2262,7 +2255,7 @@ contains
                             MPI_DOUBLE_PRECISION, bc_z%end, 1, &
                             MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
@@ -2322,7 +2315,7 @@ contains
 
                     !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     if (cu_mpi) then
                         !$acc host_data use_device( q_cons_buff_recv, q_cons_buff_send )
 
@@ -2351,14 +2344,13 @@ contains
                             buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1), &
                             MPI_DOUBLE_PRECISION, bc_z%end, 1, &
                             MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                     end if
 #endif
 
                 end if
 
-#if defined(MFC_OpenACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi .eqv. .false.) then
                     !$acc update device(q_cons_buff_recv)
                 end if
@@ -2449,19 +2441,19 @@ contains
 
         if (n > 0) then
             if (p > 0) then
-                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * &
+                @:ALLOCATE_GLOBAL(ib_buff_send(0:-1 + gp_layers * &
                                         & (m + 2*gp_layers + 1)* &
                                         & (n + 2*gp_layers + 1)* &
                                         & (p + 2*gp_layers + 1)/ &
                                         & (min(m, n, p) + 2*gp_layers + 1)))
             else
-                @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* &
+                @:ALLOCATE_GLOBAL(ib_buff_send(0:-1 + gp_layers* &
                                         & (max(m, n) + 2*gp_layers + 1)))
             end if
         else
-            @:ALLOCATE(ib_buff_send(0:-1 + gp_layers))
+            @:ALLOCATE_GLOBAL(ib_buff_send(0:-1 + gp_layers))
         end if
-        @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
+        @:ALLOCATE_GLOBAL(ib_buff_recv(0:ubound(ib_buff_send, 1)))
 
         !nCalls_time = nCalls_time + 1
 
@@ -2483,7 +2475,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send, ib_buff_recv, ib_buff_send)
 
@@ -2514,7 +2506,7 @@ contains
                         MPI_INTEGER, bc_x%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
@@ -2533,7 +2525,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -2563,13 +2555,13 @@ contains
                         MPI_INTEGER, bc_x%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
             end if
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
             if (cu_mpi .eqv. .false.) then
                 !$acc update device(ib_buff_recv)
             end if
@@ -2605,7 +2597,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -2634,7 +2626,7 @@ contains
                         MPI_INTEGER, bc_x%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
@@ -2653,7 +2645,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -2683,7 +2675,7 @@ contains
                         MPI_INTEGER, bc_x%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
@@ -2727,7 +2719,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -2758,7 +2750,7 @@ contains
                         MPI_INTEGER, bc_y%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
@@ -2778,7 +2770,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -2809,13 +2801,13 @@ contains
                         MPI_INTEGER, bc_y%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
             end if
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
             if (cu_mpi .eqv. .false.) then
                 !$acc update device(ib_buff_recv)
             end if
@@ -2853,7 +2845,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -2884,7 +2876,7 @@ contains
                         MPI_INTEGER, bc_y%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
@@ -2904,7 +2896,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -2935,13 +2927,13 @@ contains
                         MPI_INTEGER, bc_y%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
             end if
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
             if (cu_mpi .eqv. .false.) then
                 !$acc update device(ib_buff_recv)
             end if
@@ -2982,7 +2974,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -3013,7 +3005,7 @@ contains
                         MPI_INTEGER, bc_z%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
@@ -3033,7 +3025,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -3064,13 +3056,13 @@ contains
                         MPI_INTEGER, bc_z%beg, 0, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
             end if
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
             if (cu_mpi .eqv. .false.) then
                 !$acc update device(ib_buff_recv)
             end if
@@ -3109,7 +3101,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -3139,7 +3131,7 @@ contains
                         MPI_INTEGER, bc_z%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
@@ -3160,7 +3152,7 @@ contains
 
                 !call MPI_Barrier(MPI_COMM_WORLD, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 if (cu_mpi) then
                     !$acc host_data use_device( ib_buff_recv, ib_buff_send )
 
@@ -3190,13 +3182,13 @@ contains
                         MPI_INTEGER, bc_z%end, 1, &
                         MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
                 end if
 #endif
 
             end if
 
-#if defined(_OPENACC) && defined(__PGI)
+#if defined(MFC_OpenACC)
             if (cu_mpi .eqv. .false.) then
                 !$acc update device(ib_buff_recv)
             end if
@@ -3229,9 +3221,9 @@ contains
 #ifdef MFC_MPI
 
         ! Deallocating q_cons_buff_send and q_cons_buff_recv
-        @:DEALLOCATE(q_cons_buff_send, q_cons_buff_recv)
+        @:DEALLOCATE_GLOBAL(q_cons_buff_send, q_cons_buff_recv)
         if (ib) then
-            @:DEALLOCATE(ib_buff_send, ib_buff_recv)
+            @:DEALLOCATE_GLOBAL(ib_buff_send, ib_buff_recv)
         end if
 
 #endif
diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp
index b916688b6..02b9f2075 100644
--- a/src/simulation/m_qbmm.fpp
+++ b/src/simulation/m_qbmm.fpp
@@ -26,21 +26,32 @@ module m_qbmm
 
     private; public :: s_initialize_qbmm_module, s_mom_inv, s_coeff, s_compute_qbmm_rhs
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :, :), momrhs)
+!$acc declare link(momrhs)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :, :) :: momrhs
-
+!$acc declare create(momrhs)
+#endif
     #:if MFC_CASE_OPTIMIZATION
         integer, parameter :: nterms = ${nterms}$
     #:else
         integer :: nterms
+        !$acc declare create(nterms)
     #:endif
 
-    type(int_bounds_info) :: is1, is2, is3
+    type(int_bounds_info) :: is1_qbmm, is2_qbmm, is3_qbmm
+!$acc declare create(is1_qbmm, is2_qbmm, is3_qbmm)
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(integer, dimension(:), bubrs)
+    @:CRAY_DECLARE_GLOBAL(integer, dimension(:, :), bubmoms)
+!$acc declare link(bubrs, bubmoms)
+#else
     integer, allocatable, dimension(:) :: bubrs
     integer, allocatable, dimension(:, :) :: bubmoms
-
-    !$acc declare create(momrhs, nterms, is1, is2, is3)
-    !$acc declare create(bubrs, bubmoms)
+!$acc declare create(bubrs, bubmoms)
+#endif
 
 contains
 
@@ -58,11 +69,12 @@ contains
                 nterms = 7
             end if
 
+            !$acc enter data copyin(nterms)
             !$acc update device(nterms)
 
         #:endif
 
-        @:ALLOCATE(momrhs(3, 0:2, 0:2, nterms, nb))
+        @:ALLOCATE_GLOBAL(momrhs(3, 0:2, 0:2, nterms, nb))
         momrhs = 0d0
 
         ! Assigns the required RHS moments for moment transport equations
@@ -394,8 +406,8 @@ contains
 
         !$acc update device(momrhs)
 
-        @:ALLOCATE(bubrs(1:nb))
-        @:ALLOCATE(bubmoms(1:nb, 1:nmom))
+        @:ALLOCATE_GLOBAL(bubrs(1:nb))
+        @:ALLOCATE_GLOBAL(bubmoms(1:nb, 1:nmom))
 
         do i = 1, nb
             bubrs(i) = bub_idx%rs(i)
@@ -651,9 +663,14 @@ contains
     end subroutine
 
 !Coefficient array for non-polytropic model (pb and mv values are accounted in wght_pb and wght_mv)
+
     subroutine s_coeff_nonpoly(pres, rho, c, coeffs)
-        !$acc routine seq
-        real(kind(0.d0)), intent(INOUT) :: pres, rho, c
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_coeff_nonpoly
+#else
+!$acc routine seq
+#endif
+        real(kind(0.d0)), intent(IN) :: pres, rho, c
         real(kind(0.d0)), dimension(nterms, 0:2, 0:2), intent(OUT) :: coeffs
         integer :: i1, i2, q
 
@@ -720,7 +737,12 @@ contains
 
 !Coefficient array for polytropic model (pb for each R0 bin accounted for in wght_pb)
     subroutine s_coeff(pres, rho, c, coeffs)
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_coeff
+#else
+!$acc routine seq
+#endif
+
         real(kind(0.d0)), intent(INOUT) :: pres, rho, c
         real(kind(0.d0)), dimension(nterms, 0:2, 0:2), intent(OUT) :: coeffs
         integer :: i1, i2, q
@@ -797,10 +819,14 @@ contains
         integer :: id1, id2, id3
         integer :: i1, i2
 
+        is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz
+
+        !$acc update device(is1_qbmm, is2_qbmm, is3_qbmm)
+
         !$acc parallel loop collapse(3) gang vector default(present) private(moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, R3, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T)
-        do id3 = iz%beg, iz%end
-            do id2 = iy%beg, iy%end
-                do id1 = ix%beg, ix%end
+        do id3 = is3_qbmm%beg, is3_qbmm%end
+            do id2 = is2_qbmm%beg, is2_qbmm%end
+                do id1 = is1_qbmm%beg, is1_qbmm%end
 
                     alf = q_prim_vf(alf_idx)%sf(id1, id2, id3)
                     pres = q_prim_vf(E_idx)%sf(id1, id2, id3)
@@ -989,7 +1015,11 @@ contains
     end subroutine s_mom_inv
 
     subroutine s_chyqmom(momin, wght, abscX, abscY)
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_chyqmom
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(nnode), intent(INOUT) :: wght, abscX, abscY
         real(kind(0d0)), dimension(nmom), intent(IN) :: momin
 
@@ -1052,7 +1082,11 @@ contains
     end subroutine s_chyqmom
 
     subroutine s_hyqmom(frho, fup, fmom)
-        !$acc routine seq
+#ifdef CRAY_ACC_WAR
+        !DIR$ INLINEALWAYS s_hyqmom
+#else
+!$acc routine seq
+#endif
         real(kind(0d0)), dimension(2), intent(INOUT) :: frho, fup
         real(kind(0d0)), dimension(3), intent(IN) :: fmom
         real(kind(0d0)) :: bu, d2, c2
@@ -1086,6 +1120,7 @@ contains
     function f_quad2D(abscX, abscY, wght_in, pow)
         !$acc routine seq
         real(kind(0.d0)), dimension(nnode), intent(IN) :: abscX, abscY, wght_in
+
         real(kind(0.d0)), dimension(3), intent(IN) :: pow
         real(kind(0.d0)) :: f_quad2D
 
diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
index e7cbeb1d7..d2f2c9032 100644
--- a/src/simulation/m_rhs.fpp
+++ b/src/simulation/m_rhs.fpp
@@ -59,15 +59,17 @@ module m_rhs
  s_pressure_relaxation_procedure, &
  s_finalize_rhs_module
 
-    type(vector_field) :: q_cons_qp !<
     !! This variable contains the WENO-reconstructed values of the cell-average
     !! conservative variables, which are located in q_cons_vf, at cell-interior
     !! Gaussian quadrature points (QP).
+    type(vector_field) :: q_cons_qp !<
+    !$acc declare create(q_cons_qp)
 
-    type(vector_field) :: q_prim_qp !<
     !! The primitive variables at cell-interior Gaussian quadrature points. These
     !! are calculated from the conservative variables and gradient magnitude (GM)
     !! of the volume fractions, q_cons_qp and gm_alpha_qp, respectively.
+    type(vector_field) :: q_prim_qp !<
+    !$acc declare create(q_prim_qp)
 
     !> @name The first-order spatial derivatives of the primitive variables at cell-
     !! interior Gaussian quadrature points. These are WENO-reconstructed from
@@ -75,22 +77,30 @@ module m_rhs
     !! of the divergence theorem on the integral-average cell-boundary values
     !! of the primitive variables, located in qK_prim_n, where K = L or R.
     !> @{
-    type(vector_field) :: dq_prim_dx_qp
-    type(vector_field) :: dq_prim_dy_qp
-    type(vector_field) :: dq_prim_dz_qp
-    !> @}
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp)
+!$acc declare link(dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp)
+#else
+    type(vector_field), allocatable, dimension(:) :: dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp
+!$acc declare create(dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp)
+#endif
 
     !> @name The left and right WENO-reconstructed cell-boundary values of the cell-
     !! average first-order spatial derivatives of the primitive variables. The
     !! cell-average of the first-order spatial derivatives may be found in the
     !! variables dq_prim_ds_qp, where s = x, y or z.
     !> @{
-    type(vector_field), allocatable, dimension(:) :: dqL_prim_dx_n
-    type(vector_field), allocatable, dimension(:) :: dqL_prim_dy_n
-    type(vector_field), allocatable, dimension(:) :: dqL_prim_dz_n
-    type(vector_field), allocatable, dimension(:) :: dqR_prim_dx_n
-    type(vector_field), allocatable, dimension(:) :: dqR_prim_dy_n
-    type(vector_field), allocatable, dimension(:) :: dqR_prim_dz_n
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n)
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n)
+!$acc declare link(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n)
+!$acc declare link(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n)
+#else
+    type(vector_field), allocatable, dimension(:) :: dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n
+    type(vector_field), allocatable, dimension(:) :: dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n
+!$acc declare create(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n)
+!$acc declare create(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n)
+#endif
     !> @}
 
     type(vector_field) :: gm_alpha_qp  !<
@@ -98,64 +108,108 @@ module m_rhs
     !! quadrature points. gm_alpha_qp is calculated from individual first-order
     !! spatial derivatives located in dq_prim_ds_qp.
 
+    !$acc declare create(gm_alpha_qp)
+
     !> @name The left and right WENO-reconstructed cell-boundary values of the cell-
     !! average gradient magnitude of volume fractions, located in gm_alpha_qp.
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), gm_alphaL_n)
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), gm_alphaR_n)
+!$acc declare link(gm_alphaL_n, gm_alphaR_n)
+#else
     type(vector_field), allocatable, dimension(:) :: gm_alphaL_n
     type(vector_field), allocatable, dimension(:) :: gm_alphaR_n
+!$acc declare create(gm_alphaL_n, gm_alphaR_n)
+#endif
     !> @}
 
     !> @name The cell-boundary values of the fluxes (src - source, gsrc - geometrical
     !! source). These are computed by applying the chosen Riemann problem solver
     !! .on the left and right cell-boundary values of the primitive variables
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), flux_n)
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), flux_src_n)
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), flux_gsrc_n)
+!$acc declare link(flux_n, flux_src_n, flux_gsrc_n)
+#else
     type(vector_field), allocatable, dimension(:) :: flux_n
     type(vector_field), allocatable, dimension(:) :: flux_src_n
     type(vector_field), allocatable, dimension(:) :: flux_gsrc_n
+!$acc declare create(flux_n, flux_src_n, flux_gsrc_n)
+#endif
     !> @}
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), qL_prim, qR_prim)
+!$acc declare link(qL_prim, qR_prim)
+#else
     type(vector_field), allocatable, dimension(:) :: qL_prim, qR_prim
+!$acc declare create(qL_prim, qR_prim)
+#endif
 
     type(int_bounds_info) :: iv !< Vector field indical bounds
+    !$acc declare create(iv)
 
     !> @name Indical bounds in the x-, y- and z-directions
     !> @{
     type(int_bounds_info) :: ix, iy, iz
-    !> @}
+    !$acc declare create(ix, iy, iz)
 
     type(int_bounds_info) :: is1, is2, is3
+    !$acc declare create(is1, is2, is3)
 
     type(int_bounds_info) :: ixt, iyt, izt
-
-    real(kind(0d0)), allocatable, dimension(:, :, :, :, :) :: bub_mom_src
-    !$acc declare create(bub_mom_src)
+    !$acc declare create(ixt, iyt, izt)
 
     !> @name Saved fluxes for testing
     !> @{
     type(scalar_field) :: alf_sum
     !> @}
+!$acc declare create(alf_sum)
+
+#ifdef CRAY_ACC_WAR
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), blkmod1, blkmod2, alpha1, alpha2, Kterm)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf)
+!$acc declare link(blkmod1, blkmod2, alpha1, alpha2, Kterm)
+!$acc declare link(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf)
+!$acc declare link(dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf)
 
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :) :: blkmod1, blkmod2, alpha1, alpha2, Kterm
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf
-
+!$acc declare create(blkmod1, blkmod2, alpha1, alpha2, Kterm)
+!$acc declare create(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf)
+!$acc declare create(dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf)
+#endif
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:), gamma_min, pres_inf)
+!$acc declare link(gamma_min, pres_inf)
+#else
     real(kind(0d0)), allocatable, dimension(:) :: gamma_min, pres_inf
-    !$acc declare create(gamma_min, pres_inf)
+!$acc declare create(gamma_min, pres_inf)
+#endif
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), Res)
+!$acc declare link(Res)
+#else
     real(kind(0d0)), allocatable, dimension(:, :) :: Res
-    !$acc declare create(Res)
-
-    !$acc declare create(q_cons_qp,q_prim_qp,  &
-    !$acc   dq_prim_dx_qp,dq_prim_dy_qp,dq_prim_dz_qp,dqL_prim_dx_n,dqL_prim_dy_n, &
-    !$acc   dqL_prim_dz_n,dqR_prim_dx_n,dqR_prim_dy_n,dqR_prim_dz_n,gm_alpha_qp,       &
-    !$acc   gm_alphaL_n,gm_alphaR_n,flux_n,flux_src_n,flux_gsrc_n,       &
-    !$acc   qL_prim, qR_prim, iv,ix, iy, iz,is1,is2,is3,alf_sum, &
-    !$acc   blkmod1, blkmod2, alpha1, alpha2, Kterm, qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, &
-    !$acc   dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf, &
-    !$acc   ixt, iyt, izt)
+!$acc declare create(Res)
+#endif
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), nbub)
+!$acc declare link(nbub)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :) :: nbub !< Bubble number density
-    !$acc declare create(nbub)
+!$acc declare create(nbub)
+#endif
 
 contains
 
@@ -174,6 +228,7 @@ contains
         ix%end = m - ix%beg; iy%end = n - iy%beg; iz%end = p - iz%beg
         ! ==================================================================
 
+        !$acc enter data copyin(ix, iy, iz)
         !$acc update device(ix, iy, iz)
 
         ixt = ix; iyt = iy; izt = iz
@@ -193,22 +248,24 @@ contains
             @:ALLOCATE(q_prim_qp%vf(l)%sf(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
         end do
 
+        @:ACC_SETUP_VFs(q_cons_qp, q_prim_qp)
+
         do l = 1, cont_idx%end
-            q_prim_qp%vf(l)%sf => &
-                q_cons_qp%vf(l)%sf
+            q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf
+            !$acc enter data copyin(q_prim_qp%vf(l)%sf)
             !$acc enter data attach(q_prim_qp%vf(l)%sf)
         end do
 
         do l = adv_idx%beg, adv_idx%end
-            q_prim_qp%vf(l)%sf => &
-                q_cons_qp%vf(l)%sf
+            q_prim_qp%vf(l)%sf => q_cons_qp%vf(l)%sf
+            !$acc enter data copyin(q_prim_qp%vf(l)%sf)
             !$acc enter data attach(q_prim_qp%vf(l)%sf)
         end do
 
         ! ==================================================================
 
         if (qbmm) then
-            @:ALLOCATE(mom_sp(1:nmomsp), mom_3d(0:2, 0:2, nb))
+            @:ALLOCATE_GLOBAL(mom_sp(1:nmomsp), mom_3d(0:2, 0:2, nb))
 
             do i = 0, 2
                 do j = 0, 2
@@ -217,105 +274,111 @@ contains
                                       & ix%beg:ix%end, &
                                       & iy%beg:iy%end, &
                                       & iz%beg:iz%end))
+                        @:ACC_SETUP_SFs(mom_3d(i, j, k))
                     end do
                 end do
             end do
+
             do i = 1, nmomsp
                 @:ALLOCATE(mom_sp(i)%sf( &
                         & ix%beg:ix%end, &
                         & iy%beg:iy%end, &
                         & iz%beg:iz%end))
+                @:ACC_SETUP_SFs(mom_sp(i))
             end do
         end if
 
         ! Allocation/Association of qK_cons_n and qK_prim_n ==========
-        @:ALLOCATE(qL_prim(1:num_dims))
-        @:ALLOCATE(qR_prim(1:num_dims))
+        @:ALLOCATE_GLOBAL(qL_prim(1:num_dims))
+        @:ALLOCATE_GLOBAL(qR_prim(1:num_dims))
 
         do i = 1, num_dims
             @:ALLOCATE(qL_prim(i)%vf(1:sys_size))
             @:ALLOCATE(qR_prim(i)%vf(1:sys_size))
-        end do
-
-        if (weno_Re_flux) then
-
-            do i = 1, num_dims
-                do l = mom_idx%beg, mom_idx%end
-                    @:ALLOCATE(qL_prim(i)%vf(l)%sf(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
-                    @:ALLOCATE(qR_prim(i)%vf(l)%sf(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
-                end do
+            do l = mom_idx%beg, mom_idx%end
+                @:ALLOCATE(qL_prim(i)%vf(l)%sf(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
+                @:ALLOCATE(qR_prim(i)%vf(l)%sf(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
             end do
-        end if
+            @:ACC_SETUP_VFs(qL_prim(i), qR_prim(i))
+        end do
 
         if (mpp_lim .and. bubbles) then
             @:ALLOCATE(alf_sum%sf(ix%beg:ix%end, iy%beg:iy%end, iz%beg:iz%end))
         end if
         ! END: Allocation/Association of qK_cons_n and qK_prim_n ======
 
-        @:ALLOCATE(qL_rsx_vf(ix%beg:ix%end, &
+        @:ALLOCATE_GLOBAL(qL_rsx_vf(ix%beg:ix%end, &
             iy%beg:iy%end, iz%beg:iz%end, 1:sys_size))
-        @:ALLOCATE(qR_rsx_vf(ix%beg:ix%end, &
+        @:ALLOCATE_GLOBAL(qR_rsx_vf(ix%beg:ix%end, &
             iy%beg:iy%end, iz%beg:iz%end, 1:sys_size))
 
         if (n > 0) then
 
-            @:ALLOCATE(qL_rsy_vf(iy%beg:iy%end, &
+            @:ALLOCATE_GLOBAL(qL_rsy_vf(iy%beg:iy%end, &
                 ix%beg:ix%end, iz%beg:iz%end, 1:sys_size))
-            @:ALLOCATE(qR_rsy_vf(iy%beg:iy%end, &
+            @:ALLOCATE_GLOBAL(qR_rsy_vf(iy%beg:iy%end, &
                 ix%beg:ix%end, iz%beg:iz%end, 1:sys_size))
         else
-            @:ALLOCATE(qL_rsy_vf(ix%beg:ix%end, &
+            @:ALLOCATE_GLOBAL(qL_rsy_vf(ix%beg:ix%end, &
                 iy%beg:iy%end, iz%beg:iz%end, 1:sys_size))
-            @:ALLOCATE(qR_rsy_vf(ix%beg:ix%end, &
+            @:ALLOCATE_GLOBAL(qR_rsy_vf(ix%beg:ix%end, &
                 iy%beg:iy%end, iz%beg:iz%end, 1:sys_size))
         end if
 
         if (p > 0) then
-            @:ALLOCATE(qL_rsz_vf(iz%beg:iz%end, &
+            @:ALLOCATE_GLOBAL(qL_rsz_vf(iz%beg:iz%end, &
                 iy%beg:iy%end, ix%beg:ix%end, 1:sys_size))
-            @:ALLOCATE(qR_rsz_vf(iz%beg:iz%end, &
+            @:ALLOCATE_GLOBAL(qR_rsz_vf(iz%beg:iz%end, &
                 iy%beg:iy%end, ix%beg:ix%end, 1:sys_size))
         else
-            @:ALLOCATE(qL_rsz_vf(ix%beg:ix%end, &
+            @:ALLOCATE_GLOBAL(qL_rsz_vf(ix%beg:ix%end, &
                 iy%beg:iy%end, iz%beg:iz%end, 1:sys_size))
-            @:ALLOCATE(qR_rsz_vf(ix%beg:ix%end, &
+            @:ALLOCATE_GLOBAL(qR_rsz_vf(ix%beg:ix%end, &
                 iy%beg:iy%end, iz%beg:iz%end, 1:sys_size))
 
         end if
 
         ! Allocation of dq_prim_ds_qp ======================================
 
-        if (any(Re_size > 0)) then
-
-            @:ALLOCATE(dq_prim_dx_qp%vf(1:sys_size))
-            @:ALLOCATE(dq_prim_dy_qp%vf(1:sys_size))
-            @:ALLOCATE(dq_prim_dz_qp%vf(1:sys_size))
+        @:ALLOCATE_GLOBAL(dq_prim_dx_qp(1:1))
+        @:ALLOCATE_GLOBAL(dq_prim_dy_qp(1:1))
+        @:ALLOCATE_GLOBAL(dq_prim_dz_qp(1:1))
 
+        if (any(Re_size > 0)) then
+            @:ALLOCATE(dq_prim_dx_qp(1)%vf(1:sys_size))
+            @:ALLOCATE(dq_prim_dy_qp(1)%vf(1:sys_size))
+            @:ALLOCATE(dq_prim_dz_qp(1)%vf(1:sys_size))
             if (any(Re_size > 0)) then
 
                 do l = mom_idx%beg, mom_idx%end
-                    @:ALLOCATE(dq_prim_dx_qp%vf(l)%sf( &
+                    @:ALLOCATE(dq_prim_dx_qp(1)%vf(l)%sf( &
                               & ix%beg:ix%end, &
                               & iy%beg:iy%end, &
                               & iz%beg:iz%end))
                 end do
 
+                @:ACC_SETUP_VFs(dq_prim_dx_qp(1))
+
                 if (n > 0) then
 
                     do l = mom_idx%beg, mom_idx%end
-                        @:ALLOCATE(dq_prim_dy_qp%vf(l)%sf( &
+                        @:ALLOCATE(dq_prim_dy_qp(1)%vf(l)%sf( &
                                  & ix%beg:ix%end, &
                                  & iy%beg:iy%end, &
                                  & iz%beg:iz%end))
                     end do
 
+                    @:ACC_SETUP_VFs(dq_prim_dy_qp(1))
+
                     if (p > 0) then
+
                         do l = mom_idx%beg, mom_idx%end
-                            @:ALLOCATE(dq_prim_dz_qp%vf(l)%sf( &
+                            @:ALLOCATE(dq_prim_dz_qp(1)%vf(l)%sf( &
                                      & ix%beg:ix%end, &
                                      & iy%beg:iy%end, &
                                      & iz%beg:iz%end))
                         end do
+                        @:ACC_SETUP_VFs(dq_prim_dz_qp(1))
                     end if
 
                 end if
@@ -326,12 +389,12 @@ contains
         ! END: Allocation of dq_prim_ds_qp =================================
 
         ! Allocation/Association of dqK_prim_ds_n =======================
-        @:ALLOCATE(dqL_prim_dx_n(1:num_dims))
-        @:ALLOCATE(dqL_prim_dy_n(1:num_dims))
-        @:ALLOCATE(dqL_prim_dz_n(1:num_dims))
-        @:ALLOCATE(dqR_prim_dx_n(1:num_dims))
-        @:ALLOCATE(dqR_prim_dy_n(1:num_dims))
-        @:ALLOCATE(dqR_prim_dz_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(dqL_prim_dx_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(dqL_prim_dy_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(dqL_prim_dz_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(dqR_prim_dx_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(dqR_prim_dy_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(dqR_prim_dz_n(1:num_dims))
 
         if (any(Re_size > 0)) then
             do i = 1, num_dims
@@ -383,40 +446,42 @@ contains
 
                 end if
 
+                @:ACC_SETUP_VFs(dqL_prim_dx_n(i), dqL_prim_dy_n(i), dqL_prim_dz_n(i))
+                @:ACC_SETUP_VFs(dqR_prim_dx_n(i), dqR_prim_dy_n(i), dqR_prim_dz_n(i))
             end do
         end if
         ! END: Allocation/Association of d K_prim_ds_n ==================
 
         if (any(Re_size > 0)) then
             if (weno_Re_flux) then
-                @:ALLOCATE(dqL_rsx_vf(ix%beg:ix%end, &
+                @:ALLOCATE_GLOBAL(dqL_rsx_vf(ix%beg:ix%end, &
                     iy%beg:iy%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
-                @:ALLOCATE(dqR_rsx_vf(ix%beg:ix%end, &
+                @:ALLOCATE_GLOBAL(dqR_rsx_vf(ix%beg:ix%end, &
                     iy%beg:iy%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
 
                 if (n > 0) then
 
-                    @:ALLOCATE(dqL_rsy_vf(iy%beg:iy%end, &
+                    @:ALLOCATE_GLOBAL(dqL_rsy_vf(iy%beg:iy%end, &
                         ix%beg:ix%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
-                    @:ALLOCATE(dqR_rsy_vf(iy%beg:iy%end, &
+                    @:ALLOCATE_GLOBAL(dqR_rsy_vf(iy%beg:iy%end, &
                         ix%beg:ix%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
                 else
-                    @:ALLOCATE(dqL_rsy_vf(ix%beg:ix%end, &
+                    @:ALLOCATE_GLOBAL(dqL_rsy_vf(ix%beg:ix%end, &
                         iy%beg:iy%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
-                    @:ALLOCATE(dqR_rsy_vf(ix%beg:ix%end, &
+                    @:ALLOCATE_GLOBAL(dqR_rsy_vf(ix%beg:ix%end, &
                         iy%beg:iy%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
 
                 end if
 
                 if (p > 0) then
-                    @:ALLOCATE(dqL_rsz_vf(iz%beg:iz%end, &
+                    @:ALLOCATE_GLOBAL(dqL_rsz_vf(iz%beg:iz%end, &
                         iy%beg:iy%end, ix%beg:ix%end, mom_idx%beg:mom_idx%end))
-                    @:ALLOCATE(dqR_rsz_vf(iz%beg:iz%end, &
+                    @:ALLOCATE_GLOBAL(dqR_rsz_vf(iz%beg:iz%end, &
                         iy%beg:iy%end, ix%beg:ix%end, mom_idx%beg:mom_idx%end))
                 else
-                    @:ALLOCATE(dqL_rsz_vf(ix%beg:ix%end, &
+                    @:ALLOCATE_GLOBAL(dqL_rsz_vf(ix%beg:ix%end, &
                         iy%beg:iy%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
-                    @:ALLOCATE(dqR_rsz_vf(ix%beg:ix%end, &
+                    @:ALLOCATE_GLOBAL(dqR_rsz_vf(ix%beg:ix%end, &
                         iy%beg:iy%end, iz%beg:iz%end, mom_idx%beg:mom_idx%end))
 
                 end if
@@ -426,20 +491,14 @@ contains
         ! ==================================================================
 
         ! Allocation of gm_alphaK_n =====================================
-        @:ALLOCATE(gm_alphaL_n(1:num_dims))
-        @:ALLOCATE(gm_alphaR_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(gm_alphaL_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(gm_alphaR_n(1:num_dims))
         ! ==================================================================
 
-        if (bubbles) then
-            if (qbmm) then
-                @:ALLOCATE(bub_mom_src(1:nmom, 0:m, 0:n, 0:p, 1:nb))
-            end if
-        end if
-
         ! Allocation/Association of flux_n, flux_src_n, and flux_gsrc_n ===
-        @:ALLOCATE(flux_n(1:num_dims))
-        @:ALLOCATE(flux_src_n(1:num_dims))
-        @:ALLOCATE(flux_gsrc_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(flux_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(flux_src_n(1:num_dims))
+        @:ALLOCATE_GLOBAL(flux_gsrc_n(1:num_dims))
 
         do i = 1, num_dims
 
@@ -448,7 +507,6 @@ contains
             @:ALLOCATE(flux_gsrc_n(i)%vf(1:sys_size))
 
             if (i == 1) then
-
                 do l = 1, sys_size
                     @:ALLOCATE(flux_n(i)%vf(l)%sf( &
                              & ix%beg:ix%end, &
@@ -481,14 +539,7 @@ contains
                                  & iy%beg:iy%end, &
                                  & iz%beg:iz%end))
                     end do
-                else
-                    do l = adv_idx%beg + 1, adv_idx%end
-                        flux_src_n(i)%vf(l)%sf => &
-                            flux_src_n(i)%vf(adv_idx%beg)%sf
-                        !$acc enter data attach(flux_src_n(i)%vf(l)%sf(ix%beg:ix%end,iy%beg:iy%end,iz%beg:iz%end))
-                    end do
                 end if
-
             else
                 do l = 1, sys_size
                     @:ALLOCATE(flux_gsrc_n(i)%vf(l)%sf( &
@@ -496,25 +547,35 @@ contains
                         iy%beg:iy%end, &
                         iz%beg:iz%end))
                 end do
+            end if
+
+            @:ACC_SETUP_VFs(flux_n(i), flux_src_n(i), flux_gsrc_n(i))
+
+            if (i == 1) then
+                if (riemann_solver /= 1) then
+                    do l = adv_idx%beg + 1, adv_idx%end
+                        flux_src_n(i)%vf(l)%sf => flux_src_n(i)%vf(adv_idx%beg)%sf
+
+                        !$acc enter data attach(flux_src_n(i)%vf(l)%sf)
+                    end do
+                end if
+            else
                 do l = 1, sys_size
-                    flux_n(i)%vf(l)%sf => &
-                        flux_n(1)%vf(l)%sf
-                    flux_src_n(i)%vf(l)%sf => &
-                        flux_src_n(1)%vf(l)%sf
+                    flux_n(i)%vf(l)%sf => flux_n(1)%vf(l)%sf
+                    flux_src_n(i)%vf(l)%sf => flux_src_n(1)%vf(l)%sf
 
                     !$acc enter data attach(flux_n(i)%vf(l)%sf,flux_src_n(i)%vf(l)%sf)
                 end do
-
             end if
         end do
 
         ! END: Allocation/Association of flux_n, flux_src_n, and flux_gsrc_n ===
 
         if (alt_soundspeed) then
-            @:ALLOCATE(blkmod1(0:m, 0:n, 0:p), blkmod2(0:m, 0:n, 0:p), alpha1(0:m, 0:n, 0:p), alpha2(0:m, 0:n, 0:p), Kterm(0:m, 0:n, 0:p))
+            @:ALLOCATE_GLOBAL(blkmod1(0:m, 0:n, 0:p), blkmod2(0:m, 0:n, 0:p), alpha1(0:m, 0:n, 0:p), alpha2(0:m, 0:n, 0:p), Kterm(0:m, 0:n, 0:p))
         end if
 
-        @:ALLOCATE(gamma_min(1:num_fluids), pres_inf(1:num_fluids))
+        @:ALLOCATE_GLOBAL(gamma_min(1:num_fluids), pres_inf(1:num_fluids))
 
         do i = 1, num_fluids
             gamma_min(i) = 1d0/fluid_pp(i)%gamma + 1d0
@@ -523,7 +584,7 @@ contains
         !$acc update device(gamma_min, pres_inf)
 
         if (any(Re_size > 0)) then
-            @:ALLOCATE(Res(1:2, 1:maxval(Re_size)))
+            @:ALLOCATE_GLOBAL(Res(1:2, 1:maxval(Re_size)))
         end if
 
         if (any(Re_size > 0)) then
@@ -570,7 +631,7 @@ contains
         end do
 
         if (bubbles) then
-            @:ALLOCATE(nbub(0:m, 0:n, 0:p))
+            @:ALLOCATE_GLOBAL(nbub(0:m, 0:n, 0:p))
         end if
 
     end subroutine s_initialize_rhs_module ! -------------------------------
@@ -738,21 +799,21 @@ contains
                 iv%beg = mom_idx%beg; iv%end = mom_idx%end
                 if (weno_Re_flux) then
                     call s_reconstruct_cell_boundary_values_visc_deriv( &
-                        dq_prim_dx_qp%vf(iv%beg:iv%end), &
+                        dq_prim_dx_qp(1)%vf(iv%beg:iv%end), &
                         dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, &
                         dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf, &
                         id, dqL_prim_dx_n(id)%vf(iv%beg:iv%end), dqR_prim_dx_n(id)%vf(iv%beg:iv%end), &
                         ix, iy, iz)
                     if (n > 0) then
                         call s_reconstruct_cell_boundary_values_visc_deriv( &
-                            dq_prim_dy_qp%vf(iv%beg:iv%end), &
+                            dq_prim_dy_qp(1)%vf(iv%beg:iv%end), &
                             dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, &
                             dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf, &
                             id, dqL_prim_dy_n(id)%vf(iv%beg:iv%end), dqR_prim_dy_n(id)%vf(iv%beg:iv%end), &
                             ix, iy, iz)
                         if (p > 0) then
                             call s_reconstruct_cell_boundary_values_visc_deriv( &
-                                dq_prim_dz_qp%vf(iv%beg:iv%end), &
+                                dq_prim_dz_qp(1)%vf(iv%beg:iv%end), &
                                 dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, &
                                 dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf, &
                                 id, dqL_prim_dz_n(id)%vf(iv%beg:iv%end), dqR_prim_dz_n(id)%vf(iv%beg:iv%end), &
@@ -773,7 +834,6 @@ contains
             end if
             ix%end = m; iy%end = n; iz%end = p
             ! ===============================================================
-            call nvtxStartRange("RHS-Riemann")
 
             ! Computing Riemann Solver Flux and Source Flux =================
 
@@ -1339,7 +1399,6 @@ contains
                         end if
                     end if
                 end if
-
             end if  ! id loop
             call nvtxEndRange
 
@@ -1367,9 +1426,9 @@ contains
                                                                q_prim_qp%vf, &
                                                                rhs_vf, &
                                                                flux_src_n(id)%vf, &
-                                                               dq_prim_dx_qp%vf, &
-                                                               dq_prim_dy_qp%vf, &
-                                                               dq_prim_dz_qp%vf, &
+                                                               dq_prim_dx_qp(1)%vf, &
+                                                               dq_prim_dy_qp(1)%vf, &
+                                                               dq_prim_dz_qp(1)%vf, &
                                                                ixt, iyt, izt)
             call nvtxEndRange
 
@@ -1392,7 +1451,6 @@ contains
                                               rhs_mv)
             call nvtxEndRange
             ! END: Additional physics and source terms =========================
-
         end do
 
         if (ib) then
@@ -1439,6 +1497,7 @@ contains
             if (n > 0) iy%beg = -buff_size; 
             if (p > 0) iz%beg = -buff_size; 
             ix%end = m - ix%beg; iy%end = n - iy%beg; iz%end = p - iz%beg
+
             !$acc update device(ix, iy, iz)
 
             !$acc parallel loop collapse(4) gang vector default(present)
@@ -1817,25 +1876,25 @@ contains
         end do
 
         @:DEALLOCATE(q_cons_qp%vf, q_prim_qp%vf)
-        @:DEALLOCATE(qL_rsx_vf, qR_rsx_vf)
+        @:DEALLOCATE_GLOBAL(qL_rsx_vf, qR_rsx_vf)
 
         if (n > 0) then
-            @:DEALLOCATE(qL_rsy_vf, qR_rsy_vf)
+            @:DEALLOCATE_GLOBAL(qL_rsy_vf, qR_rsy_vf)
         end if
 
         if (p > 0) then
-            @:DEALLOCATE(qL_rsz_vf, qR_rsz_vf)
+            @:DEALLOCATE_GLOBAL(qL_rsz_vf, qR_rsz_vf)
         end if
 
         if (any(Re_size > 0) .and. weno_Re_flux) then
-            @:DEALLOCATE(dqL_rsx_vf, dqR_rsx_vf)
+            @:DEALLOCATE_GLOBAL(dqL_rsx_vf, dqR_rsx_vf)
 
             if (n > 0) then
-                @:DEALLOCATE(dqL_rsy_vf, dqR_rsy_vf)
+                @:DEALLOCATE_GLOBAL(dqL_rsy_vf, dqR_rsy_vf)
             end if
 
             if (p > 0) then
-                @:DEALLOCATE(dqL_rsz_vf, dqR_rsz_vf)
+                @:DEALLOCATE_GLOBAL(dqL_rsz_vf, dqR_rsz_vf)
             end if
         end if
 
@@ -1846,26 +1905,26 @@ contains
 
         if (any(Re_size > 0)) then
             do l = mom_idx%beg, mom_idx%end
-                @:DEALLOCATE(dq_prim_dx_qp%vf(l)%sf)
+                @:DEALLOCATE(dq_prim_dx_qp(1)%vf(l)%sf)
             end do
 
             if (n > 0) then
 
                 do l = mom_idx%beg, mom_idx%end
-                    @:DEALLOCATE(dq_prim_dy_qp%vf(l)%sf)
+                    @:DEALLOCATE(dq_prim_dy_qp(1)%vf(l)%sf)
                 end do
 
                 if (p > 0) then
                     do l = mom_idx%beg, mom_idx%end
-                        @:DEALLOCATE(dq_prim_dz_qp%vf(l)%sf)
+                        @:DEALLOCATE(dq_prim_dz_qp(1)%vf(l)%sf)
                     end do
                 end if
 
             end if
 
-            @:DEALLOCATE(dq_prim_dx_qp%vf)
-            @:DEALLOCATE(dq_prim_dy_qp%vf)
-            @:DEALLOCATE(dq_prim_dz_qp%vf)
+            @:DEALLOCATE(dq_prim_dx_qp(1)%vf)
+            @:DEALLOCATE(dq_prim_dy_qp(1)%vf)
+            @:DEALLOCATE(dq_prim_dz_qp(1)%vf)
         end if
 
         if (any(Re_size > 0)) then
@@ -1902,8 +1961,8 @@ contains
             end do
         end if
 
-        @:DEALLOCATE(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n)
-        @:DEALLOCATE(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n)
+        @:DEALLOCATE_GLOBAL(dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n)
+        @:DEALLOCATE_GLOBAL(dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n)
 
         do i = num_dims, 1, -1
             if (i /= 1) then
@@ -1940,7 +1999,7 @@ contains
             @:DEALLOCATE(flux_n(i)%vf, flux_src_n(i)%vf, flux_gsrc_n(i)%vf)
         end do
 
-        @:DEALLOCATE(flux_n, flux_src_n, flux_gsrc_n)
+        @:DEALLOCATE_GLOBAL(flux_n, flux_src_n, flux_gsrc_n)
 
         s_riemann_solver => null()
         s_convert_to_mixture_variables => null()
diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp
index 1d3fc7c88..34268775b 100644
--- a/src/simulation/m_riemann_solvers.fpp
+++ b/src/simulation/m_riemann_solvers.fpp
@@ -17,6 +17,8 @@
 !!                  1) Harten-Lax-van Leer (HLL)
 !!                  2) Harten-Lax-van Leer-Contact (HLLC)
 !!                  3) Exact
+
+#:include 'macros.fpp'
 #:include 'inline_riemann.fpp'
 #:include 'inline_conversions.fpp'
 
@@ -154,39 +156,75 @@ module m_riemann_solvers
     !! source terms, by using the left and right states given in qK_prim_rs_vf,
     !! dqK_prim_ds_vf where ds = dx, dy or dz.
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_rsx_vf, flux_src_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_rsy_vf, flux_src_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_rsz_vf, flux_src_rsz_vf)
+!$acc declare link( flux_rsx_vf, flux_src_rsx_vf, flux_rsy_vf,  &
+!$acc   flux_src_rsy_vf, flux_rsz_vf, flux_src_rsz_vf )
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_rsx_vf, flux_src_rsx_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_rsy_vf, flux_src_rsy_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_rsz_vf, flux_src_rsz_vf
+!$acc declare create( flux_rsx_vf, flux_src_rsx_vf, flux_rsy_vf,  &
+!$acc   flux_src_rsy_vf, flux_rsz_vf, flux_src_rsz_vf )
+#endif
     !> @}
-    !$acc declare create( flux_rsx_vf, flux_src_rsx_vf, flux_rsy_vf,  &
-    !$acc   flux_src_rsy_vf, flux_rsz_vf, flux_src_rsz_vf )
 
     !> The cell-boundary values of the geometrical source flux that are computed
     !! through the chosen Riemann problem solver by using the left and right
     !! states given in qK_prim_rs_vf. Currently 2D axisymmetric for inviscid only.
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_gsrc_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_gsrc_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), flux_gsrc_rsz_vf)
+!$acc declare link( flux_gsrc_rsx_vf, flux_gsrc_rsy_vf, flux_gsrc_rsz_vf )
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsx_vf !<
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsy_vf !<
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: flux_gsrc_rsz_vf !<
+!$acc declare create( flux_gsrc_rsx_vf, flux_gsrc_rsy_vf, flux_gsrc_rsz_vf )
+#endif
     !> @}
-    !$acc declare create( flux_gsrc_rsx_vf, flux_gsrc_rsy_vf, flux_gsrc_rsz_vf )
 
     ! The cell-boundary values of the velocity. vel_src_rs_vf is determined as
     ! part of Riemann problem solution and is used to evaluate the source flux.
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), vel_src_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), vel_src_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), vel_src_rsz_vf)
+!$acc declare link(vel_src_rsx_vf, vel_src_rsy_vf, vel_src_rsz_vf)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: vel_src_rsx_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: vel_src_rsy_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: vel_src_rsz_vf
-    !$acc declare create(vel_src_rsx_vf, vel_src_rsy_vf, vel_src_rsz_vf)
-
+!$acc declare create(vel_src_rsx_vf, vel_src_rsy_vf, vel_src_rsz_vf)
+#endif
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), mom_sp_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), mom_sp_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), mom_sp_rsz_vf)
+!$acc declare link(mom_sp_rsx_vf, mom_sp_rsy_vf, mom_sp_rsz_vf)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: mom_sp_rsx_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: mom_sp_rsy_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: mom_sp_rsz_vf
-    !$acc declare create(mom_sp_rsx_vf, mom_sp_rsy_vf, mom_sp_rsz_vf)
-
+!$acc declare create(mom_sp_rsx_vf, mom_sp_rsy_vf, mom_sp_rsz_vf)
+#endif
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), Re_avg_rsx_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), Re_avg_rsy_vf)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), Re_avg_rsz_vf)
+!$acc declare link(Re_avg_rsx_vf, Re_avg_rsy_vf, Re_avg_rsz_vf)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: Re_avg_rsx_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: Re_avg_rsy_vf
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: Re_avg_rsz_vf
-    !$acc declare create(Re_avg_rsx_vf, Re_avg_rsy_vf, Re_avg_rsz_vf)
+!$acc declare link(Re_avg_rsx_vf, Re_avg_rsy_vf, Re_avg_rsz_vf)
+#endif
 
     procedure(s_abstract_riemann_solver), &
         pointer :: s_riemann_solver => null() !<
@@ -203,16 +241,29 @@ module m_riemann_solvers
     type(int_bounds_info) :: is1, is2, is3
     type(int_bounds_info) :: isx, isy, isz
     !> @}
-    !$acc declare create(is1, is2, is3, isx, isy, isz)
 
+!$acc declare create(is1, is2, is3, isx, isy, isz)
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:),  Gs)
+!$acc declare link(Gs)
+#else
     real(kind(0d0)), allocatable, dimension(:) :: Gs
-    !$acc declare create(Gs)
+!$acc declare create(Gs)
+#endif
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), Res)
+!$acc declare link(Res)
+#else
     real(kind(0d0)), allocatable, dimension(:, :) :: Res
-    !$acc declare create(Res)
+!$acc declare create(Res)
+#endif
 
 contains
 
+    @:s_compute_speed_of_sound()
+
     subroutine s_hll_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & ! -------
                                     dqL_prim_dy_vf, &
                                     dqL_prim_dz_vf, &
@@ -1881,7 +1932,7 @@ contains
                     !$acc end parallel loop
                 else
                     !$acc parallel loop collapse(3) gang vector default(present) private(vel_L, vel_R, Re_L, Re_R, &
-                    !$acc rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms)
+                    !$acc rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms) copyin(is1,is2,is3)
                     do l = is3%beg, is3%end
                         do k = is2%beg, is2%end
                             do j = is1%beg, is1%end
@@ -2185,6 +2236,7 @@ contains
 
                                     end if
                                 #:endif
+
                             end do
                         end do
                     end do
@@ -2235,7 +2287,7 @@ contains
         ! the Riemann problem solution
         integer :: i, j
 
-        allocate (Gs(1:num_fluids))
+        @:ALLOCATE_GLOBAL(Gs(1:num_fluids))
 
         do i = 1, num_fluids
             Gs(i) = fluid_pp(i)%G
@@ -2243,7 +2295,7 @@ contains
         !$acc update device(Gs)
 
         if (any(Re_size > 0)) then
-            allocate (Res(1:2, 1:maxval(Re_size)))
+            @:ALLOCATE_GLOBAL(Res(1:2, 1:maxval(Re_size)))
         end if
 
         if (any(Re_size > 0)) then
@@ -2255,6 +2307,8 @@ contains
             !$acc update device(Res, Re_idx, Re_size)
         end if
 
+        !$acc enter data copyin(is1, is2, is3, isx, isy, isz)
+
         ! Associating procedural pointer to the subroutine that will be
         ! utilized to calculate the solution of a given Riemann problem
         if (riemann_solver == 1) then
@@ -2274,26 +2328,26 @@ contains
         is1%beg = -1; is2%beg = 0; is3%beg = 0
         is1%end = m; is2%end = n; is3%end = p
 
-        allocate (flux_rsx_vf(is1%beg:is1%end, &
-                              is2%beg:is2%end, &
-                              is3%beg:is3%end, 1:sys_size))
-        allocate (flux_gsrc_rsx_vf(is1%beg:is1%end, &
-                                   is2%beg:is2%end, &
-                                   is3%beg:is3%end, 1:sys_size))
-        allocate (flux_src_rsx_vf(is1%beg:is1%end, &
-                                  is2%beg:is2%end, &
-                                  is3%beg:is3%end, advxb:sys_size))
-        allocate (vel_src_rsx_vf(is1%beg:is1%end, &
-                                 is2%beg:is2%end, &
-                                 is3%beg:is3%end, 1:num_dims))
+        @:ALLOCATE_GLOBAL(flux_rsx_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(flux_gsrc_rsx_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(flux_src_rsx_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, advxb:sys_size))
+        @:ALLOCATE_GLOBAL(vel_src_rsx_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:num_dims))
         if (qbmm) then
-            allocate (mom_sp_rsx_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4))
+            @:ALLOCATE_GLOBAL(mom_sp_rsx_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4))
         end if
 
         if (any(Re_size > 0)) then
-            allocate (Re_avg_rsx_vf(is1%beg:is1%end, &
-                                    is2%beg:is2%end, &
-                                    is3%beg:is3%end, 1:2))
+            @:ALLOCATE_GLOBAL(Re_avg_rsx_vf(is1%beg:is1%end, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:2))
         end if
 
         if (n == 0) return
@@ -2301,27 +2355,27 @@ contains
         is1%beg = -1; is2%beg = 0; is3%beg = 0
         is1%end = n; is2%end = m; is3%end = p
 
-        allocate (flux_rsy_vf(is1%beg:is1%end, &
-                              is2%beg:is2%end, &
-                              is3%beg:is3%end, 1:sys_size))
-        allocate (flux_gsrc_rsy_vf(is1%beg:is1%end, &
-                                   is2%beg:is2%end, &
-                                   is3%beg:is3%end, 1:sys_size))
-        allocate (flux_src_rsy_vf(is1%beg:is1%end, &
-                                  is2%beg:is2%end, &
-                                  is3%beg:is3%end, advxb:sys_size))
-        allocate (vel_src_rsy_vf(is1%beg:is1%end, &
-                                 is2%beg:is2%end, &
-                                 is3%beg:is3%end, 1:num_dims))
+        @:ALLOCATE_GLOBAL(flux_rsy_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(flux_gsrc_rsy_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(flux_src_rsy_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, advxb:sys_size))
+        @:ALLOCATE_GLOBAL(vel_src_rsy_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:num_dims))
 
         if (qbmm) then
-            allocate (mom_sp_rsy_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4))
+            @:ALLOCATE_GLOBAL(mom_sp_rsy_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4))
         end if
 
         if (any(Re_size > 0)) then
-            allocate (Re_avg_rsy_vf(is1%beg:is1%end, &
-                                    is2%beg:is2%end, &
-                                    is3%beg:is3%end, 1:2))
+            @:ALLOCATE_GLOBAL(Re_avg_rsy_vf(is1%beg:is1%end, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:2))
         end if
 
         if (p == 0) return
@@ -2329,27 +2383,27 @@ contains
         is1%beg = -1; is2%beg = 0; is3%beg = 0
         is1%end = p; is2%end = n; is3%end = m
 
-        allocate (flux_rsz_vf(is1%beg:is1%end, &
-                              is2%beg:is2%end, &
-                              is3%beg:is3%end, 1:sys_size))
-        allocate (flux_gsrc_rsz_vf(is1%beg:is1%end, &
-                                   is2%beg:is2%end, &
-                                   is3%beg:is3%end, 1:sys_size))
-        allocate (flux_src_rsz_vf(is1%beg:is1%end, &
-                                  is2%beg:is2%end, &
-                                  is3%beg:is3%end, advxb:sys_size))
-        allocate (vel_src_rsz_vf(is1%beg:is1%end, &
-                                 is2%beg:is2%end, &
-                                 is3%beg:is3%end, 1:num_dims))
+        @:ALLOCATE_GLOBAL(flux_rsz_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(flux_gsrc_rsz_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(flux_src_rsz_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, advxb:sys_size))
+        @:ALLOCATE_GLOBAL(vel_src_rsz_vf(is1%beg:is1%end, &
+            is2%beg:is2%end, &
+            is3%beg:is3%end, 1:num_dims))
 
         if (qbmm) then
-            allocate (mom_sp_rsz_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4))
+            @:ALLOCATE_GLOBAL(mom_sp_rsz_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4))
         end if
 
         if (any(Re_size > 0)) then
-            allocate (Re_avg_rsz_vf(is1%beg:is1%end, &
-                                    is2%beg:is2%end, &
-                                    is3%beg:is3%end, 1:2))
+            @:ALLOCATE_GLOBAL(Re_avg_rsz_vf(is1%beg:is1%end, &
+                is2%beg:is2%end, &
+                is3%beg:is3%end, 1:2))
         end if
 
     end subroutine s_initialize_riemann_solvers_module ! -------------------
@@ -2416,6 +2470,8 @@ contains
             dir_idx = (/3, 1, 2/); dir_flg = (/0d0, 0d0, 1d0/)
         end if
 
+        !$acc update device(is1, is2, is3)
+
         if (hypoelasticity) then
             if (norm_dir == 1) then
                 dir_idx_tau = (/1, 2, 4/)
@@ -2427,8 +2483,8 @@ contains
         end if
 
         isx = ix; isy = iy; isz = iz
-
-        !$acc update device(is1, is2, is3, dir_idx, dir_flg, isx, isy, isz, dir_idx_tau)
+        !$acc update device(isx, isy, isz) ! for stuff in the same module
+        !$acc update device(dir_idx, dir_flg,  dir_idx_tau) ! for stuff in different modules
 
         ! Population of Buffers in x-direction =============================
         if (norm_dir == 1) then
@@ -3898,8 +3954,6 @@ contains
 
     end subroutine s_compute_cartesian_viscous_source_flux ! -------------------------
 
-    @:s_compute_speed_of_sound()
-
     !>  Deallocation and/or disassociation procedures that are
         !!      needed to finalize the selected Riemann problem solver
         !!  @param flux_vf       Intercell fluxes
@@ -4089,40 +4143,40 @@ contains
         ! s_convert_to_mixture_variables => null()
 
         if (Re_size(1) > 0) then
-            deallocate (Re_avg_rsx_vf)
+            @:DEALLOCATE_GLOBAL(Re_avg_rsx_vf)
         end if
-        deallocate (vel_src_rsx_vf)
-        deallocate (flux_rsx_vf)
-        deallocate (flux_src_rsx_vf)
-        deallocate (flux_gsrc_rsx_vf)
+        @:DEALLOCATE_GLOBAL(vel_src_rsx_vf)
+        @:DEALLOCATE_GLOBAL(flux_rsx_vf)
+        @:DEALLOCATE_GLOBAL(flux_src_rsx_vf)
+        @:DEALLOCATE_GLOBAL(flux_gsrc_rsx_vf)
         if (qbmm) then
-            deallocate (mom_sp_rsx_vf)
+            @:DEALLOCATE_GLOBAL(mom_sp_rsx_vf)
         end if
 
         if (n == 0) return
 
         if (Re_size(1) > 0) then
-            deallocate (Re_avg_rsy_vf)
+            @:DEALLOCATE_GLOBAL(Re_avg_rsy_vf)
         end if
-        deallocate (vel_src_rsy_vf)
-        deallocate (flux_rsy_vf)
-        deallocate (flux_src_rsy_vf)
-        deallocate (flux_gsrc_rsy_vf)
+        @:DEALLOCATE_GLOBAL(vel_src_rsy_vf)
+        @:DEALLOCATE_GLOBAL(flux_rsy_vf)
+        @:DEALLOCATE_GLOBAL(flux_src_rsy_vf)
+        @:DEALLOCATE_GLOBAL(flux_gsrc_rsy_vf)
         if (qbmm) then
-            deallocate (mom_sp_rsy_vf)
+            @:DEALLOCATE_GLOBAL(mom_sp_rsy_vf)
         end if
 
         if (p == 0) return
 
         if (Re_size(1) > 0) then
-            deallocate (Re_avg_rsz_vf)
+            @:DEALLOCATE_GLOBAL(Re_avg_rsz_vf)
         end if
-        deallocate (vel_src_rsz_vf)
-        deallocate (flux_rsz_vf)
-        deallocate (flux_src_rsz_vf)
-        deallocate (flux_gsrc_rsz_vf)
+        @:DEALLOCATE_GLOBAL(vel_src_rsz_vf)
+        @:DEALLOCATE_GLOBAL(flux_rsz_vf)
+        @:DEALLOCATE_GLOBAL(flux_src_rsz_vf)
+        @:DEALLOCATE_GLOBAL(flux_gsrc_rsz_vf)
         if (qbmm) then
-            deallocate (mom_sp_rsz_vf)
+            @:DEALLOCATE_GLOBAL(mom_sp_rsz_vf)
         end if
 
     end subroutine s_finalize_riemann_solvers_module ! ---------------------
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 11ecaeee9..ecf633a83 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -44,7 +44,7 @@ module m_start_up
 
     use m_qbmm                 !< Quadrature MOM
 
-    use m_derived_variables     !< Procedures used to compute quantites derived
+    use m_derived_variables     !< Procedures used to compute quantities derived
                                 !! from the conservative and primitive variables
 
     use m_hypoelastic
@@ -121,6 +121,8 @@ contains
         integer :: iostatus
             !! Integer to check iostat of file read
 
+        CHARACTER(len=511) :: CRAY_ACC_MODULE
+
         character(len=1000) :: line
 
         ! Namelist of the global parameters which may be specified by user
@@ -148,8 +150,9 @@ contains
             polytropic, thermal, &
             integral, integral_wrt, num_integrals, &
             polydisperse, poly_sigma, qbmm, &
-            R0_type, file_per_process, relax, relax_model, &
-            palpha_eps, ptgalpha_eps
+             relax, relax_model, &
+            palpha_eps, ptgalpha_eps, &
+            R0_type, file_per_process
 
         ! Checking that an input file has been provided by the user. If it
         ! has, then the input file is read in, otherwise, simulation exits.
@@ -181,6 +184,16 @@ contains
             call s_mpi_abort(trim(file_path)//' is missing. Exiting ...')
         end if
 
+#ifdef _CRAYFTN
+#ifdef MFC_OpenACC
+        call get_environment_variable("CRAY_ACC_MODULE", CRAY_ACC_MODULE)
+
+        if (CRAY_ACC_MODULE == "") then
+            call s_mpi_abort("CRAY_ACC_MODULE is not set. Exiting...")
+        end if
+#endif        
+#endif        
+
     end subroutine s_read_input_file ! -------------------------------------
 
     !> The goal of this procedure is to verify that each of the
@@ -468,7 +481,7 @@ contains
         else
             call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting...')
         end if
-
+        
         ! Assigning local cell boundary locations
         x_cb(-1:m) = x_cb_glb((start_idx(1) - 1):(start_idx(1) + m))
         ! Computing the cell width distribution
@@ -1081,9 +1094,7 @@ contains
         elseif (time_stepper == 3) then
             call s_3rd_order_tvd_rk(t_step, time_avg)
         end if
-
-        if (relax) call s_relaxation_solver(q_cons_ts(1)%vf)
-
+        if (relax) call s_infinite_relaxation_k(q_cons_ts(1)%vf)
         ! Time-stepping loop controls
         if ((mytime + dt) >= finaltime) dt = finaltime - mytime
         t_step = t_step + 1
@@ -1234,7 +1245,7 @@ contains
         call acc_present_dump()
 #endif
 
-        if (hypoelasticity) call s_initialize_hypoelastic_module()
+        if (hypoelasticity) call s_initialize_hypoelastic_module()      
         if (relax) call s_initialize_phasechange_module()
         call s_initialize_data_output_module()
         call s_initialize_derived_variables_module()
@@ -1350,30 +1361,25 @@ contains
     subroutine s_initialize_gpu_vars()
         integer :: i
         !Update GPU DATA
-        !$acc update device(dt, dx, dy, dz, x_cc, y_cc, z_cc, x_cb, y_cb, z_cb)
-        !$acc update device(sys_size, buff_size)
-        !$acc update device(m, n, p)
-        !$acc update device(momxb, momxe, bubxb, bubxe, advxb, advxe, contxb, contxe, strxb, strxe)
         do i = 1, sys_size
             !$acc update device(q_cons_ts(1)%vf(i)%sf)
         end do
         if (qbmm .and. .not. polytropic) then
             !$acc update device(pb_ts(1)%sf, mv_ts(1)%sf)
         end if
-        !$acc update device(dt, sys_size, pref, rhoref, gamma_idx, pi_inf_idx, E_idx, alf_idx, stress_idx, mpp_lim, bubbles, hypoelasticity, alt_soundspeed, avg_state, num_fluids, model_eqns, num_dims, mixture_err, nb, weight, grid_geometry, cyl_coord, mapped_weno, mp_weno, weno_eps)
         !$acc update device(nb, R0ref, Ca, Web, Re_inv, weight, R0, V0, bubbles, polytropic, polydisperse, qbmm, R0_type, ptil, bubble_model, thermal, poly_sigma)
         !$acc update device(R_n, R_v, phi_vn, phi_nv, Pe_c, Tw, pv, M_n, M_v, k_n, k_v, pb0, mass_n0, mass_v0, Pe_T, Re_trans_T, Re_trans_c, Im_trans_T, Im_trans_c, omegaN , mul0, ss, gamma_v, mu_v, gamma_m, gamma_n, mu_n, gam)
-        !$acc update device(monopole, num_mono)
-    
+        !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)    
         !$acc update device(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3)
         !$acc update device(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
         !$acc update device(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
 
 
-        !$acc update device(relax)
+        !$acc update device(relax, relax_model)
         if (relax) then
             !$acc update device(palpha_eps, ptgalpha_eps)
         end if
+
     end subroutine s_initialize_gpu_vars
 
     subroutine s_finalize_modules()
@@ -1393,7 +1399,6 @@ contains
         call s_finalize_mpi_proxy_module()
         call s_finalize_global_parameters_module()
         if (relax) call s_finalize_relaxation_solver_module()      
-
         if (any(Re_size > 0)) then
             call s_finalize_viscous_module()
         end if
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index d482d0ab5..7d820a026 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -34,6 +34,28 @@ module m_time_steppers
 
     implicit none
 
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), q_cons_ts)
+    !! Cell-average conservative variables at each time-stage (TS)
+
+    @:CRAY_DECLARE_GLOBAL(type(scalar_field), dimension(:), q_prim_vf)
+    !! Cell-average primitive variables at the current time-stage
+
+    @:CRAY_DECLARE_GLOBAL(type(scalar_field), dimension(:), rhs_vf)
+    !! Cell-average RHS variables at the current time-stage
+
+    @:CRAY_DECLARE_GLOBAL(type(vector_field), dimension(:), q_prim_ts)
+    !! Cell-average primitive variables at consecutive TIMESTEPS
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :, :), rhs_pb)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :, :), rhs_mv)
+
+    integer, private :: num_ts !<
+    !! Number of time stages in the time-stepping scheme
+
+!$acc declare link(q_cons_ts,q_prim_vf,rhs_vf,q_prim_ts, rhs_mv, rhs_pb)
+#else
     type(vector_field), allocatable, dimension(:) :: q_cons_ts !<
     !! Cell-average conservative variables at each time-stage (TS)
 
@@ -53,7 +75,8 @@ module m_time_steppers
     integer, private :: num_ts !<
     !! Number of time stages in the time-stepping scheme
 
-    !$acc declare create(q_cons_ts,q_prim_vf,rhs_vf,q_prim_ts, rhs_mv, rhs_pb)
+!$acc declare create(q_cons_ts,q_prim_vf,rhs_vf,q_prim_ts, rhs_mv, rhs_pb)
+#endif
 
 contains
 
@@ -91,7 +114,7 @@ contains
         end if
 
         ! Allocating the cell-average conservative variables
-        @:ALLOCATE(q_cons_ts(1:num_ts))
+        @:ALLOCATE_GLOBAL(q_cons_ts(1:num_ts))
 
         do i = 1, num_ts
             @:ALLOCATE(q_cons_ts(i)%vf(1:sys_size))
@@ -103,11 +126,12 @@ contains
                     iy_t%beg:iy_t%end, &
                     iz_t%beg:iz_t%end))
             end do
+            @:ACC_SETUP_VFs(q_cons_ts(i))
         end do
 
         ! Allocating the cell-average primitive ts variables
         if (probe_wrt) then
-            @:ALLOCATE(q_prim_ts(0:3))
+            @:ALLOCATE_GLOBAL(q_prim_ts(0:3))
 
             do i = 0, 3
                 @:ALLOCATE(q_prim_ts(i)%vf(1:sys_size))
@@ -120,15 +144,20 @@ contains
                         iz_t%beg:iz_t%end))
                 end do
             end do
+
+            do i = 0, 3
+                @:ACC_SETUP_VFs(q_prim_ts(i))
+            end do
         end if
 
         ! Allocating the cell-average primitive variables
-        @:ALLOCATE(q_prim_vf(1:sys_size))
+        @:ALLOCATE_GLOBAL(q_prim_vf(1:sys_size))
 
         do i = 1, adv_idx%end
             @:ALLOCATE(q_prim_vf(i)%sf(ix_t%beg:ix_t%end, &
                 iy_t%beg:iy_t%end, &
                 iz_t%beg:iz_t%end))
+            @:ACC_SETUP_SFs(q_prim_vf(i))
         end do
 
         if (bubbles) then
@@ -136,79 +165,100 @@ contains
                 @:ALLOCATE(q_prim_vf(i)%sf(ix_t%beg:ix_t%end, &
                     iy_t%beg:iy_t%end, &
                     iz_t%beg:iz_t%end))
+                @:ACC_SETUP_SFs(q_prim_vf(i))
+            end do
+        end if
+
+        if (hypoelasticity) then
+
+            do i = stress_idx%beg, stress_idx%end
+                @:ALLOCATE(q_prim_vf(i)%sf(ix_t%beg:ix_t%end, &
+                    iy_t%beg:iy_t%end, &
+                    iz_t%beg:iz_t%end))
+                @:ACC_SETUP_SFs(q_prim_vf(i))
+            end do
+        end if
+
+        if (model_eqns == 3) then
+            do i = internalEnergies_idx%beg, internalEnergies_idx%end
+                @:ALLOCATE(q_prim_vf(i)%sf(ix_t%beg:ix_t%end, &
+                    iy_t%beg:iy_t%end, &
+                    iz_t%beg:iz_t%end))
+                @:ACC_SETUP_SFs(q_prim_vf(i))
             end do
         end if
 
-        @:ALLOCATE(pb_ts(1:2))
+        @:ALLOCATE_GLOBAL(pb_ts(1:2))
         !Initialize bubble variables pb and mv at all quadrature nodes for all R0 bins
         if (qbmm .and. (.not. polytropic)) then
             @:ALLOCATE(pb_ts(1)%sf(ix_t%beg:ix_t%end, &
                 iy_t%beg:iy_t%end, &
                 iz_t%beg:iz_t%end, 1:nnode, 1:nb))
+            @:ACC_SETUP_SFs(pb_ts(1))
+
             @:ALLOCATE(pb_ts(2)%sf(ix_t%beg:ix_t%end, &
                 iy_t%beg:iy_t%end, &
                 iz_t%beg:iz_t%end, 1:nnode, 1:nb))
-            @:ALLOCATE(rhs_pb(ix_t%beg:ix_t%end, &
+            @:ACC_SETUP_SFs(pb_ts(2))
+
+            @:ALLOCATE_GLOBAL(rhs_pb(ix_t%beg:ix_t%end, &
                 iy_t%beg:iy_t%end, &
                 iz_t%beg:iz_t%end, 1:nnode, 1:nb))
         else if (qbmm .and. polytropic) then
             @:ALLOCATE(pb_ts(1)%sf(ix_t%beg:ix_t%beg + 1, &
                 iy_t%beg:iy_t%beg + 1, &
                 iz_t%beg:iz_t%beg + 1, 1:nnode, 1:nb))
+            @:ACC_SETUP_SFs(pb_ts(1))
+
             @:ALLOCATE(pb_ts(2)%sf(ix_t%beg:ix_t%beg + 1, &
                 iy_t%beg:iy_t%beg + 1, &
                 iz_t%beg:iz_t%beg + 1, 1:nnode, 1:nb))
-            @:ALLOCATE(rhs_pb(ix_t%beg:ix_t%beg + 1, &
+            @:ACC_SETUP_SFs(pb_ts(2))
+
+            @:ALLOCATE_GLOBAL(rhs_pb(ix_t%beg:ix_t%beg + 1, &
                 iy_t%beg:iy_t%beg + 1, &
                 iz_t%beg:iz_t%beg + 1, 1:nnode, 1:nb))
         end if
 
-        @:ALLOCATE(mv_ts(1:2))
+        @:ALLOCATE_GLOBAL(mv_ts(1:2))
 
         if (qbmm .and. (.not. polytropic)) then
             @:ALLOCATE(mv_ts(1)%sf(ix_t%beg:ix_t%end, &
                 iy_t%beg:iy_t%end, &
                 iz_t%beg:iz_t%end, 1:nnode, 1:nb))
+            @:ACC_SETUP_SFs(mv_ts(1))
+
             @:ALLOCATE(mv_ts(2)%sf(ix_t%beg:ix_t%end, &
                 iy_t%beg:iy_t%end, &
                 iz_t%beg:iz_t%end, 1:nnode, 1:nb))
-            @:ALLOCATE(rhs_mv(ix_t%beg:ix_t%end, &
+            @:ACC_SETUP_SFs(mv_ts(2))
+
+            @:ALLOCATE_GLOBAL(rhs_mv(ix_t%beg:ix_t%end, &
                 iy_t%beg:iy_t%end, &
                 iz_t%beg:iz_t%end, 1:nnode, 1:nb))
+
         else if (qbmm .and. polytropic) then
             @:ALLOCATE(mv_ts(1)%sf(ix_t%beg:ix_t%beg + 1, &
                 iy_t%beg:iy_t%beg + 1, &
                 iz_t%beg:iz_t%beg + 1, 1:nnode, 1:nb))
+            @:ACC_SETUP_SFs(mv_ts(1))
+
             @:ALLOCATE(mv_ts(2)%sf(ix_t%beg:ix_t%beg + 1, &
                 iy_t%beg:iy_t%beg + 1, &
                 iz_t%beg:iz_t%beg + 1, 1:nnode, 1:nb))
-            @:ALLOCATE(rhs_mv(ix_t%beg:ix_t%beg + 1, &
+            @:ACC_SETUP_SFs(mv_ts(2))
+
+            @:ALLOCATE_GLOBAL(rhs_mv(ix_t%beg:ix_t%beg + 1, &
                 iy_t%beg:iy_t%beg + 1, &
                 iz_t%beg:iz_t%beg + 1, 1:nnode, 1:nb))
         end if
 
-        if (hypoelasticity) then
-
-            do i = stress_idx%beg, stress_idx%end
-                @:ALLOCATE(q_prim_vf(i)%sf(ix_t%beg:ix_t%end, &
-                    iy_t%beg:iy_t%end, &
-                    iz_t%beg:iz_t%end))
-            end do
-        end if
-
-        if (model_eqns == 3) then
-            do i = internalEnergies_idx%beg, internalEnergies_idx%end
-                @:ALLOCATE(q_prim_vf(i)%sf(ix_t%beg:ix_t%end, &
-                    iy_t%beg:iy_t%end, &
-                    iz_t%beg:iz_t%end))
-            end do
-        end if
-
         ! Allocating the cell-average RHS variables
-        @:ALLOCATE(rhs_vf(1:sys_size))
+        @:ALLOCATE_GLOBAL(rhs_vf(1:sys_size))
 
         do i = 1, sys_size
             @:ALLOCATE(rhs_vf(i)%sf(0:m, 0:n, 0:p))
+            @:ACC_SETUP_SFs(rhs_vf(i))
         end do
 
         ! Opening and writing the header of the run-time information file
@@ -803,7 +853,7 @@ contains
 
         end do
 
-        @:DEALLOCATE(q_cons_ts)
+        @:DEALLOCATE_GLOBAL(q_cons_ts)
 
         ! Deallocating the cell-average primitive ts variables
         if (probe_wrt) then
@@ -813,7 +863,7 @@ contains
                 end do
                 @:DEALLOCATE(q_prim_ts(i)%vf)
             end do
-            @:DEALLOCATE(q_prim_ts)
+            @:DEALLOCATE_GLOBAL(q_prim_ts)
         end if
 
         ! Deallocating the cell-average primitive variables
@@ -839,14 +889,14 @@ contains
             end do
         end if
 
-        @:DEALLOCATE(q_prim_vf)
+        @:DEALLOCATE_GLOBAL(q_prim_vf)
 
         ! Deallocating the cell-average RHS variables
         do i = 1, sys_size
             @:DEALLOCATE(rhs_vf(i)%sf)
         end do
 
-        @:DEALLOCATE(rhs_vf)
+        @:DEALLOCATE_GLOBAL(rhs_vf)
 
         ! Writing the footer of and closing the run-time information file
         if (proc_rank == 0 .and. run_time_info) then
diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index cd573c2ac..bce3fa8df 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -24,16 +24,24 @@ module m_viscous
  s_finalize_viscous_module
 
     type(int_bounds_info) :: iv
-    type(int_bounds_info) :: is1, is2, is3
-    !$acc declare create(is1, is2, is3, iv)
-
-    real(kind(0d0)), allocatable, dimension(:, :) :: Res
-    !$acc declare create(Res)
-
-    !> @name Additional field for capillary source terms
-    !> @{
+    type(int_bounds_info) :: is1_viscous, is2_viscous, is3_viscous
+!$acc declare create(is1_viscous, is2_viscous, is3_viscous, iv)
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), Res_viscous)
+!$acc declare link(Res_viscous)
+#else
+    real(kind(0d0)), allocatable, dimension(:, :) :: Res_viscous
+!$acc declare create(Re_viscous)
+#endif
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(type(scalar_field), dimension(:), tau_Re_vf)
+!$acc declare link(tau_Re_vf)
+#else
     type(scalar_field), allocatable, dimension(:) :: tau_Re_vf
-    !> @}
+!$acc declare create(tau_Re_vf)
+#endif
 
 contains
 
@@ -50,26 +58,27 @@ contains
         ix%end = m - ix%beg; iy%end = n - iy%beg; iz%end = p - iz%beg
         ! ==================================================================
 
-        @:ALLOCATE(Res(1:2, 1:maxval(Re_size)))
+        @:ALLOCATE_GLOBAL(Res_viscous(1:2, 1:maxval(Re_size)))
 
         do i = 1, 2
             do j = 1, Re_size(i)
-                Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i)
+                Res_viscous(i, j) = fluid_pp(Re_idx(i, j))%Re(i)
             end do
         end do
-        !$acc update device(Res, Re_idx, Re_size)
+        !$acc update device(Res_viscous, Re_idx, Re_size)
+        !$acc enter data copyin(is1_viscous, is2_viscous, is3_viscous, iv)
 
-        if (cyl_coord) then
-            @:ALLOCATE(tau_Re_vf(1:sys_size))
-            do i = 1, num_dims
-                @:ALLOCATE(tau_Re_vf(cont_idx%end + i)%sf(ix%beg:ix%end, &
-                                                       &  iy%beg:iy%end, &
-                                                       &  iz%beg:iz%end))
-            end do
-            @:ALLOCATE(tau_Re_vf(E_idx)%sf(ix%beg:ix%end, &
-                                         & iy%beg:iy%end, &
-                                         & iz%beg:iz%end))
-        end if
+        @:ALLOCATE_GLOBAL(tau_Re_vf(1:sys_size))
+        do i = 1, num_dims
+            @:ALLOCATE(tau_Re_vf(cont_idx%end + i)%sf(ix%beg:ix%end, &
+                                                   &  iy%beg:iy%end, &
+                                                   &  iz%beg:iz%end))
+            @:ACC_SETUP_SFs(tau_Re_vf(cont_idx%end + i))
+        end do
+        @:ALLOCATE(tau_Re_vf(E_idx)%sf(ix%beg:ix%end, &
+                                     & iy%beg:iy%end, &
+                                     & iz%beg:iz%end))
+        @:ACC_SETUP_SFs(tau_Re_vf(E_idx))
 
     end subroutine s_initialize_viscous_module
 
@@ -101,12 +110,14 @@ contains
 
         type(int_bounds_info) :: ix, iy, iz
 
-        !$acc update device(ix, iy, iz)
+        is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
+
+        !$acc update device(is1_viscous, is2_viscous, is3_viscous)
 
         !$acc parallel loop collapse(3) gang vector default(present)
-        do l = iz%beg, iz%end
-            do k = iy%beg, iy%end
-                do j = ix%beg, ix%end
+        do l = is3_viscous%beg, is3_viscous%end
+            do k = is2_viscous%beg, is2_viscous%end
+                do j = is1_viscous%beg, is1_viscous%end
                     !$acc loop seq
                     do i = momxb, E_idx
                         tau_Re_vf(i)%sf(j, k, l) = 0d0
@@ -116,9 +127,9 @@ contains
         end do
         if (Re_size(1) > 0) then    ! Shear stresses
             !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
-            do l = iz%beg, iz%end
+            do l = is3_viscous%beg, is3_viscous%end
                 do k = -1, 1
-                    do j = ix%beg, ix%end
+                    do j = is1_viscous%beg, is1_viscous%end
 
                         !$acc loop seq
                         do i = 1, num_fluids
@@ -188,7 +199,7 @@ contains
                                     if (Re_size(i) > 0) Re_visc(i) = 0d0
                                     !$acc loop seq
                                     do q = 1, Re_size(i)
-                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res(i, q) &
+                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
                                                      + Re_visc(i)
                                     end do
 
@@ -223,9 +234,9 @@ contains
 
         if (Re_size(2) > 0) then    ! Bulk stresses
             !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
-            do l = iz%beg, iz%end
+            do l = is3_viscous%beg, is3_viscous%end
                 do k = -1, 1
-                    do j = ix%beg, ix%end
+                    do j = is1_viscous%beg, is1_viscous%end
 
                         !$acc loop seq
                         do i = 1, num_fluids
@@ -295,7 +306,7 @@ contains
                                     if (Re_size(i) > 0) Re_visc(i) = 0d0
                                     !$acc loop seq
                                     do q = 1, Re_size(i)
-                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res(i, q) &
+                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
                                                      + Re_visc(i)
                                     end do
 
@@ -327,9 +338,9 @@ contains
 
         if (Re_size(1) > 0) then    ! Shear stresses
             !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
-            do l = iz%beg, iz%end
+            do l = is3_viscous%beg, is3_viscous%end
                 do k = -1, 1
-                    do j = ix%beg, ix%end
+                    do j = is1_viscous%beg, is1_viscous%end
 
                         !$acc loop seq
                         do i = 1, num_fluids
@@ -399,7 +410,7 @@ contains
                                     if (Re_size(i) > 0) Re_visc(i) = 0d0
                                     !$acc loop seq
                                     do q = 1, Re_size(i)
-                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res(i, q) &
+                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
                                                      + Re_visc(i)
                                     end do
 
@@ -435,9 +446,9 @@ contains
 
         if (Re_size(2) > 0) then    ! Bulk stresses
             !$acc parallel loop collapse(3) gang vector default(present) private(alpha_visc, alpha_rho_visc, Re_visc, tau_Re )
-            do l = iz%beg, iz%end
+            do l = is3_viscous%beg, is3_viscous%end
                 do k = -1, 1
-                    do j = ix%beg, ix%end
+                    do j = is1_viscous%beg, is1_viscous%end
 
                         !$acc loop seq
                         do i = 1, num_fluids
@@ -507,7 +518,7 @@ contains
                                     if (Re_size(i) > 0) Re_visc(i) = 0d0
                                     !$acc loop seq
                                     do q = 1, Re_size(i)
-                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res(i, q) &
+                                        Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) &
                                                      + Re_visc(i)
                                     end do
 
@@ -553,7 +564,7 @@ contains
                              qL_prim_rsy_vf, qR_prim_rsy_vf, &
                              qL_prim_rsz_vf, qR_prim_rsz_vf
 
-        type(vector_field), dimension(1:num_dims) :: qL_prim, qR_prim
+        type(vector_field), dimension(num_dims), intent(INOUT) :: qL_prim, qR_prim
 
         type(vector_field) :: q_prim_qp
 
@@ -562,7 +573,7 @@ contains
                              dqL_prim_dy_n, dqR_prim_dy_n, &
                              dqL_prim_dz_n, dqR_prim_dz_n
 
-        type(vector_field) :: dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp
+        type(vector_field), dimension(1), intent(INOUT) :: dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp
 
         type(int_bounds_info), intent(IN) :: ix, iy, iz
 
@@ -590,19 +601,19 @@ contains
                     call s_apply_scalar_divergence_theorem( &
                         qL_prim(i)%vf(iv%beg:iv%end), &
                         qR_prim(i)%vf(iv%beg:iv%end), &
-                        dq_prim_dx_qp%vf(iv%beg:iv%end), i, &
+                        dq_prim_dx_qp(1)%vf(iv%beg:iv%end), i, &
                         ix, iy, iz, iv, dx, m, buff_size)
                 elseif (i == 2) then
                     call s_apply_scalar_divergence_theorem( &
                         qL_prim(i)%vf(iv%beg:iv%end), &
                         qR_prim(i)%vf(iv%beg:iv%end), &
-                        dq_prim_dy_qp%vf(iv%beg:iv%end), i, &
+                        dq_prim_dy_qp(1)%vf(iv%beg:iv%end), i, &
                         ix, iy, iz, iv, dy, n, buff_size)
                 else
                     call s_apply_scalar_divergence_theorem( &
                         qL_prim(i)%vf(iv%beg:iv%end), &
                         qR_prim(i)%vf(iv%beg:iv%end), &
-                        dq_prim_dz_qp%vf(iv%beg:iv%end), i, &
+                        dq_prim_dz_qp(1)%vf(iv%beg:iv%end), i, &
                         ix, iy, iz, iv, dz, p, buff_size)
                 end if
             end do
@@ -612,10 +623,14 @@ contains
             iv%beg = mom_idx%beg; iv%end = mom_idx%end
             !$acc update device(iv)
 
+            is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
+
+            !$acc update device(is1_viscous, is2_viscous, is3_viscous)
+
             !$acc parallel loop collapse(3) gang vector default(present)
-            do l = iz%beg, iz%end
+            do l = is3_viscous%beg, is3_viscous%end
                 do k = iy%beg, iy%end
-                    do j = ix%beg + 1, ix%end
+                    do j = is1_viscous%beg + 1, is1_viscous%end
                         !$acc loop seq
                         do i = iv%beg, iv%end
                             dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = &
@@ -628,9 +643,9 @@ contains
             end do
 
             !$acc parallel loop collapse(3) gang vector default(present)
-            do l = iz%beg, iz%end
-                do k = iy%beg, iy%end
-                    do j = ix%beg, ix%end - 1
+            do l = is3_viscous%beg, is3_viscous%end
+                do k = is2_viscous%beg, is2_viscous%end
+                    do j = is1_viscous%beg, is1_viscous%end - 1
                         !$acc loop seq
                         do i = iv%beg, iv%end
                             dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = &
@@ -645,9 +660,9 @@ contains
             if (n > 0) then
 
                 !$acc parallel loop collapse(3) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do j = iy%beg + 1, iy%end
-                        do k = ix%beg, ix%end
+                do l = is3_viscous%beg, is3_viscous%end
+                    do j = is2_viscous%beg + 1, is2_viscous%end
+                        do k = is1_viscous%beg, is1_viscous%end
                             !$acc loop seq
                             do i = iv%beg, iv%end
                                 dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = &
@@ -660,9 +675,9 @@ contains
                 end do
 
                 !$acc parallel loop collapse(3) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do j = iy%beg, iy%end - 1
-                        do k = ix%beg, ix%end
+                do l = is3_viscous%beg, is3_viscous%end
+                    do j = is2_viscous%beg, is2_viscous%end - 1
+                        do k = is1_viscous%beg, is1_viscous%end
                             !$acc loop seq
                             do i = iv%beg, iv%end
                                 dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = &
@@ -675,9 +690,9 @@ contains
                 end do
 
                 !$acc parallel loop collapse(3) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do j = iy%beg + 1, iy%end
-                        do k = ix%beg + 1, ix%end - 1
+                do l = is3_viscous%beg, is3_viscous%end
+                    do j = is2_viscous%beg + 1, is2_viscous%end
+                        do k = is1_viscous%beg + 1, is1_viscous%end - 1
                             !$acc loop seq
                             do i = iv%beg, iv%end
                                 dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = &
@@ -694,9 +709,9 @@ contains
                 end do
 
                 !$acc parallel loop collapse(3) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do j = iy%beg, iy%end - 1
-                        do k = ix%beg + 1, ix%end - 1
+                do l = is3_viscous%beg, is3_viscous%end
+                    do j = is2_viscous%beg, is2_viscous%end - 1
+                        do k = is1_viscous%beg + 1, is1_viscous%end - 1
                             !$acc loop seq
                             do i = iv%beg, iv%end
                                 dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = &
@@ -714,9 +729,9 @@ contains
                 end do
 
                 !$acc parallel loop collapse(3) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do k = iy%beg + 1, iy%end - 1
-                        do j = ix%beg + 1, ix%end
+                do l = is3_viscous%beg, is3_viscous%end
+                    do k = is2_viscous%beg + 1, is2_viscous%end - 1
+                        do j = is1_viscous%beg + 1, is1_viscous%end
                             !$acc loop seq
                             do i = iv%beg, iv%end
                                 dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = &
@@ -734,9 +749,9 @@ contains
                 end do
 
                 !$acc parallel loop collapse(3) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do k = iy%beg + 1, iy%end - 1
-                        do j = ix%beg, ix%end - 1
+                do l = is3_viscous%beg, is3_viscous%end
+                    do k = is2_viscous%beg + 1, is2_viscous%end - 1
+                        do j = is1_viscous%beg, is1_viscous%end - 1
                             !$acc loop seq
                             do i = iv%beg, iv%end
                                 dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = &
@@ -756,9 +771,9 @@ contains
                 if (p > 0) then
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do j = iz%beg + 1, iz%end
-                        do l = iy%beg, iy%end
-                            do k = ix%beg, ix%end
+                    do j = is3_viscous%beg + 1, is3_viscous%end
+                        do l = is2_viscous%beg, is2_viscous%end
+                            do k = is1_viscous%beg, is1_viscous%end
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -772,9 +787,9 @@ contains
                     end do
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do j = iz%beg, iz%end - 1
-                        do l = iy%beg, iy%end
-                            do k = ix%beg, ix%end
+                    do j = is3_viscous%beg, is3_viscous%end - 1
+                        do l = is2_viscous%beg, is2_viscous%end
+                            do k = is1_viscous%beg, is1_viscous%end
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -788,9 +803,9 @@ contains
                     end do
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do l = iz%beg + 1, iz%end - 1
-                        do k = iy%beg, iy%end
-                            do j = ix%beg + 1, ix%end
+                    do l = is3_viscous%beg + 1, is3_viscous%end - 1
+                        do k = is2_viscous%beg, is2_viscous%end
+                            do j = is1_viscous%beg + 1, is1_viscous%end
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -809,9 +824,9 @@ contains
                     end do
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do l = iz%beg + 1, iz%end - 1
-                        do k = iy%beg, iy%end
-                            do j = ix%beg, ix%end - 1
+                    do l = is3_viscous%beg + 1, is3_viscous%end - 1
+                        do k = is2_viscous%beg, is2_viscous%end
+                            do j = is1_viscous%beg, is1_viscous%end - 1
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -830,9 +845,9 @@ contains
                     end do
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do l = iz%beg + 1, iz%end - 1
-                        do j = iy%beg + 1, iy%end
-                            do k = ix%beg, ix%end
+                    do l = is3_viscous%beg + 1, is3_viscous%end - 1
+                        do j = is2_viscous%beg + 1, is2_viscous%end
+                            do k = is1_viscous%beg, is1_viscous%end
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -851,9 +866,9 @@ contains
                     end do
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do l = iz%beg + 1, iz%end - 1
-                        do j = iy%beg, iy%end - 1
-                            do k = ix%beg, ix%end
+                    do l = is3_viscous%beg + 1, is3_viscous%end - 1
+                        do j = is2_viscous%beg, is2_viscous%end - 1
+                            do k = is1_viscous%beg, is1_viscous%end
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -872,9 +887,9 @@ contains
                     end do
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do j = iz%beg + 1, iz%end
-                        do l = iy%beg + 1, iy%end - 1
-                            do k = ix%beg, ix%end
+                    do j = is3_viscous%beg + 1, is3_viscous%end
+                        do l = is2_viscous%beg + 1, is2_viscous%end - 1
+                            do k = is1_viscous%beg, is1_viscous%end
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -893,9 +908,9 @@ contains
                     end do
 
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do j = iz%beg, iz%end - 1
-                        do l = iy%beg + 1, iy%end - 1
-                            do k = ix%beg, ix%end
+                    do j = is3_viscous%beg, is3_viscous%end - 1
+                        do l = is2_viscous%beg + 1, is2_viscous%end - 1
+                            do k = is1_viscous%beg, is1_viscous%end
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -912,11 +927,10 @@ contains
                             end do
                         end do
                     end do
-
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do j = iz%beg + 1, iz%end
-                        do l = iy%beg, iy%end
-                            do k = ix%beg + 1, ix%end - 1
+                    do j = is3_viscous%beg + 1, is3_viscous%end
+                        do l = is2_viscous%beg, is2_viscous%end
+                            do k = is1_viscous%beg + 1, is1_viscous%end - 1
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
 
@@ -933,11 +947,10 @@ contains
                             end do
                         end do
                     end do
-
                     !$acc parallel loop collapse(3) gang vector default(present)
-                    do j = iz%beg, iz%end - 1
-                        do l = iy%beg, iy%end
-                            do k = ix%beg + 1, ix%end - 1
+                    do j = is3_viscous%beg, is3_viscous%end - 1
+                        do l = is2_viscous%beg, is2_viscous%end
+                            do k = is1_viscous%beg + 1, is1_viscous%end - 1
                                 !$acc loop seq
                                 do i = iv%beg, iv%end
                                     dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = &
@@ -956,9 +969,9 @@ contains
 
                     do i = iv%beg, iv%end
                         call s_compute_fd_gradient(q_prim_qp%vf(i), &
-                                                   dq_prim_dx_qp%vf(i), &
-                                                   dq_prim_dy_qp%vf(i), &
-                                                   dq_prim_dz_qp%vf(i), &
+                                                   dq_prim_dx_qp(1)%vf(i), &
+                                                   dq_prim_dy_qp(1)%vf(i), &
+                                                   dq_prim_dz_qp(1)%vf(i), &
                                                    ix, iy, iz, buff_size)
                     end do
 
@@ -966,9 +979,9 @@ contains
 
                     do i = iv%beg, iv%end
                         call s_compute_fd_gradient(q_prim_qp%vf(i), &
-                                                   dq_prim_dx_qp%vf(i), &
-                                                   dq_prim_dy_qp%vf(i), &
-                                                   dq_prim_dy_qp%vf(i), &
+                                                   dq_prim_dx_qp(1)%vf(i), &
+                                                   dq_prim_dy_qp(1)%vf(i), &
+                                                   dq_prim_dy_qp(1)%vf(i), &
                                                    ix, iy, iz, buff_size)
                     end do
 
@@ -977,9 +990,9 @@ contains
             else
                 do i = iv%beg, iv%end
                     call s_compute_fd_gradient(q_prim_qp%vf(i), &
-                                               dq_prim_dx_qp%vf(i), &
-                                               dq_prim_dx_qp%vf(i), &
-                                               dq_prim_dx_qp%vf(i), &
+                                               dq_prim_dx_qp(1)%vf(i), &
+                                               dq_prim_dx_qp(1)%vf(i), &
+                                               dq_prim_dx_qp(1)%vf(i), &
                                                ix, iy, iz, buff_size)
                 end do
 
@@ -1191,23 +1204,23 @@ contains
         ! Reconstruction in s1-direction ===================================
 
         if (norm_dir == 1) then
-            is1 = ix; is2 = iy; is3 = iz
-            weno_dir = 1; is1%beg = is1%beg + weno_polyn
-            is1%end = is1%end - weno_polyn
+            is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
+            weno_dir = 1; is1_viscous%beg = is1_viscous%beg + weno_polyn
+            is1_viscous%end = is1_viscous%end - weno_polyn
 
         elseif (norm_dir == 2) then
-            is1 = iy; is2 = ix; is3 = iz
-            weno_dir = 2; is1%beg = is1%beg + weno_polyn
-            is1%end = is1%end - weno_polyn
+            is1_viscous = iy; is2_viscous = ix; is3_viscous = iz
+            weno_dir = 2; is1_viscous%beg = is1_viscous%beg + weno_polyn
+            is1_viscous%end = is1_viscous%end - weno_polyn
 
         else
-            is1 = iz; is2 = iy; is3 = ix
-            weno_dir = 3; is1%beg = is1%beg + weno_polyn
-            is1%end = is1%end - weno_polyn
+            is1_viscous = iz; is2_viscous = iy; is3_viscous = ix
+            weno_dir = 3; is1_viscous%beg = is1_viscous%beg + weno_polyn
+            is1_viscous%end = is1_viscous%end - weno_polyn
 
         end if
 
-        !$acc update device(is1, is2, is3, iv)
+        !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv)
 
         if (n > 0) then
             if (p > 0) then
@@ -1215,19 +1228,19 @@ contains
                 call s_weno(v_vf(iv%beg:iv%end), &
                             vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, iv%beg:iv%end), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, iv%beg:iv%end), &
                             norm_dir, weno_dir, &
-                            is1, is2, is3)
+                            is1_viscous, is2_viscous, is3_viscous)
             else
                 call s_weno(v_vf(iv%beg:iv%end), &
                             vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, :), &
                             norm_dir, weno_dir, &
-                            is1, is2, is3)
+                            is1_viscous, is2_viscous, is3_viscous)
             end if
         else
 
             call s_weno(v_vf(iv%beg:iv%end), &
                         vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, :), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, :), vR_z(:, :, :, :), &
                         norm_dir, weno_dir, &
-                        is1, is2, is3)
+                        is1_viscous, is2_viscous, is3_viscous)
         end if
 
         if (any(Re_size > 0)) then
@@ -1235,9 +1248,9 @@ contains
                 if (norm_dir == 2) then
                     !$acc parallel loop collapse(4) gang vector default(present)
                     do i = iv%beg, iv%end
-                        do l = is3%beg, is3%end
-                            do j = is1%beg, is1%end
-                                do k = is2%beg, is2%end
+                        do l = is3_viscous%beg, is3_viscous%end
+                            do j = is1_viscous%beg, is1_viscous%end
+                                do k = is2_viscous%beg, is2_viscous%end
                                     vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i)
                                     vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i)
                                 end do
@@ -1247,9 +1260,9 @@ contains
                 elseif (norm_dir == 3) then
                     !$acc parallel loop collapse(4) gang vector default(present)
                     do i = iv%beg, iv%end
-                        do j = is1%beg, is1%end
-                            do k = is2%beg, is2%end
-                                do l = is3%beg, is3%end
+                        do j = is1_viscous%beg, is1_viscous%end
+                            do k = is2_viscous%beg, is2_viscous%end
+                                do l = is3_viscous%beg, is3_viscous%end
                                     vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i)
                                     vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i)
                                 end do
@@ -1259,9 +1272,9 @@ contains
                 elseif (norm_dir == 1) then
                     !$acc parallel loop collapse(4) gang vector default(present)
                     do i = iv%beg, iv%end
-                        do l = is3%beg, is3%end
-                            do k = is2%beg, is2%end
-                                do j = is1%beg, is1%end
+                        do l = is3_viscous%beg, is3_viscous%end
+                            do k = is2_viscous%beg, is2_viscous%end
+                                do j = is1_viscous%beg, is1_viscous%end
                                     vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i)
                                     vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i)
                                 end do
@@ -1294,23 +1307,23 @@ contains
         ! Reconstruction in s1-direction ===================================
 
         if (norm_dir == 1) then
-            is1 = ix; is2 = iy; is3 = iz
-            weno_dir = 1; is1%beg = is1%beg + weno_polyn
-            is1%end = is1%end - weno_polyn
+            is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
+            weno_dir = 1; is1_viscous%beg = is1_viscous%beg + weno_polyn
+            is1_viscous%end = is1_viscous%end - weno_polyn
 
         elseif (norm_dir == 2) then
-            is1 = iy; is2 = ix; is3 = iz
-            weno_dir = 2; is1%beg = is1%beg + weno_polyn
-            is1%end = is1%end - weno_polyn
+            is1_viscous = iy; is2_viscous = ix; is3_viscous = iz
+            weno_dir = 2; is1_viscous%beg = is1_viscous%beg + weno_polyn
+            is1_viscous%end = is1_viscous%end - weno_polyn
 
         else
-            is1 = iz; is2 = iy; is3 = ix
-            weno_dir = 3; is1%beg = is1%beg + weno_polyn
-            is1%end = is1%end - weno_polyn
+            is1_viscous = iz; is2_viscous = iy; is3_viscous = ix
+            weno_dir = 3; is1_viscous%beg = is1_viscous%beg + weno_polyn
+            is1_viscous%end = is1_viscous%end - weno_polyn
 
         end if
 
-        !$acc update device(is1, is2, is3, iv)
+        !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv)
 
         if (n > 0) then
             if (p > 0) then
@@ -1318,19 +1331,19 @@ contains
                 call s_weno(v_vf(iv%beg:iv%end), &
                             vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, iv%beg:iv%end), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, iv%beg:iv%end), &
                             norm_dir, weno_dir, &
-                            is1, is2, is3)
+                            is1_viscous, is2_viscous, is3_viscous)
             else
                 call s_weno(v_vf(iv%beg:iv%end), &
                             vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, iv%beg:iv%end), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, iv%beg:iv%end), vR_z(:, :, :, :), &
                             norm_dir, weno_dir, &
-                            is1, is2, is3)
+                            is1_viscous, is2_viscous, is3_viscous)
             end if
         else
 
             call s_weno(v_vf(iv%beg:iv%end), &
                         vL_x(:, :, :, iv%beg:iv%end), vL_y(:, :, :, :), vL_z(:, :, :, :), vR_x(:, :, :, iv%beg:iv%end), vR_y(:, :, :, :), vR_z(:, :, :, :), &
                         norm_dir, weno_dir, &
-                        is1, is2, is3)
+                        is1_viscous, is2_viscous, is3_viscous)
         end if
 
         if (any(Re_size > 0)) then
@@ -1338,9 +1351,9 @@ contains
                 if (norm_dir == 2) then
                     !$acc parallel loop collapse(4) gang vector default(present)
                     do i = iv%beg, iv%end
-                        do l = is3%beg, is3%end
-                            do j = is1%beg, is1%end
-                                do k = is2%beg, is2%end
+                        do l = is3_viscous%beg, is3_viscous%end
+                            do j = is1_viscous%beg, is1_viscous%end
+                                do k = is2_viscous%beg, is2_viscous%end
                                     vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i)
                                     vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i)
                                 end do
@@ -1350,9 +1363,9 @@ contains
                 elseif (norm_dir == 3) then
                     !$acc parallel loop collapse(4) gang vector default(present)
                     do i = iv%beg, iv%end
-                        do j = is1%beg, is1%end
-                            do k = is2%beg, is2%end
-                                do l = is3%beg, is3%end
+                        do j = is1_viscous%beg, is1_viscous%end
+                            do k = is2_viscous%beg, is2_viscous%end
+                                do l = is3_viscous%beg, is3_viscous%end
                                     vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i)
                                     vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i)
                                 end do
@@ -1362,9 +1375,9 @@ contains
                 elseif (norm_dir == 1) then
                     !$acc parallel loop collapse(4) gang vector default(present)
                     do i = iv%beg, iv%end
-                        do l = is3%beg, is3%end
-                            do k = is2%beg, is2%end
-                                do j = is1%beg, is1%end
+                        do l = is3_viscous%beg, is3_viscous%end
+                            do k = is2_viscous%beg, is2_viscous%end
+                                do j = is1_viscous%beg, is1_viscous%end
                                     vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i)
                                     vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i)
                                 end do
@@ -1390,11 +1403,10 @@ contains
     subroutine s_apply_scalar_divergence_theorem(vL_vf, vR_vf, & ! --------
                                                  dv_ds_vf, &
                                                  norm_dir, &
-                                                 ix, iy, iz, iv, &
+                                                 ix, iy, iz, iv_in, &
                                                  dL, dim, buff_size_in)
 
-        type(int_bounds_info) :: ix, iy, iz, iv
-
+        type(int_bounds_info), intent(IN) :: ix, iy, iz, iv_in
         integer :: buff_size_in, dim
 
         real(kind(0d0)), dimension(-buff_size_in:dim + buff_size_in) :: dL
@@ -1412,7 +1424,12 @@ contains
 
         integer :: i, j, k, l !< Generic loop iterators
 
-        !$acc update device(ix, iy, iz, iv)
+        is1_viscous = ix
+        is2_viscous = iy
+        is3_viscous = iz
+        iv = iv_in
+
+        !$acc update device(is1_viscous, is2_viscous, is3_viscous, iv)
 
         ! First-Order Spatial Derivatives in x-direction ===================
         if (norm_dir == 1) then
@@ -1424,9 +1441,9 @@ contains
             ! spatial derivatives inside the cell.
 
             !$acc parallel loop collapse(3) gang vector default(present)
-            do l = iz%beg, iz%end
-                do k = iy%beg, iy%end
-                    do j = ix%beg + 1, ix%end - 1
+            do l = is3_viscous%beg, is3_viscous%end
+                do k = is2_viscous%beg, is2_viscous%end
+                    do j = is1_viscous%beg + 1, is1_viscous%end - 1
                         !$acc loop seq
                         do i = iv%beg, iv%end
                             dv_ds_vf(i)%sf(j, k, l) = &
@@ -1452,9 +1469,9 @@ contains
             ! spatial derivatives inside the cell.
 
             !$acc parallel loop collapse(3) gang vector default(present)
-            do l = iz%beg, iz%end
-                do k = iy%beg + 1, iy%end - 1
-                    do j = ix%beg, ix%end
+            do l = is3_viscous%beg, is3_viscous%end
+                do k = is2_viscous%beg + 1, is2_viscous%end - 1
+                    do j = is1_viscous%beg, is1_viscous%end
                         !$acc loop seq
                         do i = iv%beg, iv%end
                             dv_ds_vf(i)%sf(j, k, l) = &
@@ -1480,9 +1497,9 @@ contains
             ! spatial derivatives inside the cell.
 
             !$acc parallel loop collapse(3) gang vector default(present)
-            do l = iz%beg + 1, iz%end - 1
-                do k = iy%beg, iy%end
-                    do j = ix%beg, ix%end
+            do l = is3_viscous%beg + 1, is3_viscous%end - 1
+                do k = is2_viscous%beg, is2_viscous%end
+                    do j = is1_viscous%beg, is1_viscous%end
                         !$acc loop seq
                         do i = iv%beg, iv%end
                             dv_ds_vf(i)%sf(j, k, l) = &
@@ -1534,12 +1551,14 @@ contains
             iz%beg = -1; iz%end = 1
         end if
 
-        !$acc update device(ix, iy, iz)
+        is1_viscous = ix; is2_viscous = iy; is3_viscous = iz
+
+        !$acc update device(is1_viscous, is2_viscous, is3_viscous)
 
         !$acc parallel loop collapse(3) gang vector default(present)
-        do l = iz%beg + 1, iz%end - 1
-            do k = iy%beg + 1, iy%end - 1
-                do j = ix%beg + 1, ix%end - 1
+        do l = is3_viscous%beg + 1, is3_viscous%end - 1
+            do k = is2_viscous%beg + 1, is2_viscous%end - 1
+                do j = is1_viscous%beg + 1, is1_viscous%end - 1
                     grad_x%sf(j, k, l) = &
                         (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ &
                         (x_cc(j + 1) - x_cc(j - 1))
@@ -1549,9 +1568,9 @@ contains
 
         if (n > 0) then
             !$acc parallel loop collapse(3) gang vector
-            do l = iz%beg + 1, iz%end - 1
-                do k = iy%beg + 1, iy%end - 1
-                    do j = ix%beg + 1, ix%end - 1
+            do l = is3_viscous%beg + 1, is3_viscous%end - 1
+                do k = is2_viscous%beg + 1, is2_viscous%end - 1
+                    do j = is1_viscous%beg + 1, is1_viscous%end - 1
                         grad_y%sf(j, k, l) = &
                             (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ &
                             (y_cc(k + 1) - y_cc(k - 1))
@@ -1562,9 +1581,9 @@ contains
 
         if (p > 0) then
             !$acc parallel loop collapse(3) gang vector
-            do l = iz%beg + 1, iz%end - 1
-                do k = iy%beg + 1, iy%end - 1
-                    do j = ix%beg + 1, ix%end - 1
+            do l = is3_viscous%beg + 1, is3_viscous%end - 1
+                do k = is2_viscous%beg + 1, is2_viscous%end - 1
+                    do j = is1_viscous%beg + 1, is1_viscous%end - 1
                         grad_z%sf(j, k, l) = &
                             (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ &
                             (z_cc(l + 1) - z_cc(l - 1))
@@ -1573,54 +1592,54 @@ contains
             end do
         end if
 
-        ix%beg = -buff_size_in; ix%end = m + buff_size_in; 
+        is1_viscous%beg = -buff_size_in; is1_viscous%end = m + buff_size_in; 
         if (n > 0) then
-            iy%beg = -buff_size_in; iy%end = n + buff_size_in
+            is2_viscous%beg = -buff_size_in; is2_viscous%end = n + buff_size_in
         else
-            iy%beg = 0; iy%end = 0
+            is2_viscous%beg = 0; is2_viscous%end = 0
         end if
 
         if (p > 0) then
-            iz%beg = -buff_size_in; iz%end = p + buff_size_in
+            is3_viscous%beg = -buff_size_in; is3_viscous%end = p + buff_size_in
         else
-            iz%beg = 0; iz%end = 0
+            is3_viscous%beg = 0; is3_viscous%end = 0
         end if
 
-        !$acc update device(ix, iy, iz)
+        !$acc update device(is1_viscous, is2_viscous, is3_viscous)
 
         !$acc parallel loop collapse(2) gang vector default(present)
-        do l = iz%beg, iz%end
-            do k = iy%beg, iy%end
-                grad_x%sf(ix%beg, k, l) = &
-                    (-3d0*var%sf(ix%beg, k, l) + 4d0*var%sf(ix%beg + 1, k, l) - var%sf(ix%beg + 2, k, l))/ &
-                    (x_cc(ix%beg + 2) - x_cc(ix%beg))
-                grad_x%sf(ix%end, k, l) = &
-                    (3d0*var%sf(ix%end, k, l) - 4d0*var%sf(ix%end - 1, k, l) + var%sf(ix%end - 2, k, l))/ &
-                    (x_cc(ix%end) - x_cc(ix%end - 2))
+        do l = is3_viscous%beg, is3_viscous%end
+            do k = is2_viscous%beg, is2_viscous%end
+                grad_x%sf(is1_viscous%beg, k, l) = &
+                    (-3d0*var%sf(is1_viscous%beg, k, l) + 4d0*var%sf(is1_viscous%beg + 1, k, l) - var%sf(is1_viscous%beg + 2, k, l))/ &
+                    (x_cc(is1_viscous%beg + 2) - x_cc(is1_viscous%beg))
+                grad_x%sf(is1_viscous%end, k, l) = &
+                    (3d0*var%sf(is1_viscous%end, k, l) - 4d0*var%sf(is1_viscous%end - 1, k, l) + var%sf(is1_viscous%end - 2, k, l))/ &
+                    (x_cc(is1_viscous%end) - x_cc(is1_viscous%end - 2))
             end do
         end do
         if (n > 0) then
             !$acc parallel loop collapse(2) gang vector default(present)
-            do l = iz%beg, iz%end
-                do j = ix%beg, ix%end
-                    grad_y%sf(j, iy%beg, l) = &
-                        (-3d0*var%sf(j, iy%beg, l) + 4d0*var%sf(j, iy%beg + 1, l) - var%sf(j, iy%beg + 2, l))/ &
-                        (y_cc(iy%beg + 2) - y_cc(iy%beg))
-                    grad_y%sf(j, iy%end, l) = &
-                        (3d0*var%sf(j, iy%end, l) - 4d0*var%sf(j, iy%end - 1, l) + var%sf(j, iy%end - 2, l))/ &
-                        (y_cc(iy%end) - y_cc(iy%end - 2))
+            do l = is3_viscous%beg, is3_viscous%end
+                do j = is1_viscous%beg, is1_viscous%end
+                    grad_y%sf(j, is2_viscous%beg, l) = &
+                        (-3d0*var%sf(j, is2_viscous%beg, l) + 4d0*var%sf(j, is2_viscous%beg + 1, l) - var%sf(j, is2_viscous%beg + 2, l))/ &
+                        (y_cc(is2_viscous%beg + 2) - y_cc(is2_viscous%beg))
+                    grad_y%sf(j, is2_viscous%end, l) = &
+                        (3d0*var%sf(j, is2_viscous%end, l) - 4d0*var%sf(j, is2_viscous%end - 1, l) + var%sf(j, is2_viscous%end - 2, l))/ &
+                        (y_cc(is2_viscous%end) - y_cc(is2_viscous%end - 2))
                 end do
             end do
             if (p > 0) then
                 !$acc parallel loop collapse(2) gang vector default(present)
-                do k = iy%beg, iy%end
-                    do j = ix%beg, ix%end
-                        grad_z%sf(j, k, iz%beg) = &
-                            (-3d0*var%sf(j, k, iz%beg) + 4d0*var%sf(j, k, iz%beg + 1) - var%sf(j, k, iz%beg + 2))/ &
-                            (z_cc(iz%beg + 2) - z_cc(iz%beg))
-                        grad_z%sf(j, k, iz%end) = &
-                            (3d0*var%sf(j, k, iz%end) - 4d0*var%sf(j, k, iz%end - 1) + var%sf(j, k, iz%end - 2))/ &
-                            (z_cc(iz%end) - z_cc(iz%end - 2))
+                do k = is2_viscous%beg, is2_viscous%end
+                    do j = is1_viscous%beg, is1_viscous%end
+                        grad_z%sf(j, k, is3_viscous%beg) = &
+                            (-3d0*var%sf(j, k, is3_viscous%beg) + 4d0*var%sf(j, k, is3_viscous%beg + 1) - var%sf(j, k, is3_viscous%beg + 2))/ &
+                            (z_cc(is3_viscous%beg + 2) - z_cc(is3_viscous%beg))
+                        grad_z%sf(j, k, is3_viscous%end) = &
+                            (3d0*var%sf(j, k, is3_viscous%end) - 4d0*var%sf(j, k, is3_viscous%end - 1) + var%sf(j, k, is3_viscous%end - 2))/ &
+                            (z_cc(is3_viscous%end) - z_cc(is3_viscous%end - 2))
                     end do
                 end do
             end if
@@ -1628,8 +1647,8 @@ contains
 
         if (bc_x%beg <= -3) then
             !$acc parallel loop collapse(2) gang vector default(present)
-            do l = iz%beg, iz%end
-                do k = iy%beg, iy%end
+            do l = is3_viscous%beg, is3_viscous%end
+                do k = is2_viscous%beg, is2_viscous%end
                     grad_x%sf(0, k, l) = (-3d0*var%sf(0, k, l) + 4d0*var%sf(1, k, l) - var%sf(2, k, l))/ &
                                          (x_cc(2) - x_cc(0))
                 end do
@@ -1637,18 +1656,18 @@ contains
         end if
         if (bc_x%end <= -3) then
             !$acc parallel loop collapse(2) gang vector default(present)
-            do l = iz%beg, iz%end
-                do k = iy%beg, iy%end
+            do l = is3_viscous%beg, is3_viscous%end
+                do k = is2_viscous%beg, is2_viscous%end
                     grad_x%sf(m, k, l) = (3d0*var%sf(m, k, l) - 4d0*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ &
                                          (x_cc(m) - x_cc(m - 2))
                 end do
             end do
         end if
         if (n > 0) then
-            if (bc_y%beg <= -3 .and. bc_y%beg /= -14) then
+            if (bc_y%beg <= -3 .and. bc_y%beg /= -13) then
                 !$acc parallel loop collapse(2) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do j = ix%beg, ix%end
+                do l = is3_viscous%beg, is3_viscous%end
+                    do j = is1_viscous%beg, is1_viscous%end
                         grad_y%sf(j, 0, l) = (-3d0*var%sf(j, 0, l) + 4d0*var%sf(j, 1, l) - var%sf(j, 2, l))/ &
                                              (y_cc(2) - y_cc(0))
                     end do
@@ -1656,8 +1675,8 @@ contains
             end if
             if (bc_y%end <= -3) then
                 !$acc parallel loop collapse(2) gang vector default(present)
-                do l = iz%beg, iz%end
-                    do j = ix%beg, ix%end
+                do l = is3_viscous%beg, is3_viscous%end
+                    do j = is1_viscous%beg, is1_viscous%end
                         grad_y%sf(j, n, l) = (3d0*var%sf(j, n, l) - 4d0*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ &
                                              (y_cc(n) - y_cc(n - 2))
                     end do
@@ -1666,8 +1685,8 @@ contains
             if (p > 0) then
                 if (bc_z%beg <= -3) then
                     !$acc parallel loop collapse(2) gang vector default(present)
-                    do k = iy%beg, iy%end
-                        do j = ix%beg, ix%end
+                    do k = is2_viscous%beg, is2_viscous%end
+                        do j = is1_viscous%beg, is1_viscous%end
                             grad_z%sf(j, k, 0) = &
                                 (-3d0*var%sf(j, k, 0) + 4d0*var%sf(j, k, 1) - var%sf(j, k, 2))/ &
                                 (z_cc(2) - z_cc(0))
@@ -1676,8 +1695,8 @@ contains
                 end if
                 if (bc_z%end <= -3) then
                     !$acc parallel loop collapse(2) gang vector default(present)
-                    do k = iy%beg, iy%end
-                        do j = ix%beg, ix%end
+                    do k = is2_viscous%beg, is2_viscous%end
+                        do j = is1_viscous%beg, is1_viscous%end
                             grad_z%sf(j, k, p) = &
                                 (3d0*var%sf(j, k, p) - 4d0*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ &
                                 (z_cc(p) - z_cc(p - 2))
@@ -1693,14 +1712,14 @@ contains
 
         integer :: i
 
-        @:DEALLOCATE(Res)
+        @:DEALLOCATE_GLOBAL(Res_viscous)
 
         if (cyl_coord) then
             do i = 1, num_dims
                 @:DEALLOCATE(tau_Re_vf(cont_idx%end + i)%sf)
             end do
             @:DEALLOCATE(tau_Re_vf(E_idx)%sf)
-            @:DEALLOCATE(tau_Re_vf)
+            @:DEALLOCATE_GLOBAL(tau_Re_vf)
         end if
     end subroutine s_finalize_viscous_module
 
diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp
index f1facc15e..21b0ea1e1 100644
--- a/src/simulation/m_weno.fpp
+++ b/src/simulation/m_weno.fpp
@@ -20,7 +20,7 @@ module m_weno
 
     use m_variables_conversion !< State variables type conversion procedures
 
-#ifdef MFC_OpenACC
+#ifdef MFC_OPENACC
     use openacc
 #endif
 
@@ -39,7 +39,13 @@ module m_weno
     !! of the characteristic decomposition are stored in custom-constructed WENO-
     !! stencils (WS) that are annexed to each position of a given scalar field.
     !> @{
+
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :, :), v_rs_ws_x, v_rs_ws_y, v_rs_ws_z)
+!$acc declare link(v_rs_ws_x, v_rs_ws_y, v_rs_ws_z)
+#else
     real(kind(0d0)), allocatable, dimension(:, :, :, :) :: v_rs_ws_x, v_rs_ws_y, v_rs_ws_z
+#endif
     !> @}
 
     ! WENO Coefficients ========================================================
@@ -50,6 +56,17 @@ module m_weno
     !! second dimension identifies the position of its coefficients and the last
     !! dimension denotes the cell-location in the relevant coordinate direction.
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), poly_coef_cbL_x)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), poly_coef_cbL_y)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), poly_coef_cbL_z)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), poly_coef_cbR_x)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), poly_coef_cbR_y)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), poly_coef_cbR_z)
+!$acc declare link(poly_coef_cbL_x, poly_coef_cbL_y, poly_coef_cbL_z)
+!$acc declare link(poly_coef_cbR_x, poly_coef_cbR_y, poly_coef_cbR_z)
+#else
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: poly_coef_cbL_x
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: poly_coef_cbL_y
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: poly_coef_cbL_z
@@ -57,10 +74,10 @@ module m_weno
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_x
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_y
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: poly_coef_cbR_z
-    real(kind(0d0)), pointer, dimension(:, :, :) :: poly_coef_L
-    real(kind(0d0)), pointer, dimension(:, :, :) :: poly_coef_R
-!    real(kind(0d0)), pointer, dimension(:, :, :) :: poly_coef_L => null()
-!    real(kind(0d0)), pointer, dimension(:, :, :) :: poly_coef_R => null()
+#endif
+
+    !    real(kind(0d0)), pointer, dimension(:, :, :) :: poly_coef_L => null()
+    !    real(kind(0d0)), pointer, dimension(:, :, :) :: poly_coef_R => null()
     !> @}
 
     !> @name The ideal weights at the left and the right cell-boundaries and at the
@@ -68,6 +85,16 @@ module m_weno
     !! that the first dimension of the array identifies the weight, while the
     !! last denotes the cell-location in the relevant coordinate direction.
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), d_cbL_y)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), d_cbL_x)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), d_cbL_z)
+
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), d_cbR_x)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), d_cbR_y)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :), d_cbR_z)
+!$acc declare link(d_cbL_x, d_cbL_y, d_cbL_z, d_cbR_x, d_cbR_y, d_cbR_z)
+#else
     real(kind(0d0)), target, allocatable, dimension(:, :) :: d_cbL_x
     real(kind(0d0)), target, allocatable, dimension(:, :) :: d_cbL_y
     real(kind(0d0)), target, allocatable, dimension(:, :) :: d_cbL_z
@@ -75,9 +102,7 @@ module m_weno
     real(kind(0d0)), target, allocatable, dimension(:, :) :: d_cbR_x
     real(kind(0d0)), target, allocatable, dimension(:, :) :: d_cbR_y
     real(kind(0d0)), target, allocatable, dimension(:, :) :: d_cbR_z
-
-    real(kind(0d0)), pointer, dimension(:, :) :: d_L
-    real(kind(0d0)), pointer, dimension(:, :) :: d_R
+#endif
 !    real(kind(0d0)), pointer, dimension(:, :) :: d_L => null()
 !    real(kind(0d0)), pointer, dimension(:, :) :: d_R => null()
     !> @}
@@ -87,11 +112,16 @@ module m_weno
     !! second identifies the position of its coefficients and the last denotes
     !! the cell-location in the relevant coordinate direction.
     !> @{
+#ifdef CRAY_ACC_WAR
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), beta_coef_x)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), beta_coef_y)
+    @:CRAY_DECLARE_GLOBAL(real(kind(0d0)), dimension(:, :, :), beta_coef_z)
+!$acc declare link(beta_coef_x, beta_coef_y, beta_coef_z)
+#else
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: beta_coef_x
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: beta_coef_y
     real(kind(0d0)), target, allocatable, dimension(:, :, :) :: beta_coef_z
-
-    real(kind(0d0)), pointer, dimension(:, :, :) :: beta_coef
+#endif
 !    real(kind(0d0)), pointer, dimension(:, :, :) :: beta_coef => null()
     !> @}
 
@@ -99,19 +129,25 @@ module m_weno
 
     integer :: v_size !< Number of WENO-reconstructed cell-average variables
 
+    !$acc declare create(v_size)
+
     !> @name Indical bounds in the s1-, s2- and s3-directions
     !> @{
-    type(int_bounds_info) :: is1, is2, is3
+    type(int_bounds_info) :: is1_weno, is2_weno, is3_weno
+    !$acc declare create(is1_weno, is2_weno, is3_weno)
+    !
     !> @}
 
     real(kind(0d0)) :: test
-
-    !$acc declare create( &
-    !$acc                v_rs_ws_x, v_rs_ws_y, v_rs_ws_z, &
-    !$acc                poly_coef_cbL_x,poly_coef_cbL_y,poly_coef_cbL_z, &
-    !$acc                poly_coef_cbR_x,poly_coef_cbR_y,poly_coef_cbR_z,d_cbL_x,       &
-    !$acc                d_cbL_y,d_cbL_z,d_cbR_x,d_cbR_y,d_cbR_z,beta_coef_x,beta_coef_y,beta_coef_z,   &
-    !$acc                v_size, is1, is2, is3, test)
+!$acc declare create(test)
+
+#ifndef CRAY_ACC_WAR
+!$acc declare create( &
+!$acc                v_rs_ws_x, v_rs_ws_y, v_rs_ws_z, &
+!$acc                poly_coef_cbL_x,poly_coef_cbL_y,poly_coef_cbL_z, &
+!$acc                poly_coef_cbR_x,poly_coef_cbR_y,poly_coef_cbR_z,d_cbL_x,       &
+!$acc                d_cbL_y,d_cbL_z,d_cbR_x,d_cbR_y,d_cbR_z,beta_coef_x,beta_coef_y,beta_coef_z)
+#endif
 
 contains
 
@@ -124,95 +160,95 @@ contains
         if (weno_order == 1) return
 
         ! Allocating/Computing WENO Coefficients in x-direction ============
-        is1%beg = -buff_size; is1%end = m - is1%beg
+        is1_weno%beg = -buff_size; is1_weno%end = m - is1_weno%beg
         if (n == 0) then
-            is2%beg = 0
+            is2_weno%beg = 0
         else
-            is2%beg = -buff_size; 
+            is2_weno%beg = -buff_size; 
         end if
 
-        is2%end = n - is2%beg
+        is2_weno%end = n - is2_weno%beg
 
         if (p == 0) then
-            is3%beg = 0
+            is3_weno%beg = 0
         else
-            is3%beg = -buff_size
+            is3_weno%beg = -buff_size
         end if
 
-        is3%end = p - is3%beg
+        is3_weno%end = p - is3_weno%beg
 
-        @:ALLOCATE(poly_coef_cbL_x(is1%beg + weno_polyn:is1%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(poly_coef_cbL_x(is1_weno%beg + weno_polyn:is1_weno%end - weno_polyn, 0:weno_polyn, &
             0:weno_polyn - 1))
-        @:ALLOCATE(poly_coef_cbR_x(is1%beg + weno_polyn:is1%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(poly_coef_cbR_x(is1_weno%beg + weno_polyn:is1_weno%end - weno_polyn, 0:weno_polyn, &
             0:weno_polyn - 1))
 
-        @:ALLOCATE(d_cbL_x(0:weno_polyn, is1%beg + weno_polyn:is1%end - weno_polyn))
-        @:ALLOCATE(d_cbR_x(0:weno_polyn, is1%beg + weno_polyn:is1%end - weno_polyn))
+        @:ALLOCATE_GLOBAL(d_cbL_x(0:weno_polyn, is1_weno%beg + weno_polyn:is1_weno%end - weno_polyn))
+        @:ALLOCATE_GLOBAL(d_cbR_x(0:weno_polyn, is1_weno%beg + weno_polyn:is1_weno%end - weno_polyn))
 
-        @:ALLOCATE(beta_coef_x(is1%beg + weno_polyn:is1%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(beta_coef_x(is1_weno%beg + weno_polyn:is1_weno%end - weno_polyn, 0:weno_polyn, &
             0:2*(weno_polyn - 1)))
 
-        call s_compute_weno_coefficients(1, is1)
+        call s_compute_weno_coefficients(1, is1_weno)
 
-        @:ALLOCATE(v_rs_ws_x(is1%beg:is1%end, &
-            is2%beg:is2%end, is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(v_rs_ws_x(is1_weno%beg:is1_weno%end, &
+            is2_weno%beg:is2_weno%end, is3_weno%beg:is3_weno%end, 1:sys_size))
 
         ! ==================================================================
 
         ! Allocating/Computing WENO Coefficients in y-direction ============
         if (n == 0) return
 
-        is2%beg = -buff_size; is2%end = n - is2%beg
-        is1%beg = -buff_size; is1%end = m - is1%beg
+        is2_weno%beg = -buff_size; is2_weno%end = n - is2_weno%beg
+        is1_weno%beg = -buff_size; is1_weno%end = m - is1_weno%beg
 
         if (p == 0) then
-            is3%beg = 0
+            is3_weno%beg = 0
         else
-            is3%beg = -buff_size
+            is3_weno%beg = -buff_size
         end if
 
-        is3%end = p - is3%beg
+        is3_weno%end = p - is3_weno%beg
 
-        @:ALLOCATE(poly_coef_cbL_y(is2%beg + weno_polyn:is2%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(poly_coef_cbL_y(is2_weno%beg + weno_polyn:is2_weno%end - weno_polyn, 0:weno_polyn, &
             0:weno_polyn - 1))
-        @:ALLOCATE(poly_coef_cbR_y(is2%beg + weno_polyn:is2%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(poly_coef_cbR_y(is2_weno%beg + weno_polyn:is2_weno%end - weno_polyn, 0:weno_polyn, &
             0:weno_polyn - 1))
 
-        @:ALLOCATE(d_cbL_y(0:weno_polyn, is2%beg + weno_polyn:is2%end - weno_polyn))
-        @:ALLOCATE(d_cbR_y(0:weno_polyn, is2%beg + weno_polyn:is2%end - weno_polyn))
+        @:ALLOCATE_GLOBAL(d_cbL_y(0:weno_polyn, is2_weno%beg + weno_polyn:is2_weno%end - weno_polyn))
+        @:ALLOCATE_GLOBAL(d_cbR_y(0:weno_polyn, is2_weno%beg + weno_polyn:is2_weno%end - weno_polyn))
 
-        @:ALLOCATE(beta_coef_y(is2%beg + weno_polyn:is2%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(beta_coef_y(is2_weno%beg + weno_polyn:is2_weno%end - weno_polyn, 0:weno_polyn, &
             0:2*(weno_polyn - 1)))
 
-        call s_compute_weno_coefficients(2, is2)
+        call s_compute_weno_coefficients(2, is2_weno)
 
-        @:ALLOCATE(v_rs_ws_y(is2%beg:is2%end, &
-            is1%beg:is1%end, is3%beg:is3%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(v_rs_ws_y(is2_weno%beg:is2_weno%end, &
+            is1_weno%beg:is1_weno%end, is3_weno%beg:is3_weno%end, 1:sys_size))
 
         ! ==================================================================
 
         ! Allocating/Computing WENO Coefficients in z-direction ============
         if (p == 0) return
 
-        is2%beg = -buff_size; is2%end = n - is2%beg
-        is1%beg = -buff_size; is1%end = m - is1%beg
-        is3%beg = -buff_size; is3%end = p - is3%beg
+        is2_weno%beg = -buff_size; is2_weno%end = n - is2_weno%beg
+        is1_weno%beg = -buff_size; is1_weno%end = m - is1_weno%beg
+        is3_weno%beg = -buff_size; is3_weno%end = p - is3_weno%beg
 
-        @:ALLOCATE(poly_coef_cbL_z(is3%beg + weno_polyn:is3%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(poly_coef_cbL_z(is3_weno%beg + weno_polyn:is3_weno%end - weno_polyn, 0:weno_polyn, &
             0:weno_polyn - 1))
-        @:ALLOCATE(poly_coef_cbR_z(is3%beg + weno_polyn:is3%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(poly_coef_cbR_z(is3_weno%beg + weno_polyn:is3_weno%end - weno_polyn, 0:weno_polyn, &
             0:weno_polyn - 1))
 
-        @:ALLOCATE(d_cbL_z(0:weno_polyn, is3%beg + weno_polyn:is3%end - weno_polyn))
-        @:ALLOCATE(d_cbR_z(0:weno_polyn, is3%beg + weno_polyn:is3%end - weno_polyn))
+        @:ALLOCATE_GLOBAL(d_cbL_z(0:weno_polyn, is3_weno%beg + weno_polyn:is3_weno%end - weno_polyn))
+        @:ALLOCATE_GLOBAL(d_cbR_z(0:weno_polyn, is3_weno%beg + weno_polyn:is3_weno%end - weno_polyn))
 
-        @:ALLOCATE(beta_coef_z(is3%beg + weno_polyn:is3%end - weno_polyn, 0:weno_polyn, &
+        @:ALLOCATE_GLOBAL(beta_coef_z(is3_weno%beg + weno_polyn:is3_weno%end - weno_polyn, 0:weno_polyn, &
             0:2*(weno_polyn - 1)))
 
-        call s_compute_weno_coefficients(3, is3)
+        call s_compute_weno_coefficients(3, is3_weno)
 
-        @:ALLOCATE(v_rs_ws_z(is3%beg:is3%end, &
-            is2%beg:is2%end, is1%beg:is1%end, 1:sys_size))
+        @:ALLOCATE_GLOBAL(v_rs_ws_z(is3_weno%beg:is3_weno%end, &
+            is2_weno%beg:is2_weno%end, is1_weno%beg:is1_weno%end, 1:sys_size))
 
         ! ==================================================================
 
@@ -464,13 +500,13 @@ contains
 
     subroutine s_weno(v_vf, vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, & ! -------------------
                       norm_dir, weno_dir, &
-                      is1_d, is2_d, is3_d)
+                      is1_weno_d, is2_weno_d, is3_weno_d)
 
         type(scalar_field), dimension(1:), intent(IN) :: v_vf
         real(kind(0d0)), dimension(startx:, starty:, startz:, 1:), intent(INOUT) :: vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z
         integer, intent(IN) :: norm_dir
         integer, intent(IN) :: weno_dir
-        type(int_bounds_info), intent(IN) :: is1_d, is2_d, is3_d
+        type(int_bounds_info), intent(IN) :: is1_weno_d, is2_weno_d, is3_weno_d
 
         real(kind(0d0)), dimension(-weno_polyn:weno_polyn - 1) :: dvd
         real(kind(0d0)), dimension(0:weno_polyn) :: poly
@@ -479,15 +515,17 @@ contains
         real(kind(0d0)), dimension(0:weno_polyn) :: beta
         real(kind(0d0)), pointer :: beta_p(:)
 
+        real(kind(0d0)) :: v_rs1, v_rs2, v_rs3, v_rs4, v_rs5
+
         integer :: i, j, k, l, r, s, w
 
         integer :: t1, t2, c_rate, c_max
 
-        is1 = is1_d
-        is2 = is2_d
-        is3 = is3_d
+        is1_weno = is1_weno_d
+        is2_weno = is2_weno_d
+        is3_weno = is3_weno_d
 
-        !$acc update device(is1, is2, is3)
+        !$acc update device(is1_weno, is2_weno, is3_weno)
 
         if (weno_order /= 1) then
             call s_initialize_weno(v_vf, &
@@ -498,9 +536,9 @@ contains
             if (weno_dir == 1) then
                 !$acc parallel loop collapse(4) default(present)
                 do i = 1, ubound(v_vf, 1)
-                    do l = is3%beg, is3%end
-                        do k = is2%beg, is2%end
-                            do j = is1%beg, is1%end
+                    do l = is3_weno%beg, is3_weno%end
+                        do k = is2_weno%beg, is2_weno%end
+                            do j = is1_weno%beg, is1_weno%end
                                 vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l)
                                 vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l)
                             end do
@@ -511,9 +549,9 @@ contains
             else if (weno_dir == 2) then
                 !$acc parallel loop collapse(4) default(present)
                 do i = 1, ubound(v_vf, 1)
-                    do l = is3%beg, is3%end
-                        do k = is2%beg, is2%end
-                            do j = is1%beg, is1%end
+                    do l = is3_weno%beg, is3_weno%end
+                        do k = is2_weno%beg, is2_weno%end
+                            do j = is1_weno%beg, is1_weno%end
                                 vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l)
                                 vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l)
                             end do
@@ -524,9 +562,9 @@ contains
             else if (weno_dir == 3) then
                 !$acc parallel loop collapse(4) default(present)
                 do i = 1, ubound(v_vf, 1)
-                    do l = is3%beg, is3%end
-                        do k = is2%beg, is2%end
-                            do j = is1%beg, is1%end
+                    do l = is3_weno%beg, is3_weno%end
+                        do k = is2_weno%beg, is2_weno%end
+                            do j = is1_weno%beg, is1_weno%end
                                 vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j)
                                 vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j)
                             end do
@@ -539,9 +577,9 @@ contains
             #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
                 if (weno_dir == ${WENO_DIR}$) then
                     !$acc parallel loop collapse(4) gang vector default(present) private(beta,dvd,poly,omega,alpha)
-                    do l = is3%beg, is3%end
-                        do k = is2%beg, is2%end
-                            do j = is1%beg, is1%end
+                    do l = is3_weno%beg, is3_weno%end
+                        do k = is2_weno%beg, is2_weno%end
+                            do j = is1_weno%beg, is1_weno%end
                                 do i = 1, v_size
                                     ! reconstruct from left side
 
@@ -607,13 +645,12 @@ contains
         else
             #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')]
                 if (weno_dir == ${WENO_DIR}$) then
-                    !$acc parallel loop gang vector collapse (3)  default(present) private(dvd, poly, beta, alpha, omega)
-                    do l = is3%beg, is3%end
-                        do k = is2%beg, is2%end
-                            do j = is1%beg, is1%end
+                    !$acc parallel loop vector gang collapse(3) default(present) private(dvd, poly, beta, alpha, omega)
+                    do l = is3_weno%beg, is3_weno%end
+                        do k = is2_weno%beg, is2_weno%end
+                            do j = is1_weno%beg, is1_weno%end
                                 !$acc loop seq
                                 do i = 1, v_size
-
                                     dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) &
                                              - v_rs_ws_${XYZ}$ (j + 1, k, l, i)
                                     dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) &
@@ -696,7 +733,6 @@ contains
                         call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, &
                                                      vR_rs_vf_${XYZ}$)
                     end if
-
                 end if
             #:endfor
         end if
@@ -712,9 +748,9 @@ contains
         !! @param vR_vf Right WENO reconstructed cell-boundary values
         !! @param norm_dir Characteristic decommposition coordinate direction
         !! @param weno_dir Coordinate direction of the WENO reconstruction
-        !! @param is1 Index bounds in first coordinate direction
-        !! @param is2 Index bounds in second coordinate direction
-        !! @param is3 Index bounds in third coordinate direction
+        !! @param is1_weno Index bounds in first coordinate direction
+        !! @param is2_weno Index bounds in second coordinate direction
+        !! @param is3_weno Index bounds in third coordinate direction
     subroutine s_initialize_weno(v_vf, & ! ---------
                                  norm_dir, weno_dir)
 
@@ -731,15 +767,14 @@ contains
         ! as to reshape the inputted data in the coordinate direction of
         ! the WENO reconstruction
         v_size = ubound(v_vf, 1)
-
         !$acc update device(v_size)
 
         if (weno_dir == 1) then
             !$acc parallel loop collapse(4) gang vector default(present)
             do j = 1, v_size
-                do q = is3%beg, is3%end
-                    do l = is2%beg, is2%end
-                        do k = is1%beg - weno_polyn, is1%end + weno_polyn
+                do q = is3_weno%beg, is3_weno%end
+                    do l = is2_weno%beg, is2_weno%end
+                        do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
                             v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q)
                         end do
                     end do
@@ -776,9 +811,9 @@ contains
 #endif
                 !$acc parallel loop collapse(4) gang vector default(present)
                 do j = 1, v_size
-                    do q = is3%beg, is3%end
-                        do l = is2%beg, is2%end
-                            do k = is1%beg - weno_polyn, is1%end + weno_polyn
+                    do q = is3_weno%beg, is3_weno%end
+                        do l = is2_weno%beg, is2_weno%end
+                            do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
                                 v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q)
                             end do
                         end do
@@ -808,9 +843,9 @@ contains
 #endif
                 !$acc parallel loop collapse(4) gang vector default(present)
                 do j = 1, v_size
-                    do q = is3%beg, is3%end
-                        do l = is2%beg, is2%end
-                            do k = is1%beg - weno_polyn, is1%end + weno_polyn
+                    do q = is3_weno%beg, is3_weno%end
+                        do l = is2_weno%beg, is2_weno%end
+                            do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn
                                 v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k)
                             end do
                         end do
@@ -872,9 +907,9 @@ contains
         real(kind(0d0)), parameter :: beta_mp = 4d0/3d0
 
         !$acc parallel loop gang vector collapse (4)  default(present) private(d)
-        do l = is3%beg, is3%end
-            do k = is2%beg, is2%end
-                do j = is1%beg, is1%end
+        do l = is3_weno%beg, is3_weno%end
+            do k = is2_weno%beg, is2_weno%end
+                do j = is1_weno%beg, is1_weno%end
                     do i = 1, v_size
                         d(-1) = v_rs_ws(j, k, l, i) &
                                 + v_rs_ws(j - 2, k, l, i) &
@@ -1010,34 +1045,34 @@ contains
         ! Deallocating the WENO-stencil of the WENO-reconstructed variables
 
         !deallocate(vL_rs_vf_x, vR_rs_vf_x)
-        @:DEALLOCATE(v_rs_ws_x)
+        @:DEALLOCATE_GLOBAL(v_rs_ws_x)
 
         ! Deallocating WENO coefficients in x-direction ====================
-        @:DEALLOCATE(poly_coef_cbL_x, poly_coef_cbR_x)
-        @:DEALLOCATE(d_cbL_x, d_cbR_x)
-        @:DEALLOCATE(beta_coef_x)
+        @:DEALLOCATE_GLOBAL(poly_coef_cbL_x, poly_coef_cbR_x)
+        @:DEALLOCATE_GLOBAL(d_cbL_x, d_cbR_x)
+        @:DEALLOCATE_GLOBAL(beta_coef_x)
         ! ==================================================================
 
         ! Deallocating WENO coefficients in y-direction ====================
         if (n == 0) return
 
         !deallocate(vL_rs_vf_y, vR_rs_vf_y)
-        @:DEALLOCATE(v_rs_ws_y)
+        @:DEALLOCATE_GLOBAL(v_rs_ws_y)
 
-        @:DEALLOCATE(poly_coef_cbL_y, poly_coef_cbR_y)
-        @:DEALLOCATE(d_cbL_y, d_cbR_y)
-        @:DEALLOCATE(beta_coef_y)
+        @:DEALLOCATE_GLOBAL(poly_coef_cbL_y, poly_coef_cbR_y)
+        @:DEALLOCATE_GLOBAL(d_cbL_y, d_cbR_y)
+        @:DEALLOCATE_GLOBAL(beta_coef_y)
         ! ==================================================================
 
         ! Deallocating WENO coefficients in z-direction ====================
         if (p == 0) return
 
         !deallocate(vL_rs_vf_z, vR_rs_vf_z)
-        @:DEALLOCATE(v_rs_ws_z)
+        @:DEALLOCATE_GLOBAL(v_rs_ws_z)
 
-        @:DEALLOCATE(poly_coef_cbL_z, poly_coef_cbR_z)
-        @:DEALLOCATE(d_cbL_z, d_cbR_z)
-        @:DEALLOCATE(beta_coef_z)
+        @:DEALLOCATE_GLOBAL(poly_coef_cbL_z, poly_coef_cbR_z)
+        @:DEALLOCATE_GLOBAL(d_cbL_z, d_cbR_z)
+        @:DEALLOCATE_GLOBAL(beta_coef_z)
         ! ==================================================================
 
     end subroutine s_finalize_weno_module ! --------------------------------
diff --git a/toolchain/bootstrap/format.sh b/toolchain/bootstrap/format.sh
index 0a2ec24a1..a2ca71248 100644
--- a/toolchain/bootstrap/format.sh
+++ b/toolchain/bootstrap/format.sh
@@ -17,14 +17,8 @@ done
 
 log "Formatting MFC:"
 
-if ! find ${@:-src} -type f | grep -Ev 'autogen' | grep -E '\.(f90|fpp)$' | \
-         parallel --jobs ${JOBS:-1} -- \
-             echo      "\> {}" \&\& \
-             python3   toolchain/indenter.py "{}" \&\& \
-             fprettify "{}" --silent --indent 4 --c-relations --enable-replacements \
-                       --enable-decl --whitespace-comma 1 --whitespace-multdiv 0    \
-                       --whitespace-plusminus 1 --case 1 1 1 1 --strict-indent      \
-                       --line-length 1000\;; then
+if ! find ${@:-src} -type f | grep -Ev 'autogen' | grep -E '\.(f90|fpp)$' \
+        | xargs -L 1 -P ${JOBS:-1} $SHELL toolchain/bootstrap/format_file.sh; then
     error "Formatting MFC failed."
     exit 1
 fi
diff --git a/toolchain/bootstrap/format_file.sh b/toolchain/bootstrap/format_file.sh
new file mode 100644
index 000000000..5f086e17b
--- /dev/null
+++ b/toolchain/bootstrap/format_file.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. toolchain/util.sh
+
+echo "> $1"
+
+niter=0
+old_file=""
+
+while :
+do
+    niter=$((niter+1))
+    new_file=`cat "$1"`
+    if [[ "$new_file" == "$old_file" ]]; then
+        break
+    fi
+    old_file="$new_file"
+
+    if [[ "$niter" -gt 4 ]]; then
+        error "Failed to format $1: No steady-state (after $niter iterations)."
+        exit 1
+    fi
+
+    if ! python3 toolchain/indenter.py "$1"; then
+        error "Failed to format $1: indenter.py."
+        exit 1
+    fi
+
+    if ! fprettify "$1" --silent --indent 4 --c-relations --enable-replacements \
+            --enable-decl --whitespace-comma 1 --whitespace-multdiv 0           \
+            --whitespace-plusminus 1 --case 1 1 1 1 --strict-indent             \
+            --line-length 1000; then
+        error "Failed to format $1: fprettify."
+        exit 1
+    fi
+done
+
diff --git a/toolchain/bootstrap/modules.sh b/toolchain/bootstrap/modules.sh
index 022529548..e864d1faf 100644
--- a/toolchain/bootstrap/modules.sh
+++ b/toolchain/bootstrap/modules.sh
@@ -19,11 +19,11 @@ done
 # Get computer (if not supplied in command-line)
 if [ -v $u_c ]; then
     log   "Select a system:"
-    log   "$G""ORNL$W:    Ascent     (a) | Crusher (c) | Summit (s) | Wombat (w)"
+    log   "$G""ORNL$W:    Ascent     (a) | Frontier (f) | Summit (s) | Wombat (w)"
     log   "$C""ACCESS$W:  Bridges2   (b) | Expanse (e) | Delta  (d)"
-    log   "$Y""GaTech$W:  Phoenix    (p)"
-    log   "$R""CALTECH$W: Richardson (r)"
-    log_n "($G""a$W/$G""c$W/$G""s$W/$G""w$W/$C""b$W/$C""e$CR/$C""d$CR/$Y""p$CR/$R""r$CR): "
+    log   "$Y""Gatech$W:  Phoenix    (p)"
+    log   "$R""Caltech$W: Richardson (r)"
+    log_n "($G""a$W/$G""f$W/$G""s$W/$G""w$W/$C""b$W/$C""e$CR/$C""d$CR/$Y""p$CR/$R""r$CR): "
     read u_c
     log
 fi
diff --git a/toolchain/cce_simulation_workgroup_256.sh b/toolchain/cce_simulation_workgroup_256.sh
new file mode 100755
index 000000000..9137871d5
--- /dev/null
+++ b/toolchain/cce_simulation_workgroup_256.sh
@@ -0,0 +1,48 @@
+#! /bin/bash
+
+# On Frontier, after building simulation run the script
+# ./cce_simulation_workgroup_256.sh to edit the workgroup
+# size in the generated code and recompile, then set the recommended
+# environment variable to use the new code.
+
+# Using the script requires `-hkeepfiles` at link!
+
+# This script needs to be run after every time simulation
+# is built to generate a correct offload binary.
+# This include case-optimization builds!
+
+CCE_LLVM_PATH=${CRAY_CCE_CLANGSHARE}/../
+
+## This was errantly 1024 by default in the generated binary
+WGSIZE=256
+
+## Turning bitcode into human-readable IR
+echo "Disassembling"
+${CCE_LLVM_PATH}/bin/llvm-dis "$1/simulation-cce-openmp-pre-llc.bc"
+
+## Find/replace the workgroup size to what it _should be_ in the (human readable) IR
+echo "Globally setting amdgpu-flat-work-group-size size to 1,$WGSIZE"
+sed "s/\"amdgpu-flat-work-group-size\"\=\"1,1024\"/\"amdgpu-flat-work-group-size\"\=\"1,${WGSIZE}\"/g" "$1/simulation-cce-openmp-pre-llc.ll" > "$1/simulation-cce-openmp-pre-llc-wg${WGSIZE}.ll"
+
+## This is building to an AMD GPU code object, using internal copy/pasted Cray Fortran   flags.
+## The flags may need to be adjusted for future compiler versions
+echo "Invoking LLC to compile"
+${CCE_LLVM_PATH}/bin/llc -mtriple=amdgcn-amd-amdhsa -disable-promote-alloca-to-lds -mcpu=gfx90a -amdgpu-dump-hsa-metadata "$1/simulation-cce-openmp-pre-llc-wg${WGSIZE}.ll" -filetype=obj -o "$1/simulation-cce-openmp__llc_wg${WGSIZE}.amdgpu"
+
+## Wrapping AMD GPU code object in an object that CCE OpenACC runtime understands
+echo "Linking to a CCE Offload module"
+${CCE_LLVM_PATH}/bin/lld  -flavor gnu --no-undefined -shared -o "$1/simulation-wg${WGSIZE}.lld.exe" "$1/simulation-cce-openmp__llc_wg${WGSIZE}.amdgpu"
+
+## Backend/hidden env var that tells the runtime where to find the offload object
+echo "Now "
+echo "export CRAY_ACC_MODULE=${PWD}/build/simulation/simulation-wg${WGSIZE}.lld.exe"
+echo "to use the new GPU offload code."
+echo "To use the original build"
+echo "unset CRAY_ACC_MODULE"
+
+# # This goes inside an sbatch script for multi-node submissions (> 1000 nodes important)
+# # Requet an nvme via
+# #SBATCH -C nvme
+# # Put this in the sbatch script before execution via srun
+# sbcast -pf ${PWD}/build/simulation/simulation-wg${WGSIZE}.lld.exe /mnt/bb/$USER/simulation-wg${WGSIZE}.lld.exe
+# export CRAY_ACC_MODULE=/mnt/bb/$USER/simulation-wg${WGSIZE}.lld.exe"
diff --git a/toolchain/dependencies/CMakeLists.txt b/toolchain/dependencies/CMakeLists.txt
index 200f69979..69454adb3 100644
--- a/toolchain/dependencies/CMakeLists.txt
+++ b/toolchain/dependencies/CMakeLists.txt
@@ -9,15 +9,18 @@ if (MFC_SILO OR NOT CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
     enable_language(C CXX)
 endif()
 
+set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
+
 # Imports
 include(GNUInstallDirs)
 include(ExternalProject)
 
 
 # Options
-option(MFC_FFTW "Build the FFTW3 dependency" OFF)
-option(MFC_HDF5 "Build the HDF5  dependency" OFF)
-option(MFC_SILO "Build the SILO  dependency" OFF)
+option(MFC_FFTW    "Build the FFTW3 dependency" OFF)
+option(MFC_HDF5    "Build the HDF5  dependency" OFF)
+option(MFC_SILO    "Build the SILO  dependency" OFF)
+option(MFC_HIPFORT "Build the HIPFORT dependency" OFF)
 
 
 # FFTW3
@@ -38,44 +41,62 @@ endif()
 
 # HDF5
 if (MFC_HDF5)
-    if (NOT CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
-        ExternalProject_Add(hdf5
-            GIT_REPOSITORY "https://github.com/HDFGroup/hdf5"
-            GIT_TAG        hdf5-1_12_2
-            GIT_SHALLOW    ON
-            GIT_PROGRESS   ON
-            CMAKE_ARGS     "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
-                           -DBUILD_SHARED_LIBS=OFF
-                           -DFORTRAN_LIBRARIES=ON
-                           -DBUILD_TESTING=OFF
-                           -DHDF5_BUILD_UTILS=OFF
-                           -DHDF5_BUILD_TOOLS=ON
-                           -DHDF5_BUILD_EXAMPLES=OFF
-        )
-    else()
-        message(WARNING "The Fortran compiler vendor is Cray so HDF5 will not be built. We will use cray-hdf5 instead.")
-        add_custom_target(hdf5)
-    endif()
+    ExternalProject_Add(hdf5
+        GIT_REPOSITORY "https://github.com/HDFGroup/hdf5"
+        GIT_TAG        hdf5-1_12_2
+        GIT_SHALLOW    ON
+        GIT_PROGRESS   ON
+        CMAKE_ARGS     "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
+                        -DBUILD_SHARED_LIBS=OFF
+                        -DFORTRAN_LIBRARIES=ON
+                        -DBUILD_TESTING=OFF
+                        -DHDF5_BUILD_UTILS=OFF
+                        -DHDF5_BUILD_TOOLS=ON
+                        -DHDF5_BUILD_EXAMPLES=OFF
+    )
 endif()
 
 
 # SILO
 if (MFC_SILO)
+    # If we are using the CCE, HDF5 is not built, and we wish to find
+    # the system's cray-hdf5. Otherwise, we point SILO to find HDF5 in
+    # our common install directory using SILO_HDF5_DIR.
     ExternalProject_Add(silo
         GIT_REPOSITORY "https://github.com/henryleberre/Silo"
-        GIT_TAG        964d993039b467e3d472d3a70afd03380fe2c320
+        GIT_TAG        af955eb5dd009caf00c41ca51611b37c052b042c
         GIT_SHALLOW    ON
         GIT_PROGRESS   ON
-        CMAKE_ARGS     -DSILO_ENABLE_SHARED=OFF
-                       -DSILO_ENABLE_SILOCK=OFF
-                       -DSILO_ENABLE_BROWSER=OFF
-                       -DHDF5_USE_STATIC_LIBRARIES=ON
+        CMAKE_ARGS      -DSILO_ENABLE_SHARED=OFF
+                        -DSILO_ENABLE_SILOCK=OFF
+                        -DSILO_ENABLE_BROWSER=OFF
+                        -DHDF5_USE_STATIC_LIBRARIES=ON
                        "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
-                       $<$<BOOL:${MFC_BUILD_HDF5}>:"-DSILO_HDF5_DIR=\"${CMAKE_INSTALL_PREFIX}\"">
+                       "-DSILO_HDF5_DIR=${CMAKE_INSTALL_PREFIX}"
+                       "$<$<STREQUAL:${CMAKE_Fortran_COMPILER_ID},Cray>:-DCMAKE_MODULE_PATH=${CMAKE_SOURCE_DIR}/../cmake>"
     )
 
-    if (MFC_HDF5 AND NOT CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
+    if (MFC_HDF5)
         add_dependencies(silo hdf5)
     endif()
 endif()
 
+# HIPFORT
+if (MFC_HIPFORT)
+    if (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
+        ExternalProject_Add(hipfort
+            GIT_REPOSITORY "https://github.com/ROCmSoftwarePlatform/hipfort"
+            GIT_TAG        develop
+            GIT_SHALLOW    ON
+            GIT_PROGRESS   ON
+            CMAKE_ARGS     "-DHIPFORT_COMPILER=${CMAKE_Fortran_COMPILER}"
+                           "-DHIPFORT_AR=${CMAKE_AR}"
+                           "-DHIPFORT_RANLIB=${CMAKE_RANLIB}"
+                           "-DHIPFORT_COMPILER_FLAGS=-f free -e F -O0 -h ipa0"
+                           "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
+        )
+    else()
+        message(WARNING "The Fortran compiler vendor is not Cray so HIPFORT will not be built.")
+        add_custom_target(hipfort)
+    endif()
+endif()
diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
index 3eb109f0a..70c1af39f 100644
--- a/toolchain/mfc/args.py
+++ b/toolchain/mfc/args.py
@@ -63,7 +63,7 @@ def add_common_arguments(p, mask = None):
 
         if "n" not in mask:
             for target in DEPENDENCY_TARGETS:
-                p.add_argument(f"--no-{target.name}", action="store_true", help=f"Do not build the {target.name} dependency. Use the system's instead.")
+                p.add_argument(f"--sys-{target.name}", action="store_true", help=f"Do not build the {target.name} dependency. Use the system's instead.")
 
         if "g" not in mask:
             p.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="(Optional GPU override) List of GPU #s to use (environment default if unspecified).")
@@ -88,8 +88,8 @@ def add_common_arguments(p, mask = None):
     test.add_argument("-a", "--test-all",     action="store_true", default=False, help="Run the Post Process Tests too.")
     test.add_argument("-%", "--percent",      type=int, default=100, help="Percentage of tests to run.")
     test.add_argument("-m", "--max-attempts", type=int, default=3, help="Maximum number of attempts to run a test.")
-
-    test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
+    test.add_argument(      "--no-build",     action="store_true",                    default=False,      help="(Testing) Do not rebuild MFC.")
+    test.add_argument("--case-optimization",  action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
 
     test_meg = test.add_mutually_exclusive_group()
     test_meg.add_argument("--generate",          action="store_true", default=False, help="(Test Generation) Generate golden files.")
@@ -115,6 +115,8 @@ def add_common_arguments(p, mask = None):
     run.add_argument("-b", "--binary",             choices=["mpirun", "jsrun", "srun", "mpiexec"],             type=str, default=None,       help="(Interactive) Override MPI execution binary")
     run.add_argument("--ncu",                      nargs=argparse.REMAINDER,     type=str,                     help="Profile with NVIDIA Nsight Compute.")
     run.add_argument("--nsys",                     nargs=argparse.REMAINDER,     type=str,                     help="Profile with NVIDIA Nsight Systems.")
+    run.add_argument("--omni",                 nargs=argparse.REMAINDER,        type=str,                     help="Profile with ROCM omniperf.")
+    run.add_argument("--roc",                  nargs=argparse.REMAINDER,        type=str,                     help="Profile with ROCM rocprof.")
     run.add_argument(      "--dry-run",            action="store_true",                    default=False,      help="(Batch) Run without submitting batch file.")
     run.add_argument("--case-optimization",        action="store_true",                    default=False,      help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
     run.add_argument(      "--no-build",           action="store_true",                    default=False,      help="(Testing) Do not rebuild MFC.")
diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py
index 34d3cf9f1..067204f29 100644
--- a/toolchain/mfc/build.py
+++ b/toolchain/mfc/build.py
@@ -95,7 +95,7 @@ def is_buildable(self) -> bool:
         if ARG("no_build"):
             return False
 
-        if self.isDependency and ARG(f"no_{self.name}"):
+        if self.isDependency and ARG(f"sys_{self.name}", False):
             return False
 
         return True
@@ -107,6 +107,8 @@ def configure(self):
 
         install_prefixes = ';'.join([install_dirpath, get_dependency_install_dirpath()])
 
+        mod_dirs = ';'.join(['build/install/dependencies/include/hipfort/amdgcn'])
+
         flags: list = self.flags.copy() + [
             # Disable CMake warnings intended for developers (us).
             # See: https://cmake.org/cmake/help/latest/manual/cmake.1.html.
@@ -133,8 +135,14 @@ def configure(self):
             # Location prefix to install bin/, lib/, include/, etc.
             # See: https://cmake.org/cmake/help/latest/command/install.html.
             f"-DCMAKE_INSTALL_PREFIX={install_dirpath}",
+            # Fortran .mod include directories. Currently used for the HIPFORT
+            # dependency that has this missing from its config files.
+            f"-DCMAKE_Fortran_MODULE_DIRECTORY={mod_dirs}",
         ]
 
+        if ARG("verbose"):
+            flags.append('--debug-find')
+
         if not self.isDependency:
             flags.append(f"-DMFC_MPI={    'ON' if ARG('mpi') else 'OFF'}")
             flags.append(f"-DMFC_OpenACC={'ON' if ARG('gpu') else 'OFF'}")
@@ -189,17 +197,17 @@ def clean(self):
         if system(command).returncode != 0:
             raise MFCException(f"Failed to clean the [bold magenta]{self.name}[/bold magenta] target.")
 
-
 FFTW          = MFCTarget('fftw',          ['-DMFC_FFTW=ON'],          True,  False, False, MFCTarget.Dependencies([], [], []), -1)
 HDF5          = MFCTarget('hdf5',          ['-DMFC_HDF5=ON'],          True,  False, False, MFCTarget.Dependencies([], [], []), -1)
 SILO          = MFCTarget('silo',          ['-DMFC_SILO=ON'],          True,  False, False, MFCTarget.Dependencies([HDF5], [], []), -1)
+HIPFORT       = MFCTarget('hipfort',       ['-DMFC_HIPFORT=ON'],       True,  False, False, MFCTarget.Dependencies([], [], []), -1)
 PRE_PROCESS   = MFCTarget('pre_process',   ['-DMFC_PRE_PROCESS=ON'],   False, True,  False, MFCTarget.Dependencies([], [], []), 0)
-SIMULATION    = MFCTarget('simulation',    ['-DMFC_SIMULATION=ON'],    False, True,  False, MFCTarget.Dependencies([], [FFTW], []), 1)
+SIMULATION    = MFCTarget('simulation',    ['-DMFC_SIMULATION=ON'],    False, True,  False, MFCTarget.Dependencies([], [FFTW], [HIPFORT]), 1)
 POST_PROCESS  = MFCTarget('post_process',  ['-DMFC_POST_PROCESS=ON'],  False, True,  False, MFCTarget.Dependencies([FFTW, SILO], [], []), 2)
-SYSCHECK      = MFCTarget('syscheck',      ['-DMFC_SYSCHECK=ON'],      False, False, True,  MFCTarget.Dependencies([], [], []), -1)
+SYSCHECK      = MFCTarget('syscheck',      ['-DMFC_SYSCHECK=ON'],      False, False, True,  MFCTarget.Dependencies([], [], [HIPFORT]), -1)
 DOCUMENTATION = MFCTarget('documentation', ['-DMFC_DOCUMENTATION=ON'], False, False, False, MFCTarget.Dependencies([], [], []), -1)
 
-TARGETS = { FFTW, HDF5, SILO, PRE_PROCESS, SIMULATION, POST_PROCESS, SYSCHECK, DOCUMENTATION }
+TARGETS = { FFTW, HDF5, SILO, HIPFORT, PRE_PROCESS, SIMULATION, POST_PROCESS, SYSCHECK, DOCUMENTATION }
 
 DEFAULT_TARGETS    = { target for target in TARGETS if target.isDefault }
 REQUIRED_TARGETS   = { target for target in TARGETS if target.isRequired }
@@ -242,7 +250,14 @@ def __build_target(target: typing.Union[MFCTarget, str], history: typing.Set[str
 
     history.add(target.name)
 
-    build(target.requires.compute(), history)
+    for dep in target.requires.compute():
+        # If we have already built and installed this target,
+        # do not do so again. This can be inferred by whether
+        # the target requesting this dependency is already configured.
+        if dep.isDependency and target.is_configured():
+            continue
+
+        build([dep], history)
 
     if not target.is_configured():
         target.configure()
diff --git a/toolchain/mfc/common.py b/toolchain/mfc/common.py
index 3d6af7f27..c0c518594 100644
--- a/toolchain/mfc/common.py
+++ b/toolchain/mfc/common.py
@@ -28,7 +28,6 @@
 class MFCException(Exception):
     pass
 
-
 def system(command: typing.List[str], print_cmd = None, **kwargs) -> subprocess.CompletedProcess:
     cmd = [ str(x) for x in command if not isspace(str(x)) ]
 
diff --git a/toolchain/mfc/run/input.py b/toolchain/mfc/run/input.py
index 3a00e4752..751beedcd 100644
--- a/toolchain/mfc/run/input.py
+++ b/toolchain/mfc/run/input.py
@@ -158,7 +158,7 @@ def __get_sim_fpp(self, print: bool) -> str:
             if print:
                 cons.print("Case optimization is enabled.")
 
-            nterms = -100
+            nterms = 1
 
             bubble_model = int(self.case_dict.get("bubble_model", "-100"))
 
diff --git a/toolchain/mfc/state.py b/toolchain/mfc/state.py
index aacf4fc0e..1b06ed4cb 100644
--- a/toolchain/mfc/state.py
+++ b/toolchain/mfc/state.py
@@ -49,10 +49,15 @@ def __str__(self) -> str:
 gARG: dict      = {}
 
 
-def ARG(arg: str) -> typing.Any:
+def ARG(arg: str, dflt = None) -> typing.Any:
     # pylint: disable=global-variable-not-assigned
     global gARG
-    return gARG[arg]
+    if arg in gARG:
+        return gARG[arg]
+    if dflt is not None:
+        return dflt
+
+    raise KeyError(f"{arg} is not an argument.")
 
 def ARGS() -> dict:
     # pylint: disable=global-variable-not-assigned
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index adca675da..1c5e6de74 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -189,9 +189,9 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
 
             h5dump = f"{HDF5.get_install_dirpath()}/bin/h5dump"
 
-            if ARG("no_hdf5"):
+            if ARG("sys_hdf5"):
                 if not does_command_exist("h5dump"):
-                    raise MFCException("--no-hdf5 was specified and h5dump couldn't be found.")
+                    raise MFCException("--sys-hdf5 was specified and h5dump couldn't be found.")
 
                 h5dump = shutil.which("h5dump")
 
diff --git a/toolchain/modules b/toolchain/modules
index ea62a0841..51b710162 100644
--- a/toolchain/modules
+++ b/toolchain/modules
@@ -47,11 +47,13 @@ p-cpu gcc/10.3.0-o57x6h openmpi/4.1.4
 p-gpu cuda/11.7.0-7sdye3 nvhpc/22.11
 p-gpu MFC_CUDA_CC=70,80 CC=nvc CXX=nvc++ FC=nvfortran
 
-c     OLCF Crusher
-c-all cmake/3.23.2 cray-fftw/3.3.10.2 hdf5/1.12.1 cray-python/3.9.13.1
-c-all ninja/1.10.2 cray-mpich/8.1.23
-c-cpu
-c-gpu rocm/5.1.0 craype-accel-amd-gfx90a
+f     OLCF Frontier
+f-gpu rocm/5.5.1 craype-accel-amd-gfx90a
+f-all cpe/23.09
+f-all cray-fftw cray-hdf5 cray-mpich cce/16.0.1 
+f-all rocm/5.5.1 cray-python  omniperf
+f-cpu
+
 
 d     NCSA Delta
 d-all python/3.11.6
diff --git a/toolchain/requirements.txt b/toolchain/requirements.txt
index fd2377a6d..9c6a914e4 100644
--- a/toolchain/requirements.txt
+++ b/toolchain/requirements.txt
@@ -1,3 +1,5 @@
+numpy
+pandas
 rich
 fypp
 mako
diff --git a/toolchain/templates/frontier.mako b/toolchain/templates/frontier.mako
new file mode 100644
index 000000000..2668ba82f
--- /dev/null
+++ b/toolchain/templates/frontier.mako
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+
+<%namespace name="helpers" file="helpers.mako"/>
+
+% if engine == 'batch':
+#SBATCH --nodes=${nodes}
+#SBATCH --ntasks-per-node=${tasks_per_node}
+#SBATCH --job-name="${name}"
+#SBATCH --output="${name}.out"
+#SBATCH --time=${walltime}
+% if account:
+#SBATCH --account=${account}
+% endif
+% if partition:
+#SBATCH --partition=${partition}
+% endif
+% if quality_of_service:
+#SBATCH --qos=${quality_of_service}
+% endif
+% if email:
+#SBATCH --mail-user=${email}
+#SBATCH --mail-type="BEGIN, END, FAIL"
+% endif
+% endif
+
+${helpers.template_prologue()}
+
+ok ":) Loading modules:\n"
+cd "${MFC_ROOTDIR}"
+% if engine == 'batch':
+. ./mfc.sh load -c f -m ${'g' if gpu else 'c'}
+% endif
+cd - > /dev/null
+echo
+
+% for target in targets:
+    ${helpers.run_prologue(target)}
+
+    % if not mpi:
+        (set -x; ${' '.join([f"'{x}'" for x in profiler ])} "${target.get_install_binpath()}")
+    % else:
+        (set -x; ${' '.join([f"'{x}'" for x in profiler ])}    \
+            srun -N ${nodes}                 \
+                   -n ${tasks_per_node}                              \
+                   ${' '.join([f"'{x}'" for x in ARG('--') ])} \
+                   "${target.get_install_binpath()}")
+    % endif
+
+    ${helpers.run_epilogue(target)}
+
+    echo
+% endfor
+
+${helpers.template_epilogue()}
diff --git a/toolchain/templates/include/helpers.mako b/toolchain/templates/include/helpers.mako
index 262fc5917..8f4d8fefd 100644
--- a/toolchain/templates/include/helpers.mako
+++ b/toolchain/templates/include/helpers.mako
@@ -58,6 +58,10 @@ exit $code
 <%def name="run_prologue(target)">
 ok ":) Running$MAGENTA ${target.name}$COLOR_RESET:\n"
 
+if [ '${target.name}' == 'simulation' ]; then
+    export CRAY_ACC_MODULE='${target.get_staging_dirpath()}/simulation-wg256.lld.exe'
+fi
+
 cd "${os.path.dirname(input)}"
 
 t_${target.name}_start=$(date +%s)
@@ -75,6 +79,8 @@ if [ $code -ne 0 ]; then
     exit 1
 fi
 
+unset CRAY_ACC_MODULE
+
 % if output_summary:
 
 cd "${MFC_ROOTDIR}"
@@ -86,4 +92,4 @@ EOL
 cd - > /dev/null
 
 % endif
-</%def>
\ No newline at end of file
+</%def>