Skip to content

Commit

Permalink
Merge branch 'master' into add-modular-precision-update
Browse files Browse the repository at this point in the history
  • Loading branch information
aricer123 authored Nov 8, 2024
2 parents 6986aac + 78a810f commit 68e62d4
Show file tree
Hide file tree
Showing 25 changed files with 474 additions and 939 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,11 @@ if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
add_compile_options(
"SHELL:-h nomessage=296:878:1391:1069"
"SHELL:-M 296,878,1391,1069,5025"
"SHELL:-h static" "SHELL:-h keepfiles"
"SHELL:-h acc_model=auto_async_none"
"SHELL: -h acc_model=no_fast_addr"
"SHELL: -h list=adm" "-DCRAY_ACC_SIMPLIFY" "-DCRAY_ACC_WAR"
"SHELL: -h list=adm"
)

add_link_options("SHELL:-hkeepfiles")
Expand Down
19 changes: 10 additions & 9 deletions docs/documentation/expectedPerformance.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,37 +30,38 @@ Note:
| NVIDIA V100 | | GPU | 1 GPU | 0.99 | NVHPC 22.11 | GT Phoenix |
| NVIDIA A30 | | GPU | 1 GPU | 1.1 | NVHPC 24.1 | GT Rogues Gallery |
| AMD MI250X | | GPU | 1 _GCD_* | 1.1 | CCE 16.0.1 | OLCF Frontier |
| AMD EPYC 9965 | Turin | CPU | 192 cores | 1.2 | AOCC 5.0.0 | AMD Volcano |
| AMD EPYC 9965 | Turin, Zen5c | CPU | 192 cores | 1.2 | AOCC 5.0.0 | AMD Volcano |
| AMD MI100 | | GPU | 1 GPU | 1.4 | CCE 16.0.1 | Cray internal system |
| AMD EPYC 9755 | Turin, Zen5 | CPU | 128 cores | 1.4 | AOCC 5.0.0 | AMD Volcano |
| Intel Xeon 6980P | Granite Rapids | CPU | 128 cores | 1.4 | Intel 2024.2 | Intel Endeavour |
| NVIDIA L40S | FP32-only GPU | GPU | 1 GPU | 1.7 | NVHPC 24.5 | GT ICE |
| AMD EPYC 9654 | Genoa | CPU | 96 cores | 1.7 | Intel 2021.9 | DOD Carpenter |
| AMD EPYC 9654 | Genoa, Zen4 | CPU | 96 cores | 1.7 | Intel 2021.9 | DOD Carpenter |
| Intel Xeon 6960P | Granite Rapids | CPU | 72 cores | 1.7 | Intel 2024.2 | Intel AI Cloud |
| NVIDIA P100 | | GPU | 1 GPU | 2.4 | NVHPC 23.5 | GT CSE Internal |
| Intel Xeon 8592+ | Emerald Rapids | CPU | 64 cores | 2.6 | Intel 2024.2 | Intel AI Cloud |
| Intel Xeon 6900E | Sierra Forest Advanced, 2.8GHz Boost, 384 MiB L3 | CPU | 192 cores | 2.6 | Intel 2024.2 | Intel AI Cloud |
| AMD EPYC 9534 | Genoa | CPU | 64 cores | 2.7 | GNU 12.3.0 | GT Phoenix |
| Intel Xeon 6900E | Sierra Forest Adv., 2.8GHz Boost, 384 MiB L3 | CPU | 192 cores | 2.6 | Intel 2024.2 | Intel AI Cloud |
| AMD EPYC 9534 | Genoa, Zen4 | CPU | 64 cores | 2.7 | GNU 12.3.0 | GT Phoenix |
| NVIDIA A40 | FP32-only GPU | GPU | 1 GPU | 3.3 | NVHPC 22.11 | NCSA Delta |
| Intel Xeon Max 9468 | Sapphire Rapids HBM | CPU | 48 cores | 3.5 | NVHPC 24.5 | GT Rogues Gallery |
| NVIDIA Grace CPU | Arm, Neoverse V2 | CPU | 72 cores | 3.7 | NVHPC 24.1 | GT Rogues Gallery |
| NVIDIA RTX6000 | FP32-only GPU | GPU | 1 GPU | 3.9 | NVHPC 22.11 | GT Phoenix |
| AMD EPYC 7763 | Milan | CPU | 64 cores | 4.1 | GNU 11.4.0 | NCSA Delta |
| AMD EPYC 7763 | Milan, Zen3 | CPU | 64 cores | 4.1 | GNU 11.4.0 | NCSA Delta |
| Intel Xeon 6740E | Sierra Forest | CPU | 92 cores | 4.2 | Intel 2024.2 | Intel AI Cloud |
| NVIDIA A10 | FP32-only GPU | GPU | 1 GPU | 4.3 | NVHPC 24.1 | TAMU Faster |
| AMD EPYC 7713 | Milan | CPU | 64 cores | 5.0 | GNU 12.3.0 | GT Phoenix |
| AMD EPYC 7713 | Milan, Zen3 | CPU | 64 cores | 5.0 | GNU 12.3.0 | GT Phoenix |
| Intel Xeon 8480CL | Sapphire Rapids | CPU | 56 cores | 5.0 | NVHPC 24.5 | GT Phoenix |
| Intel Xeon 6454S | Sapphire Rapids | CPU | 32 cores | 5.6 | NVHPC 24.5 | GT Rogues Gallery |
| Intel Xeon 8462Y+ | Sapphire Rapids | CPU | 32 cores | 6.2 | GNU 12.3.0 | GT ICE |
| Intel Xeon 6548Y+ | Emerald Rapids | CPU | 32 cores | 6.6 | Intel 2021.9 | GT ICE |
| Intel Xeon 8352Y | Ice Lake | CPU | 32 cores | 6.6 | NVHPC 24.5 | GT Rogues Gallery |
| Ampere Altra Q80-28 | Arm, Neoverse-N1 | CPU | 80 cores | 6.8 | GNU 12.2.0 | OLCF Wombat |
| AMD EPYC 7513 | Milan | CPU | 32 cores | 7.4 | GNU 12.3.0 | GT ICE |
| AMD EPYC 7513 | Milan, Zen3 | CPU | 32 cores | 7.4 | GNU 12.3.0 | GT ICE |
| Intel Xeon 8268 | Cascade Lake | CPU | 24 cores | 7.5 | Intel 2024.2 | TAMU ACES |
| AMD EPYC 7452 | Rome | CPU | 32 cores | 8.4 | GNU 12.3.0 | GT ICE |
| AMD EPYC 7452 | Rome, Zen2 | CPU | 32 cores | 8.4 | GNU 12.3.0 | GT ICE |
| NVIDIA T4 | FP32-only GPU | GPU | 1 GPU | 8.8 | NVHPC 24.1 | TAMU Faster |
| Intel Xeon 8160 | Skylake | CPU | 24 cores | 8.9 | Intel 2024.0 | TACC Stampede3 |
| IBM Power10 | | CPU | 24 cores | 10 | GNU 13.3.1 | GT Rogues Gallery |
| AMD EPYC 7401 | Naples | CPU | 24 cores | 10 | GNU 10.3.1 | LLNL Corona |
| AMD EPYC 7401 | Naples, Zen(1) | CPU | 24 cores | 10 | GNU 10.3.1 | LLNL Corona |
| Intel Xeon 6226 | Cascade Lake | CPU | 12 cores | 17 | GNU 12.3.0 | GT ICE |
| Apple M1 Max | | CPU | 10 cores | 20 | GNU 14.1.0 | N/A |
| IBM Power9 | | CPU | 20 cores | 21 | GNU 9.1.0 | OLCF Summit |
Expand Down
54 changes: 7 additions & 47 deletions src/common/include/macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,71 +13,31 @@
#:def ALLOCATE(*args)
@:LOG({'@:ALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
allocate (${', '.join(args)}$)
#ifndef CRAY_ACC_WAR
!$acc enter data create(${', '.join(args)}$)
#endif
!$acc enter data create(${', '.join(args)}$)
#:enddef ALLOCATE

#:def DEALLOCATE(*args)
@:LOG({'@:DEALLOCATE(${re.sub(' +', ' ', ', '.join(args))}$)'})
deallocate (${', '.join(args)}$)
#ifndef CRAY_ACC_WAR
!$acc exit data delete(${', '.join(args)}$)
#endif
!$acc exit data delete(${', '.join(args)}$)
#:enddef DEALLOCATE

#:def ALLOCATE_GLOBAL(*args)
@:LOG({'@:ALLOCATE_GLOBAL(${re.sub(' +', ' ', ', '.join(args))}$)'})
#ifdef CRAY_ACC_WAR
allocate (${', '.join(('p_' + arg.strip() for arg in args))}$)
#:for arg in args
${re.sub('\\(.*\\)','',arg)}$ => ${ 'p_' + re.sub('\\(.*\\)','',arg.strip()) }$
#:endfor
!$acc enter data create(${', '.join(('p_' + re.sub('\\(.*\\)','',arg.strip()) for arg in args))}$) &
!$acc& attach(${', '.join(map(lambda x: re.sub('\\(.*\\)','',x), args))}$)
#else

allocate (${', '.join(args)}$)
!$acc enter data create(${', '.join(args)}$)
#endif

#:enddef ALLOCATE_GLOBAL

#:def DEALLOCATE_GLOBAL(*args)
@:LOG({'@:DEALLOCATE_GLOBAL(${re.sub(' +', ' ', ', '.join(args))}$)'})
#ifdef CRAY_ACC_WAR
!$acc exit data delete(${', '.join(('p_' + arg.strip() for arg in args))}$) &
!$acc& detach(${', '.join(args)}$)
#:for arg in args
nullify (${arg}$)
#:endfor
deallocate (${', '.join(('p_' + arg.strip() for arg in args))}$)
#else

deallocate (${', '.join(args)}$)
!$acc exit data delete(${', '.join(args)}$)
#endif

#:enddef DEALLOCATE_GLOBAL

#:def CRAY_DECLARE_GLOBAL(intype, dim, *args)
#ifdef CRAY_ACC_WAR
${intype}$, ${dim}$, allocatable, target :: ${', '.join(('p_' + arg.strip() for arg in args))}$
${intype}$, ${dim}$, pointer :: ${', '.join(args)}$
#else
${intype}$, ${dim}$, allocatable :: ${', '.join(args)}$
#endif
#:enddef CRAY_DECLARE_GLOBAL

#:def CRAY_DECLARE_GLOBAL_SCALAR(intype, *args)
#ifdef CRAY_ACC_WAR
${intype}$, target :: ${', '.join(('p_' + arg.strip() for arg in args))}$
${intype}$, pointer :: ${', '.join(args)}$
#else
${intype}$::${', '.join(args)}$
#endif
#:enddef CRAY_DECLARE_GLOBAL_SCALAR

#:def ACC_SETUP_VFs(*args)
#ifdef CRAY_ACC_WAR
#ifdef _CRAYFTN
block
integer :: macros_setup_vfs_i

Expand All @@ -100,7 +60,7 @@
#:enddef

#:def ACC_SETUP_SFs(*args)
#ifdef CRAY_ACC_WAR
#ifdef _CRAYFTN
block

@:LOG({'@:ACC_SETUP_SFs(${', '.join(args)}$)'})
Expand All @@ -116,7 +76,7 @@
#:enddef

#:def ACC_SETUP_source_spatials(*args)
#ifdef CRAY_ACC_WAR
#ifdef _CRAYFTN
block

@:LOG({'@:ACC_SETUP_source_spatials(${', '.join(args)}$)'})
Expand Down
46 changes: 21 additions & 25 deletions src/common/m_phase_change.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,6 @@ module m_phase_change
s_infinite_relaxation_k, &
s_finalize_relaxation_solver_module

!> @name Abstract interface for creating function pointers
!> @{
abstract interface

!> @name Abstract subroutine for the infinite relaxation solver
!> @{
subroutine s_abstract_relaxation_solver(q_cons_vf)
import :: scalar_field, sys_size
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
end subroutine
!> @}

end interface
!> @}

!> @name Parameters for the first order transition phase change
!> @{
integer, parameter :: max_iter = 1e8_wp !< max # of iterations
Expand All @@ -66,10 +51,18 @@ module m_phase_change

!$acc declare create(max_iter,pCr,TCr,mixM,lp,vp,A,B,C,D)

procedure(s_abstract_relaxation_solver), pointer :: s_relaxation_solver => null()

contains

!> This subroutine should dispatch to the correct relaxation solver based
!! some parameter. It replaces the procedure pointer, which CCE
!! is breaking on.
subroutine s_relaxation_solver(q_cons_vf)
type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
! This is empty because in current master the procedure pointer
! was never assigned
@:ASSERT(.false., "s_relaxation_solver called but it currently does nothing")
end subroutine s_relaxation_solver

!> The purpose of this subroutine is to initialize the phase change module
!! by setting the parameters needed for phase change and
!! selecting the phase change module that will be used
Expand Down Expand Up @@ -298,8 +291,9 @@ contains
!! @param rhoe mixture energy
!! @param TS equilibrium temperature at the interface
subroutine s_infinite_pt_relaxation_k(j, k, l, MFL, pS, p_infpT, rM, q_cons_vf, rhoe, TS)
#ifdef CRAY_ACC_WAR
!DIR$ INLINEALWAYS s_compute_speed_of_sound

#ifdef _CRAYFTN
!DIR$ INLINEALWAYS s_infinite_pt_relaxation_k
#else
!$acc routine seq
#endif
Expand Down Expand Up @@ -403,7 +397,7 @@ contains
!! @param TS equilibrium temperature at the interface
subroutine s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS)

#ifdef CRAY_ACC_WAR
#ifdef _CRAYFTN
!DIR$ INLINEALWAYS s_infinite_ptg_relaxation_k
#else
!$acc routine seq
Expand Down Expand Up @@ -527,7 +521,8 @@ contains
!! @param k generic loop iterator for y direction
!! @param l generic loop iterator for z direction
subroutine s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l)
#ifdef CRAY_ACC_WAR
#ifdef _CRAYFTN
!DIR$ INLINEALWAYS s_correct_partial_densities
#else
!$acc routine seq
Expand Down Expand Up @@ -590,7 +585,7 @@ contains
!! @param TJac Transpose of the Jacobian Matrix
subroutine s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac)
#ifdef CRAY_ACC_WAR
#ifdef _CRAYFTN
!DIR$ INLINEALWAYS s_compute_jacobian_matrix
#else
!$acc routine seq
Expand Down Expand Up @@ -697,7 +692,7 @@ contains
!! @param R2D (2D) residue array
subroutine s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D)
#ifdef CRAY_ACC_WAR
#ifdef _CRAYFTN
!DIR$ INLINEALWAYS s_compute_pTg_residue
#else
!$acc routine seq
Expand Down Expand Up @@ -747,8 +742,9 @@ contains
!! @param TSat Saturation Temperature
!! @param TSIn equilibrium Temperature
subroutine s_TSat(pSat, TSat, TSIn)
#ifdef CRAY_ACC_WAR
!DIR$ INLINEALWAYS s_compute_speed_of_sound
#ifdef _CRAYFTN
!DIR$ INLINEALWAYS s_TSat
#else
!$acc routine seq
#endif
Expand Down
Loading

0 comments on commit 68e62d4

Please sign in to comment.