Skip to content

Commit

Permalink
thermodynamics, subgrid forces: more variables in field_r precision
Browse files Browse the repository at this point in the history
* explicit field_r on local variables
* change double precision constants (1.0) to integers or
  explicit precision 1.0_field_r

for single-precision runs on RTX3090, 512x512 botany case for 300s:
   2x faster thermo
  20% faster subgrid
   2x faster forces
   8% faster simulation in total
  • Loading branch information
fjansson committed Dec 2, 2024
1 parent add8a6e commit 490e252
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 74 deletions.
4 changes: 2 additions & 2 deletions src/modforces.f90
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ subroutine forces
!$acc kernels default(present) async(2)
do k=2,kmax
wp(:,:,k) = wp(:,:,k) + grav*(thv0h(:,:,k)-thvh(k))/thvh(k) - &
grav*(sv0(:,:,k,iqr)*dzf(k-1)+sv0(:,:,k-1,iqr)*dzf(k))/(2.0*dzh(k))
grav*(sv0(:,:,k,iqr)*dzf(k-1)+sv0(:,:,k-1,iqr)*dzf(k))/(2*dzh(k))
end do
!$acc end kernels
else
Expand All @@ -103,7 +103,7 @@ subroutine forces
! special treatment for lowest full level: k=1
! --------------------------------------------
!$acc kernels default(present) async(3)
wp(:,:,1) = 0.0
wp(:,:,1) = 0
!$acc end kernels

!$acc wait
Expand Down
16 changes: 8 additions & 8 deletions src/modsubgrid.f90
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ subroutine closure
use modopenboundary, only : openboundary_excjs
implicit none

real :: strain2,mlen,RiPrratio
real(field_r) :: strain2,mlen,RiPrratio
integer :: i,j,k

if(lsmagorinsky) then
Expand Down Expand Up @@ -274,9 +274,9 @@ subroutine closure
( 0.25_field_r*(w0(i,j+1,2)-w0(i,j-1,2))*dyi + &
dvdz(i,j) )**2 )

RiPrratio = min( grav/thvf(1) * dthvdz(i,j,1) / (2. * strain2 * Prandtl) , (1. - eps1) ) ! SvdL, 20241106: dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90)
RiPrratio = min( grav/thvf(1) * dthvdz(i,j,1) / (2 * strain2 * Prandtl) , (1 - eps1) ) ! SvdL, 20241106: dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90)

ekm(i,j,1) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1.0 - RiPrratio)
ekm(i,j,1) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1 - RiPrratio)
ekh(i,j,1) = ekm(i,j,1) / Prandtl

ekm(i,j,1) = max(ekm(i,j,1),ekmin)
Expand Down Expand Up @@ -333,9 +333,9 @@ subroutine closure
(w0(i ,j+1,k+1)-w0(i ,j ,k+1)) *dyi )**2 )

! (SvdL, 20241106:) take ratio of gradient Richardson number to critical Richardson number (equal to Prandtl in Smagorinsky-Lilly model)
RiPrratio = min( grav/thvf(k) * dthvdz(i,j,k) / (2. * strain2 * Prandtl) , (1. - eps1) ) ! (SvdL, 20241106:) dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90)
RiPrratio = min( grav/thvf(k) * dthvdz(i,j,k) / (2 * strain2 * Prandtl) , (1 - eps1) ) ! (SvdL, 20241106:) dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90)

ekm(i,j,k) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1.0 - RiPrratio)
ekm(i,j,k) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1 - RiPrratio)
ekh(i,j,k) = ekm(i,j,k) / Prandtl

ekm(i,j,k) = max(ekm(i,j,k),ekmin)
Expand Down Expand Up @@ -475,7 +475,7 @@ subroutine sources
use modsubgriddata, only : sgs_surface_fix
implicit none

real tdef2, uwflux, vwflux, local_dudz, local_dvdz, local_dthvdz, horv
real(field_r):: tdef2, uwflux, vwflux, local_dudz, local_dvdz, local_dthvdz, horv
integer i, j, k

!$acc parallel loop collapse(3) default(present) private(tdef2) async(1)
Expand Down Expand Up @@ -1034,9 +1034,9 @@ subroutine diffw(a_out)
+(v0(i,j+1,k)-v0(i,j+1,k-1)) *dzhi(k) ) &
-eomm * ( (w0(i,j ,k)-w0(i,j-1,k )) *dyi &
+(v0(i,j ,k)-v0(i,j ,k-1)) *dzhi(k) ))*dyi * anis_fac(k) &
+ (1./rhobh(k))*&
+ (1/rhobh(k))*&
( rhobf(k ) * ekm(i,j,k ) * (w0(i,j,k+1) -w0(i,j,k )) * dzfi(k ) &
- rhobf(k-1) * ekm(i,j,k-1) * (w0(i,j,k ) -w0(i,j,k-1)) * dzfi(k-1) ) * 2. &
- rhobf(k-1) * ekm(i,j,k-1) * (w0(i,j,k ) -w0(i,j,k-1)) * dzfi(k-1) ) * 2 &
* dzhi(k)
end do
end do
Expand Down
128 changes: 64 additions & 64 deletions src/modthermodynamics.f90
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ subroutine calthv
implicit none

integer i, j, k
real qs
real a_surf,b_surf,dq,dth,dthv,temp
real a_dry, b_dry, a_moist, b_moist, c_liquid, epsilon, eps_I, chi_sat, chi
real del_thv_sat, del_thv_dry
real(field_r) qs
real(field_r) a_surf,b_surf,dq,dth,dthv,temp
real(field_r) a_dry, b_dry, a_moist, b_moist, c_liquid, epsilon, eps_I, chi_sat, chi
real(field_r) del_thv_sat, del_thv_dry

call timer_tic('modthermodynamics/calthv', 1)

Expand Down Expand Up @@ -207,9 +207,9 @@ subroutine calthv
! default thv jump computed unsaturated
!
epsilon = rd/rv
eps_I = 1/epsilon - 1. !cstep approx 0.608
eps_I = 1/epsilon - 1 !cstep approx 0.608

a_dry = 1. + eps_I * qt0(i,j,k)
a_dry = 1 + eps_I * qt0(i,j,k)
b_dry = eps_I * thl0(i,j,k)

dth = thl0(i,j,k+1)-thl0(i,j,k-1)
Expand All @@ -224,8 +224,8 @@ subroutine calthv
temp = thl0(i,j,k)*exnf(k)+(rlv/cp)*ql0(i,j,k)
qs = qt0(i,j,k) - ql0(i,j,k)

a_moist = (1.-qt0(i,j,k)+qs/epsilon*(1.+rlv/(rv*temp))) &
/(1.+rlv**2*qs/(cp*rv*temp**2))
a_moist = (1-qt0(i,j,k)+qs/epsilon*(1+rlv/(rv*temp))) &
/(1+rlv**2*qs/(cp*rv*temp**2))
b_moist = a_moist*rlv/cp-temp
c_liquid = a_dry * rlv / cp - thl0(i,j,k) / epsilon

Expand All @@ -250,12 +250,12 @@ subroutine calthv
if(ql0(i,j,1)>0) then
temp = thl0(i,j,1)*exnf(1)+(rlv/cp)*ql0(i,j,1)
qs = qt0(i,j,1) - ql0(i,j,1)
a_surf = (1.-qt0(i,j,1)+rv/rd*qs*(1.+rlv/(rv*temp))) &
/(1.+rlv**2*qs/(cp*rv*temp**2))
b_surf = a_surf*rlv/(temp*cp)-1.
a_surf = (1-qt0(i,j,1)+rv/rd*qs*(1+rlv/(rv*temp))) &
/(1+rlv**2*qs/(cp*rv*temp**2))
b_surf = a_surf*rlv/(temp*cp)-1

else
a_surf = 1.+(rv/rd-1)*qt0(i,j,1)
a_surf = 1+(rv/rd-1)*qt0(i,j,1)
b_surf = rv/rd-1

end if
Expand Down Expand Up @@ -410,7 +410,7 @@ subroutine diagfld
! 3.2 determine rho
!$acc parallel loop default(present) async wait(1, 2)
do k=1,k1
thvf(k) = th0av(k)*exnf(k)*(1.+(rv/rd-1)*qt0av(k)-rv/rd*ql0av(k))
thvf(k) = th0av(k)*exnf(k)*(1+(rv/rd-1)*qt0av(k)-rv/rd*ql0av(k))
rhof(k) = presf(k)/(rd*thvf(k))
end do
!$acc wait
Expand Down Expand Up @@ -439,7 +439,7 @@ subroutine fromztop
implicit none

integer k
real rdocp
real(field_r) rdocp

call timer_tic('modthermodynamics/fromztop', 1)

Expand Down Expand Up @@ -467,15 +467,15 @@ subroutine fromztop

thvh(1) = th0av(1)*(1+(rv/rd-1)*qt0av(1)-rv/rd*ql0av(1))
presf(1) = ps**rdocp - grav*(pref0**rdocp)*zf(1) /(cp*thvh(1))
presf(1) = presf(1)**(1./rdocp)
presf(1) = presf(1)**(1/rdocp)

! 2: higher levels

do k=2,k1
thvh(k) = thetah(k)*(1+(rv/rd-1)*qth(k)-rv/rd*qlh(k))
presf(k) = presf(k-1)**rdocp - &
grav*(pref0**rdocp)*dzh(k) /(cp*thvh(k))
presf(k) = presf(k)**(1./rdocp)
presf(k) = presf(k)**(1/rdocp)
end do

!**************************************************
Expand All @@ -490,7 +490,7 @@ subroutine fromztop
thvf(k) = th0av(k)*(1+(rv/rd-1)*qt0av(k)-rv/rd*ql0av(k))
presh(k) = presh(k-1)**rdocp - &
grav*(pref0**rdocp)*dzf(k-1) / (cp*thvf(k-1))
presh(k) = presh(k)**(1./rdocp)
presh(k) = presh(k)**(1/rdocp)
end do

!$acc update device(thvh, presf, thvf, presh)
Expand Down Expand Up @@ -560,7 +560,7 @@ subroutine thermo (thl,qt,ql,pressure,exner)
es = es0*exp(at*(tl-tmelt)/(tl-bt))
qsl = rd/rv*es/(pressure(k)-(1-rd/rv)*es)
b1 = rlv**2/(tl**2*cp*rv)
qs = qsl*(1.+b1*qt(i,j,k))/(1.+b1*qsl)
qs = qsl*(1+b1*qt(i,j,k))/(1.+b1*qsl)
ql(i,j,k) = dim(qt(i,j,k)-qs,0.)
end do
end do
Expand All @@ -577,20 +577,20 @@ end subroutine thermo
pure function qsat_magnus(T, p) result(qsat)
use modglobal, only : rd,rv,tup,tdn
implicit none
real, intent(in) :: T, p
real(field_r), intent(in) :: T, p
real :: qsat
real ilratio, TC, esl, esi, es
ilratio = max(0.,min(1.,(T-tdn)/(tup-tdn)))
ilratio = max(0._field_r,min(1._field_r,(T-tdn)/(tup-tdn)))

TC = T - 273.15 ! in Celcius
esl = 610.94 * exp( (17.625*TC) / (TC+243.04) ) ! Magnus
esi = 611.21 * exp( (22.587*TC) / (TC+273.86) ) ! Magnus
esl = 610.94_field_r * exp( (17.625_field_r*TC) / (TC+243.04_field_r) ) ! Magnus
esi = 611.21_field_r * exp( (22.587_field_r*TC) / (TC+273.86_field_r) ) ! Magnus

! interpolated saturation vapor pressure
es = ilratio*esl + (1-ilratio)*esi

! convert saturation vapor pressure to saturation humidity
qsat = (rd/rv) * es / (p - (1.-rd/rv)*es)
qsat = (rd/rv) * es / (p - (1-rd/rv)*es)
end function qsat_magnus

!> Huang's formulas for q_sat over liquid and ice
Expand All @@ -600,20 +600,20 @@ end function qsat_magnus
pure function qsat_huang(T, p) result(qsat)
use modglobal, only : rd,rv,tup,tdn
implicit none
real, intent(in) :: T, p
real(field_r), intent(in) :: T, p
real :: qsat
real ilratio, TC, esl, esi, es
ilratio = max(0.,min(1.,(T-tdn)/(tup-tdn)))
ilratio = max(0._field_r,min(1._field_r,(T-tdn)/(tup-tdn)))

TC = T - 273.15 ! in Celcius
esl = exp(34.494 - 4924.99 / (TC + 237.1)) / (TC+105)**1.57 ! Huang
esi = exp(43.494 - 6545.8/(TC+278)) / (TC+868)**2 ! Huang
TC = T - 273.15_field_r ! in Celcius
esl = exp(34.494_field_r - 4924.99_field_r / (TC + 237.1_field_r)) / (TC+105)**1.57_field_r ! Huang
esi = exp(43.494_field_r - 6545.8_field_r/(TC+278)) / (TC+868)**2 ! Huang

! interpolated saturation vapor pressure
es = ilratio*esl + (1-ilratio)*esi

! convert saturation vapor pressure to saturation humidity
qsat = (rd/rv) * es / (p - (1.-rd/rv)*es)
qsat = (rd/rv) * es / (p - (1-rd/rv)*es)
end function qsat_huang

! return esat for ice-liquid mix using table
Expand All @@ -628,10 +628,10 @@ pure function esat_tab(T) result(es)

! interpolated ice-liquid saturation vapor pressure from table
! note if imicto==imicro_bulk3, the table is for liquid only
tlonr=int((T-150.)*5.)
tlo = 150. + 0.2*tlonr
thi = tlo + 0.2
es = (thi-T)*5.*esatmtab(tlonr)+(T-tlo)*5.*esatmtab(tlonr+1)
tlonr=int((T-150)*5)
tlo = 150 + 0.2_field_r*tlonr
thi = tlo + 0.2_field_r
es = (thi-T)*5*esatmtab(tlonr)+(T-tlo)*5*esatmtab(tlonr+1)
end function esat_tab

!> q_sat over liquid and ice, using interpolation in a table created in modglobal.
Expand All @@ -648,13 +648,13 @@ pure function qsat_tab(T, p) result(qsat)
real(field_r) :: tlo, thi, es

! interpolated ice-liquid saturation vapor pressure from table
tlonr=int((T-150.)*5.)
tlo = 150. + 0.2*tlonr
thi = tlo + 0.2
es = (thi-T)*5.*esatmtab(tlonr)+(T-tlo)*5.*esatmtab(tlonr+1)
tlonr=int((T-150)*5)
tlo = 150 + 0.2_field_r*tlonr
thi = tlo + 0.2_field_r
es = (thi-T)*5*esatmtab(tlonr)+(T-tlo)*5*esatmtab(tlonr+1)

! convert saturation vapor pressure to saturation humidity
qsat = (rd/rv) * es / (p - (1.-rd/rv)*es)
qsat = (rd/rv) * es / (p - (1-rd/rv)*es)
end function qsat_tab

subroutine icethermo0_fast
Expand Down Expand Up @@ -753,13 +753,13 @@ subroutine icethermo0_fast
tmp0(i,j,k) = T

! use the separate e_sat tables for liquid and ice to calculate and store esl, qvsl, qvsi
tlonr=int((T-150.)*5.)
tlo = 150. + 0.2*tlonr
thi = tlo + 0.2
esl(i,j,k) = (thi-T)*5.*esatltab(tlonr)+(T-tlo)*5.*esatltab(tlonr+1) ! saturation vapor pressure liquid
esi1 = (thi-T)*5.*esatitab(tlonr)+(T-tlo)*5.*esatitab(tlonr+1) ! saturation vapor pressure ice
qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1.-rd/rv)*esl(i,j,k)) ! saturation humidity liquid
qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1.-rd/rv)*esi1) ! saturation humidity ice
tlonr=int((T-150)*5)
tlo = 150 + 0.2_field_r*tlonr
thi = tlo + 0.2_field_r
esl(i,j,k) = (thi-T)*5*esatltab(tlonr)+(T-tlo)*5*esatltab(tlonr+1) ! saturation vapor pressure liquid
esi1 = (thi-T)*5*esatitab(tlonr)+(T-tlo)*5*esatitab(tlonr+1) ! saturation vapor pressure ice
qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1-rd/rv)*esl(i,j,k)) ! saturation humidity liquid
qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1-rd/rv)*esi1) ! saturation humidity ice
!!!!!!!!!!!
end do
end do
Expand All @@ -774,13 +774,13 @@ subroutine icethermo0_fast
qsat(i,j,k) = qsat_tab(T, presf(k))

! use the separate e_sat tables for liquid and ice to calculate and store esl, qvsl, qvsi
tlonr=int((T-150.)*5.)
tlo = 150. + 0.2*tlonr
thi = tlo + 0.2
esl(i,j,k) = (thi-T)*5.*esatltab(tlonr)+(T-tlo)*5.*esatltab(tlonr+1) ! saturation vapor pressure liquid
esi1 = (thi-T)*5.*esatitab(tlonr)+(T-tlo)*5.*esatitab(tlonr+1) ! saturation vapor pressure ice
qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1.-rd/rv)*esl(i,j,k)) ! saturation humidity liquid
qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1.-rd/rv)*esi1) ! saturation humidity ice
tlonr=int((T-150)*5)
tlo = 150 + 0.2_field_r*tlonr
thi = tlo + 0.2_field_r
esl(i,j,k) = (thi-T)*5*esatltab(tlonr)+(T-tlo)*5*esatltab(tlonr+1) ! saturation vapor pressure liquid
esi1 = (thi-T)*5*esatitab(tlonr)+(T-tlo)*5*esatitab(tlonr+1) ! saturation vapor pressure ice
qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1-rd/rv)*esl(i,j,k)) ! saturation humidity liquid
qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1-rd/rv)*esi1) ! saturation humidity ice
end do
end do
end if
Expand Down Expand Up @@ -836,8 +836,8 @@ subroutine icethermo0_fast_gpu
call timer_tic('modthermodynamics/icethermo0_fast', 1)

! Sanity checks
Tl_min = 400.0
PrDiff_min = 100.0
Tl_min = 400
PrDiff_min = 100
!$acc parallel loop collapse(3) reduction(min:Tl_min, PrDiff_min)
do k = 1, k1
do j = 2, j1
Expand All @@ -848,7 +848,7 @@ subroutine icethermo0_fast_gpu
end do
end do
if (Tl_min < 150) stop 'icethermo0_fast: Tl_min below limit 150K'
if (PrDiff_min < 0.0) stop 'icethermo0_fast: Tl_max too close to boiling point'
if (PrDiff_min < 0) stop 'icethermo0_fast: Tl_max too close to boiling point'

!$acc parallel loop collapse(3) private(Tl, qsat_, qt, ql, b, T, esi1, tlo, thi, tlonr) default(present)
do k = 1, k1
Expand Down Expand Up @@ -887,13 +887,13 @@ subroutine icethermo0_fast_gpu
tmp0(i,j,k) = T

! use the separate e_sat tables for liquid and ice to calculate and store esl, qvsl, qvsi
tlonr=int((T-150.)*5.)
tlo = 150. + 0.2*tlonr
thi = tlo + 0.2
esl(i,j,k) = (thi-T)*5.*esatltab(tlonr)+(T-tlo)*5.*esatltab(tlonr+1) ! saturation vapor pressure liquid
esi1 = (thi-T)*5.*esatitab(tlonr)+(T-tlo)*5.*esatitab(tlonr+1) ! saturation vapor pressure ice
qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1.-rd/rv)*esl(i,j,k)) ! saturation humidity liquid
qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1.-rd/rv)*esi1) ! saturation humidity ice
tlonr=int((T-150)*5)
tlo = 150 + 0.2_field_r*tlonr
thi = tlo + 0.2_field_r
esl(i,j,k) = (thi-T)*5*esatltab(tlonr)+(T-tlo)*5*esatltab(tlonr+1) ! saturation vapor pressure liquid
esi1 = (thi-T)*5*esatitab(tlonr)+(T-tlo)*5*esatitab(tlonr+1) ! saturation vapor pressure ice
qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1-rd/rv)*esl(i,j,k)) ! saturation humidity liquid
qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1-rd/rv)*esi1) ! saturation humidity ice
!!!!!!!!!!!
end do
end do
Expand Down Expand Up @@ -994,8 +994,8 @@ subroutine icethermoh_fast_gpu

call timer_tic('modthermodynamics/icethermoh_fast', 1)

Tl_min = 400.0
PrDiff_min = 100.0
Tl_min = 400
PrDiff_min = 100
!$acc parallel loop collapse(3) reduction(min:Tl_min, PrDiff_min)
do k = 1, k1
do j = 2, j1
Expand Down

0 comments on commit 490e252

Please sign in to comment.