From 490e252ea51f955c1fd3eb29c57d8b3d0cbc5ea2 Mon Sep 17 00:00:00 2001 From: Fredrik Jansson Date: Thu, 28 Nov 2024 14:26:01 +0100 Subject: [PATCH] thermodynamics, subgrid forces: more variables in field_r precision * explicit field_r on local variables * change double precision constants (1.0) to integers or explicit precision 1.0_field_r for single-precision runs on RTX3090, 512x512 botany case for 300s: 2x faster thermo 20% faster subgrid 2x faster forces 8% faster simulation in total --- src/modforces.f90 | 4 +- src/modsubgrid.f90 | 16 ++--- src/modthermodynamics.f90 | 128 +++++++++++++++++++------------------- 3 files changed, 74 insertions(+), 74 deletions(-) diff --git a/src/modforces.f90 b/src/modforces.f90 index e4b0de73..e718aa41 100644 --- a/src/modforces.f90 +++ b/src/modforces.f90 @@ -88,7 +88,7 @@ subroutine forces !$acc kernels default(present) async(2) do k=2,kmax wp(:,:,k) = wp(:,:,k) + grav*(thv0h(:,:,k)-thvh(k))/thvh(k) - & - grav*(sv0(:,:,k,iqr)*dzf(k-1)+sv0(:,:,k-1,iqr)*dzf(k))/(2.0*dzh(k)) + grav*(sv0(:,:,k,iqr)*dzf(k-1)+sv0(:,:,k-1,iqr)*dzf(k))/(2*dzh(k)) end do !$acc end kernels else @@ -103,7 +103,7 @@ subroutine forces ! special treatment for lowest full level: k=1 ! -------------------------------------------- !$acc kernels default(present) async(3) - wp(:,:,1) = 0.0 + wp(:,:,1) = 0 !$acc end kernels !$acc wait diff --git a/src/modsubgrid.f90 b/src/modsubgrid.f90 index b8772e17..894a9e0f 100644 --- a/src/modsubgrid.f90 +++ b/src/modsubgrid.f90 @@ -236,7 +236,7 @@ subroutine closure use modopenboundary, only : openboundary_excjs implicit none - real :: strain2,mlen,RiPrratio + real(field_r) :: strain2,mlen,RiPrratio integer :: i,j,k if(lsmagorinsky) then @@ -274,9 +274,9 @@ subroutine closure ( 0.25_field_r*(w0(i,j+1,2)-w0(i,j-1,2))*dyi + & dvdz(i,j) )**2 ) - RiPrratio = min( grav/thvf(1) * dthvdz(i,j,1) / (2. * strain2 * Prandtl) , (1. - eps1) ) ! SvdL, 20241106: dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90) + RiPrratio = min( grav/thvf(1) * dthvdz(i,j,1) / (2 * strain2 * Prandtl) , (1 - eps1) ) ! SvdL, 20241106: dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90) - ekm(i,j,1) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1.0 - RiPrratio) + ekm(i,j,1) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1 - RiPrratio) ekh(i,j,1) = ekm(i,j,1) / Prandtl ekm(i,j,1) = max(ekm(i,j,1),ekmin) @@ -333,9 +333,9 @@ subroutine closure (w0(i ,j+1,k+1)-w0(i ,j ,k+1)) *dyi )**2 ) ! (SvdL, 20241106:) take ratio of gradient Richardson number to critical Richardson number (equal to Prandtl in Smagorinsky-Lilly model) - RiPrratio = min( grav/thvf(k) * dthvdz(i,j,k) / (2. * strain2 * Prandtl) , (1. - eps1) ) ! (SvdL, 20241106:) dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90) + RiPrratio = min( grav/thvf(k) * dthvdz(i,j,k) / (2 * strain2 * Prandtl) , (1 - eps1) ) ! (SvdL, 20241106:) dthvdz(i,j,k) already contains MO gradient at k=kmin (see modthermodynamics.f90) - ekm(i,j,k) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1.0 - RiPrratio) + ekm(i,j,k) = mlen ** 2 * sqrt(2 * strain2) * sqrt(1 - RiPrratio) ekh(i,j,k) = ekm(i,j,k) / Prandtl ekm(i,j,k) = max(ekm(i,j,k),ekmin) @@ -475,7 +475,7 @@ subroutine sources use modsubgriddata, only : sgs_surface_fix implicit none - real tdef2, uwflux, vwflux, local_dudz, local_dvdz, local_dthvdz, horv + real(field_r):: tdef2, uwflux, vwflux, local_dudz, local_dvdz, local_dthvdz, horv integer i, j, k !$acc parallel loop collapse(3) default(present) private(tdef2) async(1) @@ -1034,9 +1034,9 @@ subroutine diffw(a_out) +(v0(i,j+1,k)-v0(i,j+1,k-1)) *dzhi(k) ) & -eomm * ( (w0(i,j ,k)-w0(i,j-1,k )) *dyi & +(v0(i,j ,k)-v0(i,j ,k-1)) *dzhi(k) ))*dyi * anis_fac(k) & - + (1./rhobh(k))*& + + (1/rhobh(k))*& ( rhobf(k ) * ekm(i,j,k ) * (w0(i,j,k+1) -w0(i,j,k )) * dzfi(k ) & - - rhobf(k-1) * ekm(i,j,k-1) * (w0(i,j,k ) -w0(i,j,k-1)) * dzfi(k-1) ) * 2. & + - rhobf(k-1) * ekm(i,j,k-1) * (w0(i,j,k ) -w0(i,j,k-1)) * dzfi(k-1) ) * 2 & * dzhi(k) end do end do diff --git a/src/modthermodynamics.f90 b/src/modthermodynamics.f90 index 6630ead1..afe85c10 100644 --- a/src/modthermodynamics.f90 +++ b/src/modthermodynamics.f90 @@ -176,10 +176,10 @@ subroutine calthv implicit none integer i, j, k - real qs - real a_surf,b_surf,dq,dth,dthv,temp - real a_dry, b_dry, a_moist, b_moist, c_liquid, epsilon, eps_I, chi_sat, chi - real del_thv_sat, del_thv_dry + real(field_r) qs + real(field_r) a_surf,b_surf,dq,dth,dthv,temp + real(field_r) a_dry, b_dry, a_moist, b_moist, c_liquid, epsilon, eps_I, chi_sat, chi + real(field_r) del_thv_sat, del_thv_dry call timer_tic('modthermodynamics/calthv', 1) @@ -207,9 +207,9 @@ subroutine calthv ! default thv jump computed unsaturated ! epsilon = rd/rv - eps_I = 1/epsilon - 1. !cstep approx 0.608 + eps_I = 1/epsilon - 1 !cstep approx 0.608 - a_dry = 1. + eps_I * qt0(i,j,k) + a_dry = 1 + eps_I * qt0(i,j,k) b_dry = eps_I * thl0(i,j,k) dth = thl0(i,j,k+1)-thl0(i,j,k-1) @@ -224,8 +224,8 @@ subroutine calthv temp = thl0(i,j,k)*exnf(k)+(rlv/cp)*ql0(i,j,k) qs = qt0(i,j,k) - ql0(i,j,k) - a_moist = (1.-qt0(i,j,k)+qs/epsilon*(1.+rlv/(rv*temp))) & - /(1.+rlv**2*qs/(cp*rv*temp**2)) + a_moist = (1-qt0(i,j,k)+qs/epsilon*(1+rlv/(rv*temp))) & + /(1+rlv**2*qs/(cp*rv*temp**2)) b_moist = a_moist*rlv/cp-temp c_liquid = a_dry * rlv / cp - thl0(i,j,k) / epsilon @@ -250,12 +250,12 @@ subroutine calthv if(ql0(i,j,1)>0) then temp = thl0(i,j,1)*exnf(1)+(rlv/cp)*ql0(i,j,1) qs = qt0(i,j,1) - ql0(i,j,1) - a_surf = (1.-qt0(i,j,1)+rv/rd*qs*(1.+rlv/(rv*temp))) & - /(1.+rlv**2*qs/(cp*rv*temp**2)) - b_surf = a_surf*rlv/(temp*cp)-1. + a_surf = (1-qt0(i,j,1)+rv/rd*qs*(1+rlv/(rv*temp))) & + /(1+rlv**2*qs/(cp*rv*temp**2)) + b_surf = a_surf*rlv/(temp*cp)-1 else - a_surf = 1.+(rv/rd-1)*qt0(i,j,1) + a_surf = 1+(rv/rd-1)*qt0(i,j,1) b_surf = rv/rd-1 end if @@ -410,7 +410,7 @@ subroutine diagfld ! 3.2 determine rho !$acc parallel loop default(present) async wait(1, 2) do k=1,k1 - thvf(k) = th0av(k)*exnf(k)*(1.+(rv/rd-1)*qt0av(k)-rv/rd*ql0av(k)) + thvf(k) = th0av(k)*exnf(k)*(1+(rv/rd-1)*qt0av(k)-rv/rd*ql0av(k)) rhof(k) = presf(k)/(rd*thvf(k)) end do !$acc wait @@ -439,7 +439,7 @@ subroutine fromztop implicit none integer k - real rdocp + real(field_r) rdocp call timer_tic('modthermodynamics/fromztop', 1) @@ -467,7 +467,7 @@ subroutine fromztop thvh(1) = th0av(1)*(1+(rv/rd-1)*qt0av(1)-rv/rd*ql0av(1)) presf(1) = ps**rdocp - grav*(pref0**rdocp)*zf(1) /(cp*thvh(1)) - presf(1) = presf(1)**(1./rdocp) + presf(1) = presf(1)**(1/rdocp) ! 2: higher levels @@ -475,7 +475,7 @@ subroutine fromztop thvh(k) = thetah(k)*(1+(rv/rd-1)*qth(k)-rv/rd*qlh(k)) presf(k) = presf(k-1)**rdocp - & grav*(pref0**rdocp)*dzh(k) /(cp*thvh(k)) - presf(k) = presf(k)**(1./rdocp) + presf(k) = presf(k)**(1/rdocp) end do !************************************************** @@ -490,7 +490,7 @@ subroutine fromztop thvf(k) = th0av(k)*(1+(rv/rd-1)*qt0av(k)-rv/rd*ql0av(k)) presh(k) = presh(k-1)**rdocp - & grav*(pref0**rdocp)*dzf(k-1) / (cp*thvf(k-1)) - presh(k) = presh(k)**(1./rdocp) + presh(k) = presh(k)**(1/rdocp) end do !$acc update device(thvh, presf, thvf, presh) @@ -560,7 +560,7 @@ subroutine thermo (thl,qt,ql,pressure,exner) es = es0*exp(at*(tl-tmelt)/(tl-bt)) qsl = rd/rv*es/(pressure(k)-(1-rd/rv)*es) b1 = rlv**2/(tl**2*cp*rv) - qs = qsl*(1.+b1*qt(i,j,k))/(1.+b1*qsl) + qs = qsl*(1+b1*qt(i,j,k))/(1.+b1*qsl) ql(i,j,k) = dim(qt(i,j,k)-qs,0.) end do end do @@ -577,20 +577,20 @@ end subroutine thermo pure function qsat_magnus(T, p) result(qsat) use modglobal, only : rd,rv,tup,tdn implicit none - real, intent(in) :: T, p + real(field_r), intent(in) :: T, p real :: qsat real ilratio, TC, esl, esi, es - ilratio = max(0.,min(1.,(T-tdn)/(tup-tdn))) + ilratio = max(0._field_r,min(1._field_r,(T-tdn)/(tup-tdn))) TC = T - 273.15 ! in Celcius - esl = 610.94 * exp( (17.625*TC) / (TC+243.04) ) ! Magnus - esi = 611.21 * exp( (22.587*TC) / (TC+273.86) ) ! Magnus + esl = 610.94_field_r * exp( (17.625_field_r*TC) / (TC+243.04_field_r) ) ! Magnus + esi = 611.21_field_r * exp( (22.587_field_r*TC) / (TC+273.86_field_r) ) ! Magnus ! interpolated saturation vapor pressure es = ilratio*esl + (1-ilratio)*esi ! convert saturation vapor pressure to saturation humidity - qsat = (rd/rv) * es / (p - (1.-rd/rv)*es) + qsat = (rd/rv) * es / (p - (1-rd/rv)*es) end function qsat_magnus !> Huang's formulas for q_sat over liquid and ice @@ -600,20 +600,20 @@ end function qsat_magnus pure function qsat_huang(T, p) result(qsat) use modglobal, only : rd,rv,tup,tdn implicit none - real, intent(in) :: T, p + real(field_r), intent(in) :: T, p real :: qsat real ilratio, TC, esl, esi, es - ilratio = max(0.,min(1.,(T-tdn)/(tup-tdn))) + ilratio = max(0._field_r,min(1._field_r,(T-tdn)/(tup-tdn))) - TC = T - 273.15 ! in Celcius - esl = exp(34.494 - 4924.99 / (TC + 237.1)) / (TC+105)**1.57 ! Huang - esi = exp(43.494 - 6545.8/(TC+278)) / (TC+868)**2 ! Huang + TC = T - 273.15_field_r ! in Celcius + esl = exp(34.494_field_r - 4924.99_field_r / (TC + 237.1_field_r)) / (TC+105)**1.57_field_r ! Huang + esi = exp(43.494_field_r - 6545.8_field_r/(TC+278)) / (TC+868)**2 ! Huang ! interpolated saturation vapor pressure es = ilratio*esl + (1-ilratio)*esi ! convert saturation vapor pressure to saturation humidity - qsat = (rd/rv) * es / (p - (1.-rd/rv)*es) + qsat = (rd/rv) * es / (p - (1-rd/rv)*es) end function qsat_huang ! return esat for ice-liquid mix using table @@ -628,10 +628,10 @@ pure function esat_tab(T) result(es) ! interpolated ice-liquid saturation vapor pressure from table ! note if imicto==imicro_bulk3, the table is for liquid only - tlonr=int((T-150.)*5.) - tlo = 150. + 0.2*tlonr - thi = tlo + 0.2 - es = (thi-T)*5.*esatmtab(tlonr)+(T-tlo)*5.*esatmtab(tlonr+1) + tlonr=int((T-150)*5) + tlo = 150 + 0.2_field_r*tlonr + thi = tlo + 0.2_field_r + es = (thi-T)*5*esatmtab(tlonr)+(T-tlo)*5*esatmtab(tlonr+1) end function esat_tab !> q_sat over liquid and ice, using interpolation in a table created in modglobal. @@ -648,13 +648,13 @@ pure function qsat_tab(T, p) result(qsat) real(field_r) :: tlo, thi, es ! interpolated ice-liquid saturation vapor pressure from table - tlonr=int((T-150.)*5.) - tlo = 150. + 0.2*tlonr - thi = tlo + 0.2 - es = (thi-T)*5.*esatmtab(tlonr)+(T-tlo)*5.*esatmtab(tlonr+1) + tlonr=int((T-150)*5) + tlo = 150 + 0.2_field_r*tlonr + thi = tlo + 0.2_field_r + es = (thi-T)*5*esatmtab(tlonr)+(T-tlo)*5*esatmtab(tlonr+1) ! convert saturation vapor pressure to saturation humidity - qsat = (rd/rv) * es / (p - (1.-rd/rv)*es) + qsat = (rd/rv) * es / (p - (1-rd/rv)*es) end function qsat_tab subroutine icethermo0_fast @@ -753,13 +753,13 @@ subroutine icethermo0_fast tmp0(i,j,k) = T ! use the separate e_sat tables for liquid and ice to calculate and store esl, qvsl, qvsi - tlonr=int((T-150.)*5.) - tlo = 150. + 0.2*tlonr - thi = tlo + 0.2 - esl(i,j,k) = (thi-T)*5.*esatltab(tlonr)+(T-tlo)*5.*esatltab(tlonr+1) ! saturation vapor pressure liquid - esi1 = (thi-T)*5.*esatitab(tlonr)+(T-tlo)*5.*esatitab(tlonr+1) ! saturation vapor pressure ice - qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1.-rd/rv)*esl(i,j,k)) ! saturation humidity liquid - qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1.-rd/rv)*esi1) ! saturation humidity ice + tlonr=int((T-150)*5) + tlo = 150 + 0.2_field_r*tlonr + thi = tlo + 0.2_field_r + esl(i,j,k) = (thi-T)*5*esatltab(tlonr)+(T-tlo)*5*esatltab(tlonr+1) ! saturation vapor pressure liquid + esi1 = (thi-T)*5*esatitab(tlonr)+(T-tlo)*5*esatitab(tlonr+1) ! saturation vapor pressure ice + qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1-rd/rv)*esl(i,j,k)) ! saturation humidity liquid + qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1-rd/rv)*esi1) ! saturation humidity ice !!!!!!!!!!! end do end do @@ -774,13 +774,13 @@ subroutine icethermo0_fast qsat(i,j,k) = qsat_tab(T, presf(k)) ! use the separate e_sat tables for liquid and ice to calculate and store esl, qvsl, qvsi - tlonr=int((T-150.)*5.) - tlo = 150. + 0.2*tlonr - thi = tlo + 0.2 - esl(i,j,k) = (thi-T)*5.*esatltab(tlonr)+(T-tlo)*5.*esatltab(tlonr+1) ! saturation vapor pressure liquid - esi1 = (thi-T)*5.*esatitab(tlonr)+(T-tlo)*5.*esatitab(tlonr+1) ! saturation vapor pressure ice - qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1.-rd/rv)*esl(i,j,k)) ! saturation humidity liquid - qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1.-rd/rv)*esi1) ! saturation humidity ice + tlonr=int((T-150)*5) + tlo = 150 + 0.2_field_r*tlonr + thi = tlo + 0.2_field_r + esl(i,j,k) = (thi-T)*5*esatltab(tlonr)+(T-tlo)*5*esatltab(tlonr+1) ! saturation vapor pressure liquid + esi1 = (thi-T)*5*esatitab(tlonr)+(T-tlo)*5*esatitab(tlonr+1) ! saturation vapor pressure ice + qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1-rd/rv)*esl(i,j,k)) ! saturation humidity liquid + qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1-rd/rv)*esi1) ! saturation humidity ice end do end do end if @@ -836,8 +836,8 @@ subroutine icethermo0_fast_gpu call timer_tic('modthermodynamics/icethermo0_fast', 1) ! Sanity checks - Tl_min = 400.0 - PrDiff_min = 100.0 + Tl_min = 400 + PrDiff_min = 100 !$acc parallel loop collapse(3) reduction(min:Tl_min, PrDiff_min) do k = 1, k1 do j = 2, j1 @@ -848,7 +848,7 @@ subroutine icethermo0_fast_gpu end do end do if (Tl_min < 150) stop 'icethermo0_fast: Tl_min below limit 150K' - if (PrDiff_min < 0.0) stop 'icethermo0_fast: Tl_max too close to boiling point' + if (PrDiff_min < 0) stop 'icethermo0_fast: Tl_max too close to boiling point' !$acc parallel loop collapse(3) private(Tl, qsat_, qt, ql, b, T, esi1, tlo, thi, tlonr) default(present) do k = 1, k1 @@ -887,13 +887,13 @@ subroutine icethermo0_fast_gpu tmp0(i,j,k) = T ! use the separate e_sat tables for liquid and ice to calculate and store esl, qvsl, qvsi - tlonr=int((T-150.)*5.) - tlo = 150. + 0.2*tlonr - thi = tlo + 0.2 - esl(i,j,k) = (thi-T)*5.*esatltab(tlonr)+(T-tlo)*5.*esatltab(tlonr+1) ! saturation vapor pressure liquid - esi1 = (thi-T)*5.*esatitab(tlonr)+(T-tlo)*5.*esatitab(tlonr+1) ! saturation vapor pressure ice - qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1.-rd/rv)*esl(i,j,k)) ! saturation humidity liquid - qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1.-rd/rv)*esi1) ! saturation humidity ice + tlonr=int((T-150)*5) + tlo = 150 + 0.2_field_r*tlonr + thi = tlo + 0.2_field_r + esl(i,j,k) = (thi-T)*5*esatltab(tlonr)+(T-tlo)*5*esatltab(tlonr+1) ! saturation vapor pressure liquid + esi1 = (thi-T)*5*esatitab(tlonr)+(T-tlo)*5*esatitab(tlonr+1) ! saturation vapor pressure ice + qvsl(i,j,k)=rd/rv*esl(i,j,k)/(presf(k)-(1-rd/rv)*esl(i,j,k)) ! saturation humidity liquid + qvsi(i,j,k)=rd/rv*esi1 /(presf(k)-(1-rd/rv)*esi1) ! saturation humidity ice !!!!!!!!!!! end do end do @@ -994,8 +994,8 @@ subroutine icethermoh_fast_gpu call timer_tic('modthermodynamics/icethermoh_fast', 1) - Tl_min = 400.0 - PrDiff_min = 100.0 + Tl_min = 400 + PrDiff_min = 100 !$acc parallel loop collapse(3) reduction(min:Tl_min, PrDiff_min) do k = 1, k1 do j = 2, j1