From c491b6125886d9a11207f4624187699f166ba8b1 Mon Sep 17 00:00:00 2001 From: OsKnoth <50015520+OsKnoth@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:46:30 +0200 Subject: [PATCH] New Kernels --- Jobs/NHSphere/HeldSuarezMoistSphere_32Elem | 2 +- src/GPU/FcnGPU.jl | 413 +++++++++++++++++++++ src/GPU/OperatorKernel.jl | 4 +- 3 files changed, 416 insertions(+), 3 deletions(-) diff --git a/Jobs/NHSphere/HeldSuarezMoistSphere_32Elem b/Jobs/NHSphere/HeldSuarezMoistSphere_32Elem index c28d7ec..54f38da 100755 --- a/Jobs/NHSphere/HeldSuarezMoistSphere_32Elem +++ b/Jobs/NHSphere/HeldSuarezMoistSphere_32Elem @@ -20,7 +20,7 @@ julia --project Examples/testNHSphere.jl \ --Forcing=true \ --Microphysics=true \ --TypeMicrophysics="SimpleMicrophysics" \ - --RelCloud=1.e-2 \ + --RelCloud=1.e-3 \ --Upwind=true \ --HorLimit=false \ --Decomp="EqualArea" \ diff --git a/src/GPU/FcnGPU.jl b/src/GPU/FcnGPU.jl index b399972..f3be0c1 100644 --- a/src/GPU/FcnGPU.jl +++ b/src/GPU/FcnGPU.jl @@ -136,6 +136,419 @@ function FcnAdvectionGPU!(F,U,time,FE,Metric,Phys,Cache,Exchange,Global,Param,Pr @views Parallels.ExchangeData3DRecvGPU!(F[:,:,1:1+NumV],Exchange) end +NVTX.@annotate function FcnGPUOld!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,Equation::Models.CompressibleShallow) + + backend = get_backend(F) + FT = eltype(F) + State = Global.Model.State + dtau = Global.TimeStepper.dtauStage + Glob = FE.Glob + DS = FE.DS + DW = FE.DW + M = FE.M + Stencil = FE.Stencil + BoundaryDoF = FE.BoundaryDoF + dXdxI = Metric.dXdxI + nS = Metric.nS + nSS = Metric.nSS + X = Metric.X + J = Metric.J + N = FE.OrdPoly+1 + ww = FE.w + NF = Global.Grid.NumFaces + NBF = Global.Grid.NumFacesB + @views dXdxI_B = dXdxI[:,:,:,:,:,1:NBF] + @views nS_B = nS[:,:,1:NBF] + @views J_B = J[:,:,:,1:NBF] + @views X_B = X[:,:,:,:,1:NBF] + @views Glob_B = Glob[:,1:NBF] + + @views dXdxI_I = dXdxI[:,:,:,:,:,NBF+1:NF] + @views J_I = J[:,:,:,NBF+1:NF] + @views X_I = X[:,:,:,:,NBF+1:NF] + @views nS_I = nS[:,:,NBF+1:NF] + @views Glob_I = Glob[:,NBF+1:NF] + @views Stencil_I = Stencil[NBF+1:NF,:] + xS = Metric.xS + dz = Metric.dz + zP = Metric.zP + DoF = FE.DoF + N = size(FE.DS,1) + Nz = size(F,1) + NDoF = size(F,2) + NumV = Global.Model.NumV + NumTr = Global.Model.NumTr + Koeff = Global.Model.HyperDDiv + Temp1 = Cache.Temp1 + NumberThreadGPU = Global.ParallelCom.NumberThreadGPU + Proc = Global.ParallelCom.Proc + Force = Global.Model.Force + Damp = Global.Model.Damp + EDMF = Global.Model.EDMF + ND = Global.Model.NDEDMF + MicrophysicsSource = Global.Model.MicrophysicsSource + CoriolisFun = Global.Model.CoriolisFun + GravitationFun = Global.Model.GravitationFun + HorLimit = Global.Model.HorLimit + + KoeffCurl = Global.Model.HyperDCurl + KoeffGrad = Global.Model.HyperDGrad + KoeffDiv = Global.Model.HyperDDiv + KoeffDivW = Global.Model.HyperDDivW + +# Position + RhoPos = Global.Model.RhoPos + uPos = Global.Model.uPos + vPos = Global.Model.vPos + wPos = Global.Model.wPos + ThPos = Global.Model.ThPos + TkePos = Global.Model.TkePos +# State vector + @views Rho = U[:,:,RhoPos] + @views u = U[:,:,uPos] + @views v = U[:,:,vPos] + @views w = U[:,:,wPos] + @views Th = U[:,:,ThPos] + TrPos = NumV + if TkePos > 0 + @views Tke = U[:,:,TkePos] + TrPos += 1 + end + @views UTr = U[:,:,TrPos+1:TrPos+NumTr] + if EDMF + aRhoEDMFPos = NumV + NumTr + if TkePos > 0 + aRhoEDMFPos += 1 + end + wEDMFPos = aRhoEDMFPos + ND + ThEDMFPos = wEDMFPos + ND + TrEDMFPos = ThEDMFPos + ND + @views aRhoEDMF = U[:,:,aRhoEDMFPos+ND-1] + @views wEDMF = U[:,:,wEDMFPos:wEDMFPos+ND-1] + @views ThEDMF = U[:,:,ThEDMFPos:ThEDMFPos+ND-1] + @views TrEDMF = U[:,:,TrEDMFPos:end] + @views FaRhoEDMF = F[:,:,aRhoEDMFPos+ND-1] + @views FwEDMF = F[:,:,wEDMFPos:wEDMFPos+ND-1] + @views FThEDMF = F[:,:,ThEDMFPos:ThEDMFPos+ND-1] + @views FTrEDMF = F[:,:,TrEDMFPos:end] + RhoEDMF = Cache.RhoEDMF + end +# Tendency + @views FRho = F[:,:,1] + if TkePos > 0 + @views FTke = F[:,:,TkePos] + end + @views FTr = F[:,:,TrPos+1:TrPos+NumTr] +# Cache +# Need clearer cache distribution for different setups +# 1...4 Horizontal momentum +# 5 Thermodynamic variable +# +1 Vertical momentum +# +1 Turbulent kinetic energy +# +NumTr Tracer +# +ND*( +# +1 Thermodynamic variable +# +1 Vertical velocity +# +NumTr Tracer) + LenTemp1 = 5 + if KoeffDivW > 0 + LenTemp1 += 1 + @views Cachew = Temp1[:,:,LenTemp1] + end + if TkePos > 0 + LenTemp1 += 1 + @views CacheTke = Temp1[:,:,LenTemp1] + end + if ~HorLimit && NumTr > 0 + @views CacheTr = Temp1[:,:,LenTemp1+1:LenTemp1+NumTr] + LenTemp1 += NumTr + end + if EDMF + @views CachewEDMF = Temp1[:,:,LenTemp1+1:LenTemp1+ND] + LenTemp1 += ND + @views CacheThEDMF = Temp1[:,:,LenTemp1+1:LenTemp1+ND] + LenTemp1 += ND + @views CacheTrEDMF = Temp1[:,:,LenTemp1+1:LenTemp1+ND*NumTr] + LenTemp1 += ND * NumTr + end + + @views CacheF = Temp1[:,:,1:5] + @views p = Cache.AuxG[:,:,1] + + +# Ranges + NzG = min(div(NumberThreadGPU,N*N),Nz) + group = (N, N, NzG, 1) + ndrange = (N, N, Nz, NF) + ndrangeB = (N, N, Nz, NBF) + ndrangeI = (N, N, Nz, NF-NBF) + groupTr = group + ndrangeTr = ndrange + NDoFG = min(div(NumberThreadGPU,Nz),NDoF) + groupG = (Nz, NDoFG) + ndrangeG = (Nz, NDoF) + groupS = max(div(NDoF,NumberThreadGPU),1) + ndrangeS = (NDoF) + NzG = min(div(NumberThreadGPU,N*N),Nz-1) + groupw = (N, N, NzG, 1) + ndrangewB = (Nz-1, NBF) + ndrangewI = (Nz-1, NF-NBF) + NFG = min(div(NumberThreadGPU,Nz),NF) + groupL = (Nz, NFG, 1) + ndrangeL = (Nz, NF, NumTr) + + KRhoGradKinKernel! = RhoGradKinKernel!(backend,group) + KGradFullKernel! = GradFullKernel!(backend,group) + KGradKernel! = GradKernel!(backend,group) + KHyperViscKernel! = HyperViscKernel!(backend, group) + KHyperViscKoeffKernel! = HyperViscKoeffKernel!(backend, group) + if State == "Dry" || State == "ShallowWater" || State == "Moist" + KDivRhoThUpwind3Kernel! = DivRhoThUpwind3Kernel!(backend, group) + elseif State == "DryEnergy" || State == "MoistEnergy" + KDivRhoKEUpwind3Kernel! = DivRhoKEUpwind3Kernel!(backend, group) + end + KDivRhoThUpwind3Kernel! = DivRhoThUpwind3Kernel!(backend, group) + KMomentumCoriolisKernel! = MomentumVectorInvariantCoriolisKernel!(backend, group) + KHyperViscTracerKernel! = HyperViscTracerKernel!(backend, groupTr) + KHyperViscTracerKoeffKernel! = HyperViscTracerKoeffKernel!(backend, groupTr) + KDivRhoTrUpwind3Kernel! = DivRhoTrUpwind3Kernel!(backend, groupTr) + KDivRhoTrUpwind3LimKernel! = DivRhoTrUpwind3LimKernel!(backend, groupTr) + KLimitKernel! = LimitKernel!(backend, groupL) + +# BoundaryValues + @. @views U[:,BoundaryDoF,vPos] = FT(0.0) + + if HorLimit + @views KLimitKernel!(DoF,q,UTr,Rho,Glob,ndrange=ndrangeL) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeDataFSendGPU(q,Exchange) + end + + +#### +# First phase +#### + Temp1 .= FT(0) + KHyperViscKernel!(CacheF,U,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeB) + if ~HorLimit + for iT = 1 : NumTr + @views KHyperViscTracerKernel!(CacheTr[:,:,iT],UTr[:,:,iT],Rho,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeB) + end + end + if TkePos > 0 + @views KHyperViscTracerKernel!(CacheTke,Tke,Rho,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeB) + end + + if KoeffDivW > 0 + KHyperViscWKernel! = HyperViscWKernel!(backend, groupTr) + @views KHyperViscWKernel!(Cachew,U[:,:,4],DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeB) + end + + if EDMF + ndrangeBEDMF = (N, N, Nz, NBF, ND) + KHyperViscWEDMFKernel! = HyperViscWEDMFKernel!(backend, groupTr) + @views KHyperViscWEDMFKernel!(CachewEDMF,wEDMF,aRhoEDMF,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeBEDMF) + KHyperViscTracerEDMFKernel! = HyperViscTracerEDMFKernel!(backend, groupTr) + @views KHyperViscTracerEDMFKernel!(CacheThEDMF,ThEDMF,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeBEDMF) + if NumTr > 0 + ndrangeBEDMFTr = (N, N, Nz, NBF, ND*NumTr) + @views KHyperViscTracerEDMFKernel!(CacheTrEDMF,TrEDMF,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeBEDMFTr) + end + end + + if HorLimit + Parallels.ExchangeDataFRecvGPU!(q,Exchange) + end + KernelAbstractions.synchronize(backend) + @views Parallels.ExchangeData3DSendGPU(Temp1[:,:,1:LenTemp1],Exchange) + + KHyperViscKernel!(CacheF,U,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + if ~HorLimit + for iT = 1 : NumTr + @views KHyperViscTracerKernel!(CacheTr[:,:,iT],UTr[:,:,iT],Rho,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + end + end + if TkePos > 0 + @views KHyperViscTracerKernel!(CacheTke,Tke,Rho,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + end + if KoeffDivW > 0 + @views KHyperViscWKernel!(Cachew,U[:,:,4],DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + end + if EDMF + ndrangeIEDMF = (N, N, Nz, NF-NBF, ND) + @views KHyperViscWEDMFKernel!(CachewEDMF,wEDMF,aRhoEDMF,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeIEDMF) + @views KHyperViscTracerEDMFKernel!(CacheThEDMF,ThEDMF,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeIEDMF) + KernelAbstractions.synchronize(backend) + if NumTr > 0 + ndrangeIEDMFTr = (N, N, Nz, NF-NBF, ND*NumTr) + @views KHyperViscTracerEDMFKernel!(CacheTrEDMF,TrEDMF,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeIEDMFTr) + end + end + + @views Parallels.ExchangeData3DRecvGPU!(Temp1[:,:,1:LenTemp1],Exchange) + +#### +# Second phase +#### + F .= FT(0) + KHyperViscKoeffKernel!(F,U,CacheF,DS,DW,dXdxI,J,M,Glob,KoeffCurl,KoeffGrad,KoeffDiv,ndrange=ndrangeB) + if ~HorLimit + for iT = 1 : NumTr + @views KHyperViscTracerKoeffKernel!(FTr[:,:,iT],CacheTr[:,:,iT],Rho,DS,DW,dXdxI,J,M,Glob, + KoeffDiv,ndrange=ndrangeB) + @views KDivRhoTrUpwind3Kernel!(FTr[:,:,iT],UTr[:,:,iT],U,DS, + dXdxI,J,M,Glob,ndrange=ndrangeB) + end + else + for iT = 1 : NumTr + @views KDivRhoTrUpwind3LimKernel!(FTr[:,:,iT],UTr[:,:,iT],U,DS, + dXdxI,J,M,Glob,dtau,ww,q[:,:,iT],q[:,:,NumTr+iT],Stencil,ndrange=ndrangeB) + end + end + if TkePos > 0 + @views KHyperViscTracerKoeffKernel!(FTke,CacheTke,Rho,DS,DW,dXdxI,J,M,Glob, + KoeffDiv,ndrange=ndrangeB) + @views KDivRhoTrUpwind3Kernel!(FTke,Tke,U,DS, dXdxI,J,M,Glob,ndrange=ndrangeB) + end + if KoeffDivW > 0 + KHyperViscWKoeffKernel! = HyperViscWKoeffKernel!(backend, groupTr) + @views KHyperViscWKoeffKernel!(F[:,:,4],Cachew,DS,DW,dXdxI,J,M,Glob,KoeffDivW,ndrange=ndrangeB) + end + if EDMF + KHyperViscWKoeffEDMFKernel! = HyperViscWKoeffEDMFKernel!(backend, groupTr) + @views KHyperViscWKoeffEDMFKernel!(FwEDMF,CachewEDMF,DS,DW,dXdxI,J,M,Glob,KoeffDivW,ndrange=ndrangeBEDMF) + @views KHyperViscTracerKoeffEDMFKernel!(FThEDMF,CacheThEDMF,DS,DW,dXdxI,J,M,Glob,KoeffDiv,ndrange=ndrangeBEDMF) + @views KHyperViscTracerKoeffEDMFKernel!(FTrEDMF,CacheTrEDMF,DS,DW,dXdxI,J,M,Glob,KoeffDiv,ndrange=ndrangeBEDMFTr) + end + KMomentumCoriolisKernel!(F,U,DS,dXdxI,J,X,M,Glob,CoriolisFun,ndrange=ndrangeB) + KGradFullKernel!(F,U,p,DS,dXdxI,X,J,M,Glob,GravitationFun,ndrange=ndrangeB) + if State == "Dry" || State == "ShallowWater" || State == "Moist" + KDivRhoThUpwind3Kernel!(F,U,DS,dXdxI,J,M,Glob,ndrange=ndrangeB) + elseif State == "DryEnergy" || State == "MoistEnergy" + KDivRhoKEUpwind3Kernel!(F,U,p,DS,dXdxI,J,M,Glob,ndrange=ndrangeB) + end + if EDMF + KMomentumCoriolisDraftKernel! = MomentumVectorInvariantCoriolisDraftKernel!(backend,group) + KMomentumCoriolisDraftKernel!(F,U,wEDMF,aRhoEDMF,DS,dXdxI,J,X,M,Glob,CoriolisFun,ndrange=ndrangeBEDMF) + KRhoGradKinEDMFKernel! = RhoGradKinEDMFKernel!(backend,group) + KRhoGradKinEDMFKernel!(F,U,wEDMF,aRhoEDMF,DS,dXdxI,J,M,Glob,ndrange=ndrangeBEDMF) + KAdvectionTrUpwind3Kernel! = AdvectionTrUpwind3Kernel!(backend,groupTr) + KAdvectionTrUpwind3Kernel!(FThEDMF,ThEDMF,U,wEDMF,DS,dXdxI,J,M,Glob,ndrange=ndrangeBEDMF) + if NumTr > 0 + KAdvectionTrUpwind3Kernel!(FTrEDMF,TrEDMF,U,wEDMF,DS,dXdxI,J,M,Glob,ndrange=ndrangeBEDMFTr) + end + end + + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData3DSendGPU(F,Exchange) + + KHyperViscKoeffKernel!(F,U,CacheF,DS,DW,dXdxI_I,J_I,M,Glob_I,KoeffCurl,KoeffGrad,KoeffDiv,ndrange=ndrangeI) + if ~HorLimit + for iT = 1 : NumTr + @views KHyperViscTracerKoeffKernel!(FTr[:,:,iT],CacheTr[:,:,iT],Rho,DS,DW,dXdxI_I,J_I,M,Glob_I, + KoeffDiv,ndrange=ndrangeI) + @views KDivRhoTrUpwind3Kernel!(FTr[:,:,iT],UTr[:,:,iT],U,DS, + dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + end + else + for iT = 1 : NumTr + @views KDivRhoTrUpwind3LimKernel!(FTr[:,:,iT],UTr[:,:,iT],U,DS, + dXdxI_I,J_I,M,Glob_I,dtau,ww,q[:,:,iT],q[:,:,NumTr+iT],Stencil_I,ndrange=ndrangeI) + end + end + if TkePos > 0 + @views KHyperViscTracerKoeffKernel!(FTke,CacheTke,Rho,DS,DW,dXdxI_I,J_I,M,Glob_I, + KoeffDiv,ndrange=ndrangeI) + KDivRhoTrUpwind3Kernel!(FTke,Tke,U,DS,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + end + if KoeffDivW > 0 + @views KHyperViscWKoeffKernel!(F[:,:,4],Cachew,DS,DW,dXdxI_I,J_I,M,Glob_I,KoeffDivW,ndrange=ndrangeI) + end + if EDMF + @views KHyperViscWKoeffEDMFKernel!(FwEDMF,CachewEDMF,DS,DW,dXdxI_I,J_I,M,Glob_I,KoeffDivW,ndrange=ndrangeIEDMF) + @views KHyperViscTracerKoeffEDMFKernel!(FThEDMF,CacheThEDMF,DS,DW,dXdxI_I,J_I,M,Glob_I, + KoeffDiv,ndrange=ndrangeIEDMF) + if NumTr > 0 + @views KHyperViscTracerKoeffEDMFKernel!(FTrEDMF,CacheTrEDMF,DS,DW,dXdxI_I,J_I,M,Glob_I, + KoeffDiv,ndrange=ndrangeIEDMFTr) + end + end + + KMomentumCoriolisKernel!(F,U,DS,dXdxI_I,J_I,X_I,M,Glob_I,CoriolisFun,ndrange=ndrangeI) + KGradFullKernel!(F,U,p,DS,dXdxI_I,X_I,J_I,M,Glob_I,GravitationFun,ndrange=ndrangeI) + + if State == "Dry" || State == "ShallowWater" || State == "Moist" + KDivRhoThUpwind3Kernel!(F,U,DS,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + elseif State == "DryEnergy" + KDivRhoKEUpwind3Kernel!(F,U,p,DS,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI) + end + + if EDMF + KMomentumCoriolisDraftKernel!(F,U,wEDMF,aRhoEDMF,DS,dXdxI_I,J_I,X_I,M,Glob_I,CoriolisFun,ndrange=ndrangeIEDMF) + KRhoGradKinEDMFKernel!(F,U,wEDMF,aRhoEDMF,DS,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeIEDMF) + KAdvectionTrUpwind3Kernel!(FThEDMF,ThEDMF,U,wEDMF,DS,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeIEDMF) + if NumTr > 0 + KAdvectionTrUpwind3Kernel!(FTrEDMF,TrEDMF,U,wEDMF,DS,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeIEDMFTr) + end + end + + Parallels.ExchangeData3DRecvGPU!(F,Exchange) + + if Global.Model.Turbulence + KV = Cache.KV + KTkeSourceKernel! = TkeSourceKernel!(backend, groupG) + KTkeSourceKernel!(Global.Model.TurbulenceSource,FTke,KV,U,dz,ndrange=ndrangeG) + end + if Global.Model.VerticalDiffusionMom + KVerticalDiffusionMomentumKernel! = VerticalDiffusionMomentumKernel!(backend,groupG) + KV = Cache.KV + KVerticalDiffusionMomentumKernel!(F,U,KV,dz,ndrange=ndrangeG) + end + if Global.Model.SurfaceFluxMom + CM = Global.SurfaceData.CM + KSurfaceFluxMomentumKernel! = SurfaceFluxMomentumKernel!(backend,groupS) + KSurfaceFluxMomentumKernel!(F,U,nSS,CM,dz,ndrange=ndrangeS) + end + if Global.Model.VerticalDiffusion + KVerticalDiffusionScalarKernel! = VerticalDiffusionScalarKernel!(backend,groupG) + KV = Cache.KV + @views FTh = F[:,:,5] + @views Th = U[:,:,5] + KVerticalDiffusionScalarKernel!(FTh,Th,Rho,KV,dz,ndrange=ndrangeG) + if TkePos > 0 + KVerticalDiffusionScalarKernel!(FTke,Tke,Rho,KV,dz,ndrange=ndrangeG) + end + for iT = 1 : NumTr + @views KVerticalDiffusionScalarKernel!(FTr[:,:,iT],UTr[:,:,iT],Rho,KV,dz,ndrange=ndrangeG) + end + end + if Global.Model.SurfaceFlux + KSurfaceFluxScalarsKernel! = SurfaceFluxScalarsKernel!(backend,groupS) + CT = Global.SurfaceData.CT + CH = Global.SurfaceData.CH + uStar = Global.SurfaceData.uStar + TSurf = Global.SurfaceData.TS + RhoVSurf = Global.SurfaceData.RhoVS + SurfaceFluxRhs! = Global.Model.SurfaceFluxRhs + KSurfaceFluxScalarsKernel!(SurfaceFluxRhs!,F,U,p,TSurf,RhoVSurf,uStar,CT,CH,dz,ndrange=ndrangeS) + end + if Global.Model.Forcing + KForceKernel! = ForceKernel!(backend, groupG) + KForceKernel!(Force,F,U,p,xS,ndrange=ndrangeG) + end + + if Global.Model.Microphysics + KMicrophysicsKernel! = MicrophysicsKernel!(backend, groupG) + KMicrophysicsKernel!(MicrophysicsSource,F,U,p,ndrange=ndrangeG) + end + + if Global.Model.Damping + KDampKernel! = DampKernel!(backend, groupG) + KDampKernel!(Damp,F,U,zP,ndrange=ndrangeG) + end + +end + NVTX.@annotate function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,Equation::Models.CompressibleShallow) backend = get_backend(F) diff --git a/src/GPU/OperatorKernel.jl b/src/GPU/OperatorKernel.jl index a8490cb..77c5c53 100644 --- a/src/GPU/OperatorKernel.jl +++ b/src/GPU/OperatorKernel.jl @@ -696,7 +696,7 @@ end (-abs(wCon) + wCon) * cFR) @atomic :monotonic F[Iz,ind,5] += -Flux / MCCol[I,J,iz] @atomic :monotonic F[Iz+1,ind,5] += Flux / MCCol[I,J,iz+1] - Flux = wCon + Flux = eltype(F)(0.5)*wCon @atomic :monotonic F[Iz,ind,1] += -Flux / MCCol[I,J,iz] @atomic :monotonic F[Iz+1,ind,1] += Flux / MCCol[I,J,iz+1] end @@ -719,7 +719,7 @@ end @atomic :monotonic F[Iz,ind,1] += DivRho / MCCol[I,J,iz] end - for iT = NumV+1 : NumV + NumTr + for iT = NumV + 1 : NumV + NumTr # Second tracer ID = I + (J - 1) * N ind = Glob[ID,IF]