From 48effdee19d997e171cf1fbed170763ac604ddd5 Mon Sep 17 00:00:00 2001 From: OsKnoth <50015520+OsKnoth@users.noreply.github.com> Date: Thu, 2 Nov 2023 18:16:37 +0100 Subject: [PATCH] GPU vertical diffusion use of closure functions, to specify examples --- Examples/testExample.jl | 48 ++++++++- Examples/testNHSphere.jl | 16 ++- Jobs/JobNHBaroWaveDrySphere | 3 +- Jobs/JobNHHeldSuarezDrySphere | 6 +- Jobs/JobNHHeldSuarezMoistSphere | 6 +- Notes | 176 ------------------------------ ToDo | 3 + src/DyCore/GlobalVariables.jl | 12 +++ src/DyCore/parse_commandline.jl | 20 ++++ src/Examples/initial.jl | 10 +- src/GPU/DiagnosticKernel.jl | 25 +++-- src/GPU/FcnGPU.jl | 11 ++ src/GPU/GPU.jl | 1 + src/GPU/InitialKernel.jl | 179 ++++++++++++++++++++++++++++++ src/GPU/OperatorKernel.jl | 186 +------------------------------- src/Integration/cache.jl | 5 + 16 files changed, 328 insertions(+), 379 deletions(-) delete mode 100644 Notes create mode 100644 ToDo create mode 100644 src/GPU/InitialKernel.jl diff --git a/Examples/testExample.jl b/Examples/testExample.jl index f635bb7..299f0f5 100644 --- a/Examples/testExample.jl +++ b/Examples/testExample.jl @@ -1 +1,47 @@ -using Examples +Base.@kwdef struct ParamGalewskiSphere + H0G = 10000.0 + hH = 120.0 + alphaG = 1.0/3.0 + betaG = 1.0/15.0 + lat0G = pi/7.0 + lat1G = pi/2.0-lat0G + eN = exp(-4.0/(lat1G-lat0G)^2.0) + uM = 80.0 + Omega = 2*pi/24.0/3600.0 +end + +Param = ParamGalewskiSphere() + +abstract type Example end + +Base.@kwdef struct Example1 <: Example end + +mutable struct Model + Eddy::Any + Prof::Any +end + + + +function (profile::Example1)(Param) + function local_profile(x) + FT = eltype(x) + A = 2 * x + return A + end + function eddy(x) + FT = eltype(x) + A = 3 * x + return A + end + return local_profile,eddy +end + +Prof, Eddy = Example1()(Param) +@show eltype(Prof) +@show eltype(Eddy) +Model1 = Model(Prof,Eddy) + +Prof(5) +Eddy(7) + diff --git a/Examples/testNHSphere.jl b/Examples/testNHSphere.jl index 89f552f..e0a5c8f 100644 --- a/Examples/testNHSphere.jl +++ b/Examples/testNHSphere.jl @@ -18,6 +18,10 @@ ProfTheta = parsed_args["ProfTheta"] PertTh = parsed_args["PertTh"] ProfVel = parsed_args["ProfVel"] ProfVelGeo = parsed_args["ProfVelGeo"] +RhoVPos = parsed_args["RhoVPos"] +RhoCPos = parsed_args["RhoCPos"] +RhoIPos = parsed_args["RhoIPos"] +RhoRPos = parsed_args["RhoRPos"] HorLimit = parsed_args["HorLimit"] Upwind = parsed_args["Upwind"] Damping = parsed_args["Damping"] @@ -161,10 +165,10 @@ Model.uPos = 2 Model.vPos = 3 Model.wPos = 4 Model.ThPos = 5 -if Model.Equation == "CompressibleMoist" - Model.RhoVPos = 1 - Model.RhoCPos = 2 -end +Model.RhoVPos = RhoVPos +Model.RhoCPos = RhoCPos +Model.RhoIPos = RhoIPos +Model.RhoRPos = RhoRPos Model.HorLimit = HorLimit Model.Upwind = Upwind Model.Damping = Damping @@ -210,7 +214,9 @@ elseif Problem == "BaroWaveDrySphere" elseif Problem == "HeldSuarezDrySphere" Profile = Examples.HeldSuarezDryExample()(Param,Phys) elseif Problem == "HeldSuarezMoistSphere" - Profile = Examples.HeldSuarezMoistExample()(Param,Phys) + Profile, Eddy = Examples.HeldSuarezMoistExample()(Param,Phys) + Model.InitialProfile = Profile + Model.Eddy = Eddy end @show "InitialConditions" diff --git a/Jobs/JobNHBaroWaveDrySphere b/Jobs/JobNHBaroWaveDrySphere index 71a4db3..25cae83 100755 --- a/Jobs/JobNHBaroWaveDrySphere +++ b/Jobs/JobNHBaroWaveDrySphere @@ -1,6 +1,6 @@ mpirun -n 1 julia --project Examples/testNHSphere.jl \ --Problem="BaroWaveDrySphere" \ - --Device="CPU_P" \ + --Device="CPU" \ --GPUType="Metal" \ --FloatTypeBackend="Float32" \ --NumberThreadGPU=512 \ @@ -13,6 +13,7 @@ mpirun -n 1 julia --project Examples/testNHSphere.jl \ --Curl=false \ --ModelType="VectorInvariant" \ --Coriolis=true \ + --VerticalDiffusion=true \ --Upwind=true \ --HorLimit=false \ --Buoyancy=true \ diff --git a/Jobs/JobNHHeldSuarezDrySphere b/Jobs/JobNHHeldSuarezDrySphere index 5c454d8..ee2e88c 100755 --- a/Jobs/JobNHHeldSuarezDrySphere +++ b/Jobs/JobNHHeldSuarezDrySphere @@ -1,4 +1,4 @@ -mpirun -n 8 julia --project Examples/testNHSphere.jl \ +mpirun -n 1 julia --project Examples/testNHSphere.jl \ --Problem="HeldSuarezDrySphere" \ --Device="CPU" \ --GPUType="Metal" \ @@ -32,8 +32,8 @@ mpirun -n 8 julia --project Examples/testNHSphere.jl \ --Stretch=true \ --StretchType="Exp" \ --GridType="CubedSphere" \ - --nz=64 \ - --nPanel=30 \ + --nz=30 \ + --nPanel=16 \ --H=45000.0 \ --OrdPoly=3 \ --HyperVisc=true \ diff --git a/Jobs/JobNHHeldSuarezMoistSphere b/Jobs/JobNHHeldSuarezMoistSphere index 5912e96..2239b77 100755 --- a/Jobs/JobNHHeldSuarezMoistSphere +++ b/Jobs/JobNHHeldSuarezMoistSphere @@ -1,18 +1,20 @@ mpirun -n 1 julia --project Examples/testNHSphere.jl \ --Problem="HeldSuarezMoistSphere" \ - --Device="CPU_P" \ + --Device="CPU" \ --GPUType="Metal" \ --NumberThreadGPU=512 \ --FloatTypeBackend="Float32" \ - --Equation="CompressibleMoist" \ --NumV=5 \ --NumTr=2 \ + --RhoVPos=1 \ + --RhoCPos=2 \ --ProfpBGrd="" \ --ProfRhoBGrd="" \ --Source=true \ --Force=true \ --Curl=false \ --ModelType="VectorInvariant" \ + --VerticalDiffusion=true \ --Coriolis=true \ --Upwind=true \ --HorLimit=false \ diff --git a/Notes b/Notes deleted file mode 100644 index 4a7ed9f..0000000 --- a/Notes +++ /dev/null @@ -1,176 +0,0 @@ -julia> x = [1,2] -2-element Vector{Int64}: - 1 - 2 - -julia> typeof(x) -Vector{Int64} (alias for Array{Int64, 1}) - -julia> using StaticArrays - │ Package StaticArrays not found, but a package named StaticArrays is available - │ from a registry. - │ Install package? - │ (@v1.7) pkg> add StaticArrays - └ (y/n) [y]: - Updating registry at `~/.julia/registries/General.toml` - Resolving package versions... - Updating `~/.julia/environments/v1.7/Project.toml` - [90137ffa] + StaticArrays v1.4.3 - Updating `~/.julia/environments/v1.7/Manifest.toml` - [90137ffa] + StaticArrays v1.4.3 -Precompiling project... - 21 dependencies successfully precompiled in 46 seconds (28 already precompiled) - -julia> x = @SVector [1.0,2.0] -2-element SVector{2, Float64} with indices SOneTo(2): - 1.0 - 2.0 - -julia> @code_warntype length(x) -MethodInstance for length(::SVector{2, Float64}) - from length(a::Union{LinearAlgebra.Adjoint{T, <:Union{StaticVector{<:Any, T}, StaticMatrix{<:Any, <:Any, T}}}, LinearAlgebra.Diagonal{T, <:StaticVector{<:Any, T}}, LinearAlgebra.Hermitian{T, <:StaticMatrix{<:Any, <:Any, T}}, LinearAlgebra.LowerTriangular{T, <:StaticMatrix{<:Any, <:Any, T}}, LinearAlgebra.Symmetric{T, <:StaticMatrix{<:Any, <:Any, T}}, LinearAlgebra.Transpose{T, <:Union{StaticVector{<:Any, T}, StaticMatrix{<:Any, <:Any, T}}}, LinearAlgebra.UnitLowerTriangular{T, <:StaticMatrix{<:Any, <:Any, T}}, LinearAlgebra.UnitUpperTriangular{T, <:StaticMatrix{<:Any, <:Any, T}}, LinearAlgebra.UpperTriangular{T, <:StaticMatrix{<:Any, <:Any, T}}, StaticVector{<:Any, T}, StaticMatrix{<:Any, <:Any, T}, StaticArray{<:Tuple, T}} where T) in StaticArrays at /Users/simon/.julia/packages/StaticArrays/12k3X/src/abstractarray.jl:1 -Arguments - #self#::Core.Const(length) - a::SVector{2, Float64} -Body::Int64 -1 ─ %1 = StaticArrays.Size(a)::Core.Const(Size(2,)) -│ %2 = StaticArrays.prod(%1)::Core.Const(2) -│ %3 = Core.typeassert(%2, StaticArrays.Int)::Core.Const(2) -└── return %3 - - -julia> y = [1.0, 2.0] -2-element Vector{Float64}: - 1.0 - 2.0 - -julia> @code_warntype length(y) -MethodInstance for length(::Vector{Float64}) - from length(a::Array) in Base at array.jl:215 -Arguments - #self#::Core.Const(length) - a::Vector{Float64} -Body::Int64 -1 ─ %1 = Base.arraylen(a)::Int64 -└── return %1 - - -julia> const N = 2 -2 - -julia> f() = N -f (generic function with 1 method) - -julia> @code_warntype f() -MethodInstance for f() - from f() in Main at REPL[9]:1 -Arguments - #self#::Core.Const(f) -Body::Int64 -1 ─ return Main.N - - -julia> f(x) = N -f (generic function with 2 methods) - -julia> @code_warntype f(x) -MethodInstance for f(::SVector{2, Float64}) - from f(x) in Main at REPL[11]:1 -Arguments - #self#::Core.Const(f) - x::SVector{2, Float64} -Body::Int64 -1 ─ return Main.N - - -julia> @code_llvm f(x) -; @ REPL[11]:1 within `f` -define i64 @julia_f_736([1 x [2 x double]]* nocapture nonnull readonly align 8 dereferenceable(16) %0) #0 { -top: - ret i64 2 -} - -julia> - -julia> Val(2) -Val{2}() - -julia> f(::Val{N}) where {N} = N -f (generic function with 3 methods) - -julia> f(::Val{N}) where {N} = N + 3 -f (generic function with 3 methods) - -julia> f(Val(3)) -6 - -julia> @code_warntype f(Val(3)) -MethodInstance for f(::Val{3}) - from f(::Val{N}) where N in Main at REPL[16]:1 -Static Parameters - N = 3 -Arguments - #self#::Core.Const(f) - _::Core.Const(Val{3}()) -Body::Int64 -1 ─ %1 = ($(Expr(:static_parameter, 1)) + 3)::Core.Const(6) -└── return %1 - - -julia> v = Val(6) -Val{6}() - -julia> function divergence(v1,v2,v3,::Val{Nq}) - end -ERROR: UndefVarError: Nq not defined -Stacktrace: - [1] top-level scope - @ REPL[20]:1 - -julia> function divergence(v1,v2,v3,::Val{Nq}) where {Nq} - end -divergence (generic function with 1 method) - -julia> function divergence(v1,v2,v3,::Val{Nq}) where {Nq} - Nq*3 - end -divergence (generic function with 1 method) - -julia> divergence(x,x,x,Val(2)) -6 - -julia> @code_warntype divergence(x,x,x,Val(2)) -MethodInstance for divergence(::SVector{2, Float64}, ::SVector{2, Float64}, ::SVector{2, Float64}, ::Val{2}) - from divergence(v1, v2, v3, ::Val{Nq}) where Nq in Main at REPL[22]:1 -Static Parameters - Nq = 2 -Arguments - #self#::Core.Const(divergence) - v1::SVector{2, Float64} - v2::SVector{2, Float64} - v3::SVector{2, Float64} - _::Core.Const(Val{2}()) -Body::Int64 -1 ─ %1 = ($(Expr(:static_parameter, 1)) * 3)::Core.Const(6) -└── return %1 - - -julia> @threads for e in elements - divergence(V[e]) - end -ERROR: LoadError: UndefVarError: @threads not defined -in expression starting at REPL[25]:1 - -julia> A = zeros(Threads.nthreads()) -1-element Vector{Float64}: - 0.0 - -julia> @threads for e in elements - A[Threads.threadi -threadid threading_run -julia> @threads for e in elements - A[Threads.threadid()] += ... -ERROR: syntax: invalid identifier name "..." -Stacktrace: - [1] top-level scope - @ none:1 diff --git a/ToDo b/ToDo new file mode 100644 index 0000000..e9179aa --- /dev/null +++ b/ToDo @@ -0,0 +1,3 @@ +zS on equally spaced points, +Transformation to nodal points. + diff --git a/src/DyCore/GlobalVariables.jl b/src/DyCore/GlobalVariables.jl index c69cd2f..4a1c66a 100644 --- a/src/DyCore/GlobalVariables.jl +++ b/src/DyCore/GlobalVariables.jl @@ -275,6 +275,8 @@ Base.@kwdef mutable struct ModelStruct{FT} PertTh::Bool RhoVPos::Int RhoCPos::Int + RhoIPos::Int + RhoRPos::Int NumV::Int NumTr::Int Equation::String @@ -310,6 +312,8 @@ Base.@kwdef mutable struct ModelStruct{FT} Curl::Bool Stretch::Bool StretchType::String + InitialProfile::Any + Eddy::Any end function ModelStruct{FT}() where FT <:AbstractFloat @@ -332,6 +336,8 @@ function ModelStruct{FT}() where FT <:AbstractFloat PertTh = false RhoVPos = 0 RhoCPos = 0 + RhoIPos = 0 + RhoRPos = 0 NumV = 0 NumTr = 0 Equation = "Compressible" @@ -367,6 +373,8 @@ function ModelStruct{FT}() where FT <:AbstractFloat Curl = true Stretch = false StretchType = "" + InitialProfile = "" + Eddy = "" return ModelStruct{FT}( Problem, Profile, @@ -387,6 +395,8 @@ function ModelStruct{FT}() where FT <:AbstractFloat PertTh, RhoVPos, RhoCPos, + RhoIPos, + RhoRPos, NumV, NumTr, Equation, @@ -422,6 +432,8 @@ function ModelStruct{FT}() where FT <:AbstractFloat Curl, Stretch, StretchType, + InitialProfile, + Eddy, ) end diff --git a/src/DyCore/parse_commandline.jl b/src/DyCore/parse_commandline.jl index a5c0cdb..11e8bf2 100644 --- a/src/DyCore/parse_commandline.jl +++ b/src/DyCore/parse_commandline.jl @@ -72,6 +72,26 @@ function parse_commandline() arg_type = String default = "" + "--RhoVPos" + help = "Position of water vapor in the tracer list" + arg_type = Int + default = 0 + + "--RhoCPos" + help = "Position of cloud water in the tracer list" + arg_type = Int + default = 0 + + "--RhoIPos" + help = "Position of cloud ice in the tracer list" + arg_type = Int + default = 0 + + "--RhoRPos" + help = "Position of rain water in the tracer list" + arg_type = Int + default = 0 + "--HorLimit" help = "Horizontal limiter" arg_type = Bool diff --git a/src/Examples/initial.jl b/src/Examples/initial.jl index af2643c..895cad7 100644 --- a/src/Examples/initial.jl +++ b/src/Examples/initial.jl @@ -255,5 +255,13 @@ function (profile::HeldSuarezMoistExample)(Param,Phys) qc = FT(0) return (Rho,uS,vS,w,Th,qv,qc) end - return local_profile + function Eddy(uStar,p,dz) + K = Param.CE * uStar * dz / 2 + if p < Param.p_pbl + dpR = (Param.p_pbl - p) / Param.p_strato + K = K * exp(-dpR * dpR) + end + return K + end + return local_profile,Eddy end diff --git a/src/GPU/DiagnosticKernel.jl b/src/GPU/DiagnosticKernel.jl index 1531724..cfd64b8 100644 --- a/src/GPU/DiagnosticKernel.jl +++ b/src/GPU/DiagnosticKernel.jl @@ -38,14 +38,15 @@ end (w - nS[3] * nU) * (w - nS[3] * nU)) end -@kernel function EddyCoefficientKernel!(K,@Const(uStar),@Const(p),@Const(dz),@Const(Glob),Param) +@kernel function EddyCoefficientKernel!(Eddy,K,@Const(uStar),@Const(p),@Const(dz),@Const(Glob)) ID,Iz,IF = @index(Global, NTuple) - NumF = @uniform @ndrange()[2] + Nz = @uniform @ndrange()[2] + NumF = @uniform @ndrange()[3] if Iz <= Nz && IF <= NumF - ind = Glob[ID,IF] - @inbounds @views K[ID,iZ,IF] = EddyCoefficientGPU(uStar[ID,IF],p[Iz,ind],dz,Param) + @inbounds ind = Glob[ID,IF] + @inbounds @views K[ID,Iz,IF] = Eddy(uStar[ID,IF],p[Iz,ind],dz[1,ind]) end end @@ -67,26 +68,34 @@ function FcnPrepareGPU!(U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType) NumG = size(U,2) Glob = FE.Glob NumF = size(Glob,2) + NumberThreadGPU = Global.ParallelCom.NumberThreadGPU - NG = min(div(512,Nz),NumG) + NG = min(div(NumberThreadGPU,Nz),NumG) group = (Nz, NG) ndrange = (Nz, NumG) - NF = min(div(512,N*N),NumF) + NF = min(div(NumberThreadGPU,N*N),NumF) groupS = (N * N, NF) ndrangeS = (N * N, NumF) - groupK = (N * N, NF) - ndrangeK = (N * N, NumF) + NzG = min(div(NumberThreadGPU,N*N),Nz) + groupK = (N * N, NzG, 1) + ndrangeK = (N * N, Nz, NumF) @views p = Cache.AuxG[:,:,1] uStar = Cache.uStar + KV = Cache.KV + dz = Metric.dz + Eddy = Global.Model.Eddy KPressureKernel! = PressureKernel!(backend,group) KPressureKernel!(p,U,Phys,ndrange=ndrange) if Global.Model.VerticalDiffusion + @show "uStar Eddy" KuStarCoefficientKernel! = uStarCoefficientKernel!(backend,groupS) KEddyCoefficientKernel! = EddyCoefficientKernel!(backend,groupK) @views KuStarCoefficientKernel!(uStar,U[1,:,:],dXdxI[3,:,1,:,1,:],nS,Glob,ndrange=ndrangeS) + KEddyCoefficientKernel!(Eddy,KV,uStar,p,dz,Glob,ndrange=ndrangeK) end KernelAbstractions.synchronize(backend) end + diff --git a/src/GPU/FcnGPU.jl b/src/GPU/FcnGPU.jl index 24348bf..311fb53 100644 --- a/src/GPU/FcnGPU.jl +++ b/src/GPU/FcnGPU.jl @@ -84,6 +84,7 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,Force,DiscType) @views FRho = F[:,:,1] @views FRhoTr = F[:,:,5] @views p = Cache.AuxG[:,:,1] + KV = Cache.KV # Ranges NzG = min(div(NumberThreadGPU,N*N),Nz) group = (N, N, NzG, 1) @@ -101,6 +102,7 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,Force,DiscType) KHyperViscTracerKernel! = HyperViscTracerKernel!(backend, groupTr) KHyperViscTracerKoeffKernel! = HyperViscTracerKoeffKernel!(backend, groupTr) KDivRhoTrUpwind3Kernel! = DivRhoTrUpwind3Kernel!(backend, groupTr) + KVerticalDiffusionScalarKernel! = VerticalDiffusionScalarKernel!(backend, groupTr) # KMomentumKernel! = MomentumKernel!(backend, group) @. CacheF = 0 @@ -137,6 +139,15 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,Force,DiscType) end KernelAbstractions.synchronize(backend) + if Global.Model.VerticalDiffusion + for iT = 1 : NumTr + @views KVerticalDiffusionScalarKernel!(F[:,:,iT+NumV],U[:,:,iT+NumV],Rho,KV, + dXdxI[3,3,:,:,:,:],J,M,Glob,ndrange=ndrangeTr) + end + KernelAbstractions.synchronize(backend) + end + + if Global.Model.Force NDoFG = min(div(NumberThreadGPU,Nz),NDoF) groupG = (Nz, NDoFG) diff --git a/src/GPU/GPU.jl b/src/GPU/GPU.jl index c092068..bcc2f9d 100644 --- a/src/GPU/GPU.jl +++ b/src/GPU/GPU.jl @@ -9,5 +9,6 @@ include("OperatorKernel.jl") include("FcnGPU.jl") include("DiagnosticKernel.jl") include("InitialConditions.jl") +include("InitialKernel.jl") end diff --git a/src/GPU/InitialKernel.jl b/src/GPU/InitialKernel.jl new file mode 100644 index 0000000..663f4eb --- /dev/null +++ b/src/GPU/InitialKernel.jl @@ -0,0 +1,179 @@ +@kernel function uvwFunCKernel!(Profile,u,v,w,time,@Const(Glob),@Const(X),Param,Phys) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[2] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[2] + NF = @uniform @ndrange()[3] + + + if Iz <= Nz + @inbounds ind = Glob[I,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + _,uP,vP,_ = Profile(xS,time) + @inbounds u[Iz,ind] = uP + @inbounds v[Iz,ind] = vP + end + if Iz <= Nz - 1 + @inbounds ind = Glob[I,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,2,1,Iz,IF] + X[I,1,1,Iz+1,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,2,2,Iz,IF] + X[I,1,2,Iz+1,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,2,3,Iz,IF] + X[I,1,3,Iz+1,IF]) + xS = SVector{3}(x1, x2 ,x3) + @inbounds _,_,_,w[Iz,ind] = Profile(xS,time) + end +end + +@kernel function RhouvFunCKernel!(Profile,Rho,u,v,time,@Const(Glob),@Const(X),Param,Phys) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[2] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[2] + NF = @uniform @ndrange()[3] + + if Iz <= Nz + @inbounds ind = Glob[ID,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + RhoP,uP,vP,_ = Profile(xS,time) + @inbounds Rho[Iz,ind] = RhoP + @inbounds u[Iz,ind] = uP + @inbounds v[Iz,ind] = vP + end +end + +@kernel function RhoFunCKernel!(Profile,Rho,time,@Const(Glob),@Const(X),Param,Phys) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[2] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[2] + NF = @uniform @ndrange()[3] + + if Iz <= Nz + @inbounds ind = Glob[I,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + RhoP,_,_,_ = Profile(xS,time) + @inbounds Rho[Iz,ind] = RhoP + end +end + +@kernel function TrFunCKernel!(Profile,Tr,time,@Const(Glob),@Const(X),Param,Phys) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[2] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[2] + NF = @uniform @ndrange()[3] + + if Iz <= Nz + @inbounds ind = Glob[I,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + RhoP,_,_,_ ,TrP = Profile(xS,time) + @inbounds Tr[Iz,ind] = RhoP * TrP + end +end + +@kernel function RhoThFunCKernel!(Profile,RhoTh,time,@Const(Glob),@Const(X)) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[2] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[2] + NF = @uniform @ndrange()[3] + + if Iz <= Nz + @inbounds ind = Glob[I,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + RhoP,_,_,_ ,ThP = Profile(xS,time) + @inbounds RhoTh[Iz,ind] = RhoP * ThP + end +end + +@kernel function RhoVFunCKernel!(Profile,RhoV,time,@Const(Glob),@Const(X)) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[2] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[2] + NF = @uniform @ndrange()[3] + + if Iz <= Nz + @inbounds ind = Glob[I,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + RhoP,_,_,_,_,QvP = Profile(xS,time) + @inbounds RhoV[Iz,ind] = RhoP * QvP + end +end + +@kernel function RhoCFunCKernel!(Profile,RhoC,time,@Const(Glob),@Const(X)) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[2] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[2] + NF = @uniform @ndrange()[3] + + if Iz <= Nz + @inbounds ind = Glob[I,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + RhoP,_,_,_,_,_,QcP = Profile(xS,time) + @inbounds RhoC[Iz,ind] = RhoP * QcP + end +end + +@kernel function ComputeFunFKernel!(Profile,w,time,@Const(Glob),@Const(X),Param) + + I, iz = @index(Local, NTuple) + _,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[3] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[3] + NF = @uniform @ndrange()[4] + + if Iz <= Nz - 1 + @inbounds ind = Glob[ID,IF] + @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) + @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) + @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) + xS = SVector{3}(x1, x2 ,x3) + _,_,_,wP = Profile(xS,time) + @inbounds w[Iz,ind] = wP + end +end diff --git a/src/GPU/OperatorKernel.jl b/src/GPU/OperatorKernel.jl index 7516db6..18ff2d7 100644 --- a/src/GPU/OperatorKernel.jl +++ b/src/GPU/OperatorKernel.jl @@ -1268,185 +1268,6 @@ end return (cFL,cFR) end -@kernel function uvwFunCKernel!(Profile,u,v,w,time,@Const(Glob),@Const(X),Param,Phys) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[2] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[2] - NF = @uniform @ndrange()[3] - - - if Iz <= Nz - @inbounds ind = Glob[I,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - _,uP,vP,_ = Profile(xS,time) - @inbounds u[Iz,ind] = uP - @inbounds v[Iz,ind] = vP - end - if Iz <= Nz - 1 - @inbounds ind = Glob[I,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,2,1,Iz,IF] + X[I,1,1,Iz+1,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,2,2,Iz,IF] + X[I,1,2,Iz+1,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,2,3,Iz,IF] + X[I,1,3,Iz+1,IF]) - xS = SVector{3}(x1, x2 ,x3) - @inbounds _,_,_,w[Iz,ind] = Profile(xS,time) - end -end - -@kernel function RhouvFunCKernel!(Profile,Rho,u,v,time,@Const(Glob),@Const(X),Param,Phys) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[2] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[2] - NF = @uniform @ndrange()[3] - - if Iz <= Nz - @inbounds ind = Glob[ID,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - RhoP,uP,vP,_ = Profile(xS,time) - @inbounds Rho[Iz,ind] = RhoP - @inbounds u[Iz,ind] = uP - @inbounds v[Iz,ind] = vP - end -end - -@kernel function RhoFunCKernel!(Profile,Rho,time,@Const(Glob),@Const(X),Param,Phys) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[2] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[2] - NF = @uniform @ndrange()[3] - - if Iz <= Nz - @inbounds ind = Glob[I,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - RhoP,_,_,_ = Profile(xS,time) - @inbounds Rho[Iz,ind] = RhoP - end -end - -@kernel function TrFunCKernel!(Profile,Tr,time,@Const(Glob),@Const(X),Param,Phys) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[2] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[2] - NF = @uniform @ndrange()[3] - - if Iz <= Nz - @inbounds ind = Glob[I,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - RhoP,_,_,_ ,TrP = Profile(xS,time) - @inbounds Tr[Iz,ind] = RhoP * TrP - end -end - -@kernel function RhoThFunCKernel!(Profile,RhoTh,time,@Const(Glob),@Const(X)) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[2] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[2] - NF = @uniform @ndrange()[3] - - if Iz <= Nz - @inbounds ind = Glob[I,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - RhoP,_,_,_ ,ThP = Profile(xS,time) - @inbounds RhoTh[Iz,ind] = RhoP * ThP - end -end - -@kernel function RhoVFunCKernel!(Profile,RhoV,time,@Const(Glob),@Const(X)) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[2] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[2] - NF = @uniform @ndrange()[3] - - if Iz <= Nz - @inbounds ind = Glob[I,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - RhoP,_,_,_,_,QvP = Profile(xS,time) - @inbounds RhoV[Iz,ind] = RhoP * QvP - end -end - -@kernel function RhoCFunCKernel!(Profile,RhoC,time,@Const(Glob),@Const(X)) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[2] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[2] - NF = @uniform @ndrange()[3] - - if Iz <= Nz - @inbounds ind = Glob[I,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - RhoP,_,_,_,_,_,QcP = Profile(xS,time) - @inbounds RhoC[Iz,ind] = RhoP * QcP - end -end - -@kernel function ComputeFunFKernel!(Profile,w,time,@Const(Glob),@Const(X),Param) - - I, iz = @index(Local, NTuple) - _,Iz,IF = @index(Global, NTuple) - - ColumnTilesDim = @uniform @groupsize()[3] - N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[3] - NF = @uniform @ndrange()[4] - - if Iz <= Nz - 1 - @inbounds ind = Glob[ID,IF] - @inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF]) - @inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF]) - @inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF]) - xS = SVector{3}(x1, x2 ,x3) - _,_,_,wP = Profile(xS,time) - @inbounds w[Iz,ind] = wP - end -end @kernel function ForceKernel!(F,U,p,lat,Force) Iz,IG = @index(Global, NTuple) @@ -1463,7 +1284,8 @@ end end -function VerticalDiffusionScalarKernel!(FTr,Tr,Rho,K,CG,dXdxI33,J) +@kernel function VerticalDiffusionScalarKernel!(FTr,@Const(Tr),@Const(Rho),@Const(K), + @Const(dXdxI33),@Const(JJ),@Const(M),@Const(Glob)) I, J, iz = @index(Local, NTuple) _,_,Iz,IF = @index(Global, NTuple) @@ -1488,8 +1310,8 @@ function VerticalDiffusionScalarKernel!(FTr,Tr,Rho,K,CG,dXdxI33,J) if Iz < Nz ID = I + (J - 1) * N @inbounds ind = Glob[ID,IF] - @inbounds grad = (K[I,J,Iz,IF] + K[I,J,Iz+1,IF]) * (cCol[iz+1] - cCol[iz]) * - (dXdxI33[2,ID,Iz,IF] + dXdxI33[1,ID,Iz+1,IF]) / ( J[ID,2,Iz,IF] + J[ID,1,Iz+1,IF]) + @inbounds grad = (K[ID,Iz,IF] + K[ID,Iz+1,IF]) * (cCol[I,J,iz+1] - cCol[I,J,iz]) * + (dXdxI33[2,ID,Iz,IF] + dXdxI33[1,ID,Iz+1,IF]) / ( JJ[ID,2,Iz,IF] + JJ[ID,1,Iz+1,IF]) @inbounds @atomic FTr[Iz,ind] += dXdxI33[2,ID,Iz] * grad / M[Iz,ind] @inbounds @atomic FTr[Iz+1,ind] += - dXdxI33[1,ID,Iz+1] * grad / M[Iz+1,ind] end diff --git a/src/Integration/cache.jl b/src/Integration/cache.jl index 368f2e9..5f976f5 100644 --- a/src/Integration/cache.jl +++ b/src/Integration/cache.jl @@ -26,6 +26,7 @@ Cache3::Array{FT, 2} Cache4::Array{FT, 2} PresCG::Array{FT, 2} AuxG::AT3 +KV::AT3 Aux2DG::Array{FT, 3} Temp::Array{FT, 3} KE::AT2 @@ -103,6 +104,7 @@ Cache3=zeros(FT,0,0) Cache4=zeros(FT,0,0) PresCG=zeros(FT,0,0) AuxG=KernelAbstractions.zeros(backend,FT,0,0,0) +KV=KernelAbstractions.zeros(backend,FT,0,0,0) Aux2DG=zeros(FT,0,0,0) Temp=zeros(FT,0,0,0) KE=KernelAbstractions.zeros(backend,FT,0,0) @@ -182,6 +184,7 @@ return CacheStruct{FT, Cache4, PresCG, AuxG, + KV, Aux2DG, Temp, KE, @@ -260,6 +263,7 @@ Cache3=zeros(FT,nz,NumG) Cache4=zeros(FT,nz,NumG) PresCG=zeros(FT,DoF,nz) AuxG=KernelAbstractions.zeros(backend,FT,nz,NumG,4) +KV=KernelAbstractions.zeros(backend,FT,DoF,nz,NF) Aux2DG=zeros(FT,1,NumG,NumTr+1) Temp=zeros(FT,DoF,nz,NF) KE=KernelAbstractions.zeros(backend,FT,DoF,nz) @@ -339,6 +343,7 @@ return CacheStruct{FT, Cache4, PresCG, AuxG, + KV, Aux2DG, Temp, KE,