diff --git a/Jobs/JobNHBaroWaveDrySphere b/Jobs/JobNHBaroWaveDrySphere index 0df56e8..86924d2 100755 --- a/Jobs/JobNHBaroWaveDrySphere +++ b/Jobs/JobNHBaroWaveDrySphere @@ -30,8 +30,8 @@ mpirun -n 1 julia --project Examples/testNHSphere.jl \ --Stretch=true \ --StretchType="Exp" \ --GridType="CubedSphere" \ - --nz=10 \ - --nPanel=10 \ + --nz=64 \ + --nPanel=30 \ --H=30000.0 \ --OrdPoly=3 \ --HyperVisc=true \ diff --git a/README.md b/README.md index 83a3e73..b134a0f 100644 --- a/README.md +++ b/README.md @@ -1 +1,7 @@ -# CGDycore.jl \ No newline at end of file +# CGDycore.jl + +GDycore is an expermental Julia code [Julia](https://julialang.org/) to study numerical methods and implementational details of numerical dycores for weather prediction. The code is parallelized by MPI and the time integration part of the code is already ported to accelerator by using the Julia package KernelAbstractions.jl. within this programming paradigm the code runs on CPUs and different GPU backends (CUDA, Metal). At the moment a spectral continuous Galerkin method following the HOMME philosphy is implemented as first numerical dycore. + +## Code structure and usage +* The code has a general data structure for unstructured grids on the sphere. There exist grid generators for cubed sphere grids, icosahedral triangular and hexagonal grids. In addition there are options to read in other types of unstructured quad grids n the sphere. +* Three dimensional grids (extension to the vertical) are constructed by extruding the two-dimensional horizontal grids. diff --git a/src/DyCore/DiscretizationCG.jl b/src/DyCore/DiscretizationCG.jl index 70a1809..35c6800 100644 --- a/src/DyCore/DiscretizationCG.jl +++ b/src/DyCore/DiscretizationCG.jl @@ -13,7 +13,26 @@ function DiscretizationCG(backend,FT,Jacobi,CG,Exchange,Global,zs) FS = zeros(nQuad,NF) J = zeros(nQuad,OPZ,nz,NF) X = zeros(nQuad,OPZ,3,nz,NF) + F = zeros(4,3,NF) + FGPU = KernelAbstractions.zeros(backend,FT,4,3,NF) + for iF = 1 : NF + F[1,1,iF] = Grid.Faces[iF].P[1].x + F[1,2,iF] = Grid.Faces[iF].P[1].y + F[1,3,iF] = Grid.Faces[iF].P[1].z + F[2,1,iF] = Grid.Faces[iF].P[2].x + F[2,2,iF] = Grid.Faces[iF].P[2].y + F[2,3,iF] = Grid.Faces[iF].P[2].z + F[3,1,iF] = Grid.Faces[iF].P[3].x + F[3,2,iF] = Grid.Faces[iF].P[3].y + F[3,3,iF] = Grid.Faces[iF].P[3].z + F[4,1,iF] = Grid.Faces[iF].P[4].x + F[4,2,iF] = Grid.Faces[iF].P[4].y + F[4,3,iF] = Grid.Faces[iF].P[4].z + end + copyto!(FGPU,F) + Grids.JacobiSphere3GPU!(Metric.X,Metric.dXdxI,Metric.J,CG,FGPU,Grid.z,zs,Grid.Rad) +#= for iF = 1 : NF for iz = 1 : nz zI = [Grid.z[iz],Grid.z[iz+1]] @@ -36,8 +55,9 @@ function DiscretizationCG(backend,FT,Jacobi,CG,Exchange,Global,zs) copyto!(Metric.dXdxI,dXdxI) copyto!(Metric.nS,nS) copyto!(Metric.FS,FS) +=# - MassCGGPU!(backend,FT,CG,Metric.J,CG.Glob,Exchange,Global) + MassCGGPU!(CG,Metric.J,CG.Glob,Exchange,Global) Metric.dz = KernelAbstractions.zeros(backend,FT,nz,CG.NumG) Metric.zP = KernelAbstractions.zeros(backend,FT,nz,CG.NumG) @@ -62,7 +82,7 @@ end function DiscretizationCG(backend,FT,Jacobi,CG,Exchange,Global) DiscretizationCG(backend,FT,Jacobi,CG,Exchange,Global, - zeros(CG.OrdPoly+1,CG.OrdPoly+1,Global.Grid.NumFaces)) + KernelAbstractions.zeros(backend,FT,CG.OrdPoly+1,CG.OrdPoly+1,Global.Grid.NumFaces)) end @kernel function GridSizeSphereKernel!(lat,zP,dz,@Const(X),@Const(Glob),Rad) diff --git a/src/DyCore/FiniteElement.jl b/src/DyCore/FiniteElement.jl index 1ba9a3d..c1542ea 100644 --- a/src/DyCore/FiniteElement.jl +++ b/src/DyCore/FiniteElement.jl @@ -25,10 +25,10 @@ mutable struct CGStruct{FT<:AbstractFloat, NumG::Int NumI::Int w::AT1 - xw::Array{FT, 1} + xw::AT1 xe::Array{FT, 1} IntXE2F::Array{FT, 2} - xwZ::Array{FT, 1} + xwZ::AT1 IntZE2F::Array{FT, 2} DW::AT2 DWT::Array{FT, 2} @@ -112,11 +112,15 @@ function CGStruct{FT}(backend,OrdPoly,OrdPolyZ,Grid) where FT<:AbstractFloat OrdPolyZ=OrdPolyZ DoF = OP * OP - (wCPU,xw)=DG.GaussLobattoQuad(OrdPoly) + (wCPU,xwCPU)=DG.GaussLobattoQuad(OrdPoly) w = KernelAbstractions.zeros(backend,FT,size(wCPU)) + xw = KernelAbstractions.zeros(backend,FT,size(xwCPU)) copyto!(w,wCPU) + copyto!(xw,xwCPU) - (wZ,xwZ)=DG.GaussLobattoQuad(OrdPolyZ) + (wZ,xwZCPU)=DG.GaussLobattoQuad(OrdPolyZ) + xwZ = KernelAbstractions.zeros(backend,FT,size(xwZCPU)) + copyto!(xwZ,xwZCPU) xe = zeros(OrdPoly+1) xe[1] = -1.0 for i = 2 : OrdPoly diff --git a/src/DyCore/InitDriver.jl b/src/DyCore/InitDriver.jl index 335b599..3be7bf5 100644 --- a/src/DyCore/InitDriver.jl +++ b/src/DyCore/InitDriver.jl @@ -8,7 +8,7 @@ function InitSphere(backend,FT,OrdPoly,OrdPolyZ,nz,nPanel,H,GridType,Topography, ParallelCom.ProcNumber = ProcNumber TimeStepper = TimeStepperStruct{FT}(backend) - Grid = Grids.GridStruct(nz,Topography) + Grid = Grids.GridStruct{FT}(backend,nz,Topography) if GridType == "HealPix" # Grid=CGDycore.InputGridH("Grid/mesh_H12_no_pp.nc", @@ -102,7 +102,7 @@ function InitCart(backend,FT,OrdPoly,OrdPolyZ,nx,ny,Lx,Ly,x0,y0,nz,H,Boundary,Gr ParallelCom.ProcNumber = ProcNumber TimeStepper = TimeStepperStruct{FT}(backend) - Grid = Grids.GridStruct(nz,Topography) + Grid = Grids.GridStruct{FT}(backend,nz,Topography) Grid = Grids.CartGrid(nx,ny,Lx,Ly,x0,y0,Grids.OrientFaceCart,Boundary,Grid) diff --git a/src/DyCore/MassCG.jl b/src/DyCore/MassCG.jl index 0707ff0..2c56f0a 100644 --- a/src/DyCore/MassCG.jl +++ b/src/DyCore/MassCG.jl @@ -29,7 +29,9 @@ function MassCG(CG,J,Glob,Exchange) return (M,MW,MMass) end -function MassCGGPU!(backend,FT,CG,J,Glob,Exchange,Global) +function MassCGGPU!(CG,J,Glob,Exchange,Global) + backend = get_backend(J) + FT = eltype(J) N = CG.OrdPoly + 1 DoF = CG.DoF w = CG.w diff --git a/src/Grids/AddVerticalGrid.jl b/src/Grids/AddVerticalGrid.jl index 0d62a70..74f9bad 100644 --- a/src/Grids/AddVerticalGrid.jl +++ b/src/Grids/AddVerticalGrid.jl @@ -1,17 +1,18 @@ function AddVerticalGrid!(Grid::GridStruct,nz::Int,H::Float64) -Grid.zP=zeros(nz); -Grid.z=zeros(nz+1); -Grid.dzeta = zeros(nz) -Grid.H = H -@. Grid.dzeta = H/nz; -Grid.H=H -for i=2:nz+1 - Grid.z[i] = Grid.z[i-1] + Grid.dzeta[i-1] -end -for i=1:nz - Grid.dzeta[i] = Grid.z[i+1] - Grid.z[i] - Grid.zP[i] = 0.5 * (Grid.z[i] + Grid.z[i+1]) -end + Grid.zP=zeros(nz); + z=zeros(nz+1); + Grid.dzeta = zeros(nz) + Grid.H = H + @. Grid.dzeta = H/nz; + Grid.H=H + for i=2:nz+1 + z[i] = z[i-1] + Grid.dzeta[i-1] + end + for i=1:nz + Grid.dzeta[i] = z[i+1] - z[i] + Grid.zP[i] = 0.5 * (z[i] + z[i+1]) + end + copyto!(Grid.z,z) end function AddStretchICONVerticalGrid!(Grid::GridStruct,nz::Int,H::Float64,sigma::Float64,lambda::Float64) @@ -55,7 +56,7 @@ function AddExpStretchVerticalGrid!(Grid::GridStruct,nz::Int,H::Float64, dz_bottom::Float64,dz_top::Float64) Grid.zP=zeros(nz); - Grid.z=zeros(nz+1); + z=zeros(nz+1); Grid.dzeta = zeros(nz) Grid.H = H @@ -118,10 +119,11 @@ function AddExpStretchVerticalGrid!(Grid::GridStruct,nz::Int,H::Float64, # add the bottom level faces = [z_bottom; faces...] for iz = 1 : nz + 1 - Grid.z[iz] = faces[iz] + z[iz] = faces[iz] end for i = 1 : nz - Grid.dzeta[i] = Grid.z[i+1] - Grid.z[i] - Grid.zP[i] = 0.5 * (Grid.z[i] + Grid.z[i+1]) + Grid.dzeta[i] = z[i+1] - z[i] + Grid.zP[i] = 0.5 * (z[i] + z[i+1]) end + copyto!(Grid.z,z) end diff --git a/src/Grids/GridStruct.jl b/src/Grids/GridStruct.jl index e1aeee2..cec4648 100644 --- a/src/Grids/GridStruct.jl +++ b/src/Grids/GridStruct.jl @@ -14,36 +14,37 @@ function Boundary() ) end -mutable struct GridStruct - nz::Int - zP::Array{Float64, 1} - z::Array{Float64, 1} - dzeta::Array{Float64, 1} - H::Float64 - NumFaces::Int - NumGhostFaces::Int - Faces::Array{Face, 1} - NumEdges::Int - Edges::Array{Edge, 1} - NumNodes::Int - Nodes::Array{Node, 1} - Form::String - Type ::String - Dim::Int - Rad::Float64 - NumEdgesI::Int - NumEdgesB::Int - nBar3::Array{Float64, 2} - nBar::Array{Float64, 2} - Topography::NamedTuple - colors::Array{Array{Int, 1}, 1} - Spline_2d::Dierckx.Spline2D - BoundaryFaces::Array{Int,1} - InteriorFaces::Array{Int,1} +mutable struct GridStruct{FT<:AbstractFloat, + AT1<:AbstractArray} + nz::Int + zP::Array{FT, 1} + z::AT1 + dzeta::Array{FT, 1} + H::FT + NumFaces::Int + NumGhostFaces::Int + Faces::Array{Face, 1} + NumEdges::Int + Edges::Array{Edge, 1} + NumNodes::Int + Nodes::Array{Node, 1} + Form::String + Type ::String + Dim::Int + Rad::FT + NumEdgesI::Int + NumEdgesB::Int + nBar3::Array{FT, 2} + nBar::Array{FT, 2} + Topography::NamedTuple + colors::Array{Array{Int, 1}, 1} + Spline_2d::Dierckx.Spline2D + BoundaryFaces::Array{Int,1} + InteriorFaces::Array{Int,1} end -function GridStruct(nz,Topography) +function GridStruct{FT}(backend,nz,Topography) where FT <: AbstractFloat zP=zeros(nz) - z=zeros(nz+1) + z=KernelAbstractions.zeros(backend,FT,nz+1) dzeta=zeros(nz) H=0.0 NumFaces=0 @@ -65,7 +66,8 @@ function GridStruct(nz,Topography) Spline_2d = Spline2D(zeros(0),zeros(0),zeros(0),0,0,0.0) BoundaryFaces = zeros(Int,0) InteriorFaces = zeros(Int,0) - return GridStruct( + return GridStruct{FT, + typeof(z)}( nz, zP, z, diff --git a/src/Grids/Grids.jl b/src/Grids/Grids.jl index 676ac51..eff603b 100644 --- a/src/Grids/Grids.jl +++ b/src/Grids/Grids.jl @@ -8,6 +8,9 @@ using RootSolvers using Dierckx using NCDatasets using PairedLinkedLists +using KernelAbstractions +using KernelAbstractions: @atomic, @atomicswap, @atomicreplace + include("Geometry.jl") include("Node.jl") @@ -20,6 +23,7 @@ include("CubedGrid.jl") include("FacesInNodes.jl") include("JacobiDG3.jl") include("JacobiSphere3.jl") +include("JacobiSphere3GPU.jl") include("OrientFaceCart.jl") include("OrientFaceSphere.jl") include("Orientation.jl") diff --git a/src/Grids/JacobiSphere3GPU.jl b/src/Grids/JacobiSphere3GPU.jl index 6a0483c..993909d 100644 --- a/src/Grids/JacobiSphere3GPU.jl +++ b/src/Grids/JacobiSphere3GPU.jl @@ -1,79 +1,97 @@ -function JacobiSphere3GPU!(X,dXdx,dXdxI,J,FE,F,z,zs) +function JacobiSphere3GPU!(X,dXdxI,J,FE,F,z,zs,Rad) - NF = size(X,4) - N = size(FE.xw,1)) - Nz = size(X,2) + backend = get_backend(X) + FT = eltype(X) + + NF = size(X,5) + N = size(FE.xw,1) + Nz = size(X,4) - NzG = min(div(512,N*N),Nz) - group = (N, N, NzG, 1) - ndrange = (N, N, Nz, NF) + NzG = min(div(512,N*N*2),Nz) + group = (N, N, 2, NzG, 1) + ndrange = (N, N, 2, Nz, NF) KJacobiSphere3Kernel! = JacobiSphere3Kernel!(backend,group) - KJacobiSphere3Kernel!(X,dXdx,dXdxI,J,FE.xw,FE.DS, + H = z[end] + KJacobiSphere3Kernel!(X,dXdxI,J,FE.xw,FE.xwZ,FE.DS,F,z,Rad,H,zs,ndrange=ndrange) end -@kernel function JacobiSphere3Kernel!(X,dXdx,dXdxI,J,ksi,zeta,DS,F,z,H,zs) +@kernel function JacobiSphere3Kernel!(X,dXdxI,JJ,@Const(ksi),@Const(zeta),@Const(D), + @Const(F),@Const(z),Rad,H,@Const(zs)) - gi, gj, gz, gF = @index(Group, NTuple) - I, J, iz = @index(Local, NTuple) - _,_,Iz,IF = @index(Global, NTuple) + gi, gj, gk, gz, gF = @index(Group, NTuple) + I, J, K, iz = @index(Local, NTuple) + _,_,_,Iz,IF = @index(Global, NTuple) - ColumnTilesDim = @uniform @groupsize()[3] + ColumnTilesDim = @uniform @groupsize()[4] N = @uniform @groupsize()[1] - Nz = @uniform @ndrange()[3] - NF = @uniform @ndrange()[4] + L = @uniform @groupsize()[3] + Nz = @uniform @ndrange()[4] + NF = @uniform @ndrange()[5] - eta = ksi - n3 = 2 - ID = I + (J - 1) * N - @inbounds for k = 1 : n3 - JacobiSphere3Loc!(X[ID,k,:],dXdx[ID,k,:,:],ksi[I],eta[J],zeta[k],F,z,Topography.Rad,Topography,zs[I,J]) - end + hR = @localmem eltype(X) (N,N,L,ColumnTilesDim) + dXdx = @localmem eltype(X) (N,N,L,3,3,ColumnTilesDim) - @inbounds for k=1:n3 - dXdx[I,:,k,3,1]=DS*hR[I,:,k] - dXdx[I,:,k,3,2]=reshape(hR[I,:,k],N,N)*DS' - end - @inbounds for k=1:n3 - J[i,j,k]=det(reshape(dXdx[i,j,k,:,:],3,3)) - dXdxI[:,:,k,i,j]=inv(reshape(dXdx[i,j,k,:,:],3,3))*J[i,j,k] + eta = ksi + if Iz <= Nz + ID = I + (J - 1) * N + @inbounds z1 = z[Iz] + @inbounds z2 = z[Iz+1] + hRLoc = eltype(X)(0) + @views @inbounds JacobiSphere3Loc!(X[ID,K,:,Iz,IF],dXdx[I,J,K,:,:,iz],hRLoc,ksi[I],eta[J],zeta[K],F[:,:,IF],z1,z2,Rad,H,zs[I,J,IF]) + @inbounds hR[I,J,K,iz] = hRLoc + end + + @synchronize + + if Iz <= Nz + ID = I + (J - 1) * N + @inbounds DxhR = D[I,1] * hR[1,J,K,iz] + @inbounds DyhR = D[J,1] * hR[I,1,K,iz] + for k = 2 : N + @inbounds DxhR += D[I,k] * hR[k,J,K,iz] + @inbounds DyhR += D[J,k] * hR[I,k,K,iz] + end + + @inbounds dXdx[I,J,K,3,1,iz] = DxhR + @inbounds dXdx[I,J,K,3,2,iz] = DyhR + @inbounds JJ[ID,K,Iz,IF] = det(reshape(dXdx[I,J,K,:,:,iz],3,3)) + @inbounds dXdxI[:,:,K,ID,Iz,IF] = inv(reshape(dXdx[I,J,K,:,:,iz],3,3))*JJ[ID,K,Iz,IF] end - - X = reshape(X,n*n,n3,3) - J = reshape(J,n*n,n3) - dXdx = reshape(dXdx,n*n,n3,3,3) - dXdxI = reshape(dXdxI,3,3,n3,n*n) end -function JacobiSphere3Loc(X,dXdx,ksi1,ksi2,ksi3,F,z,Rad,H,zs) - - X1 = 0.25 * (F.P[1].x .* (1-ksi1)*(1-ksi2)+ - F.P[2].x .* (1+ksi1)* (1-ksi2)+ - F.P[3].x .* (1+ksi1)* (1+ksi2)+ - F.P[4].x .* (1-ksi1)* (1+ksi2)) - X2 = 0.25 * (F.P[1].y .*(1-ksi1)*(1-ksi2)+ - F.P[2].y .*(1+ksi1)*(1-ksi2)+ - F.P[3].y .*(1+ksi1)*(1+ksi2)+ - F.P[4].y .*(1-ksi1)*(1+ksi2)) - X3 = 0.25 * (F.P[1].z .*(1-ksi1)*(1-ksi2)+ - F.P[2].z .*(1+ksi1)*(1-ksi2)+ - F.P[3].z .*(1+ksi1)*(1+ksi2)+ - F.P[4].z .*(1-ksi1)*(1+ksi2)) - zLoc = 0.5 * ((1-ksi3) * z[1] + (1+ksi3) * z[2]) +@inline function JacobiSphere3Loc!(X,dXdx,hR,ksi1,ksi2,ksi3,F,z1,z2,Rad,H,zs) + zero = eltype(X)(0) + one = eltype(X)(1) + half = eltype(X)(1/2) + quarter = eltype(X)(1/4) + X1 = quarter * (F[1,1] * (one-ksi1)*(one-ksi2) + + F[2,1] * (one+ksi1)*(one-ksi2) + + F[3,1] * (one+ksi1)*(one+ksi2) + + F[4,1] * (one-ksi1)*(one+ksi2)) + X2 = quarter * (F[1,2] * (one-ksi1)*(one-ksi2) + + F[2,2] * (one+ksi1)*(one-ksi2) + + F[3,2] * (one+ksi1)*(one+ksi2) + + F[4,2] * (one-ksi1)*(one+ksi2)) + X3 = quarter * (F[1,3] * (one-ksi1)*(one-ksi2) + + F[2,3] * (one+ksi1)*(one-ksi2) + + F[3,3] * (one+ksi1)*(one+ksi2) + + F[4,3] * (one-ksi1)*(one+ksi2)) + zLoc = half * ((one-ksi3) * z1 + (one+ksi3) * z2) hR = zLoc + (H - zLoc) * zs / H - D33 = 1 - zs / Topography.H; - D33 = 0.5 * D33*(z[2]-z[1]) + D33 = one - zs / H; + D33 = half * D33*(z2-z1) - r = sqrt(X1^2 + X2^2 + X3^2) + r = sqrt(X1 * X1 + X2 * X2 + X3 * X3) f = Rad / r X1 = X1 / r X2 = X2 / r X3 = X3 / r (lam,theta)=cart2sphere(X1,X2,X3) - DD=[-sin(lam) cos(lam) 0 - 0 0 1] + DD=[-sin(lam) cos(lam) zero + zero zero one] sinlam = sin(lam) coslam = cos(lam) @@ -92,18 +110,20 @@ function JacobiSphere3Loc(X,dXdx,ksi1,ksi2,ksi3,F,z,Rad,H,zs) a21 a22 a23 a31 a32 a33] - B = [F.P[1].x F.P[2].x F.P[3].x F.P[4].x - F.P[1].y F.P[2].y F.P[3].y F.P[4].y - F.P[1].z F.P[2].z F.P[3].z F.P[4].z] + B = [F[1,1] F[2,1] F[3,1] F[4,1] + F[1,2] F[2,2] F[3,2] F[4,2] + F[1,3] F[2,3] F[3,3] F[4,3]] - C =0.25 * [-1+ksi2 -1+ksi1 - 1-ksi2 -1-ksi1 - 1+ksi2 1+ksi1 - -1-ksi2 1-ksi1] + C = quarter * [-one+ksi2 -one+ksi1 + one-ksi2 -one-ksi1 + one+ksi2 one+ksi1 + -one-ksi2 one-ksi1] D = f * DD * A * B * C - dXdx .= [D [0; 0] - 0 0 D33] - X .= [X1 X2 X3]*(Rad+hR) + dXdx .= [D [zero; zero] + zero zero D33] + X[1] = X1 * (Rad + hR) + X[2] = X2 * (Rad + hR) + X[3] = X3 * (Rad + hR) end diff --git a/src/Grids/SubGrid.jl b/src/Grids/SubGrid.jl index c19d75c..397a58b 100644 --- a/src/Grids/SubGrid.jl +++ b/src/Grids/SubGrid.jl @@ -1,5 +1,7 @@ function ConstructSubGrid(GlobalGrid,Proc,ProcNumber) - SubGrid = GridStruct(GlobalGrid.nz,GlobalGrid.Topography) + backend = get_backend(GlobalGrid.z) + FT = eltype(GlobalGrid.z) + SubGrid = GridStruct{FT}(backend,GlobalGrid.nz,GlobalGrid.Topography) # Number of faces DictF = Dict() diff --git a/src/Integration/cache.jl b/src/Integration/cache.jl index 0cd314d..368f2e9 100644 --- a/src/Integration/cache.jl +++ b/src/Integration/cache.jl @@ -1,4 +1,3 @@ -using KernelAbstractions mutable struct CacheStruct{FT<:AbstractFloat, AT1<:AbstractArray, AT2<:AbstractArray,