diff --git a/Examples/testAdvectionCart.jl b/Examples/testAdvectionCart.jl index 9842984..75604d5 100644 --- a/Examples/testAdvectionCart.jl +++ b/Examples/testAdvectionCart.jl @@ -182,7 +182,7 @@ end Global.Output.PrintDays = PrintDays Global.Output.PrintSeconds = PrintSeconds Global.Output.PrintTime = PrintTime - Global.Output.PrintStartDays = 0 + Global.Output.PrintStartTime = 0 Global.Output.OrdPrint=CG.OrdPoly Global.vtkCache = CGDycore.vtkStruct(Global.Output.OrdPrint,CGDycore.TransCartX,CG,Global) @@ -198,4 +198,6 @@ end Global.TimeStepper.SimSeconds = SimSeconds Global.TimeStepper.SimTime = SimTime + nT = NumV + NumTr + CGDycore.InitExchangeData3D(nz,nT,Global.Exchange) CGDycore.TimeStepperAdvection!(U,CGDycore.TransCartX,CG,Global,Param) diff --git a/Jobs/JobAdvectionCubeCart b/Jobs/JobAdvectionCubeCart index 399cd33..aadfdb4 100755 --- a/Jobs/JobAdvectionCubeCart +++ b/Jobs/JobAdvectionCubeCart @@ -1,10 +1,10 @@ -mpirun -n 2 julia --project --check-bounds=yes Examples/testAdvectionCart.jl \ +mpirun -n 6 julia --project --check-bounds=yes Examples/testAdvectionCart.jl \ --Problem="AdvectionCubeCart" \ --NumV=1 \ --NumTr=1 \ --ProfTr="AdvectionCubeCart" \ --ProfVel="Const" \ - --HorLimit=true \ + --HorLimit=false \ --vtkFileName="AdvectionCubeCart" \ --SimTime=1000.0 \ --PrintTime=10.0 \ @@ -16,16 +16,16 @@ mpirun -n 2 julia --project --check-bounds=yes Examples/testAdvectionCart.jl \ --H=1.0 \ --x0=0.0 \ --y0=0.0 \ - --nx=2 \ - --ny=2 \ - --OrdPoly=1 \ + --nx=40 \ + --ny=40 \ + --OrdPoly=4 \ --BoundaryWE="Period" \ --BoundarySN="Period" \ --BoundaryBT="" \ --HyperVisc=true \ - --HyperDCurl=0.e5 \ - --HyperDGrad=0.e5 \ - --HyperDDiv=0.e5 + --HyperDCurl=1.e4 \ + --HyperDGrad=1.e4 \ + --HyperDDiv=1.e4 diff --git a/src/DyCore/FcnTracer.jl b/src/DyCore/FcnTracer.jl index 121b158..ce6cc7b 100644 --- a/src/DyCore/FcnTracer.jl +++ b/src/DyCore/FcnTracer.jl @@ -55,13 +55,13 @@ function FcnTracer!(F,U,time,CG,Global,Param) end end end - @views DivRhoGrad!(DivCG,ThCG,RhoCG,CG, + @views DivRhoGradE!(DivCG,ThCG,RhoCG,CG, Global.Metric.dXdxI[:,:,:,:,:,:,iF],Global.Metric.J[:,:,:,:,iF],Global.ThreadCache) @inbounds for jP=1:OP @inbounds for iP=1:OP ind = CG.Glob[iP,jP,iF] @inbounds for iz=1:nz - DivTr[iz,ind,iT] += 0.5 * DivCG[iP,jP,iz] / CG.M[iz,ind] + DivTr[iz,ind,iT] += DivCG[iP,jP,iz] / CG.M[iz,ind] end end end @@ -88,13 +88,13 @@ function FcnTracer!(F,U,time,CG,Global,Param) end end end - @views DivRhoGrad!(DivCG,ThCG,RhoCG,CG, + @views DivRhoGradE!(DivCG,ThCG,RhoCG,CG, Global.Metric.dXdxI[:,:,:,:,:,:,iF],Global.Metric.J[:,:,:,:,iF],Global.ThreadCache) @inbounds for jP=1:OP @inbounds for iP=1:OP ind = CG.Glob[iP,jP,iF] @inbounds for iz=1:nz - DivTr[iz,ind,iT] += 0.5 * DivCG[iP,jP,iz] / CG.M[iz,ind] + DivTr[iz,ind,iT] += DivCG[iP,jP,iz] / CG.M[iz,ind] end end end @@ -170,8 +170,8 @@ function FcnTracer!(F,U,time,CG,Global,Param) if HorLimit @inbounds for iz=1:nz @inbounds for iT = 1:NumTr - @views qMinS=minimum(qMin[iz,CG.Stencil[iF,:]]) - @views qMaxS=maximum(qMax[iz,CG.Stencil[iF,:]]) + @views qMinS=minimum(qMin[iz,CG.Stencil[iF,:],iT]) + @views qMaxS=maximum(qMax[iz,CG.Stencil[iF,:],iT]) @views HorLimiter!(FCG[:,:,iz,iT+NumV],TrCG[:,:,iz,iT],RhoCG,RhoCG,dtau, Global.Metric.J[:,:,:,iz,iF],CG.w,qMinS,qMaxS,Global.ThreadCache) end diff --git a/src/DyCore/Operator.jl b/src/DyCore/Operator.jl index 2c920fd..2d6b31b 100644 --- a/src/DyCore/Operator.jl +++ b/src/DyCore/Operator.jl @@ -1342,6 +1342,40 @@ function DivRhoGrad!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache) end end +function DivRhoGradE!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache) + @unpack TCacheC1, TCacheC2, TCacheC3, TCacheC4 = ThreadCache + Nz = size(F,3) + N = size(F,1) + D = Fe.DS + DW = Fe.DW + + @. F = 0 + @inbounds for iz = 1 : Nz + @inbounds for j = 1 : N + @inbounds for i = 1 : N + Dxc = 0 + Dyc = 0 + @inbounds for k = 1 : N + Dxc += D[i,k] * (cC[k,j,iz] / RhoC[k,j,iz]) + Dyc += D[j,k] * (cC[i,k,iz] / RhoC[i,k,iz]) + end + GradDx = ((dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * Dxc + + (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz]) + GradDy = ((dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * Dxc + + (dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz]) + tempx = (dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * GradDx + + (dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * GradDy + tempy = (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * GradDx + + (dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * GradDy + for k = 1 : N + F[k,j,iz] += DW[k,i] * tempx + F[i,k,iz] += DW[k,j] * tempy + end + end + end + end +end + function DivRhoGradHor!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache) @unpack TCacheC1, TCacheC2, TCacheC3, TCacheC4 = ThreadCache Nz = size(F,3) diff --git a/src/GPU/OperatorCuda.jl b/src/GPU/OperatorCuda.jl index 9de9dbe..64884ee 100644 --- a/src/GPU/OperatorCuda.jl +++ b/src/GPU/OperatorCuda.jl @@ -1,4 +1,46 @@ +function DivRhoGradE!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache) + @unpack TCacheC1, TCacheC2, TCacheC3, TCacheC4 = ThreadCache + Nz = size(F,3) + N = size(F,1) + D = Fe.DS + DW = Fe.DW + + tempx = TCacheC1[Threads.threadid()] + tempy = TCacheC2[Threads.threadid()] + + @. F = 0 + @inbounds for iz = 1 : Nz + @inbounds for j = 1 : N + @inbounds for i = 1 : N + Dxc = 0 + Dyc = 0 + @inbounds for k = 1 : N + Dxc += D[i,k] * (cC[k,j,iz] / RhoC[k,j,iz]) + Dyc += D[j,k] * (cC[i,k,iz] / RhoC[i,k,iz]) + end + GradDx = ((dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * Dxc + + (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz]) + GradDy = ((dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * Dxc + + (dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz]) + tempx[i,j] = (dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * GradDx + + (dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * GradDy + tempy[i,j] = (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * GradDx + + (dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * GradDy + end + end + @inbounds for j = 1 : N + @inbounds for i = 1 : N + @inbounds for k = 1 : N + F[i,j,iz] += DW[i,k] * tempx[k,j] + F[i,j,iz] += DW[j,k] * tempy[i,k] + end + end + end + end +end + + function DivRhoTrCellCUDA!(FRhoTrC,uC,vC,RhoTrC,Fe,dXdxI,U,V) OrdPoly = Fe.OrdPoly D = Fe.DS @@ -12,6 +54,6 @@ function DivRhoTrCellCUDA!(FRhoTrC,uC,vC,RhoTrC,Fe,dXdxI,U,V) sync_threads() ix = (blockIdx().x-1) * blockDim().x + threadIdx().x iy = (blockIdx().y-1) * blockDim().y + threadIdx().y - @tullio FRhoTrC[ix,iy] += D[i,iy] * U[i,iy] - @tullio FRhoTrC[ix,iy] += D[ix,j] * U[ix,j] + @tullio FRhoTrC[ix,iy] += D[ix,j] * U[j,iy] + @tullio FRhoTrC[ix,iy] += D[i,iy] * U[i,ix] end diff --git a/src/GPU/Test2.jl b/src/GPU/Test2.jl new file mode 100644 index 0000000..1d5161c --- /dev/null +++ b/src/GPU/Test2.jl @@ -0,0 +1,49 @@ +using CGDycore +using MPI +using Base + + +function DivRhoGradE1!(F,cC,RhoC,D,DW,dXdxI,J) + + Nz = size(F,3) + N = size(F,1) + + @. F = 0 + @inbounds for iz = 1 : Nz + @inbounds for j = 1 : N + @inbounds for i = 1 : N + Dxc = 0 + Dyc = 0 + @inbounds for k = 1 : N + Dxc += D[i,k] * (cC[k,j,iz] / RhoC[k,j,iz]) + Dyc += D[j,k] * (cC[i,k,iz] / RhoC[i,k,iz]) + end + GradDx = ((dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * Dxc + + (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz]) + GradDy = ((dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * Dxc + + (dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz]) + tempx = (dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * GradDx + + (dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * GradDy + tempy = (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * GradDx + + (dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * GradDy + for k = 1 : N + F[k,j,iz] += DW[k,i] * tempx + F[i,k,iz] += DW[k,j] * tempy + end + end + end + end +end + +Nz = 10 +OrdPoly = 4 + +(DW,DS,) = CGDycore.DerivativeMatrixSingle(OrdPoly) + +F = zeros(Float64,OrdPoly+1,OrdPoly+1,Nz) +cC = rand(Float64,OrdPoly+1,OrdPoly+1,Nz) +RhoC = ones(Float64,OrdPoly+1,OrdPoly+1,Nz) +dXdxI = rand(OrdPoly+1,OrdPoly+1,2,Nz,3,3) +J = ones(OrdPoly+1,OrdPoly+1,2,Nz) + +DivRhoGradE1!(F,cC,RhoC,DS,DW,dXdxI,J)