Skip to content

Commit

Permalink
DivGradRho without Cache Storage
Browse files Browse the repository at this point in the history
  • Loading branch information
OsKnoth committed Jul 21, 2023
1 parent e704276 commit dd6b6b5
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 17 deletions.
4 changes: 3 additions & 1 deletion Examples/testAdvectionCart.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ end
Global.Output.PrintDays = PrintDays
Global.Output.PrintSeconds = PrintSeconds
Global.Output.PrintTime = PrintTime
Global.Output.PrintStartDays = 0
Global.Output.PrintStartTime = 0
Global.Output.OrdPrint=CG.OrdPoly
Global.vtkCache = CGDycore.vtkStruct(Global.Output.OrdPrint,CGDycore.TransCartX,CG,Global)

Expand All @@ -198,4 +198,6 @@ end
Global.TimeStepper.SimSeconds = SimSeconds
Global.TimeStepper.SimTime = SimTime

nT = NumV + NumTr
CGDycore.InitExchangeData3D(nz,nT,Global.Exchange)
CGDycore.TimeStepperAdvection!(U,CGDycore.TransCartX,CG,Global,Param)
16 changes: 8 additions & 8 deletions Jobs/JobAdvectionCubeCart
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
mpirun -n 2 julia --project --check-bounds=yes Examples/testAdvectionCart.jl \
mpirun -n 6 julia --project --check-bounds=yes Examples/testAdvectionCart.jl \
--Problem="AdvectionCubeCart" \
--NumV=1 \
--NumTr=1 \
--ProfTr="AdvectionCubeCart" \
--ProfVel="Const" \
--HorLimit=true \
--HorLimit=false \
--vtkFileName="AdvectionCubeCart" \
--SimTime=1000.0 \
--PrintTime=10.0 \
Expand All @@ -16,16 +16,16 @@ mpirun -n 2 julia --project --check-bounds=yes Examples/testAdvectionCart.jl \
--H=1.0 \
--x0=0.0 \
--y0=0.0 \
--nx=2 \
--ny=2 \
--OrdPoly=1 \
--nx=40 \
--ny=40 \
--OrdPoly=4 \
--BoundaryWE="Period" \
--BoundarySN="Period" \
--BoundaryBT="" \
--HyperVisc=true \
--HyperDCurl=0.e5 \
--HyperDGrad=0.e5 \
--HyperDDiv=0.e5
--HyperDCurl=1.e4 \
--HyperDGrad=1.e4 \
--HyperDDiv=1.e4



12 changes: 6 additions & 6 deletions src/DyCore/FcnTracer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ function FcnTracer!(F,U,time,CG,Global,Param)
end
end
end
@views DivRhoGrad!(DivCG,ThCG,RhoCG,CG,
@views DivRhoGradE!(DivCG,ThCG,RhoCG,CG,
Global.Metric.dXdxI[:,:,:,:,:,:,iF],Global.Metric.J[:,:,:,:,iF],Global.ThreadCache)
@inbounds for jP=1:OP
@inbounds for iP=1:OP
ind = CG.Glob[iP,jP,iF]
@inbounds for iz=1:nz
DivTr[iz,ind,iT] += 0.5 * DivCG[iP,jP,iz] / CG.M[iz,ind]
DivTr[iz,ind,iT] += DivCG[iP,jP,iz] / CG.M[iz,ind]
end
end
end
Expand All @@ -88,13 +88,13 @@ function FcnTracer!(F,U,time,CG,Global,Param)
end
end
end
@views DivRhoGrad!(DivCG,ThCG,RhoCG,CG,
@views DivRhoGradE!(DivCG,ThCG,RhoCG,CG,
Global.Metric.dXdxI[:,:,:,:,:,:,iF],Global.Metric.J[:,:,:,:,iF],Global.ThreadCache)
@inbounds for jP=1:OP
@inbounds for iP=1:OP
ind = CG.Glob[iP,jP,iF]
@inbounds for iz=1:nz
DivTr[iz,ind,iT] += 0.5 * DivCG[iP,jP,iz] / CG.M[iz,ind]
DivTr[iz,ind,iT] += DivCG[iP,jP,iz] / CG.M[iz,ind]
end
end
end
Expand Down Expand Up @@ -170,8 +170,8 @@ function FcnTracer!(F,U,time,CG,Global,Param)
if HorLimit
@inbounds for iz=1:nz
@inbounds for iT = 1:NumTr
@views qMinS=minimum(qMin[iz,CG.Stencil[iF,:]])
@views qMaxS=maximum(qMax[iz,CG.Stencil[iF,:]])
@views qMinS=minimum(qMin[iz,CG.Stencil[iF,:],iT])
@views qMaxS=maximum(qMax[iz,CG.Stencil[iF,:],iT])
@views HorLimiter!(FCG[:,:,iz,iT+NumV],TrCG[:,:,iz,iT],RhoCG,RhoCG,dtau,
Global.Metric.J[:,:,:,iz,iF],CG.w,qMinS,qMaxS,Global.ThreadCache)
end
Expand Down
34 changes: 34 additions & 0 deletions src/DyCore/Operator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,40 @@ function DivRhoGrad!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache)
end
end

function DivRhoGradE!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache)
@unpack TCacheC1, TCacheC2, TCacheC3, TCacheC4 = ThreadCache
Nz = size(F,3)
N = size(F,1)
D = Fe.DS
DW = Fe.DW

@. F = 0
@inbounds for iz = 1 : Nz
@inbounds for j = 1 : N
@inbounds for i = 1 : N
Dxc = 0
Dyc = 0
@inbounds for k = 1 : N
Dxc += D[i,k] * (cC[k,j,iz] / RhoC[k,j,iz])
Dyc += D[j,k] * (cC[i,k,iz] / RhoC[i,k,iz])
end
GradDx = ((dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * Dxc +
(dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz])
GradDy = ((dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * Dxc +
(dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz])
tempx = (dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * GradDx +
(dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * GradDy
tempy = (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * GradDx +
(dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * GradDy
for k = 1 : N
F[k,j,iz] += DW[k,i] * tempx
F[i,k,iz] += DW[k,j] * tempy
end
end
end
end
end

function DivRhoGradHor!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache)
@unpack TCacheC1, TCacheC2, TCacheC3, TCacheC4 = ThreadCache
Nz = size(F,3)
Expand Down
46 changes: 44 additions & 2 deletions src/GPU/OperatorCuda.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,46 @@

function DivRhoGradE!(F,cC,RhoC,Fe,dXdxI,J,ThreadCache)
@unpack TCacheC1, TCacheC2, TCacheC3, TCacheC4 = ThreadCache
Nz = size(F,3)
N = size(F,1)
D = Fe.DS
DW = Fe.DW

tempx = TCacheC1[Threads.threadid()]
tempy = TCacheC2[Threads.threadid()]

@. F = 0
@inbounds for iz = 1 : Nz
@inbounds for j = 1 : N
@inbounds for i = 1 : N
Dxc = 0
Dyc = 0
@inbounds for k = 1 : N
Dxc += D[i,k] * (cC[k,j,iz] / RhoC[k,j,iz])
Dyc += D[j,k] * (cC[i,k,iz] / RhoC[i,k,iz])
end
GradDx = ((dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * Dxc +
(dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz])
GradDy = ((dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * Dxc +
(dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz])
tempx[i,j] = (dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * GradDx +
(dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * GradDy
tempy[i,j] = (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * GradDx +
(dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * GradDy
end
end
@inbounds for j = 1 : N
@inbounds for i = 1 : N
@inbounds for k = 1 : N
F[i,j,iz] += DW[i,k] * tempx[k,j]
F[i,j,iz] += DW[j,k] * tempy[i,k]
end
end
end
end
end


function DivRhoTrCellCUDA!(FRhoTrC,uC,vC,RhoTrC,Fe,dXdxI,U,V)
OrdPoly = Fe.OrdPoly
D = Fe.DS
Expand All @@ -12,6 +54,6 @@ function DivRhoTrCellCUDA!(FRhoTrC,uC,vC,RhoTrC,Fe,dXdxI,U,V)
sync_threads()
ix = (blockIdx().x-1) * blockDim().x + threadIdx().x
iy = (blockIdx().y-1) * blockDim().y + threadIdx().y
@tullio FRhoTrC[ix,iy] += D[i,iy] * U[i,iy]
@tullio FRhoTrC[ix,iy] += D[ix,j] * U[ix,j]
@tullio FRhoTrC[ix,iy] += D[ix,j] * U[j,iy]
@tullio FRhoTrC[ix,iy] += D[i,iy] * U[i,ix]
end
49 changes: 49 additions & 0 deletions src/GPU/Test2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using CGDycore
using MPI
using Base


function DivRhoGradE1!(F,cC,RhoC,D,DW,dXdxI,J)

Nz = size(F,3)
N = size(F,1)

@. F = 0
@inbounds for iz = 1 : Nz
@inbounds for j = 1 : N
@inbounds for i = 1 : N
Dxc = 0
Dyc = 0
@inbounds for k = 1 : N
Dxc += D[i,k] * (cC[k,j,iz] / RhoC[k,j,iz])
Dyc += D[j,k] * (cC[i,k,iz] / RhoC[i,k,iz])
end
GradDx = ((dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * Dxc +
(dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz])
GradDy = ((dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * Dxc +
(dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * Dyc) / (J[i,j,1,iz] + J[i,j,2,iz])
tempx = (dXdxI[i,j,1,iz,1,1] + dXdxI[i,j,2,iz,1,1]) * GradDx +
(dXdxI[i,j,1,iz,1,2] + dXdxI[i,j,2,iz,1,2]) * GradDy
tempy = (dXdxI[i,j,1,iz,2,1] + dXdxI[i,j,2,iz,2,1]) * GradDx +
(dXdxI[i,j,1,iz,2,2] + dXdxI[i,j,2,iz,2,2]) * GradDy
for k = 1 : N
F[k,j,iz] += DW[k,i] * tempx
F[i,k,iz] += DW[k,j] * tempy
end
end
end
end
end

Nz = 10
OrdPoly = 4

(DW,DS,) = CGDycore.DerivativeMatrixSingle(OrdPoly)

F = zeros(Float64,OrdPoly+1,OrdPoly+1,Nz)
cC = rand(Float64,OrdPoly+1,OrdPoly+1,Nz)
RhoC = ones(Float64,OrdPoly+1,OrdPoly+1,Nz)
dXdxI = rand(OrdPoly+1,OrdPoly+1,2,Nz,3,3)
J = ones(OrdPoly+1,OrdPoly+1,2,Nz)

DivRhoGradE1!(F,cC,RhoC,DS,DW,dXdxI,J)

0 comments on commit dd6b6b5

Please sign in to comment.