Skip to content

Commit

Permalink
GPU Exchange Stencil
Browse files Browse the repository at this point in the history
  • Loading branch information
OsKnoth committed Dec 9, 2023
1 parent 089b683 commit ab1660a
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Jobs/JobAdvectionLimCart
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mpirun -n 6 julia --project Examples/testAdvectionCart.jl \
mpirun -n 2 julia --project Examples/testAdvectionCart.jl \
--Problem="LimAdvectionCart" \
--Device="CPU" \
--FloatTypeBackend="Float64" \
Expand Down
31 changes: 31 additions & 0 deletions Jobs/JobAdvectionLimCart4
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export UCX_ERROR_SIGNALS=""
srun -n 4 gpu_wrapper.sh -n 4 -e "julia --project Examples/testAdvectionCart.jl \
--Problem="LimAdvectionCart" \
--Device="GPU" \
--GPUType="CUDA" \
--NumberThreadGPU=1024 \
--FloatTypeBackend="Float64" \
--NumV=5 \
--NumTr=1 \
--HorLimit=true \
--Upwind=true \
--vtkFileName="LimAdvectionCart" \
--SimTime=0.0 \
--PrintTime=0.1 \
--dtau=0.0025 \
--IntMethod="SSPRungeKutta" \
--Table="SSP32" \
--Lx=0 \
--Ly=0 \
--H=0 \
--x0=0 \
--y0=0 \
--nx=40 \
--ny=40 \
--nz=40 \
--OrdPoly=4 \
--BoundaryWE="Period" \
--BoundarySN="Period" \
--BoundaryBT="" \
--HyperVisc=true \
--HyperDDiv=1.e-4"
44 changes: 44 additions & 0 deletions Jobs/JobNHBaroWaveDrySphere4
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
export UCX_ERROR_SIGNALS=""
srun -n 4 gpu_wrapper.sh -n 4 -e "julia --project Examples/testNHSphere.jl \
--Problem="BaroWaveDrySphere" \
--Device="GPU" \
--GPUType="CUDA" \
--FloatTypeBackend="Float32" \
--NumberThreadGPU=1024 \
--NumV=5 \
--NumTr=0 \
--ProfpBGrd="" \
--ProfRhoBGrd="" \
--Source=false \
--Forcing=false \
--Curl=false \
--ModelType="VectorInvariant" \
--Coriolis=true \
--VerticalDiffusion=false \
--Upwind=true \
--HorLimit=false \
--Buoyancy=true \
--Decomp="EqualArea" \
--SimDays=10 \
--SimSeconds=0 \
--PrintSeconds=0 \
--PrintMinutes=0 \
--PrintHours=0 \
--PrintDays=0 \
--StartAverageDays=100 \
--Flat=true \
--dtau=150 \
--IntMethod="Rosenbrock" \
--Table="SSP-Knoth" \
--TopoS="" \
--Stretch=true \
--StretchType="Exp" \
--GridType="CubedSphere" \
--nz=64 \
--nPanel=30 \
--H=30000.0 \
--OrdPoly=3 \
--HyperVisc=true \
--HyperDCurl=5.e14 \
--HyperDGrad=5.e14 \
--HyperDDiv=5.e14"
40 changes: 19 additions & 21 deletions src/Examples/parameters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -244,28 +244,26 @@ Base.@kwdef struct LimAdvectionCart{FT}
u0::FT = π / 2 # angular velocity
r0::FT = (xmax - xmin) / 6 # bells radius
end_time::FT = 2π # simulation period in seconds
centers1xC = xmin + (xmax - xmin) / 4
centers1yC = ymin + (ymax - ymin) / 2
centers1zC = zmin + (zmax - zmin) / 2
centers2xC = xmin + 3 * (xmax - xmin) / 4
centers2yC = ymin + (ymax - ymin) / 2
centers2zC = zmin + (zmax - zmin) / 2
centers1xC::FT = xmin + (xmax - xmin) / 4
centers1yC::FT = ymin + (ymax - ymin) / 2
centers1zC::FT = zmin + (zmax - zmin) / 2
centers2xC::FT = xmin + 3 * (xmax - xmin) / 4
centers2yC::FT = ymin + (ymax - ymin) / 2
centers2zC::FT = zmin + (zmax - zmin) / 2
end

Base.@kwdef struct ParamAdvectionCubeRotCart{FT}
Base.@kwdef struct ParamAdvectionCubeRotCart
StreamFun::Bool = false
uMax::FT = 1.0
vMax::FT = 1.0
xC::FT = 500.0
zC::FT = 500.0
x1::FT = 299.0
x2::FT = 501.0
y1::FT = 299.0
y2::FT = 501.0
z1::FT = 299.0
z2::FT = 501.0
EndTime::FT = 1000.0
H::FT = 1000.0
uMax::Float64 = 1.0
vMax::Float64 = 0.0
xC::Float64 = 500.0
zC::Float64 = 500.0
x1::Float64 = 299.0
x2::Float64 = 501.0
z1::Float64 = 299.0
z2::Float64 = 501.0
EndTime::Float64 = 1000.0
H::Float64 = 1000.0
end

Base.@kwdef struct ParamAdvectionCart
Expand Down Expand Up @@ -315,10 +313,10 @@ function Parameters(FT,Problem::String)
Param = ParamAdvectionCubeCart()
elseif Problem == "AdvectionCubeRotCart"
@show Problem
Param = ParamAdvectionCubeRotCart{FT}()
Param = ParamAdvectionCubeRotCart()
elseif Problem == "LimAdvectionCart"
@show Problem
Param = LimAdvectionCart{FT}()
Param = LimAdvectionCart{FT}()
elseif Problem == "WarmBubble2DXCart"
@show Problem
Param = ParamWarmBubble2DXCart()
Expand Down
4 changes: 2 additions & 2 deletions src/GPU/FcnGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ function FcnAdvectionGPU!(F,U,time,FE,Metric,Phys,Cache,Exchange,Global,Param,Pr
if Global.Model.HorLimit
@views KLimitKernel!(DoF,qMin,qMax,U[:,:,NumV+1:NumV+NumTr],Rho,Glob,ndrange=ndrangeL)
KernelAbstractions.synchronize(backend)
Parallels.ExchangeDataFSend(qMin,qMax,Exchange)
Parallels.ExchangeDataFRecv!(qMin,qMax,Exchange)
Parallels.ExchangeDataFSendGPU(qMin,qMax,Exchange)
Parallels.ExchangeDataFRecvGPU!(qMin,qMax,Exchange)
end


Expand Down
134 changes: 127 additions & 7 deletions src/Parallels/Exchange.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
mutable struct ExchangeStruct{FT<:AbstractFloat,
IT1<:AbstractArray,
AT3<:AbstractArray}
AT3<:AbstractArray,
AT4<:AbstractArray}
IndSendBuffer::Dict{Int,IT1}
IndSendBufferF::Dict{Int,Array{Int,1}}
IndSendBufferF::Dict{Int,IT1}
IndRecvBuffer::Dict{Int,IT1}
IndRecvBufferF::Dict{Int,Array{Int,1}}
IndRecvBufferF::Dict{Int,IT1}
NeiProc::Array{Int, 1}
Proc::Int
ProcNumber::Int
Expand All @@ -13,13 +14,13 @@ mutable struct ExchangeStruct{FT<:AbstractFloat,
SendBuffer::Dict
#SendBuffer3::Dict{Int,Array{FT, 3}}
SendBuffer3::Dict{Int,AT3}
SendBufferF::Dict{Int,Array{FT, 4}}
SendBufferF::Dict{Int,AT4}
InitRecvBuffer::Bool
InitRecvBufferF::Bool
RecvBuffer::Dict
# RecvBuffer3::Dict{Int,Array{FT, 3}}
RecvBuffer3::Dict{Int,AT3}
RecvBufferF::Dict{Int,Array{FT, 4}}
RecvBufferF::Dict{Int,AT4}
sreq::MPI.UnsafeMultiRequest
rreq::MPI.UnsafeMultiRequest
end
Expand All @@ -45,10 +46,12 @@ function ExchangeStruct{FT}(backend) where FT<:AbstractFloat
sreq = MPI.UnsafeMultiRequest(0)
rreq = MPI.UnsafeMultiRequest(0)
AT3 = KernelAbstractions.zeros(backend,FT,0,0,0)
AT4 = KernelAbstractions.zeros(backend,FT,0,0,0,0)
IT1 = KernelAbstractions.zeros(backend,Int,0)
return ExchangeStruct{FT,
typeof(IT1),
typeof(AT3)}(
typeof(AT3),
typeof(AT4)}(
IndSendBuffer,
IndSendBufferF,
IndRecvBuffer,
Expand Down Expand Up @@ -358,6 +361,7 @@ function ExchangeStruct{FT}(backend,SubGrid,OrdPoly,CellToProc,Proc,ProcNumber,H

# Copy from CPU to device
AT3 = KernelAbstractions.zeros(backend,FT,0,0,0)
AT4 = KernelAbstractions.zeros(backend,FT,0,0,0,0)
IT1 = KernelAbstractions.zeros(backend,Int,0)

SendBuffer = Dict()
Expand All @@ -374,7 +378,8 @@ function ExchangeStruct{FT}(backend,SubGrid,OrdPoly,CellToProc,Proc,ProcNumber,H

return ExchangeStruct{FT,
typeof(IT1),
typeof(AT3)}(
typeof(AT3),
typeof(AT4)}(
SendBuffer,
IndSendBufferF,
RecvBuffer,
Expand Down Expand Up @@ -675,6 +680,75 @@ function ExchangeDataFSend(cFMin,cFMax,Exchange)
end
end

function ExchangeDataFSendGPU(cFMin,cFMax,Exchange)
backend = get_backend(cFMin)
FT = eltype(cFMin)
IndSendBufferF = Exchange.IndSendBufferF
IndRecvBufferF = Exchange.IndRecvBufferF
NeiProc = Exchange.NeiProc
Proc = Exchange.Proc
ProcNumber = Exchange.ProcNumber
nz = size(cFMin,1)
nT = size(cFMin,3)
if Exchange.InitRecvBufferF
@inbounds for iP in NeiProc
Exchange.RecvBufferF[iP] = zeros(nz,length(IndRecvBufferF[iP]),2,nT)
Exchange.SendBufferF[iP] = zeros(nz,length(IndSendBufferF[iP]),2,nT)
end
RecvBufferF = Exchange.RecvBufferF
SendBufferF = Exchange.SendBufferF
Exchange.InitRecvBufferF = false
Exchange.InitSendBufferF = false
rreq = Exchange.rreq
sreq = Exchange.sreq
else
RecvBufferF = Exchange.RecvBufferF
SendBufferF = Exchange.SendBufferF
rreq = Exchange.rreq
sreq = Exchange.sreq
end

group = (nz,5,1)
KExchangeDataFSendKernel! = ExchangeDataFSendKernel!(backend,group)
@inbounds for iP in NeiProc
ndrange = (nz,length(IndSendBufferF[iP]),nT)
KExchangeDataFSendKernel!(cFMin,cFMax,SendBufferF[iP],IndSendBufferF[iP],ndrange=ndrange)
end
# @inbounds for iP in NeiProc
# i = 0
# @views @inbounds for Ind in IndSendBufferF[iP]
# i += 1
# @views @. SendBufferF[iP][:,i,1,:] = cFMin[:,Ind,:]
# @views @. SendBufferF[iP][:,i,2,:] = cFMax[:,Ind,:]
# end
# end
i = 0
@inbounds for iP in NeiProc
tag = Proc + ProcNumber*iP
i += 1
@views MPI.Irecv!(RecvBufferF[iP], iP - 1, tag, MPI.COMM_WORLD, rreq[i])
end
i = 0
@inbounds for iP in NeiProc
tag = iP + ProcNumber*Proc
i += 1
@views MPI.Isend(SendBufferF[iP], iP - 1, tag, MPI.COMM_WORLD, sreq[i])
end
end

@kernel function ExchangeDataFSendKernel!(cFMin,cFMax,SendBufferF,IndSendBufferF)

Iz,I,IT = @index(Global, NTuple)
NumInd = @uniform @ndrange()[2]
NT = @uniform @ndrange()[3]

if I <= NumInd && IT <= NT
@inbounds Ind = IndSendBufferF[I]
@inbounds SendBufferF[Iz,I,1,IT] = cFMin[Iz,Ind,IT]
@inbounds SendBufferF[Iz,I,2,IT] = cFMax[Iz,Ind,IT]
end
end

function ExchangeDataFRecv!(cFMin,cFMax,Exchange)

IndRecvBufferF = Exchange.IndRecvBufferF
Expand All @@ -697,6 +771,52 @@ function ExchangeDataFRecv!(cFMin,cFMax,Exchange)
end
end

function ExchangeDataFRecvGPU!(cFMin,cFMax,Exchange)
backend = get_backend(cFMin)
FT = eltype(cFMin)

IndRecvBufferF = Exchange.IndRecvBufferF
NeiProc = Exchange.NeiProc
RecvBufferF = Exchange.RecvBufferF
rreq = Exchange.rreq
sreq = Exchange.sreq

stats = MPI.Waitall(rreq)
stats = MPI.Waitall(sreq)
MPI.Barrier(MPI.COMM_WORLD)
Nz = size(cFMin,1)
nT = size(cFMin,4)
group = (Nz,5,1)
KExchangeDataFRecvKernel! = ExchangeDataFRecvKernel!(backend,group)

#Receive
@inbounds for iP in NeiProc
ndrange = (Nz,length(IndRecvBufferF[iP]),nT)
KExchangeDataFRecvKernel!(cFMin,cFMax,RecvBufferF[iP],IndRecvBufferF[iP],ndrange=ndrange)
end
# #Receive
# @inbounds for iP in NeiProc
# i = 0
# @inbounds for Ind in IndRecvBufferF[iP]
# i += 1
# @views @. cFMin[:,Ind,:] = RecvBufferF[iP][:,i,1,:]
# @views @. cFMax[:,Ind,:] = RecvBufferF[iP][:,i,2,:]
# end
# end
end

@kernel function ExchangeDataFRecvKernel!(cFMin,cFMax,RecvBufferF,IndRecvBufferF)

Iz,I,IT = @index(Global, NTuple)
NumInd = @uniform @ndrange()[2]
NT = @uniform @ndrange()[3]
if I <= NumInd && IT <= NT
@inbounds Ind = IndRecvBufferF[I]
cFMin[Iz,Ind,IT] = RecvBufferF[Iz,I,1,IT]
cFMax[Iz,Ind,IT] = RecvBufferF[Iz,I,2,IT]
end
end

function InitExchangeData3D(backend,FT,nz,nT,Exchange)
IndSendBuffer = Exchange.IndSendBuffer
IndRecvBuffer = Exchange.IndRecvBuffer
Expand Down

0 comments on commit ab1660a

Please sign in to comment.