Skip to content

Commit 8558ce3

Browse files
committed
HeldSuarez run on Metal, Advection parallel on GPU
1 parent 13bdb3d commit 8558ce3

11 files changed

+110
-35
lines changed

Jobs/JobAdvectionSlottedCylinder

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
mpirun -n 1 julia --project Examples/testAdvectionSphere.jl \
1+
mpirun -n 6 julia --project Examples/testAdvectionSphere.jl \
22
--Problem="AdvectionSphereSlottedCylinder" \
3-
--Device="" \
3+
--Device="CPU" \
44
--FloatTypeBackend="Float64" \
55
--NumV=5 \
66
--NumTr=1 \
@@ -13,7 +13,7 @@ mpirun -n 1 julia --project Examples/testAdvectionSphere.jl \
1313
--HorLimit=true \
1414
--Decomp="EqualArea" \
1515
--vtkFileName="SlottedCylinderSquad" \
16-
--SimTime=5 \
16+
--SimTime=5.0 \
1717
--PrintTime=0.25\
1818
--Flat=false \
1919
--dtau=0.0004 \
@@ -28,8 +28,8 @@ mpirun -n 1 julia --project Examples/testAdvectionSphere.jl \
2828
--H=1.0 \
2929
--OrdPoly=4 \
3030
--HyperVisc=true \
31-
--HyperDCurl=0.e13 \
32-
--HyperDGrad=0.e13 \
33-
--HyperDDiv=0.e13
31+
--HyperDCurl=1.e-5 \
32+
--HyperDGrad=1.e-5 \
33+
--HyperDDiv=1.e-5
3434

3535

Jobs/JobAdvectionSlottedCylinderC1

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
mpirun -n 1 julia --project Examples/testAdvectionSphere.jl \
2+
--Problem="AdvectionSphereSlottedCylinder" \
3+
--Device="CPU" \
4+
--FloatTypeBackend="Float64" \
5+
--NumV=5 \
6+
--NumTr=1 \
7+
--ProfRho="" \
8+
--ProfTheta="" \
9+
--ProfVel="Divergent" \
10+
--ProfVelW="" \
11+
--ProfTr="AdvectionSphereSlottedCylinder" \
12+
--Upwind=false \
13+
--HorLimit=false \
14+
--Decomp="EqualArea" \
15+
--vtkFileName="SlottedCylinderSquad" \
16+
--SimTime=0.05 \
17+
--PrintTime=0.01\
18+
--Flat=false \
19+
--dtau=0.01 \
20+
--IntMethod="SSPRungeKutta" \
21+
--Table="SSPRK1" \
22+
--TopoS="" \
23+
--GridType="CubedSphere" \
24+
--nz=1 \
25+
--RadEarth=1.0 \
26+
--nPanel=2 \
27+
--RadEarth=1 \
28+
--H=1.0 \
29+
--OrdPoly=1 \
30+
--HyperVisc=true \
31+
--HyperDCurl=0.e-5 \
32+
--HyperDGrad=0.e-5 \
33+
--HyperDDiv=0.e-5
34+
35+

Jobs/JobAdvectionSlottedCylinderC2

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
mpirun -n 2 julia --project Examples/testAdvectionSphere.jl \
2+
--Problem="AdvectionSphereSlottedCylinder" \
3+
--Device="CPU" \
4+
--FloatTypeBackend="Float64" \
5+
--NumV=5 \
6+
--NumTr=1 \
7+
--ProfRho="" \
8+
--ProfTheta="" \
9+
--ProfVel="Divergent" \
10+
--ProfVelW="" \
11+
--ProfTr="AdvectionSphereSlottedCylinder" \
12+
--Upwind=false \
13+
--HorLimit=false \
14+
--Decomp="EqualArea" \
15+
--vtkFileName="SlottedCylinderSquad" \
16+
--SimTime=0.05 \
17+
--PrintTime=0.01\
18+
--Flat=false \
19+
--dtau=0.01 \
20+
--IntMethod="SSPRungeKutta" \
21+
--Table="SSPRK1" \
22+
--TopoS="" \
23+
--GridType="CubedSphere" \
24+
--nz=1 \
25+
--RadEarth=1.0 \
26+
--nPanel=2 \
27+
--RadEarth=1 \
28+
--H=1.0 \
29+
--OrdPoly=1 \
30+
--HyperVisc=true \
31+
--HyperDCurl=0.e-5 \
32+
--HyperDGrad=0.e-5 \
33+
--HyperDDiv=0.e-5
34+
35+

Jobs/JobAdvectionSlottedCylinderCPU

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
mpirun -n 1 julia --project Examples/testAdvectionSphere.jl \
1+
mpirun -n 2 julia --project Examples/testAdvectionSphere.jl \
22
--Problem="AdvectionSphereSlottedCylinder" \
33
--Device="CPU" \
44
--FloatTypeBackend="Float64" \
@@ -10,11 +10,11 @@ mpirun -n 1 julia --project Examples/testAdvectionSphere.jl \
1010
--ProfVelW="" \
1111
--ProfTr="AdvectionSphereSlottedCylinder" \
1212
--Upwind=false \
13-
--HorLimit=true \
13+
--HorLimit=false \
1414
--Decomp="EqualArea" \
1515
--vtkFileName="SlottedCylinderSquad" \
16-
--SimTime=5 \
17-
--PrintTime=0.25\
16+
--SimTime=0.05 \
17+
--PrintTime=0.01\
1818
--Flat=false \
1919
--dtau=0.0004 \
2020
--IntMethod="SSPRungeKutta" \
@@ -26,10 +26,10 @@ mpirun -n 1 julia --project Examples/testAdvectionSphere.jl \
2626
--nPanel=32 \
2727
--RadEarth=1 \
2828
--H=1.0 \
29-
--OrdPoly=4 \
29+
--OrdPoly=1 \
3030
--HyperVisc=true \
31-
--HyperDCurl=0.e13 \
32-
--HyperDGrad=0.e13 \
33-
--HyperDDiv=0.e13
31+
--HyperDCurl=0.e-5 \
32+
--HyperDGrad=0.e-5 \
33+
--HyperDDiv=0.e-5
3434

3535

Jobs/JobNHHeldSuarezMoistSphere

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
mpirun -n 6 julia --project Examples/testNHSphere.jl \
22
--Problem="HeldSuarezMoistSphere" \
3-
--Device="" \
3+
--Device="CPU" \
44
--GPUType="Metal" \
55
--NumberThreadGPU=512 \
66
--FloatTypeBackend="Float32" \

ToDo

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,11 @@ NVTX.@range "KHyperViscKernel!" let
1515
KHyperViscKernel!(CacheF,MRho,U,DS,DW,dXdxI,J,M,Glob,ndrange=ndrange)
1616
KernelAbstractions.synchronize(backend)
1717
end
18+
19+
20+
4 A100
21+
Simulation time 10 days
22+
110 km Resolution Timestep 150s 163s
23+
55 km Resolution Timestep 75s 759s
24+
25+
Test parallel Advection with horizontal limiter

src/DyCore/FcnTracer.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ function FcnTracer!(F,U,time,CG,Metric,Phys,Cache,Exchange,Global,Param,Profile)
3535

3636
if HorLimit
3737
@views Limit!(qMin,qMax,U[:,:,NumV+1:NumV+NumTr],U[:,:,RhoPos],CG,Global)
38-
ExchangeDataFSend(qMin,qMax,Exchange)
39-
ExchangeDataFRecv!(qMin,qMax,Exchange)
38+
Parallels.ExchangeDataFSend(qMin,qMax,Exchange)
39+
Parallels.ExchangeDataFRecv!(qMin,qMax,Exchange)
4040
end
4141

4242
# Hyperdiffusion

src/DyCore/MassCG.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ function MassCGGPU!(CG,J,Glob,Exchange,Global)
4141
MMass = CG.MMass
4242
MW = CG.MW
4343

44-
@. M = FT(0)
45-
@. MMass = FT(0)
46-
@. MW = FT(0)
44+
M .= FT(0)
45+
MMass .= FT(0)
46+
MW .= FT(0)
4747

4848
NumberThreadGPU = Global.ParallelCom.NumberThreadGPU
4949

@@ -71,7 +71,7 @@ end
7171
ID = I + (J - 1) * N
7272
@inbounds ind = Glob[ID,IF]
7373
@inbounds @atomic M[Iz,ind] += (JJ[ID,1,Iz,IF] + JJ[ID,2,Iz,IF])
74-
@inbounds @atomic MMass[Iz,ind] += 0.5 * (JJ[ID,1,Iz,IF] + JJ[ID,2,Iz,IF]) * w[I] * w[J]
74+
@inbounds @atomic MMass[Iz,ind] += eltype(M)(0.5) * (JJ[ID,1,Iz,IF] + JJ[ID,2,Iz,IF]) * w[I] * w[J]
7575
end
7676
if Iz < Nz && IF <= NF
7777
ID = I + (J - 1) * N

src/GPU/FcnGPU.jl

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ function FcnAdvectionGPU!(F,U,time,FE,Metric,Phys,Cache,Exchange,Global,Param,Pr
5959
if Global.Model.HorLimit
6060
@views KLimitKernel!(DoF,qMin,qMax,U[:,:,NumV+1:NumV+NumTr],Rho,Glob,ndrange=ndrangeL)
6161
KernelAbstractions.synchronize(backend)
62+
Parallels.ExchangeDataFSend(qMin,qMax,Exchange)
63+
Parallels.ExchangeDataFRecv!(qMin,qMax,Exchange)
6264
end
6365

6466

@@ -74,7 +76,7 @@ function FcnAdvectionGPU!(F,U,time,FE,Metric,Phys,Cache,Exchange,Global,Param,Pr
7476
Parallels.ExchangeData3DSendGPU(CacheTr,Exchange)
7577
Parallels.ExchangeData3DRecvGPU!(CacheTr,Exchange)
7678

77-
@. F = 0
79+
F .= FT(0)
7880
# @views KHyperViscTracerKoeffKernel!(F[:,:,1+NumV],CacheTr,Rho,DS,DW,dXdxI,J,M,Glob,
7981
# KoeffDiv,ndrange=ndrange)
8082
# KernelAbstractions.synchronize(backend)
@@ -98,22 +100,17 @@ function FcnAdvectionGPU!(F,U,time,FE,Metric,Phys,Cache,Exchange,Global,Param,Pr
98100
@views KDivRhoTrUpwind3Kernel!(F[:,:,1+NumV],U[:,:,1+NumV],U,DS,
99101
dXdxI,J,M,Glob,ndrange=ndrange)
100102
KernelAbstractions.synchronize(backend)
101-
102-
KDivRhoKernel!(F,U,DS,dXdxI,J,M,Glob,ndrange=ndrange)
103-
KernelAbstractions.synchronize(backend)
104103
end
105104

106105
# Data exchange
107-
Parallels.ExchangeData3DSendGPU(F[:,:,1+NumV],Exchange)
108-
Parallels.ExchangeData3DRecvGPU!(F[:,:,1+NumV],Exchange)
109-
106+
@views Parallels.ExchangeData3DSendGPU(F[:,:,1:1+NumV],Exchange)
107+
@views Parallels.ExchangeData3DRecvGPU!(F[:,:,1:1+NumV],Exchange)
110108
end
111109

112110
function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
113111

114112
backend = get_backend(F)
115113
FT = eltype(F)
116-
@. F = 0
117114
Glob = FE.Glob
118115
DS = FE.DS
119116
DW = FE.DW
@@ -204,7 +201,7 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
204201
####
205202
# First phase
206203
####
207-
@. Temp1 = FT(0)
204+
Temp1 .= FT(0)
208205
@views MRho = CacheF[:,:,6]
209206
KHyperViscKernel!(CacheF,MRho,U,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeB)
210207
for iT = 1 : NumTr
@@ -227,7 +224,7 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
227224
# Second phase
228225
####
229226

230-
@. F = 0
227+
F .= FT(0)
231228
KHyperViscKoeffKernel!(F,U,CacheF,DS,DW,dXdxI,J,M,Glob,KoeffCurl,KoeffGrad,KoeffDiv,ndrange=ndrangeB)
232229
for iT = 1 : NumTr
233230
@views CacheTr = Temp1[:,:,iT + 6]

src/GPU/HorLimiterKernel.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ end
165165
end
166166
end
167167
@synchronize
168-
for iTer = 1 : 5
168+
for iTer = 1 : 8
169169
if Iz <= Nz && conv[iz]
170170
ID = I + (J - 1) * N
171171
@inbounds q[I,J,iz] = medianGPU(qMinS[iz], RhoTrColS[I,J,iz] / RhoColS[I,J,iz] +

src/GPU/OperatorKernel.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,14 +1346,14 @@ end
13461346
Rm = Phys.Rd * RhoD + Phys.Rv * RhoV
13471347
Cpml = Phys.Cpd * RhoD + Phys.Cpv * RhoV
13481348
@inbounds T = p[1,ind] / Rm
1349-
@inbounds LatFlux = - 2.0 * CT[ID,IF] * uStar[ID,IF] * dXdxI[3,3,1,ID,1,IF] *
1349+
@inbounds LatFlux = - eltype(F)(2) * CT[ID,IF] * uStar[ID,IF] * dXdxI[3,3,1,ID,1,IF] *
13501350
(RhoV[1,ind] - RhoVSurf[ID,IF]) / M[1,ind]
1351-
@inbounds SensFlux = - 2.0 * CH[ID,IF] * uStar[ID,IF] * dXdxI[3,3,1,ID,1,IF] *
1351+
@inbounds SensFlux = - eltype(F)(2) * CH[ID,IF] * uStar[ID,IF] * dXdxI[3,3,1,ID,1,IF] *
13521352
(T - TSurf[ID,IF]) / M[1,ind]
13531353
FRho = LatFlux
13541354
FRhoV = LatFlux
13551355
PrePi=(p[1,ind] / Phys.p0)^(Rm / Cpml)
1356-
FRhoTh = RhoTh * (SensFlux / T + ((Phys.Rv / Rm) - 1.0 / Rho -
1356+
FRhoTh = RhoTh * (SensFlux / T + ((Phys.Rv / Rm) - eltype(F)(1) / Rho -
13571357
log(PrePi)*(Phys.Rv / Rm - Phys.Cpv / Cpml)) * LatFlux)
13581358
@inbounds @atomic F[1,ind,1] += FRho
13591359
@inbounds @atomic F[1,ind,5] += FRhoTh

0 commit comments

Comments
 (0)