Skip to content

Commit

Permalink
Try different threadblock configuration for SE kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
sriharshakandala committed Aug 16, 2023
1 parent 2d57b46 commit ba34ce0
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 18 deletions.
28 changes: 27 additions & 1 deletion examples/hybrid/tuning/mwe_tune_ke.jl
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,33 @@ function profile_compute_kinetic(::Type{FT}) where {FT}
CUDA.@elapsed κ_ref = compute_kinetic_ref!(κ_ref, uₕ_ref, uᵥ_ref)
println("t_ca = $t_ca (sec); t_ref = $t_ref (sec)")
end
return nothing
end

#profile_compute_kinetic(Float64)

function profile_compute_divergence(::Type{FT}) where {FT}
κ, uₕ, uᵥ = initialize_mwe(ClimaComms.CUDADevice(), FT)
κ_cpu, uₕ_cpu, uᵥ_cpu = initialize_mwe(ClimaComms.CPUSingleThreaded(), FT)
hdiv = Operators.Divergence()
horz_div_cpu = hdiv.(uₕ_cpu)
horz_div = hdiv.(uₕ)

nreps = 100

for i in 1:nreps
t_div = CUDA.@elapsed horz_div .= hdiv.(uₕ)
println("t_div = $t_div (sec)")
end

for i in 1:nreps
NVTX.@range "compute_horizontal_divergence!" color = colorant"blue" payload =
i begin
CUDA.@sync horz_div .= hdiv.(uₕ)
end
end

return nothing
end

profile_compute_kinetic(Float64)
profile_compute_divergence(Float64)
40 changes: 23 additions & 17 deletions src/Operators/spectralelement.jl
Original file line number Diff line number Diff line change
Expand Up @@ -254,34 +254,40 @@ function Base.copyto!(
Nh = Topologies.nlocalelems(Spaces.topology(space))
Nv = Spaces.nlevels(space)
# executed
max_threads = 256
nitems = Nv * Nq * Nq * Nh # # of independent items
(nthreads, nblocks) = Spaces._configure_threadblock(max_threads, nitems)
@inbounds begin
@cuda threads = (Nq, Nq) blocks = (Nh, Nv) copyto_spectral_kernel!(
@cuda threads = (nthreads,) blocks = (nblocks,) copyto_spectral_kernel!(
strip_space(out, space),
strip_space(sbc, space),
space,
Nv,
Nq,
Nh,
)
end
return out
end

function copyto_spectral_kernel!(out::Fields.Field, sbc, space)
function copyto_spectral_kernel!(out::Fields.Field, sbc, space, Nv, Nq, Nh)
@inbounds begin
i = threadIdx().x
j = threadIdx().y
h = blockIdx().x
if space isa Spaces.AbstractSpectralElementSpace
v = nothing
elseif space isa Spaces.FaceExtrudedFiniteDifferenceSpace
v = blockIdx().y - half
elseif space isa Spaces.CenterExtrudedFiniteDifferenceSpace
v = blockIdx().y
else
error("Invalid space")
gid = threadIdx().x + (blockIdx().x - 1) * blockDim().x
if gid Nv * Nq * Nq * Nh
(v, i, j, h) = Spaces._get_idx((Nv, Nq, Nq, Nh), gid)
if space isa Spaces.AbstractSpectralElementSpace
slabidx = Fields.SlabIndex(nothing, h)
elseif space isa Spaces.FaceExtrudedFiniteDifferenceSpace
slabidx = Fields.SlabIndex(v - half, h)
elseif space isa Spaces.CenterExtrudedFiniteDifferenceSpace
slabidx = Fields.SlabIndex(v, h)
else
error("Invalid space")
end
ij = CartesianIndex((i, j))
result = get_node(space, sbc, ij, slabidx)
set_node!(space, out, ij, slabidx, result)
end
ij = CartesianIndex((i, j))
slabidx = Fields.SlabIndex(v, h)
result = get_node(space, sbc, ij, slabidx)
set_node!(space, out, ij, slabidx, result)
end
return nothing
end
Expand Down

0 comments on commit ba34ce0

Please sign in to comment.