Skip to content

Commit

Permalink
use atomci in KA
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenwu0728 committed May 6, 2024
1 parent 1d14e45 commit 5c88f26
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 174 deletions.
20 changes: 6 additions & 14 deletions src/Diagnostics/diagnostics_kernels.jl
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
#####
##### record diagnostics of plankton processes at each time step
#####

function gpu_diags_proc_kernel!(diags_proc, proc, ac, x, y, z)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic diags_proc[x[i], y[i], z[i]] += proc[i] * ac[i]
end
function diags_proc_kernel!(diags_proc, proc, ac, x, y, z)
i = @index(Global)
@inbounds KernelAbstractions.@atomic diags_proc[x[i], y[i], z[i]] += proc[i] * ac[i]
end
function diags_proc!(diags_proc, proc, ac, x, y, z, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_diags_proc_kernel!(diags_proc, proc, ac, x, y, z)
function diags_proc!(diags_proc, proc, ac, x, y, z, arch)
kernel! = diags_proc_kernel!(device(arch), 256, (size(ac,1)))
kernel!(diags_proc, proc, ac, x, y, z)
return nothing
end
function diags_proc!(diags_proc, proc, ac, x, y, z, ::CPU)
for i in 1:size(ac,1)
@inbounds diags_proc[x[i], y[i], z[i]] += proc[i] * ac[i]
end
end

function diags_spcs!(diags_sp, plank, ac, x, y, z, mode::AbstractMode, arch::Architecture)
diags = (:PS, :resp, :Bm, :Chl, :Th)
Expand Down
50 changes: 15 additions & 35 deletions src/Plankton/CarbonMode/consume_loss.jl
Original file line number Diff line number Diff line change
@@ -1,42 +1,22 @@
##### deal with nutrients uptake
function gpu_calc_consume_kernel!(ctsdic, plank, ac, x, y, z, ΔT)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic ctsdic[x[i], y[i], z[i]] += (plank.RS[i] - plank.PS[i]) * ΔT * ac[i]
end
return nothing
function calc_consume_kernel!(ctsdic, plank, ac, x, y, z, ΔT)
i = @index(Global)
@inbounds KernelAbstractions.@atomic ctsdic[x[i], y[i], z[i]] += (plank.RS[i] - plank.PS[i]) * ΔT * ac[i]
end
function calc_consume!(ctsdic, plank, ac, x, y, z, ΔT, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_calc_consume_kernel!(ctsdic, plank, ac, x, y, z, ΔT)
function calc_consume!(ctsdic, plank, ac, x, y, z, ΔT, arch)
kernel! = calc_consume_kernel!(device(arch), 256, (size(ac,1)))
kernel!(ctsdic, plank, ac, x, y, z, ΔT)
return nothing
end
function calc_consume!(ctsdic, plank, ac, x, y, z, ΔT, ::CPU)
for i in 1:size(ac,1)
@inbounds ctsdic[x[i], y[i], z[i]] += (plank.RS[i] - plank.PS[i]) * ΔT * ac[i]
end
return nothing
end

##### deal with grazed or dead individuals
function gpu_calc_loss_kernel!(ctsdoc, ctspoc, plank, ac, x, y, z, loss, lossFracC)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic ctsdoc[x[i], y[i], z[i]] += plank.Bm[i] * lossFracC * ac[i] * loss[i]
@inbounds CUDA.@atomic ctspoc[x[i], y[i], z[i]] += plank.Bm[i] * (1.0-lossFracC) * ac[i] * loss[i]
end
return nothing
end
function calc_loss!(ctsdoc, ctspoc, plank, ac, x, y, z,
loss, lossFracC, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_calc_loss_kernel!(ctsdoc, ctspoc, plank, ac, x, y, z, loss, lossFracC)
return nothing
end
function calc_loss!(ctsdoc, ctspoc, plank, ac, x, y, z, loss, lossFracC, ::CPU)
for i in 1:size(ac,1)
@inbounds ctsdoc[x[i], y[i], z[i]] += plank.Bm[i] * lossFracC * ac[i] * loss[i]
@inbounds ctspoc[x[i], y[i], z[i]] += plank.Bm[i] * (1.0-lossFracC) * ac[i] * loss[i]
end
return nothing
function calc_loss_kernel!(ctsdoc, ctspoc, plank, ac, x, y, z, loss, lossFracC)
i = @index(Global)
@inbounds KernelAbstractions.@atomic ctsdoc[x[i], y[i], z[i]] += plank.Bm[i] * lossFracC * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctspoc[x[i], y[i], z[i]] += plank.Bm[i] * (1.0-lossFracC) * ac[i] * loss[i]
end
function calc_loss!(ctsdoc, ctspoc, plank, ac, x, y, z, loss, lossFracC, arch)
kernel! = calc_loss_kernel!(device(arch), 256, (size(ac,1)))
kernel!(ctsdoc, ctspoc, plank, ac, x, y, z, loss, lossFracC)
return nothing
end
78 changes: 24 additions & 54 deletions src/Plankton/MacroMolecularMode/consume_loss.jl
Original file line number Diff line number Diff line change
@@ -1,63 +1,33 @@
##### deal with nutrients uptake
function gpu_calc_consume_kernel!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic ctsdic[x[i], y[i], z[i]] += (plank.resp[i] - plank.PS[i]) * ΔT * ac[i]
@inbounds CUDA.@atomic ctsdoc[x[i], y[i], z[i]] += (plank.exu[i] - plank.VDOC[i]) * ΔT * ac[i]
@inbounds CUDA.@atomic ctsnh4[x[i], y[i], z[i]] += -plank.VNH4[i] * ΔT * ac[i]
@inbounds CUDA.@atomic ctsno3[x[i], y[i], z[i]] += -plank.VNO3[i] * ΔT * ac[i]
@inbounds CUDA.@atomic ctspo4[x[i], y[i], z[i]] += -plank.VPO4[i] * ΔT * ac[i]
end
return nothing
function calc_consume_kernel!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
i = @index(Global)
@inbounds KernelAbstractions.@atomic ctsdic[x[i], y[i], z[i]] += (plank.resp[i] - plank.PS[i]) * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctsdoc[x[i], y[i], z[i]] += (plank.exu[i] - plank.VDOC[i]) * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctsnh4[x[i], y[i], z[i]] += -plank.VNH4[i] * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctsno3[x[i], y[i], z[i]] += -plank.VNO3[i] * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctspo4[x[i], y[i], z[i]] += -plank.VPO4[i] * ΔT * ac[i]
end
function calc_consume!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_calc_consume_kernel!(ctsdic, ctsdoc, ctsnh4,
ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
function calc_consume!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT, arch)
kernel! = calc_consume_kernel!(device(arch), 256, (size(ac,1)))
kernel!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
return nothing
end
function calc_consume!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT, ::CPU)
for i in 1:size(ac,1)
@inbounds ctsdic[x[i], y[i], z[i]] += (plank.resp[i] - plank.PS[i]) * ΔT * ac[i]
@inbounds ctsdoc[x[i], y[i], z[i]] += (plank.exu[i] - plank.VDOC[i]) * ΔT * ac[i]
@inbounds ctsnh4[x[i], y[i], z[i]] += -plank.VNH4[i] * ΔT * ac[i]
@inbounds ctsno3[x[i], y[i], z[i]] += -plank.VNO3[i] * ΔT * ac[i]
@inbounds ctspo4[x[i], y[i], z[i]] += -plank.VPO4[i] * ΔT * ac[i]
end
return nothing
end

##### deal with grazed or dead individuals
function gpu_calc_loss_kernel!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, p)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic ctsdoc[x[i], y[i], z[i]] += total_C_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.CH[i], plank.Chl[i]) * lossFracC * ac[i] * loss[i]
@inbounds CUDA.@atomic ctsdon[x[i], y[i], z[i]] += total_N_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.NST[i], plank.Chl[i], p) * lossFracN * ac[i] * loss[i]
@inbounds CUDA.@atomic ctsdop[x[i], y[i], z[i]] += total_P_biomass(plank.DNA[i], plank.RNA[i], plank.PST[i], p) * lossFracP * ac[i] * loss[i]
@inbounds CUDA.@atomic ctspoc[x[i], y[i], z[i]] += total_C_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.CH[i], plank.Chl[i]) * (1.0-lossFracC) * ac[i] * loss[i]
@inbounds CUDA.@atomic ctspon[x[i], y[i], z[i]] += total_N_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.NST[i], plank.Chl[i], p) * (1.0-lossFracN) * ac[i] * loss[i]
@inbounds CUDA.@atomic ctspop[x[i], y[i], z[i]] += total_P_biomass(plank.DNA[i], plank.RNA[i], plank.PST[i], p) * (1.0-lossFracP) * ac[i] * loss[i]
end
return nothing
end
function calc_loss!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, p, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_calc_loss_kernel!(ctsdoc, ctspoc, ctsdon, ctspon,
ctsdop, ctspop, plank, ac, x, y, z, loss,
lossFracC, lossFracN, lossFracP, p)
return nothing
function calc_loss_kernel!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, p)
i = @index(Global)
@inbounds KernelAbstractions.@atomic ctsdoc[x[i], y[i], z[i]] += total_C_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.CH[i], plank.Chl[i]) * lossFracC * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctsdon[x[i], y[i], z[i]] += total_N_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.NST[i], plank.Chl[i], p) * lossFracN * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctsdop[x[i], y[i], z[i]] += total_P_biomass(plank.DNA[i], plank.RNA[i], plank.PST[i], p) * lossFracP * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctspoc[x[i], y[i], z[i]] += total_C_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.CH[i], plank.Chl[i]) * (1.0-lossFracC) * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctspon[x[i], y[i], z[i]] += total_N_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.NST[i], plank.Chl[i], p) * (1.0-lossFracN) * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctspop[x[i], y[i], z[i]] += total_P_biomass(plank.DNA[i], plank.RNA[i], plank.PST[i], p) * (1.0-lossFracP) * ac[i] * loss[i]
end
function calc_loss!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, p, ::CPU)
for i in 1:size(ac,1)
@inbounds ctsdoc[x[i], y[i], z[i]] += total_C_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.CH[i], plank.Chl[i]) * lossFracC * ac[i] * loss[i]
@inbounds ctsdon[x[i], y[i], z[i]] += total_N_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.NST[i], plank.Chl[i], p) * lossFracN * ac[i] * loss[i]
@inbounds ctsdop[x[i], y[i], z[i]] += total_P_biomass(plank.DNA[i], plank.RNA[i], plank.PST[i], p) * lossFracP * ac[i] * loss[i]
@inbounds ctspoc[x[i], y[i], z[i]] += total_C_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.CH[i], plank.Chl[i]) * (1.0-lossFracC) * ac[i] * loss[i]
@inbounds ctspon[x[i], y[i], z[i]] += total_N_biomass(plank.PRO[i], plank.DNA[i], plank.RNA[i], plank.NST[i], plank.Chl[i], p) * (1.0-lossFracN) * ac[i] * loss[i]
@inbounds ctspop[x[i], y[i], z[i]] += total_P_biomass(plank.DNA[i], plank.RNA[i], plank.PST[i], p) * (1.0-lossFracP) * ac[i] * loss[i]
end
return nothing
loss, lossFracC, lossFracN, lossFracP, p, arch)
kernel! = calc_loss_kernel!(device(arch), 256, (size(ac,1)))
kernel!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, p)
return nothing
end
78 changes: 24 additions & 54 deletions src/Plankton/QuotaMode/consume_loss.jl
Original file line number Diff line number Diff line change
@@ -1,63 +1,33 @@
##### deal with nutrients uptake
function gpu_calc_consume_kernel!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic ctsdic[x[i], y[i], z[i]] += (plank.resp[i] - plank.PS[i]) * ΔT * ac[i]
@inbounds CUDA.@atomic ctsdoc[x[i], y[i], z[i]] += (plank.exu[i] - plank.VDOC[i]) * ΔT * ac[i]
@inbounds CUDA.@atomic ctsnh4[x[i], y[i], z[i]] += -plank.VNH4[i] * ΔT * ac[i]
@inbounds CUDA.@atomic ctsno3[x[i], y[i], z[i]] += -plank.VNO3[i] * ΔT * ac[i]
@inbounds CUDA.@atomic ctspo4[x[i], y[i], z[i]] += -plank.VPO4[i] * ΔT * ac[i]
end
return nothing
function calc_consume_kernel!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
i = @index(Global)
@inbounds KernelAbstractions.@atomic ctsdic[x[i], y[i], z[i]] += (plank.resp[i] - plank.PS[i]) * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctsdoc[x[i], y[i], z[i]] += (plank.exu[i] - plank.VDOC[i]) * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctsnh4[x[i], y[i], z[i]] += -plank.VNH4[i] * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctsno3[x[i], y[i], z[i]] += -plank.VNO3[i] * ΔT * ac[i]
@inbounds KernelAbstractions.@atomic ctspo4[x[i], y[i], z[i]] += -plank.VPO4[i] * ΔT * ac[i]
end
function calc_consume!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_calc_consume_kernel!(ctsdic, ctsdoc, ctsnh4,
ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
function calc_consume!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT, arch)
kernel! = calc_consume_kernel!(device(arch), 256, (size(ac,1)))
kernel!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT)
return nothing
end
function calc_consume!(ctsdic, ctsdoc, ctsnh4, ctsno3, ctspo4, plank, ac, x, y, z, ΔT, ::CPU)
for i in 1:size(ac,1)
@inbounds ctsdic[x[i], y[i], z[i]] += (plank.resp[i] - plank.PS[i]) * ΔT * ac[i]
@inbounds ctsdoc[x[i], y[i], z[i]] += (plank.exu[i] - plank.VDOC[i]) * ΔT * ac[i]
@inbounds ctsnh4[x[i], y[i], z[i]] += -plank.VNH4[i] * ΔT * ac[i]
@inbounds ctsno3[x[i], y[i], z[i]] += -plank.VNO3[i] * ΔT * ac[i]
@inbounds ctspo4[x[i], y[i], z[i]] += -plank.VPO4[i] * ΔT * ac[i]
end
return nothing
end

##### deal with grazed or dead individuals
function gpu_calc_loss_kernel!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, R_NC, R_PC)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic ctsdoc[x[i], y[i], z[i]] += (plank.Bm[i] + plank.Cq[i]) * lossFracC * ac[i] * loss[i]
@inbounds CUDA.@atomic ctsdon[x[i], y[i], z[i]] += (plank.Bm[i]*R_NC + plank.Nq[i]) * lossFracN * ac[i] * loss[i]
@inbounds CUDA.@atomic ctsdop[x[i], y[i], z[i]] += (plank.Bm[i]*R_PC + plank.Pq[i]) * lossFracP * ac[i] * loss[i]
@inbounds CUDA.@atomic ctspoc[x[i], y[i], z[i]] += (plank.Bm[i] + plank.Cq[i]) * (1.0-lossFracC) * ac[i] * loss[i]
@inbounds CUDA.@atomic ctspon[x[i], y[i], z[i]] += (plank.Bm[i]*R_NC + plank.Nq[i]) * (1.0-lossFracN) * ac[i] * loss[i]
@inbounds CUDA.@atomic ctspop[x[i], y[i], z[i]] += (plank.Bm[i]*R_PC + plank.Pq[i]) * (1.0-lossFracP) * ac[i] * loss[i]
end
return nothing
end
function calc_loss!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, R_NC, R_PC, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_calc_loss_kernel!(ctsdoc, ctspoc, ctsdon, ctspon,
ctsdop, ctspop, plank, ac, x, y, z, loss,
lossFracC, lossFracN, lossFracP, R_NC, R_PC)
return nothing
function calc_loss_kernel!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, R_NC, R_PC)
i = @index(Global)
@inbounds KernelAbstractions.@atomic ctsdoc[x[i], y[i], z[i]] += (plank.Bm[i] + plank.Cq[i]) * lossFracC * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctsdon[x[i], y[i], z[i]] += (plank.Bm[i]*R_NC + plank.Nq[i]) * lossFracN * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctsdop[x[i], y[i], z[i]] += (plank.Bm[i]*R_PC + plank.Pq[i]) * lossFracP * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctspoc[x[i], y[i], z[i]] += (plank.Bm[i] + plank.Cq[i]) * (1.0-lossFracC) * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctspon[x[i], y[i], z[i]] += (plank.Bm[i]*R_NC + plank.Nq[i]) * (1.0-lossFracN) * ac[i] * loss[i]
@inbounds KernelAbstractions.@atomic ctspop[x[i], y[i], z[i]] += (plank.Bm[i]*R_PC + plank.Pq[i]) * (1.0-lossFracP) * ac[i] * loss[i]
end
function calc_loss!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, R_NC, R_PC, ::CPU)
for i in 1:size(ac,1)
@inbounds ctsdoc[x[i], y[i], z[i]] += (plank.Bm[i] + plank.Cq[i]) * lossFracC * ac[i] * loss[i]
@inbounds ctsdon[x[i], y[i], z[i]] += (plank.Bm[i]*R_NC + plank.Nq[i]) * lossFracN * ac[i] * loss[i]
@inbounds ctsdop[x[i], y[i], z[i]] += (plank.Bm[i]*R_PC + plank.Pq[i]) * lossFracP * ac[i] * loss[i]
@inbounds ctspoc[x[i], y[i], z[i]] += (plank.Bm[i] + plank.Cq[i]) * (1.0-lossFracC) * ac[i] * loss[i]
@inbounds ctspon[x[i], y[i], z[i]] += (plank.Bm[i]*R_NC + plank.Nq[i]) * (1.0-lossFracN) * ac[i] * loss[i]
@inbounds ctspop[x[i], y[i], z[i]] += (plank.Bm[i]*R_PC + plank.Pq[i]) * (1.0-lossFracP) * ac[i] * loss[i]
end
return nothing
loss, lossFracC, lossFracN, lossFracP, R_NC, R_PC, arch)
kernel! = calc_loss_kernel!(device(arch), 256, (size(ac,1)))
kernel!(ctsdoc, ctspoc, ctsdon, ctspon, ctsdop, ctspop, plank, ac, x, y, z,
loss, lossFracC, lossFracN, lossFracP, R_NC, R_PC)
return nothing
end
24 changes: 7 additions & 17 deletions src/Plankton/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,16 @@ function find_NPT!(nuts, x, y, z, ac, NH4, NO3, PO4, DOC, par, temp, pop, arch::
end

##### calculate Chla and individual counts based on the status of plankton individuals
@inline function gpu_acc_counts_kernel!(ctsChl, ctspop, Chl, ac, x, y, z)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
stride = blockDim().x * gridDim().x
for i = index:stride:size(ac,1)
@inbounds CUDA.@atomic ctsChl[x[i], y[i], z[i]] += Chl[i] * ac[i]
@inbounds CUDA.@atomic ctspop[x[i], y[i], z[i]] += ac[i]
end
return nothing
function acc_counts_kernel!(ctsChl, ctspop, Chl, ac, x, y, z)
i = @index(Global)
@inbounds KernelAbstractions.@atomic ctsChl[x[i], y[i], z[i]] += Chl[i] * ac[i]
@inbounds KernelAbstractions.@atomic ctspop[x[i], y[i], z[i]] += ac[i]
end
function acc_counts!(ctsChl, ctspop, Chl, ac, x, y, z, ::GPU)
@cuda threads=256 blocks=ceil(Int, size(ac,1)/256) gpu_acc_counts_kernel!(ctsChl, ctspop, Chl, ac, x, y, z)
function acc_counts!(ctsChl, ctspop, Chl, ac, x, y, z, arch)
kernel! = acc_counts_kernel!(device(arch), 256, (size(ac,1)))
kernel!(ctsChl, ctspop, Chl, ac, x, y, z)
return nothing
end
function acc_counts!(ctsChl, ctspop, Chl, ac, x, y, z, ::CPU)
for i in 1:size(ac,1)
@inbounds ctsChl[x[i], y[i], z[i]] += Chl[i] * ac[i]
@inbounds ctspop[x[i], y[i], z[i]] += ac[i]
end
return nothing
end

##### calculate PAR field based on Chla field and depth
@kernel function calc_par_kernel!(par, Chl, PARF, g::AbstractGrid, kc, kw)
Expand Down

0 comments on commit 5c88f26

Please sign in to comment.