Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using BenchmarkTools
using TestParticle
import TestParticle as TP
using OrdinaryDiffEq, Meshes, StaticArrays
using TestParticle: CPU

const SUITE = BenchmarkGroup()

Expand Down Expand Up @@ -163,6 +164,9 @@ alg_adaptive = AdaptiveBoris(dtmax = 1.0e-3)
SUITE["trace"]["numerical field"]["Adaptive Boris"] = @benchmarkable TP.solve(
$prob_boris, $alg_adaptive
)
SUITE["trace"]["numerical field"]["Boris kernel"] = @benchmarkable TP.solve(
$prob_boris, CPU(); dt = 1 / 7, savestepinterval = 10
)

# Time-Dependent Field
param_td = prepare(E_td, B_td, F_td)
Expand Down
58 changes: 47 additions & 11 deletions src/boris_kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,32 @@ function gpu_field_evaluation!(
end
end

function _leapfrog_to_output(xv, Efunc, Bfunc, t, qdt_2m_half)
T = eltype(xv)
# Extract position and velocity (v^{n-1/2})
x_p = xv[1]
y_p = xv[2]
z_p = xv[3]
vx = xv[4]
vy = xv[5]
vz = xv[6]

# Evaluate fields at current position and time
r_vec = SVector(x_p, y_p, z_p)
E_val = Efunc(r_vec, t)
B_val = Bfunc(r_vec, t)

# Correct velocity to v^n using half-step push
vx_n, vy_n, vz_n = boris_velocity_update(
vx, vy, vz,
E_val[1], E_val[2], E_val[3],
B_val[1], B_val[2], B_val[3],
qdt_2m_half
)

return SVector{6, T}(x_p, y_p, z_p, vx_n, vy_n, vz_n)
end


function solve(
prob::TraceProblem, backend::KA.Backend;
Expand Down Expand Up @@ -319,13 +345,6 @@ function solve(

kernel! = boris_push_kernel!(backend, workgroup_size)

vback_kernel! = velocity_back_kernel!(backend, workgroup_size)
vback_kernel!(
xv_current, xv_current, q2m, -0.5 * dt,
Ex_arr, Ey_arr, Ez_arr, Bx_arr, By_arr, Bz_arr; ndrange = n_particles
)
KA.synchronize(backend)

sols = Vector{
typeof(build_solution(prob, :boris, [tspan[1]], [SVector{6, T}(u0)])),
}(undef, trajectories)
Expand All @@ -343,6 +362,13 @@ function solve(
end
end

vback_kernel! = velocity_back_kernel!(backend, workgroup_size)
vback_kernel!(
xv_current, xv_current, q2m, -0.5 * dt,
Ex_arr, Ey_arr, Ez_arr, Bx_arr, By_arr, Bz_arr; ndrange = n_particles
)
KA.synchronize(backend)

for it in 1:nt
t = tspan[1] + (it - 0.5) * dt

Expand All @@ -367,23 +393,33 @@ function solve(

if save_everystep && it % savestepinterval == 0
xv_cpu = Array(xv_current)
t_current = tspan[1] + it * dt
qdt_2m_half = q2m * 0.5 * (0.5 * dt)

for i in 1:n_particles
if iout_counters[i] < nout
iout_counters[i] += 1
saved_data[i][iout_counters[i]] = SVector{6, T}(xv_cpu[:, i])
saved_times[i][iout_counters[i]] = tspan[1] + it * dt
saved_data[i][iout_counters[i]] = _leapfrog_to_output(
@view(xv_cpu[:, i]), Efunc, Bfunc, t_current, qdt_2m_half
)
saved_times[i][iout_counters[i]] = t_current
end
end
end
end

if save_end
xv_cpu = Array(xv_current)
t_current = tspan[2]
qdt_2m_half = q2m * 0.5 * (0.5 * dt)

for i in 1:n_particles
if iout_counters[i] < nout
iout_counters[i] += 1
saved_data[i][iout_counters[i]] = SVector{6, T}(xv_cpu[:, i])
saved_times[i][iout_counters[i]] = tspan[2]
saved_data[i][iout_counters[i]] = _leapfrog_to_output(
@view(xv_cpu[:, i]), Efunc, Bfunc, t_current, qdt_2m_half
)
saved_times[i][iout_counters[i]] = t_current
end
end
end
Expand Down
4 changes: 2 additions & 2 deletions test/test_boris_kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ const KA = KernelAbstractions

# Check only first and last steps to avoid excessive test printing
@test sol_gpu[1].t ≈ sol_cpu[1].t atol = 1.0e-6
# Relax tolerances for subtle numerical differences between standard and KA implementations
@test sol_gpu[1].u[end] ≈ sol_cpu[1].u[end] rtol = 0.05 atol = 1.0e-4
# Tight tolerances as the implementations should be algorithmically equivalent
@test sol_gpu[1].u[end] ≈ sol_cpu[1].u[end] rtol = 1.0e-10 atol = 1.0e-10
end

@testset "Energy Conservation" begin
Expand Down
Loading