From 8874cb256e09fa560d36ca671d7c6cd1cc828ebb Mon Sep 17 00:00:00 2001 From: Hongyang Zhou Date: Tue, 3 Feb 2026 11:16:54 -0500 Subject: [PATCH 1/3] Fix the output mismatch; refactor saving --- src/boris_kernel.jl | 58 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/src/boris_kernel.jl b/src/boris_kernel.jl index aaf3ab040..d19a0c5e0 100644 --- a/src/boris_kernel.jl +++ b/src/boris_kernel.jl @@ -219,6 +219,32 @@ function gpu_field_evaluation!( end end +function _leapfrog_to_output(xv, Efunc, Bfunc, t, qdt_2m_half) + T = eltype(xv) + # Extract position and velocity (v^{n-1/2}) + x_p = xv[1] + y_p = xv[2] + z_p = xv[3] + vx = xv[4] + vy = xv[5] + vz = xv[6] + + # Evaluate fields at current position and time + r_vec = SVector(x_p, y_p, z_p) + E_val = Efunc(r_vec, t) + B_val = Bfunc(r_vec, t) + + # Correct velocity to v^n using half-step push + vx_n, vy_n, vz_n = boris_velocity_update( + vx, vy, vz, + E_val[1], E_val[2], E_val[3], + B_val[1], B_val[2], B_val[3], + qdt_2m_half + ) + + return SVector{6, T}(x_p, y_p, z_p, vx_n, vy_n, vz_n) +end + function solve( prob::TraceProblem, backend::KA.Backend; @@ -319,13 +345,6 @@ function solve( kernel! = boris_push_kernel!(backend, workgroup_size) - vback_kernel! = velocity_back_kernel!(backend, workgroup_size) - vback_kernel!( - xv_current, xv_current, q2m, -0.5 * dt, - Ex_arr, Ey_arr, Ez_arr, Bx_arr, By_arr, Bz_arr; ndrange = n_particles - ) - KA.synchronize(backend) - sols = Vector{ typeof(build_solution(prob, :boris, [tspan[1]], [SVector{6, T}(u0)])), }(undef, trajectories) @@ -343,6 +362,13 @@ function solve( end end + vback_kernel! = velocity_back_kernel!(backend, workgroup_size) + vback_kernel!( + xv_current, xv_current, q2m, -0.5 * dt, + Ex_arr, Ey_arr, Ez_arr, Bx_arr, By_arr, Bz_arr; ndrange = n_particles + ) + KA.synchronize(backend) + for it in 1:nt t = tspan[1] + (it - 0.5) * dt @@ -367,11 +393,16 @@ function solve( if save_everystep && it % savestepinterval == 0 xv_cpu = Array(xv_current) + t_current = tspan[1] + it * dt + qdt_2m_half = q2m * 0.5 * (0.5 * dt) + for i in 1:n_particles if iout_counters[i] < nout iout_counters[i] += 1 - saved_data[i][iout_counters[i]] = SVector{6, T}(xv_cpu[:, i]) - saved_times[i][iout_counters[i]] = tspan[1] + it * dt + saved_data[i][iout_counters[i]] = _leapfrog_to_output( + @view(xv_cpu[:, i]), Efunc, Bfunc, t_current, qdt_2m_half + ) + saved_times[i][iout_counters[i]] = t_current end end end @@ -379,11 +410,16 @@ function solve( if save_end xv_cpu = Array(xv_current) + t_current = tspan[2] + qdt_2m_half = q2m * 0.5 * (0.5 * dt) + for i in 1:n_particles if iout_counters[i] < nout iout_counters[i] += 1 - saved_data[i][iout_counters[i]] = SVector{6, T}(xv_cpu[:, i]) - saved_times[i][iout_counters[i]] = tspan[2] + saved_data[i][iout_counters[i]] = _leapfrog_to_output( + @view(xv_cpu[:, i]), Efunc, Bfunc, t_current, qdt_2m_half + ) + saved_times[i][iout_counters[i]] = t_current end end end From 746577e77c8f8f64182d02da44c3b4fd17dca2aa Mon Sep 17 00:00:00 2001 From: Hongyang Zhou Date: Tue, 3 Feb 2026 11:23:40 -0500 Subject: [PATCH 2/3] test: more strict comparison --- test/test_boris_kernel.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_boris_kernel.jl b/test/test_boris_kernel.jl index cfba8e6fc..6d7e963d2 100644 --- a/test/test_boris_kernel.jl +++ b/test/test_boris_kernel.jl @@ -66,8 +66,8 @@ const KA = KernelAbstractions # Check only first and last steps to avoid excessive test printing @test sol_gpu[1].t ≈ sol_cpu[1].t atol = 1.0e-6 - # Relax tolerances for subtle numerical differences between standard and KA implementations - @test sol_gpu[1].u[end] ≈ sol_cpu[1].u[end] rtol = 0.05 atol = 1.0e-4 + # Tight tolerances as the implementations should be algorithmically equivalent + @test sol_gpu[1].u[end] ≈ sol_cpu[1].u[end] rtol = 1.0e-10 atol = 1.0e-10 end @testset "Energy Conservation" begin From c9ca25b05cc6f42b0bbc085ebe0c5a8ab1c0a302 Mon Sep 17 00:00:00 2001 From: Hongyang Zhou Date: Tue, 3 Feb 2026 11:51:27 -0500 Subject: [PATCH 3/3] Add Boris kernel benchmark --- benchmark/benchmarks.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 174b7ed6c..f15eef0ba 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -4,6 +4,7 @@ using BenchmarkTools using TestParticle import TestParticle as TP using OrdinaryDiffEq, Meshes, StaticArrays +using TestParticle: CPU const SUITE = BenchmarkGroup() @@ -163,6 +164,9 @@ alg_adaptive = AdaptiveBoris(dtmax = 1.0e-3) SUITE["trace"]["numerical field"]["Adaptive Boris"] = @benchmarkable TP.solve( $prob_boris, $alg_adaptive ) +SUITE["trace"]["numerical field"]["Boris kernel"] = @benchmarkable TP.solve( + $prob_boris, CPU(); dt = 1 / 7, savestepinterval = 10 +) # Time-Dependent Field param_td = prepare(E_td, B_td, F_td)