Skip to content

Commit 02f7a8a

Browse files
committed
Optimize ReSTIR pipeline performance
1 parent cffce10 commit 02f7a8a

File tree

1 file changed

+37
-117
lines changed

1 file changed

+37
-117
lines changed

src/integrators/restir_di.cpp

Lines changed: 37 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,8 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
211211
Buffer<float4x4> _prev_frame_view_matrix;
212212
void _temporal_pass(const Camera::Instance *camera, Expr<uint> frame_index,
213213
Expr<uint2> pixel_id, Expr<float> time,
214-
Expr<uint> num_initial_sample, Expr<bool> enable_visibility_reuse, Expr<bool> enable_temporal_reuse,
215-
Expr<bool> enable_decorrelation) const noexcept {
214+
Expr<uint> num_initial_sample, Expr<bool> enable_visibility_reuse, Expr<bool> enable_temporal_reuse
215+
) const noexcept {
216216
auto resolution = camera->film()->node()->resolution();
217217
sampler()->start(pixel_id, frame_index);
218218
auto u_filter = sampler()->generate_pixel_2d();
@@ -277,34 +277,6 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
277277
};
278278
};
279279
};
280-
// perturb the light samples to reduce correlation, use Metropolis to determine whether to accept the perturbation
281-
$if(enable_decorrelation & reservoir.weight.total_weight > 0.f) {
282-
auto MARKOV_CHAIN_LENGTH = 4u;
283-
auto sample_box_muller = [](Expr<float2> u) noexcept {
284-
auto r = sqrt(clamp(-2.f * log(u.x), 0.f, 1.f));
285-
auto theta = 2.f * pi * u.y;
286-
return make_float2(r * cos(theta), r * sin(theta));
287-
};
288-
$for(markov_iter, MARKOV_CHAIN_LENGTH) {
289-
auto candidate = reservoir;
290-
auto perturbation = 0.025f * sample_box_muller(sampler()->generate_2d());
291-
candidate.sample.u_light_surface = reservoir.sample.u_light_surface + perturbation;
292-
$if(any(candidate.sample.u_light_surface < 0.f) | any(candidate.sample.u_light_surface > 1.f)) { $continue; };
293-
auto sample_target_pdf = def(0.f);
294-
$if(enable_visibility_reuse) {
295-
auto [L, _] = _evaluate_with_occlusion(candidate.sample, *it, wo, swl, time);
296-
sample_target_pdf = pipeline().spectrum()->cie_y(swl, L);
297-
} $else {
298-
auto [L, _] = _evaluate_without_occlusion(candidate.sample, *it, wo, swl, time);
299-
sample_target_pdf = pipeline().spectrum()->cie_y(swl, L);
300-
};
301-
auto accepting_prob = min(1.f, sample_target_pdf / candidate.weight.target_pdf);
302-
candidate.weight.target_pdf = sample_target_pdf;
303-
$if(sampler()->generate_1d() < accepting_prob) {
304-
reservoir = candidate;
305-
};
306-
};
307-
};
308280
$break;
309281
};
310282
$if(dsl::isnan(reservoir.weight.total_weight) | dsl::isnan(reservoir.weight.target_pdf)) {
@@ -334,8 +306,6 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
334306
ArrayFloat<3u> valid_neighbor_m_array;
335307
auto num_valid_neighbor = def(0u);
336308
auto z = reservoir.weight.m;
337-
auto depth_projector = inverse(camera->camera_to_world())[2];
338-
auto current_pixel_depth = dot(depth_projector.xyz(), it->p()) + depth_projector.w;
339309
$for(_, num_neighbor_sample) {
340310
auto u_radius = sampler()->generate_1d(), u_theta = sampler()->generate_1d();
341311
auto radius = neighbor_radius * sqrt(u_radius);
@@ -347,9 +317,7 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
347317
auto neighbor_hit = _visibility_buffer->hit(neighbor_id);
348318
auto neighbor_it = pipeline().geometry()->interaction(neighbor_ray, neighbor_hit);
349319
$if(neighbor_it->valid() & neighbor_it->shape().has_surface()) {
350-
auto neighbor_pixel_depth = dot(depth_projector.xyz(), neighbor_it->p()) + depth_projector.w;
351-
$if(abs(neighbor_pixel_depth - current_pixel_depth) < 0.1f * abs(current_pixel_depth) &
352-
dot(it->ng(), neighbor_it->ng()) > 0.91f) {
320+
$if(dot(it->ng(), neighbor_it->ng()) > 0.9f) {
353321
auto neighbor_reservoir = _spatial_reservoir_buffer->read(neighbor_id);
354322
$if(dsl::isnan(neighbor_reservoir.weight.total_weight) | dsl::isnan(neighbor_reservoir.weight.target_pdf) | neighbor_reservoir.weight.m == 0.f) { $continue; };
355323
auto neighbor_target_pdf = def(0.f);
@@ -397,7 +365,8 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
397365
_temporal_reservoir_buffer->write(reservoir, pixel_id);
398366
}
399367
[[nodiscard]] Float3 Li(const Camera::Instance *camera, Expr<uint> frame_index, Expr<uint2> pixel_id, Expr<float> time,
400-
Expr<bool> enable_spatial_reuse, Expr<bool> unbiased, Expr<bool> enable_visibility_reuse) const noexcept {
368+
Expr<bool> enable_spatial_reuse, Expr<bool> unbiased, Expr<bool> enable_visibility_reuse,
369+
Expr<bool> enable_decorrelation) const noexcept {
401370
auto resolution = camera->film()->node()->resolution();
402371
sampler()->start(pixel_id, frame_index << 1u | 1u);
403372
auto spectrum = pipeline().spectrum();
@@ -428,83 +397,34 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
428397
// compute direct lighting
429398
$if(!it->shape().has_surface()) { $break; };
430399
$if(enable_spatial_reuse) {
431-
$outline {
432-
reservoir = _temporal_reservoir_buffer->read(pixel_id);
433-
auto num_neighbor_sample = ite(unbiased, 3u, 5u);
434-
auto constexpr neighbor_radius = 30.f;
435-
ArrayVar<Ray, 3u> valid_neighbor_ray_array;
436-
ArrayVar<Hit, 3u> valid_neighbor_hit_array;
437-
ArrayFloat<3u> valid_neighbor_m_array;
438-
auto num_valid_neighbor = def(0u);
439-
auto z = reservoir.weight.m;
440-
auto camera_to_world = camera->camera_to_world();
441-
auto world_to_camera = inverse(camera_to_world);
442-
auto current_pixel_depth = (world_to_camera * make_float4(it->p(), 1.f)).z;
443-
$for(_, num_neighbor_sample) {
444-
auto u_radius = sampler()->generate_1d(), u_theta = sampler()->generate_1d();
445-
auto radius = neighbor_radius * sqrt(u_radius);
446-
auto theta = 2.f * pi * u_theta;
447-
auto offset = make_float2(radius * cos(theta), radius * sin(theta));
448-
auto neighbor_id = make_uint2(clamp(make_float2(pixel_id) + offset, make_float2(0.f), make_float2(resolution) - 1.f));
449-
$if(all(neighbor_id == pixel_id)) { $continue; };
450-
auto neighbor_ray = _visibility_buffer->ray(neighbor_id);
451-
auto neighbor_hit = _visibility_buffer->hit(neighbor_id);
452-
auto neighbor_it = pipeline().geometry()->interaction(neighbor_ray, neighbor_hit);
453-
$if(neighbor_it->valid() & neighbor_it->shape().has_surface()) {
454-
auto neighbor_pixel_depth = (world_to_camera * make_float4(neighbor_it->p(), 1.f)).z;
455-
$if(abs(neighbor_pixel_depth - current_pixel_depth) < 0.05f * abs(current_pixel_depth) &
456-
dot(it->ng(), neighbor_it->ng()) > 0.91f) {
457-
auto neighbor_reservoir = Reservoir::zero();
458-
neighbor_reservoir = _temporal_reservoir_buffer->read(neighbor_id);
459-
$if(dsl::isnan(neighbor_reservoir.weight.total_weight) | dsl::isnan(neighbor_reservoir.weight.target_pdf) | neighbor_reservoir.weight.m == 0.f) { $continue; };
460-
auto neighbor_target_pdf = def(0.f);
461-
$if(enable_visibility_reuse & unbiased) {
462-
auto [L, _] = _evaluate_with_occlusion(neighbor_reservoir.sample, *it, wo, swl, time);
463-
neighbor_target_pdf = pipeline().spectrum()->cie_y(swl, L);
464-
} $else {
465-
auto [L, _] = _evaluate_without_occlusion(neighbor_reservoir.sample, *it, wo, swl, time);
466-
neighbor_target_pdf = pipeline().spectrum()->cie_y(swl, L);
467-
};
468-
neighbor_reservoir.weight.total_weight *= ite(neighbor_reservoir.weight.target_pdf == 0.f, 0.f, neighbor_target_pdf / neighbor_reservoir.weight.target_pdf);
469-
neighbor_reservoir.weight.target_pdf = neighbor_target_pdf;
470-
reservoir.update(neighbor_reservoir, sampler()->generate_1d());
471-
$if(unbiased) {
472-
valid_neighbor_ray_array[num_valid_neighbor] = neighbor_ray;
473-
valid_neighbor_hit_array[num_valid_neighbor] = neighbor_hit;
474-
valid_neighbor_m_array[num_valid_neighbor] = neighbor_reservoir.weight.m;
475-
num_valid_neighbor += 1u;
476-
};
477-
};
478-
};
479-
};
480-
$if(unbiased) {
481-
$for(neighbor_index, num_valid_neighbor) {
482-
auto neighbor_ray = valid_neighbor_ray_array[neighbor_index];
483-
auto neighbor_hit = valid_neighbor_hit_array[neighbor_index];
484-
auto neighbor_it = pipeline().geometry()->interaction(neighbor_ray, neighbor_hit);
485-
auto out_of_domain = def(true);
486-
$if(enable_visibility_reuse) {
487-
auto [L, _] = _evaluate_with_occlusion(reservoir.sample, *neighbor_it, -neighbor_ray->direction(), swl, time);
488-
out_of_domain = L.is_zero();
489-
} $else {
490-
auto [L, _] = _evaluate_without_occlusion(reservoir.sample, *neighbor_it, -neighbor_ray->direction(), swl, time);
491-
out_of_domain = L.is_zero();
492-
};
493-
$if(!out_of_domain) {
494-
z += valid_neighbor_m_array[neighbor_index];
495-
};
496-
};
497-
reservoir.weight.total_weight *= reservoir.weight.m / z;
498-
reservoir.weight.m = z;
499-
};
500-
};
400+
reservoir = _temporal_reservoir_buffer->read(pixel_id);
501401
}
502402
$else {
503403
reservoir = _spatial_reservoir_buffer->read(pixel_id);
504404
};
405+
// perturb the light samples to reduce correlation, use Metropolis to determine whether to accept the perturbation
406+
$if(enable_decorrelation & reservoir.weight.total_weight > 0.f) {
407+
auto MARKOV_CHAIN_LENGTH = 2u;
408+
auto sample_box_muller = [](Expr<float2> u) noexcept {
409+
auto r = sqrt(clamp(-2.f * log(u.x), 0.f, 1.f));
410+
auto theta = 2.f * pi * u.y;
411+
return make_float2(r * cos(theta), r * sin(theta));
412+
};
413+
$for(markov_iter, MARKOV_CHAIN_LENGTH) {
414+
auto candidate = reservoir;
415+
auto perturbation = 0.025f * sample_box_muller(sampler()->generate_2d());
416+
candidate.sample.u_light_surface = reservoir.sample.u_light_surface + perturbation;
417+
$if(any(candidate.sample.u_light_surface < 0.f) | any(candidate.sample.u_light_surface > 1.f)) { $continue; };
418+
auto sample_target_pdf = def(0.f);
419+
auto [L, _] = _evaluate_with_occlusion(candidate.sample, *it, wo, swl, time);
420+
sample_target_pdf = pipeline().spectrum()->cie_y(swl, L);
421+
auto accepting_prob = min(1.f, sample_target_pdf / candidate.weight.target_pdf);
422+
candidate.weight.target_pdf = sample_target_pdf;
423+
$if(sampler()->generate_1d() < accepting_prob) { reservoir = candidate; };
424+
};
425+
};
505426
auto [L, _] = _evaluate_with_occlusion(reservoir.sample, *it, wo, swl, time);
506-
auto contribution_weight = reservoir.contribution_weight();
507-
Li += weight * contribution_weight * L;
427+
Li += weight * reservoir.contribution_weight() * L;
508428
$break;
509429
};
510430
_spatial_reservoir_buffer->write(reservoir, pixel_id);
@@ -546,22 +466,23 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
546466
}
547467
using namespace luisa::compute;
548468
Kernel2D temporal_pass_kernel = [&](UInt frame_index, Float time, UInt num_initial_sample, Bool enable_visibility_reuse,
549-
Bool enable_temporal_reuse, Bool enable_decorrelation) noexcept {
469+
Bool enable_temporal_reuse) noexcept {
550470
set_block_size(16u, 16u, 1u);
551471
auto pixel_id = dispatch_id().xy();
552472
_temporal_pass(camera, frame_index, pixel_id, time, num_initial_sample, enable_visibility_reuse,
553-
enable_temporal_reuse, enable_decorrelation);
473+
enable_temporal_reuse);
554474
};
555475
Kernel2D spatial_pass_kernel = [&](UInt frame_index, Float time, Bool unbiased, Bool enable_visibility_reuse) noexcept {
556476
set_block_size(16u, 16u, 1u);
557477
auto pixel_id = dispatch_id().xy();
558478
_spatial_pass(camera, frame_index, pixel_id, time, unbiased, enable_visibility_reuse);
559479
};
560480
Kernel2D render_kernel = [&](UInt frame_index, Float time, Float shutter_weight,
561-
Bool enable_spatial_reuse, Bool unbiased, Bool enable_visibility_reuse) noexcept {
481+
Bool enable_spatial_reuse, Bool unbiased, Bool enable_visibility_reuse,
482+
Bool enable_decorrelation) noexcept {
562483
set_block_size(16u, 16u, 1u);
563484
auto pixel_id = dispatch_id().xy();
564-
auto L = Li(camera, frame_index, pixel_id, time, enable_spatial_reuse, unbiased, enable_visibility_reuse);
485+
auto L = Li(camera, frame_index, pixel_id, time, enable_spatial_reuse, unbiased, enable_visibility_reuse, enable_decorrelation);
565486
camera->film()->accumulate(pixel_id, shutter_weight * L);
566487
$if(all(pixel_id == 0u)) {
567488
auto view_matrix = inverse(camera->camera_to_world());
@@ -595,20 +516,19 @@ class ReSTIRDirectLightingInstance final : public ProgressiveIntegrator::Instanc
595516
for (auto s : shutter_samples) {
596517
pipeline().update(command_buffer, s.point.time);
597518
for (auto i = 0u; i < s.spp; i++) {
598-
// camera->film()->clear(command_buffer);
599-
auto constexpr num_spatial_reuse_pass = 2u;
519+
camera->film()->clear(command_buffer);
600520
command_buffer << temporal_pass(sample_id, s.point.time,
601521
node<ReSTIRDirectLighting>()->num_initial_sample(),
602522
node<ReSTIRDirectLighting>()->enable_visibility_reuse(),
603-
node<ReSTIRDirectLighting>()->enable_temporal_reuse(),
604-
node<ReSTIRDirectLighting>()->enable_decorrelation())
523+
node<ReSTIRDirectLighting>()->enable_temporal_reuse())
605524
.dispatch(resolution);
606525
if (node<ReSTIRDirectLighting>()->enable_spatial_reuse()) {
607526
command_buffer << spatial_pass(sample_id, s.point.time, node<ReSTIRDirectLighting>()->unbiased_spatial_reuse(),
608527
node<ReSTIRDirectLighting>()->enable_visibility_reuse()).dispatch(resolution);
609528
}
610529
command_buffer << render(sample_id++, s.point.time, s.point.weight, node<ReSTIRDirectLighting>()->enable_spatial_reuse(),
611-
node<ReSTIRDirectLighting>()->unbiased_spatial_reuse(), node<ReSTIRDirectLighting>()->enable_visibility_reuse())
530+
node<ReSTIRDirectLighting>()->unbiased_spatial_reuse(), node<ReSTIRDirectLighting>()->enable_visibility_reuse(),
531+
node<ReSTIRDirectLighting>()->enable_decorrelation())
612532
.dispatch(resolution);
613533
_temporal_reservoir_buffer->copy_from(command_buffer, *_spatial_reservoir_buffer);
614534
if (auto &&p = pipeline().printer(); !p.empty()) {

0 commit comments

Comments
 (0)