diff --git a/.github/workflows/npm-grunt.yml b/.github/workflows/npm-grunt.yml new file mode 100644 index 00000000..5ef7a9ae --- /dev/null +++ b/.github/workflows/npm-grunt.yml @@ -0,0 +1,28 @@ +name: NodeJS with Grunt + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + node-version: [18.x, 20.x, 22.x] + + steps: + - uses: actions/checkout@v4 + + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + + - name: Build + run: | + npm install + grunt diff --git a/README.md b/README.md index 4103e3b5..35ff6952 100644 --- a/README.md +++ b/README.md @@ -3,25 +3,95 @@ WebGL Forward+ and Clustered Deferred Shading **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) **Google Chrome 222.2** on - Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Ruichi Zhang +* Tested on: **Google Chrome 141.0.7390.123** on + Windows 10, AMD Ryzen 9 7950X3D @ 4201 Mhz, 16 Core(s), NVIDIA GeForce RTX 4080 SUPER ### Live Demo -[![](img/thumb.png)](http://TODO.github.io/Project4-WebGPU-Forward-Plus-and-Clustered-Deferred) +[![](img/proj4teaser.png)](https://pabloo0610.github.io/Project4-WebGPU-Forward-Plus-and-Clustered-Deferred/) ### Demo Video/GIF -[![](img/video.mp4)](TODO) +![](img/sponzawebgpu.gif) -### (TODO: Your README) +## Overview -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +We aim to analyze how different rendering pipelines perform under increasing light counts and varying cluster capacities. +This helps identify the practical tradeoffs between **per-fragment lighting**, **Forward+ clustering**, and **deferred lighting**. -This assignment has a considerable amount of performance analysis compared -to implementation work. Complete the implementation early to leave time! +--- + +## Pipeline Overview + +### Forward+ +- Light clustering is performed once per frame in a compute pass. +- Each fragment shades only with the lights affecting its cluster. + +### Clustered Deferred +- G-buffer pass stores position, normal, and albedo. +- Lighting pass accumulates contributions from clustered lights. + +--- + +## Implementation + +- Light clustering in view space with uniform `(X, Y, Z)` grid. +- Linear Z-slicing for cluster depth partition. +- Each cluster stores the number of lights and their indices. +- Sphere-cluster intersection determines light assignment. +- Cluster indices are computed in the fragment stage from screen position and view-space depth. +- This minimizes per-fragment light loops and allows thousands of lights to be processed efficiently. + +--- + +## Performance Analysis + +### Effect of Light Count + +![](img/performance_vs_light_count.png) + +*Figure 1: Frame time (ms) vs number of lights for Naive Forward, Forward+, and Clustered Deferred rendering.* + +- Naive Forward grows linearly with light count. +- Forward+ and Clustered Deferred scale sublinearly due to clustering. +- Deferred performs slightly better than Forward+. + +--- + +### Effect of Lights per Cluster + +![](img/performance_vs_lights_per_cluster.png) + +*Figure 2: Frame time (ms) vs lights per cluster for Forward+ and Clustered Deferred rendering.* + +Increasing the maximum number of lights per cluster directly impacts both performance and rendering quality. +At lower capacities (e.g., 64 or 128), rendering is fast because fewer light contributions are accumulated per fragment. However, many lights are effectively ignored due to cluster overflow, which leads to incomplete lighting and visual artifacts (e.g., dark areas where lights should contribute). +At higher capacities (e.g., 512), more lights are correctly processed per cluster, resulting in visually correct lighting but also higher shading cost. In our experiments, 512 provides a good balance between performance and image quality. + +--- + +## Feature Analysis + +### Light Clustering +- Implemented in a compute shader. +- Complexity grows with `#lights × #clusters`. +- Linear slicing keeps indexing simple. + +### Deferred G-buffer +- Increases bandwidth usage but reduces fragment shading cost. +- More stable performance at high light counts. + +--- + +## Conclusion + +- Naive Forward is simple but scales poorly with many lights. +- Forward+ reduces per-fragment cost through clustering. +- Clustered Deferred provides better scalability at high light counts. +- The number of lights per cluster significantly affects performance and should be tuned per scene. + +--- ### Credits diff --git a/figure.py b/figure.py new file mode 100644 index 00000000..93caad1a --- /dev/null +++ b/figure.py @@ -0,0 +1,38 @@ +import matplotlib.pyplot as plt +import numpy as np + +light_counts = np.array([250, 500, 1000, 2500, 5000]) + +naive_times = np.array([27.7, 52.63, 111, 250, 500]) +forward_times = np.array([5.9, 6.9, 8.69, 12.5, 20.83]) +deferred_times = np.array([5.9, 6.9, 8.3, 8.47, 9.52]) + +lights_per_cluster = np.array([64, 128, 256, 512]) +forward_cluster_times = np.array([8.84, 12.98, 20, 14.28]) +deferred_cluster_times = np.array([3.8, 6.9, 9.25, 9.61]) + +plt.figure(figsize=(6,4)) +plt.plot(light_counts, naive_times, marker='o', label='Naive Forward') +plt.plot(light_counts, forward_times, marker='o', label='Forward+') +plt.plot(light_counts, deferred_times, marker='o', label='Clustered Deferred') + +plt.xlabel('Number of Lights') +plt.ylabel('Frame Time (ms)') +plt.title('Frame Time vs Light Count') +plt.legend() +plt.grid(True, linestyle='--', alpha=0.5) +plt.tight_layout() +plt.savefig('performance_vs_light_count.png', dpi=200) + +plt.figure(figsize=(6,4)) +plt.plot(lights_per_cluster, forward_cluster_times, marker='o', label='Forward+') +plt.plot(lights_per_cluster, deferred_cluster_times, marker='o', label='Clustered Deferred') + +plt.xlabel('Lights per Cluster') +plt.ylabel('Frame Time (ms)') +plt.xticks(lights_per_cluster) +plt.title('Frame Time vs Lights per Cluster') +plt.legend() +plt.grid(True, linestyle='--', alpha=0.5) +plt.tight_layout() +plt.savefig('performance_vs_lights_per_cluster.png', dpi=200) diff --git a/img/performance_vs_light_count.png b/img/performance_vs_light_count.png new file mode 100644 index 00000000..fc98c034 Binary files /dev/null and b/img/performance_vs_light_count.png differ diff --git a/img/performance_vs_lights_per_cluster.png b/img/performance_vs_lights_per_cluster.png new file mode 100644 index 00000000..a00d8f31 Binary files /dev/null and b/img/performance_vs_lights_per_cluster.png differ diff --git a/img/proj4teaser.png b/img/proj4teaser.png new file mode 100644 index 00000000..b1ddbfe7 Binary files /dev/null and b/img/proj4teaser.png differ diff --git a/img/sponzawebgpu.gif b/img/sponzawebgpu.gif new file mode 100644 index 00000000..83d3ff49 Binary files /dev/null and b/img/sponzawebgpu.gif differ diff --git a/src/renderers/clustered_deferred.ts b/src/renderers/clustered_deferred.ts index 00a326ca..c0bb5502 100644 --- a/src/renderers/clustered_deferred.ts +++ b/src/renderers/clustered_deferred.ts @@ -6,11 +6,182 @@ export class ClusteredDeferredRenderer extends renderer.Renderer { // TODO-3: add layouts, pipelines, textures, etc. needed for Forward+ here // you may need extra uniforms such as the camera view matrix and the canvas resolution + sceneUniformsBindGroupLayout: GPUBindGroupLayout; + sceneUniformsBindGroup: GPUBindGroup; + + posTexture: GPUTexture; + posTextureView: GPUTextureView; + normTexture: GPUTexture; + normTextureView: GPUTextureView; + albedoTexture: GPUTexture; + albedoTextureView: GPUTextureView; + depthTexture: GPUTexture; + depthTextureView: GPUTextureView; + + gbuffersBindGroupLayout: GPUBindGroupLayout; + gbuffersBindGroup: GPUBindGroup; + + gbufferPipeline: GPURenderPipeline; + fullscreenPipeline: GPURenderPipeline; + constructor(stage: Stage) { super(stage); // TODO-3: initialize layouts, pipelines, textures, etc. needed for Forward+ here // you'll need two pipelines: one for the G-buffer pass and one for the fullscreen pass + this.sceneUniformsBindGroupLayout = renderer.device.createBindGroupLayout({ + label: "scene uniforms bind group layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.VERTEX | GPUShaderStage.FRAGMENT, + buffer: { type: "uniform" } + }, + { // lightSet + binding: 1, + visibility: GPUShaderStage.FRAGMENT, + buffer: { type: "read-only-storage" } + }, + { // clusterSet + binding: 2, + visibility: GPUShaderStage.FRAGMENT, + buffer: { type: "read-only-storage" } + } + ] + }); + + this.sceneUniformsBindGroup = renderer.device.createBindGroup({ + label: "scene uniforms bind group", + layout: this.sceneUniformsBindGroupLayout, + entries: [ + { + binding: 0, + resource: { buffer: this.camera.uniformsBuffer} + }, + { + binding: 1, + resource: { buffer: this.lights.lightSetStorageBuffer } + }, + { + binding: 2, + resource: { buffer: this.lights.clusterSetStorageBuffer } + } + ] + }); + + const size: [number, number] = [renderer.canvas.width, renderer.canvas.height]; + + this.posTexture = renderer.device.createTexture({ + size: size, + format: "rgba32float", + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING + }); + this.posTextureView = this.posTexture.createView(); + + this.normTexture = renderer.device.createTexture({ + size: size, + format: "rgba16float", + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING + }); + this.normTextureView = this.normTexture.createView(); + + this.albedoTexture = renderer.device.createTexture({ + size: size, + format: "rgba8unorm", + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING + }); + this.albedoTextureView = this.albedoTexture.createView(); + + this.depthTexture = renderer.device.createTexture({ + size: size, + format: "depth24plus", + usage: GPUTextureUsage.RENDER_ATTACHMENT + }); + this.depthTextureView = this.depthTexture.createView(); + + // Bind group for sampling G-buffer + this.gbuffersBindGroupLayout = renderer.device.createBindGroupLayout({ + label: "gbuffers bind group layout", + entries: [ + { binding: 0, visibility: GPUShaderStage.FRAGMENT, texture: { sampleType: 'unfilterable-float' } }, // pos + { binding: 1, visibility: GPUShaderStage.FRAGMENT, texture: { sampleType: 'unfilterable-float' } }, // norm + { binding: 2, visibility: GPUShaderStage.FRAGMENT, texture: { sampleType: 'float' } }, // albedo (unorm) + { binding: 3, visibility: GPUShaderStage.FRAGMENT, sampler: { type: 'non-filtering' } } + ] + }); + + // Use a non-filtering sampler (nearest) because some G-buffer textures are unfilterable float formats + const gbSampler = renderer.device.createSampler({ + magFilter: 'nearest', + minFilter: 'nearest', + mipmapFilter: 'nearest' + }); + + this.gbuffersBindGroup = renderer.device.createBindGroup({ + label: "gbuffers bind group", + layout: this.gbuffersBindGroupLayout, + entries: [ + { binding: 0, resource: this.posTextureView }, + { binding: 1, resource: this.normTextureView }, + { binding: 2, resource: this.albedoTextureView }, + { binding: 3, resource: gbSampler } + ] + }); + + // G-buffer pipeline + this.gbufferPipeline = renderer.device.createRenderPipeline({ + layout: renderer.device.createPipelineLayout({ + label: "gbuffer pipeline layout", + bindGroupLayouts: [ this.sceneUniformsBindGroupLayout, renderer.modelBindGroupLayout, renderer.materialBindGroupLayout ] + }), + depthStencil: { + depthWriteEnabled: true, + depthCompare: "less", + format: "depth24plus" + }, + vertex: { + module: renderer.device.createShaderModule({ + label: "gbuffer vert shader", + code: shaders.naiveVertSrc + }), + buffers: [ renderer.vertexBufferLayout ] + }, + fragment: { + module: renderer.device.createShaderModule({ + label: "gbuffer frag shader", + code: shaders.clusteredDeferredFragSrc + }), + targets: [ + { format: "rgba32float" }, + { format: "rgba16float" }, + { format: "rgba8unorm" } + ] + } + }); + + // Fullscreen pipeline + this.fullscreenPipeline = renderer.device.createRenderPipeline({ + layout: renderer.device.createPipelineLayout({ + label: "fullscreen pipeline layout", + // fullscreen pass doesn't use model bind groups, so only include scene and gbuffers + bindGroupLayouts: [ this.sceneUniformsBindGroupLayout, this.gbuffersBindGroupLayout ] + }), + vertex: { + module: renderer.device.createShaderModule({ + label: "clustered deferred fullscreen vert", + code: shaders.clusteredDeferredFullscreenVertSrc + }), + entryPoint: "main", + buffers: [] + }, + fragment: { + module: renderer.device.createShaderModule({ + label: "clustered deferred fullscreen frag", + code: shaders.clusteredDeferredFullscreenFragSrc + }), + targets: [ { format: renderer.canvasFormat } ] + } + }); } override draw() { @@ -18,5 +189,80 @@ export class ClusteredDeferredRenderer extends renderer.Renderer { // - run the clustering compute shader // - run the G-buffer pass, outputting position, albedo, and normals // - run the fullscreen pass, which reads from the G-buffer and performs lighting calculations + const encoder = renderer.device.createCommandEncoder(); + const canvasTextureView = renderer.context.getCurrentTexture().createView(); + + // 1) Run clustering compute shader + this.lights.doLightClustering(encoder); + + // 2) G-buffer pass + const gbufferPass = encoder.beginRenderPass({ + label: "gbuffer pass", + colorAttachments: [ + { + view: this.posTextureView, + loadOp: "clear", + clearValue: [0,0,0,0], + storeOp: "store" + }, + { + view: this.normTextureView, + loadOp: "clear", + clearValue: [0,0,0,0], + storeOp: "store" + }, + { + view: this.albedoTextureView, + loadOp: "clear", + clearValue: [0,0,0,0], + storeOp: "store" + } + ], + depthStencilAttachment: { + view: this.depthTextureView, + depthClearValue: 1.0, + depthLoadOp: "clear", + depthStoreOp: "store" + } + }); + + gbufferPass.setPipeline(this.gbufferPipeline); + gbufferPass.setBindGroup(shaders.constants.bindGroup_scene, this.sceneUniformsBindGroup); + + this.scene.iterate(node => { + gbufferPass.setBindGroup(shaders.constants.bindGroup_model, node.modelBindGroup); + }, material => { + gbufferPass.setBindGroup(shaders.constants.bindGroup_material, material.materialBindGroup); + }, primitive => { + gbufferPass.setVertexBuffer(0, primitive.vertexBuffer); + gbufferPass.setIndexBuffer(primitive.indexBuffer, 'uint32'); + gbufferPass.drawIndexed(primitive.numIndices); + }); + + gbufferPass.end(); + + // 3) Fullscreen pass: read G-buffer, apply lighting using clusters + const fsPass = encoder.beginRenderPass({ + label: "clustered deferred fullscreen pass", + colorAttachments: [ + { + view: canvasTextureView, + loadOp: "clear", + clearValue: [0,0,0,0], + storeOp: "store" + } + ] + }); + + fsPass.setPipeline(this.fullscreenPipeline); + fsPass.setBindGroup(shaders.constants.bindGroup_scene, this.sceneUniformsBindGroup); + // bind G-buffer textures at group index 1 (fullscreen pipeline expects scene at 0, gbuffers at 1) + fsPass.setBindGroup(1, this.gbuffersBindGroup); + + // Draw a fullscreen triangle — no vertex buffers needed if the VS creates positions. + fsPass.draw(3); + fsPass.end(); + + renderer.device.queue.submit([encoder.finish()]); } } diff --git a/src/renderers/forward_plus.ts b/src/renderers/forward_plus.ts index 471796fd..beddf423 100644 --- a/src/renderers/forward_plus.ts +++ b/src/renderers/forward_plus.ts @@ -5,16 +5,144 @@ import { Stage } from '../stage/stage'; export class ForwardPlusRenderer extends renderer.Renderer { // TODO-2: add layouts, pipelines, textures, etc. needed for Forward+ here // you may need extra uniforms such as the camera view matrix and the canvas resolution + sceneUniformsBindGroupLayout: GPUBindGroupLayout; + sceneUniformsBindGroup: GPUBindGroup; + + depthTexture: GPUTexture; + depthTextureView: GPUTextureView; + + pipeline: GPURenderPipeline; constructor(stage: Stage) { super(stage); // TODO-2: initialize layouts, pipelines, textures, etc. needed for Forward+ here + this.sceneUniformsBindGroupLayout = renderer.device.createBindGroupLayout({ + label: "scene uniforms bind group layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.VERTEX | GPUShaderStage.FRAGMENT, + buffer: { type: "uniform" } + }, + { // lightSet + binding: 1, + visibility: GPUShaderStage.FRAGMENT, + buffer: { type: "read-only-storage" } + }, + { // clusterSet + binding: 2, + visibility: GPUShaderStage.FRAGMENT, + buffer: { type: "read-only-storage" } + } + ] + }); + + this.sceneUniformsBindGroup = renderer.device.createBindGroup({ + label: "scene uniforms bind group", + layout: this.sceneUniformsBindGroupLayout, + entries: [ + // TODO-1.2: add an entry for camera uniforms at binding 0 + // you can access the camera using `this.camera` + // if you run into TypeScript errors, you're probably trying to upload the host buffer instead + { + binding: 0, + resource: { buffer: this.camera.uniformsBuffer} + }, + { + binding: 1, + resource: { buffer: this.lights.lightSetStorageBuffer } + }, + { + binding: 2, + resource: {buffer: this.lights.clusterSetStorageBuffer} + } + ] + }); + + this.depthTexture = renderer.device.createTexture({ + size: [renderer.canvas.width, renderer.canvas.height], + format: "depth24plus", + usage: GPUTextureUsage.RENDER_ATTACHMENT + }); + this.depthTextureView = this.depthTexture.createView(); + + this.pipeline = renderer.device.createRenderPipeline({ + layout: renderer.device.createPipelineLayout({ + label: "forward plus pipeline layout", + bindGroupLayouts: [ + this.sceneUniformsBindGroupLayout, + renderer.modelBindGroupLayout, + renderer.materialBindGroupLayout + ] + }), + depthStencil: { + depthWriteEnabled: true, + depthCompare: "less", + format: "depth24plus" + }, + vertex: { + module: renderer.device.createShaderModule({ + label: "naive vert shader", + code: shaders.naiveVertSrc + }), + buffers: [ renderer.vertexBufferLayout ] + }, + fragment: { + module: renderer.device.createShaderModule({ + label: "forward plus frag shader", + code: shaders.forwardPlusFragSrc, + }), + targets: [ + { + format: renderer.canvasFormat, + } + ] + } + }); } override draw() { // TODO-2: run the Forward+ rendering pass: // - run the clustering compute shader // - run the main rendering pass, using the computed clusters for efficient lighting + const encoder = renderer.device.createCommandEncoder(); + const canvasTextureView = renderer.context.getCurrentTexture().createView(); + + this.lights.doLightClustering(encoder); + const renderPass = encoder.beginRenderPass({ + label: "forward plus render pass", + colorAttachments: [ + { + view: canvasTextureView, + clearValue: [0, 0, 0, 0], + loadOp: "clear", + storeOp: "store" + } + ], + depthStencilAttachment: { + view: this.depthTextureView, + depthClearValue: 1.0, + depthLoadOp: "clear", + depthStoreOp: "store" + } + }); + renderPass.setPipeline(this.pipeline); + + renderPass.setBindGroup(shaders.constants.bindGroup_scene, this.sceneUniformsBindGroup); + + this.scene.iterate(node => { + renderPass.setBindGroup(shaders.constants.bindGroup_model, node.modelBindGroup); + }, material => { + renderPass.setBindGroup(shaders.constants.bindGroup_material, material.materialBindGroup); + }, primitive => { + renderPass.setVertexBuffer(0, primitive.vertexBuffer); + renderPass.setIndexBuffer(primitive.indexBuffer, 'uint32'); + renderPass.drawIndexed(primitive.numIndices); + }); + + renderPass.end(); + + renderer.device.queue.submit([encoder.finish()]); } } diff --git a/src/renderers/naive.ts b/src/renderers/naive.ts index 0bf82417..02b093e3 100644 --- a/src/renderers/naive.ts +++ b/src/renderers/naive.ts @@ -18,6 +18,11 @@ export class NaiveRenderer extends renderer.Renderer { label: "scene uniforms bind group layout", entries: [ // TODO-1.2: add an entry for camera uniforms at binding 0, visible to only the vertex shader, and of type "uniform" + { + binding: 0, + visibility: GPUShaderStage.VERTEX, + buffer: { type: "uniform" } + }, { // lightSet binding: 1, visibility: GPUShaderStage.FRAGMENT, @@ -33,6 +38,10 @@ export class NaiveRenderer extends renderer.Renderer { // TODO-1.2: add an entry for camera uniforms at binding 0 // you can access the camera using `this.camera` // if you run into TypeScript errors, you're probably trying to upload the host buffer instead + { + binding: 0, + resource: { buffer: this.camera.uniformsBuffer} + }, { binding: 1, resource: { buffer: this.lights.lightSetStorageBuffer } @@ -85,7 +94,8 @@ export class NaiveRenderer extends renderer.Renderer { override draw() { const encoder = renderer.device.createCommandEncoder(); const canvasTextureView = renderer.context.getCurrentTexture().createView(); - + + this.lights.doLightClustering(encoder); const renderPass = encoder.beginRenderPass({ label: "naive render pass", colorAttachments: [ @@ -106,6 +116,7 @@ export class NaiveRenderer extends renderer.Renderer { renderPass.setPipeline(this.pipeline); // TODO-1.2: bind `this.sceneUniformsBindGroup` to index `shaders.constants.bindGroup_scene` + renderPass.setBindGroup(shaders.constants.bindGroup_scene, this.sceneUniformsBindGroup); this.scene.iterate(node => { renderPass.setBindGroup(shaders.constants.bindGroup_model, node.modelBindGroup); diff --git a/src/shaders/clustered_deferred.fs.wgsl b/src/shaders/clustered_deferred.fs.wgsl index 4e86f573..a8963f2c 100644 --- a/src/shaders/clustered_deferred.fs.wgsl +++ b/src/shaders/clustered_deferred.fs.wgsl @@ -1,3 +1,31 @@ // TODO-3: implement the Clustered Deferred G-buffer fragment shader // This shader should only store G-buffer information and should not do any shading. +@group(${bindGroup_material}) @binding(0) var diffuseTex: texture_2d; +@group(${bindGroup_material}) @binding(1) var diffuseTexSampler: sampler; + +struct FragmentInput +{ + @location(0) pos: vec3f, + @location(1) nor: vec3f, + @location(2) uv: vec2f +} + +struct GBufferOut { + @location(0) pos: vec4f, + @location(1) nor: vec4f, + @location(2) albedo: vec4f +} + +@fragment +fn main(in: FragmentInput) -> GBufferOut { + let albedo = textureSample(diffuseTex, diffuseTexSampler, in.uv); + if (albedo.a < 0.5f) { + discard; + } + var out: GBufferOut; + out.pos = vec4f(in.pos, 1.0); + out.nor = vec4f(normalize(in.nor), 1.0); + out.albedo = albedo; + return out; +} diff --git a/src/shaders/clustered_deferred_fullscreen.fs.wgsl b/src/shaders/clustered_deferred_fullscreen.fs.wgsl index 68235c41..2d6cef39 100644 --- a/src/shaders/clustered_deferred_fullscreen.fs.wgsl +++ b/src/shaders/clustered_deferred_fullscreen.fs.wgsl @@ -1,3 +1,51 @@ // TODO-3: implement the Clustered Deferred fullscreen fragment shader // Similar to the Forward+ fragment shader, but with vertex information coming from the G-buffer instead. +@group(${bindGroup_scene}) @binding(0) var camera: CameraUniforms; +@group(${bindGroup_scene}) @binding(1) var lightSet: LightSet; +@group(${bindGroup_scene}) @binding(2) var clusterSet: ClusterSet; + +@group(1) @binding(0) var posTex: texture_2d; +@group(1) @binding(1) var normTex: texture_2d; +@group(1) @binding(2) var albedoTex: texture_2d; +@group(1) @binding(3) var sampler0: sampler; + +struct FSIn { + @builtin(position) fragPos: vec4f +} + +const cDim = vec3u(${numClusterX}, ${numClusterY}, ${numClusterZ}); +const zNear = ${sceneNear}; +const zFar = ${sceneFar}; + +@fragment +fn main(in: FSIn) -> @location(0) vec4f { + //let pos = textureSample(posTex, sampler0, in.uv).xyz; + let pos = textureLoad(posTex, vec2(in.fragPos.xy), 0); + let norm = textureLoad(normTex, vec2(in.fragPos.xy), 0); + let albedo = textureLoad(albedoTex, vec2(in.fragPos.xy), 0); + + // compute cluster indices similar to Forward+ + let ndcPos = camera.viewProjMat * vec4f(pos.xyz, 1.0); + let viewPos = camera.viewMat * vec4f(pos.xyz, 1.0); + let ndc = ndcPos.xyz / ndcPos.w; + + let cx = u32(clamp(floor((ndc.x * 0.5 + 0.5) * f32(cDim.x)), 0.0, f32(cDim.x-1u))); + let cy = u32(clamp(floor((ndc.y * 0.5 + 0.5) * f32(cDim.y)), 0.0, f32(cDim.y-1u))); + + let depthz = clamp(-viewPos.z, zNear, zFar); + let czF = (depthz - zNear) / (zFar - zNear) * f32(cDim.z); + let cz = u32(clamp(floor(czF), 0.0, f32(cDim.z - 1u))); + + let clusterIdx = clamp(cz * cDim.x * cDim.y + cy * cDim.x + cx, 0u, cDim.x*cDim.y*cDim.z - 1u); + let cluster = &clusterSet.clusters[clusterIdx]; + + var totalLightContrib = vec3f(0.0, 0.0, 0.0); + for (var i = 0u; i < (*cluster).numLights; i++) { + let light = lightSet.lights[(*cluster).lightIndices[i]]; + totalLightContrib += calculateLightContrib(light, pos.xyz, normalize(norm.xyz)); + } + + let finalColor = albedo.rgb * totalLightContrib; + return vec4f(finalColor, 1.0); +} diff --git a/src/shaders/clustered_deferred_fullscreen.vs.wgsl b/src/shaders/clustered_deferred_fullscreen.vs.wgsl index 1e43a884..144e0903 100644 --- a/src/shaders/clustered_deferred_fullscreen.vs.wgsl +++ b/src/shaders/clustered_deferred_fullscreen.vs.wgsl @@ -1,3 +1,19 @@ // TODO-3: implement the Clustered Deferred fullscreen vertex shader // This shader should be very simple as it does not need all of the information passed by the the naive vertex shader. +struct VertOut { + @builtin(position) pos: vec4f, + @location(0) uv: vec2f +} + +@vertex +fn main(@builtin(vertex_index) vi: u32) -> VertOut { + var out: VertOut; + // Standard full-screen triangle positions + let positions = array(vec2f(-1.0, -1.0), vec2f(3.0, -1.0), vec2f(-1.0, 3.0)); + let p = positions[vi]; + out.pos = vec4f(p.x, p.y, 0.0, 1.0); + // UVs mapped from clip space to [0,1] + // out.uv = vec2f((p.x * 0.5) + 0.5, (p.y * 0.5) + 0.5); + return out; +} diff --git a/src/shaders/clustering.cs.wgsl b/src/shaders/clustering.cs.wgsl index 575d6e5a..e65ebde7 100644 --- a/src/shaders/clustering.cs.wgsl +++ b/src/shaders/clustering.cs.wgsl @@ -21,3 +21,119 @@ // - Stop adding lights if the maximum number of lights is reached. // - Store the number of lights assigned to this cluster. + +@group(${bindGroup_scene}) @binding(0) var lightSet: LightSet; +@group(${bindGroup_scene}) @binding(1) var clusterSet: ClusterSet; + +@group(${bindGroup_scene}) @binding(2) var camera: CameraUniforms; + +const cDim = vec3u(${numClusterX}, ${numClusterY}, ${numClusterZ}); +const numClusters = cDim.x * cDim.y * cDim.z; + +fn zlinearSliceNdc(near: f32, far: f32, totalz: f32, curz: f32) ->vec2f { + let dn: f32 = -mix(near, far, curz / totalz); + let df: f32 = -mix(near, far, (curz+ 1.0) / totalz); + // convert view to ndc + let viewn = vec4f(0.0,0.0,dn,1.0); + let viewf = vec4f(0.0,0.0,df,1.0); + + let clipn = camera.projMat * viewn; + let ndcn = clipn.z / clipn.w; + let clipf = camera.projMat * viewf; + let ndcf = clipf.z / clipf.w; + + return vec2f(ndcn, ndcf); +} + +fn zlogSliceNdc(near: f32, far: f32, totalz: f32, curz: f32) -> vec2f { + let logNear = log(near); + let logFar = log(far); + + let t0 = curz / totalz; + let t1 = (curz + 1.0) / totalz; + + let zNearSlice = exp(mix(logNear, logFar, t0)); + let zFarSlice = exp(mix(logNear, logFar, t1)); + + let dn = -zNearSlice; + let df = -zFarSlice; + + let viewn = vec4f(0.0, 0.0, dn, 1.0); + let viewf = vec4f(0.0, 0.0, df, 1.0); + + let clipn = camera.projMat * viewn; + let ndcn = clipn.z / clipn.w; + let clipf = camera.projMat * viewf; + let ndcf = clipf.z / clipf.w; + + return vec2f(ndcn, ndcf); +} + +fn sphereAABBIntersectionTest(c:vec3f, r:f32, bmin:vec3f, bmax:vec3f) -> bool { + let nearest = clamp(c, bmin, bmax); + + let dist2 = dot(c-nearest, c-nearest); + if (dist2 < r*r) { + return true; + } else { + return false; + } +} + +@compute +@workgroup_size(${clusterLightsWorkgroupSize}) +fn main(@builtin(global_invocation_id) globalIdx: vec3u) { + let clusterIdx = globalIdx.x; // idx = x + y*dimX + z*dimX*dimY; + if (clusterIdx >= numClusters) { + return; + } + let cz = clusterIdx / (cDim.x*cDim.y); + let cy = (clusterIdx-cz*cDim.x*cDim.y) / cDim.x; + let cx = clusterIdx-cz*cDim.x*cDim.y-cy*cDim.x; + + // let ndcx0 = (f32(cx) / f32(cDim.x)) * 2.0 - 1.0; + // let ndcx1 = (f32(cx+1u) / f32(cDim.x)) * 2.0 - 1.0; + // let ndcy0 = (f32(cy) / f32(cDim.y)) * 2.0 - 1.0; + // let ndcy1 = (f32(cy+1u) / f32(cDim.y)) * 2.0 - 1.0; + let ndcx = vec2f((f32(cx) / f32(cDim.x)) * 2.0 - 1.0, (f32(cx+1u) / f32(cDim.x)) * 2.0 - 1.0); + let ndcy = vec2f((f32(cy) / f32(cDim.y)) * 2.0 - 1.0, (f32(cy+1u) / f32(cDim.y)) * 2.0 - 1.0); + + let ndcz = zlinearSliceNdc(${sceneNear}, ${sceneFar}, f32(cDim.z), f32(cz)); + + var bmin = vec3f( 1e30, 1e30, 1e30); + var bmax = vec3f(-1e30, -1e30, -1e30); + + for(var i=0u; i<2u;i++) { + for(var j=0u; j<2u;j++) { + for(var k=0u; k<2u;k++) { + let curNdcCorner = vec4f(ndcx[i],ndcy[j],ndcz[k],1.0); + let curViewCornerRaw = camera.invProjMat * curNdcCorner; + let curViewCorner = curViewCornerRaw.xyz / curViewCornerRaw.w; + + bmin = min(bmin, curViewCorner); + bmax = max(bmax, curViewCorner); + } + } + } + + let bbox = AABB(bmin, bmax); + var curNLights = 0u; + var lightIndices = array(); + + let lightRadius = f32(${lightRadius}); + for (var lightIdx = 0u; lightIdx < lightSet.numLights; lightIdx++) { + if (curNLights >= ${numLightsPerCluster}) { + break; + } + let lightPosWorld = lightSet.lights[lightIdx].pos; + let lightPosView = (camera.viewMat * vec4f(lightPosWorld, 1.0)).xyz; + + if(sphereAABBIntersectionTest(lightPosView, lightRadius, bbox.min, bbox.max)) { + lightIndices[curNLights] = lightIdx; + curNLights += 1u; + } + } + + let cluster = Cluster(bbox, curNLights, lightIndices); + clusterSet.clusters[clusterIdx] = cluster; +} \ No newline at end of file diff --git a/src/shaders/common.wgsl b/src/shaders/common.wgsl index 738e9c4e..77726558 100644 --- a/src/shaders/common.wgsl +++ b/src/shaders/common.wgsl @@ -11,9 +11,29 @@ struct LightSet { } // TODO-2: you may want to create a ClusterSet struct similar to LightSet +struct AABB { + min:vec3f, + max:vec3f +}; + +struct Cluster { + bbox: AABB, + numLights: u32, + lightIndices: array +}; + +struct ClusterSet { + clusters: array +}; struct CameraUniforms { // TODO-1.3: add an entry for the view proj mat (of type mat4x4f) + viewProjMat: mat4x4f, + projMat: mat4x4f, + invProjMat: mat4x4f, + viewMat: mat4x4f, + screenSize: vec2f, + zNearFar: vec2f } // CHECKITOUT: this special attenuation function ensures lights don't affect geometry outside the maximum light radius diff --git a/src/shaders/forward_plus.fs.wgsl b/src/shaders/forward_plus.fs.wgsl index 0500e3df..0470adad 100644 --- a/src/shaders/forward_plus.fs.wgsl +++ b/src/shaders/forward_plus.fs.wgsl @@ -14,3 +14,65 @@ // Add the calculated contribution to the total light accumulation. // Multiply the fragment’s diffuse color by the accumulated light contribution. // Return the final color, ensuring that the alpha component is set appropriately (typically to 1). +@group(${bindGroup_scene}) @binding(0) var camera: CameraUniforms; +@group(${bindGroup_scene}) @binding(1) var lightSet: LightSet; +@group(${bindGroup_scene}) @binding(2) var clusterSet: ClusterSet; + + +@group(${bindGroup_material}) @binding(0) var diffuseTex: texture_2d; +@group(${bindGroup_material}) @binding(1) var diffuseTexSampler: sampler; + +struct FragmentInput +{ + @builtin(position) fragPos: vec4f, + @location(0) pos: vec3f, + @location(1) nor: vec3f, + @location(2) uv: vec2f, + @location(3) pos_view: vec3f +} + +const cDim = vec3u(${numClusterX}, ${numClusterY}, ${numClusterZ}); +const numClusters = cDim.x * cDim.y * cDim.z; + +const zNear = ${sceneNear}; +const zFar = ${sceneFar}; + +@fragment +fn main(in: FragmentInput) -> @location(0) vec4f +{ + let diffuseColor = textureSample(diffuseTex, diffuseTexSampler, in.uv); + if (diffuseColor.a < 0.5f) { + discard; + } + + var totalLightContrib = vec3f(0, 0, 0); + + let cx = u32(floor(in.fragPos.x / camera.screenSize.x * f32(cDim.x))); + let cy = u32(floor((1.0-(in.fragPos.y / camera.screenSize.y)) * f32(cDim.y))); + + let depthz = clamp(-in.pos_view.z, zNear, zFar); + let czF = (depthz - zNear) / (zFar - zNear) * f32(cDim.z); + let cz = u32(clamp(floor(czF), 0.0, f32(cDim.z - 1u))); + // if(depthz<25.0) { + // return vec4(0.0,0.0,0.0,1.0); + // } + + let clusterIdx = clamp(cz * cDim.x * cDim.y + cy * cDim.x + cx, 0u, numClusters-1u); + //let clusterIdx = 0u; + let cluster = &clusterSet.clusters[clusterIdx]; + // if (cluster.numLights >= 50u) { + // return vec4(1.0,0.0,0.0, 1.0); + // } + + for (var lightIdx = 0u; lightIdx < (*cluster).numLights; lightIdx++) { + let light = lightSet.lights[(*cluster).lightIndices[lightIdx]]; + totalLightContrib += calculateLightContrib(light, in.pos, normalize(in.nor)); + } + + var finalColor = diffuseColor.rgb * totalLightContrib; + // let r = f32(clusterIdx % 256u) / 255.0; + // let g = f32((clusterIdx / 256u) % 256u) / 255.0; + // let b = f32(clusterIdx / (256u * 256u)) / 255.0; + // return vec4f(r, g, b, 1.0); + return vec4(finalColor, 1); +} \ No newline at end of file diff --git a/src/shaders/naive.vs.wgsl b/src/shaders/naive.vs.wgsl index 5a7ddd4b..925ad716 100644 --- a/src/shaders/naive.vs.wgsl +++ b/src/shaders/naive.vs.wgsl @@ -4,6 +4,7 @@ // make sure to use ${bindGroup_scene} for the group @group(${bindGroup_model}) @binding(0) var modelMat: mat4x4f; +@group(${bindGroup_scene}) @binding(0) var camera: CameraUniforms; struct VertexInput { @@ -17,7 +18,8 @@ struct VertexOutput @builtin(position) fragPos: vec4f, @location(0) pos: vec3f, @location(1) nor: vec3f, - @location(2) uv: vec2f + @location(2) uv: vec2f, + @location(3) pos_view: vec3f } @vertex @@ -26,9 +28,10 @@ fn main(in: VertexInput) -> VertexOutput let modelPos = modelMat * vec4(in.pos, 1); var out: VertexOutput; - out.fragPos = ??? * modelPos; // TODO-1.3: replace ??? with the view proj mat from your CameraUniforms uniform variable + out.fragPos = camera.viewProjMat * modelPos; // TODO-1.3: replace ??? with the view proj mat from your CameraUniforms uniform variable out.pos = modelPos.xyz / modelPos.w; out.nor = in.nor; out.uv = in.uv; + out.pos_view = (camera.viewMat * modelPos).xyz; return out; } diff --git a/src/shaders/shaders.ts b/src/shaders/shaders.ts index 584c008f..8ac5a1ee 100644 --- a/src/shaders/shaders.ts +++ b/src/shaders/shaders.ts @@ -30,7 +30,17 @@ export const constants = { moveLightsWorkgroupSize: 128, - lightRadius: 2 + lightRadius: 2.0, + + numLightsPerCluster: 512, + numClusterX : 32, + numClusterY : 32, + numClusterZ : 32, + + sceneNear : 0.1, + sceneFar: 25.0, + clusterLightsWorkgroupSize: 128 + }; // ================================= diff --git a/src/stage/camera.ts b/src/stage/camera.ts index 7d2a4a1e..1dc33a7b 100644 --- a/src/stage/camera.ts +++ b/src/stage/camera.ts @@ -3,14 +3,47 @@ import { toRadians } from "../math_util"; import { device, canvas, fovYDegrees, aspectRatio } from "../renderer"; class CameraUniforms { - readonly buffer = new ArrayBuffer(16 * 4); - private readonly floatView = new Float32Array(this.buffer); + + // const CameraUniformsValues = new ArrayBuffer(272); + // const CameraUniformsViews = { + // viewProjMat: new Float32Array(CameraUniformsValues, 0, 16), + // projMat: new Float32Array(CameraUniformsValues, 64, 16), + // invProjMat: new Float32Array(CameraUniformsValues, 128, 16), + // viewMat: new Float32Array(CameraUniformsValues, 192, 16), + // screenSize: new Float32Array(CameraUniformsValues, 256, 2), + // zNearFar: new Float32Array(CameraUniformsValues, 264, 2), + // }; + + readonly buffer = new ArrayBuffer(272); + //private readonly floatView = new Float32Array(this.buffer); + private readonly viewProjMatView = new Float32Array(this.buffer, 0, 16); + private readonly projMatView = new Float32Array(this.buffer, 64, 16); + private readonly invProjMatView = new Float32Array(this.buffer, 128, 16); + private readonly viewMatView = new Float32Array(this.buffer, 192, 16); + private readonly screenSizeView = new Float32Array(this.buffer, 256, 2); + private readonly zNearFarView = new Float32Array(this.buffer, 264, 2); set viewProjMat(mat: Float32Array) { // TODO-1.1: set the first 16 elements of `this.floatView` to the input `mat` + this.viewProjMatView.set(mat); } - // TODO-2: add extra functions to set values needed for light clustering here + set projMat(mat: Float32Array) { + this.projMatView.set(mat); + } + set invProjMat(mat: Float32Array){ + this.invProjMatView.set(mat); + } + set viewMat(mat: Float32Array) { + this.viewMatView.set(mat); + } + set screenSize(size: Float32Array) { + this.screenSizeView.set(size); + } + set zNearFar(z: Float32Array) { + this.zNearFarView.set(z); + } + } export class Camera { @@ -38,6 +71,11 @@ export class Camera { // check `lights.ts` for examples of using `device.createBuffer()` // // note that you can add more variables (e.g. inverse proj matrix) to this buffer in later parts of the assignment + this.uniformsBuffer = device.createBuffer({ + label: "uniform buffer", + size: this.uniforms.buffer.byteLength, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST + }); this.projMat = mat4.perspective(toRadians(fovYDegrees), aspectRatio, Camera.nearPlane, Camera.farPlane); @@ -128,11 +166,17 @@ export class Camera { const lookPos = vec3.add(this.cameraPos, vec3.scale(this.cameraFront, 1)); const viewMat = mat4.lookAt(this.cameraPos, lookPos, [0, 1, 0]); const viewProjMat = mat4.mul(this.projMat, viewMat); + const invProjMat = mat4.inverse(this.projMat); // TODO-1.1: set `this.uniforms.viewProjMat` to the newly calculated view proj mat - + this.uniforms.viewProjMat = viewProjMat; // TODO-2: write to extra buffers needed for light clustering here - + this.uniforms.projMat = this.projMat; + this.uniforms.invProjMat = invProjMat; + this.uniforms.viewMat = viewMat; + this.uniforms.screenSize = new Float32Array([canvas.width, canvas.height]); + this.uniforms.zNearFar = new Float32Array([Camera.nearPlane, Camera.farPlane]); // TODO-1.1: upload `this.uniforms.buffer` (host side) to `this.uniformsBuffer` (device side) // check `lights.ts` for examples of using `device.queue.writeBuffer()` + device.queue.writeBuffer(this.uniformsBuffer, 0, this.uniforms.buffer); } } diff --git a/src/stage/lights.ts b/src/stage/lights.ts index a6eed919..4e44a9e4 100644 --- a/src/stage/lights.ts +++ b/src/stage/lights.ts @@ -29,6 +29,13 @@ export class Lights { moveLightsComputePipeline: GPUComputePipeline; // TODO-2: add layouts, pipelines, textures, etc. needed for light clustering here + static readonly numClusters = shaders.constants.numClusterX * shaders.constants.numClusterY * shaders.constants.numClusterZ; + static readonly bytesPerCluster = 32 + 4 + 4 * shaders.constants.numLightsPerCluster + 12; // assume numlightspercluster is pot so pad 12 + clusterArray = new ArrayBuffer(Lights.numClusters * Lights.bytesPerCluster); + clusterSetStorageBuffer: GPUBuffer; + clusterLightsComputeBindGroupLayout: GPUBindGroupLayout; + clusterLightsComputeBindGroup: GPUBindGroup; + clusterLightsComputePipeline: GPUComputePipeline; constructor(camera: Camera) { this.camera = camera; @@ -94,6 +101,66 @@ export class Lights { }); // TODO-2: initialize layouts, pipelines, textures, etc. needed for light clustering here + this.clusterSetStorageBuffer = device.createBuffer({ + label: "clusters", + size: this.clusterArray.byteLength, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST + }); + + this.clusterLightsComputeBindGroupLayout = device.createBindGroupLayout({ + label: "cluster lights compute bind group layout", + entries: [ + { // lightSet + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: "read-only-storage" } + }, + { // clusterSet + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: "storage" } + }, + { // camera uniform + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: "uniform" } + } + ] + }); + + this.clusterLightsComputeBindGroup = device.createBindGroup({ + label: "cluster lights compute bind group", + layout: this.clusterLightsComputeBindGroupLayout, + entries: [ + { + binding: 0, + resource: { buffer: this.lightSetStorageBuffer } + }, + { + binding: 1, + resource: { buffer: this.clusterSetStorageBuffer} + }, + { + binding: 2, + resource: { buffer: this.camera.uniformsBuffer } + } + ] + }); + + this.clusterLightsComputePipeline = device.createComputePipeline({ + label: "cluster lights compute pipeline", + layout: device.createPipelineLayout({ + label: "cluster lights compute pipeline layout", + bindGroupLayouts: [ this.clusterLightsComputeBindGroupLayout ] + }), + compute: { + module: device.createShaderModule({ + label: "cluster lights compute shader", + code: shaders.clusteringComputeSrc + }), + entryPoint: "main" + } + }); } private populateLightsBuffer() { @@ -113,6 +180,14 @@ export class Lights { doLightClustering(encoder: GPUCommandEncoder) { // TODO-2: run the light clustering compute pass(es) here // implementing clustering here allows for reusing the code in both Forward+ and Clustered Deferred + const computePass = encoder.beginComputePass(); + computePass.setPipeline(this.clusterLightsComputePipeline); + computePass.setBindGroup(0, this.clusterLightsComputeBindGroup); + + const workgroupCount = Math.ceil(Lights.numClusters / shaders.constants.clusterLightsWorkgroupSize); + computePass.dispatchWorkgroups(workgroupCount); + + computePass.end(); } // CHECKITOUT: this is where the light movement compute shader is dispatched from the host