diff --git a/README.md b/README.md index edffdaf..0b4ee42 100644 --- a/README.md +++ b/README.md @@ -2,25 +2,47 @@ **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 5** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) **Google Chrome 222.2** on - Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Daniel Chen +* Tested on: Chromium 144 - Windows 11, AMD Ryzen 7 8845HS w/ Radeon 780M Graphics (3.80 GHz), RTX 4070 notebook + +This project is a WebGPU implementation of point cloud and Gaussian splat rendering. + +Gaussian splat rendering is a rendering technique that depicts a set of oriented, scaled, colored points as small, volumetric Gaussian distribuions in 3D space. This model is is often used in photogrammetry, recreatng 3D scenes from photo data, by gradually making the set of points converge onto a progressively more accurate depiction of a scene. The resulting data can be stored in a `.ply` file ([samples](https://drive.google.com/drive/folders/1KOoKk4plvl720-nQEiqLcuTCMFizt0cc?usp=sharing)), which, alongside a JSON file with camera data, can be read by this web app and rendered in real time as either a point cloud (only centers are drawn, which is faster since there is less to render, letting you visualize the scene through the density of points) or as the full set of Gaussian splats. ### Live Demo -[![](img/thumb.png)](http://TODO.github.io/Project4-WebGPU-Forward-Plus-and-Clustered-Deferred) +https://enamchae.github.io/Project5-WebGPU-Gaussian-Splat-Viewer/ + +[![](./images/cover.png)](https://enamchae.github.io/Project5-WebGPU-Gaussian-Splat-Viewer/) ### Demo Video/GIF -[![](img/video.mp4)](TODO) +https://github.com/user-attachments/assets/e26cffa1-f91a-46f7-9415-3bf389344195 + +### Performance + +For many scenes, there is a load time of several seconds while splat data is read from the `.ply` file. The below analysis only deals with the render time after all this data has been loaded. + +#### Preprocessing workgroup size +For the bonzai scene above at the default angle, using the Gaussian splat renderer at the default splat scale, it is difficult to compare different workgroup sizes for the preprocessing compute shader, as adjusting the workgroup size too low causes my GPU to hang. Workgroup sizes of 64 or below hang immediately on the bonzai scene, 128 remains steady at around 7 to 28 ms/frame but hangs as more splats are moved onto the screen, and 256 has no issues while achieving a similar framerate. The workgroup size can be decreased lower especially when sorting is disabled, so the number of workgroup dispatches needed to perform radix sort may cause issues at lower workgroup sizes. When sorting is disabled, the framerate hovers in the 6 to 28 ms/frame range at all workgroup sizes between 16 and 256. + +#### Half-precision packing +To save on some additional memory per splat, some `f32` fields on the `Splat` struct are compressed into paired-up `f16` fields instead. With this, we can drop down from 48 bytes to 32 bytes per `Splat` (but we have too many fields to reach 16 bytes), which can be beneficial seeing as many splats will make up a scene. The render time remains roughly the same. + +#### View frustum culling +In the preprocess step, we flag splats as being culled if they lie outside the camera's view frustum plus a 10% margin in either dimension. On the bicycle scene above, the benefits of view frustum culling are noticeable especially when a significant portion of the model is off-screen. At roughly the angle pictured below, the render time is about 28 ms/frame with view frustum culling and about 50 ms/frame without, but both remain at about 70 ms/frame with the full scene. + +![](./images/occluded.png) + +#### Effects of scene complexity +There is a noticeable performance difference between the bonzai and bicycle scenes above, so the number of splats likely makes a difference. In particular, rendering the entire bonzai scene takes around 6 to 21 ms/frame whereas the bicycle scene can take around 70 ms/frame, even if they take a similar proportion of the frame. -### (TODO: Your README) +|Bonzai|Bicycle| +|-|-| +|![](./images/bonzaiframe.png)|![](./images/bicycleframe.png)| -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +The cleaned bicycle scene above has about 4 times as many splats (1 063 091) as the bonzai scene (272 956), which could mean more threads have to be run in sequence in the preprocessing and sorting steps. An additional bottleneck could be the handling of the atomic sort count, which would require each thread encountering the `atomicAdd` to be executed in sequence. One way to avoid this could be to use a prefix sum instead of a linear addition, achieving an `O(\log(n))` ideally parallel time complexity rather than `O(n)`. -This assignment has a considerable amount of performance analysis compared -to implementation work. Complete the implementation early to leave time! ### Credits diff --git a/deno.lock b/deno.lock new file mode 100644 index 0000000..b8dac0b --- /dev/null +++ b/deno.lock @@ -0,0 +1,299 @@ +{ + "version": "5", + "specifiers": { + "npm:@loaders.gl/core@^4.2.2": "4.3.4", + "npm:@loaders.gl/ply@^4.2.2": "4.3.4_@loaders.gl+core@4.3.4", + "npm:@petamoriken/float16@^3.8.7": "3.9.3", + "npm:@tweakpane/core@^1.1.7": "1.1.9", + "npm:@webgpu/types@~0.1.31": "0.1.66", + "npm:tweakpane-plugin-file-import@0.2": "0.2.1_tweakpane@3.1.10", + "npm:tweakpane@^3.1.8": "3.1.10", + "npm:typescript@^5.0.4": "5.9.3", + "npm:vite-raw-plugin@^1.0.1": "1.0.2", + "npm:vite@^4.3.1": "4.5.14", + "npm:wgpu-matrix@^3.2.0": "3.4.0" + }, + "npm": { + "@esbuild/android-arm64@0.18.20": { + "integrity": "sha512-Nz4rJcchGDtENV0eMKUNa6L12zz2zBDXuhj/Vjh18zGqB44Bi7MBMSXjgunJgjRhCmKOjnPuZp4Mb6OKqtMHLQ==", + "os": ["android"], + "cpu": ["arm64"] + }, + "@esbuild/android-arm@0.18.20": { + "integrity": "sha512-fyi7TDI/ijKKNZTUJAQqiG5T7YjJXgnzkURqmGj13C6dCqckZBLdl4h7bkhHt/t0WP+zO9/zwroDvANaOqO5Sw==", + "os": ["android"], + "cpu": ["arm"] + }, + "@esbuild/android-x64@0.18.20": { + "integrity": "sha512-8GDdlePJA8D6zlZYJV/jnrRAi6rOiNaCC/JclcXpB+KIuvfBN4owLtgzY2bsxnx666XjJx2kDPUmnTtR8qKQUg==", + "os": ["android"], + "cpu": ["x64"] + }, + "@esbuild/darwin-arm64@0.18.20": { + "integrity": "sha512-bxRHW5kHU38zS2lPTPOyuyTm+S+eobPUnTNkdJEfAddYgEcll4xkT8DB9d2008DtTbl7uJag2HuE5NZAZgnNEA==", + "os": ["darwin"], + "cpu": ["arm64"] + }, + "@esbuild/darwin-x64@0.18.20": { + "integrity": "sha512-pc5gxlMDxzm513qPGbCbDukOdsGtKhfxD1zJKXjCCcU7ju50O7MeAZ8c4krSJcOIJGFR+qx21yMMVYwiQvyTyQ==", + "os": ["darwin"], + "cpu": ["x64"] + }, + "@esbuild/freebsd-arm64@0.18.20": { + "integrity": "sha512-yqDQHy4QHevpMAaxhhIwYPMv1NECwOvIpGCZkECn8w2WFHXjEwrBn3CeNIYsibZ/iZEUemj++M26W3cNR5h+Tw==", + "os": ["freebsd"], + "cpu": ["arm64"] + }, + "@esbuild/freebsd-x64@0.18.20": { + "integrity": "sha512-tgWRPPuQsd3RmBZwarGVHZQvtzfEBOreNuxEMKFcd5DaDn2PbBxfwLcj4+aenoh7ctXcbXmOQIn8HI6mCSw5MQ==", + "os": ["freebsd"], + "cpu": ["x64"] + }, + "@esbuild/linux-arm64@0.18.20": { + "integrity": "sha512-2YbscF+UL7SQAVIpnWvYwM+3LskyDmPhe31pE7/aoTMFKKzIc9lLbyGUpmmb8a8AixOL61sQ/mFh3jEjHYFvdA==", + "os": ["linux"], + "cpu": ["arm64"] + }, + "@esbuild/linux-arm@0.18.20": { + "integrity": "sha512-/5bHkMWnq1EgKr1V+Ybz3s1hWXok7mDFUMQ4cG10AfW3wL02PSZi5kFpYKrptDsgb2WAJIvRcDm+qIvXf/apvg==", + "os": ["linux"], + "cpu": ["arm"] + }, + "@esbuild/linux-ia32@0.18.20": { + "integrity": "sha512-P4etWwq6IsReT0E1KHU40bOnzMHoH73aXp96Fs8TIT6z9Hu8G6+0SHSw9i2isWrD2nbx2qo5yUqACgdfVGx7TA==", + "os": ["linux"], + "cpu": ["ia32"] + }, + "@esbuild/linux-loong64@0.18.20": { + "integrity": "sha512-nXW8nqBTrOpDLPgPY9uV+/1DjxoQ7DoB2N8eocyq8I9XuqJ7BiAMDMf9n1xZM9TgW0J8zrquIb/A7s3BJv7rjg==", + "os": ["linux"], + "cpu": ["loong64"] + }, + "@esbuild/linux-mips64el@0.18.20": { + "integrity": "sha512-d5NeaXZcHp8PzYy5VnXV3VSd2D328Zb+9dEq5HE6bw6+N86JVPExrA6O68OPwobntbNJ0pzCpUFZTo3w0GyetQ==", + "os": ["linux"], + "cpu": ["mips64el"] + }, + "@esbuild/linux-ppc64@0.18.20": { + "integrity": "sha512-WHPyeScRNcmANnLQkq6AfyXRFr5D6N2sKgkFo2FqguP44Nw2eyDlbTdZwd9GYk98DZG9QItIiTlFLHJHjxP3FA==", + "os": ["linux"], + "cpu": ["ppc64"] + }, + "@esbuild/linux-riscv64@0.18.20": { + "integrity": "sha512-WSxo6h5ecI5XH34KC7w5veNnKkju3zBRLEQNY7mv5mtBmrP/MjNBCAlsM2u5hDBlS3NGcTQpoBvRzqBcRtpq1A==", + "os": ["linux"], + "cpu": ["riscv64"] + }, + "@esbuild/linux-s390x@0.18.20": { + "integrity": "sha512-+8231GMs3mAEth6Ja1iK0a1sQ3ohfcpzpRLH8uuc5/KVDFneH6jtAJLFGafpzpMRO6DzJ6AvXKze9LfFMrIHVQ==", + "os": ["linux"], + "cpu": ["s390x"] + }, + "@esbuild/linux-x64@0.18.20": { + "integrity": "sha512-UYqiqemphJcNsFEskc73jQ7B9jgwjWrSayxawS6UVFZGWrAAtkzjxSqnoclCXxWtfwLdzU+vTpcNYhpn43uP1w==", + "os": ["linux"], + "cpu": ["x64"] + }, + "@esbuild/netbsd-x64@0.18.20": { + "integrity": "sha512-iO1c++VP6xUBUmltHZoMtCUdPlnPGdBom6IrO4gyKPFFVBKioIImVooR5I83nTew5UOYrk3gIJhbZh8X44y06A==", + "os": ["netbsd"], + "cpu": ["x64"] + }, + "@esbuild/openbsd-x64@0.18.20": { + "integrity": "sha512-e5e4YSsuQfX4cxcygw/UCPIEP6wbIL+se3sxPdCiMbFLBWu0eiZOJ7WoD+ptCLrmjZBK1Wk7I6D/I3NglUGOxg==", + "os": ["openbsd"], + "cpu": ["x64"] + }, + "@esbuild/sunos-x64@0.18.20": { + "integrity": "sha512-kDbFRFp0YpTQVVrqUd5FTYmWo45zGaXe0X8E1G/LKFC0v8x0vWrhOWSLITcCn63lmZIxfOMXtCfti/RxN/0wnQ==", + "os": ["sunos"], + "cpu": ["x64"] + }, + "@esbuild/win32-arm64@0.18.20": { + "integrity": "sha512-ddYFR6ItYgoaq4v4JmQQaAI5s7npztfV4Ag6NrhiaW0RrnOXqBkgwZLofVTlq1daVTQNhtI5oieTvkRPfZrePg==", + "os": ["win32"], + "cpu": ["arm64"] + }, + "@esbuild/win32-ia32@0.18.20": { + "integrity": "sha512-Wv7QBi3ID/rROT08SABTS7eV4hX26sVduqDOTe1MvGMjNd3EjOz4b7zeexIR62GTIEKrfJXKL9LFxTYgkyeu7g==", + "os": ["win32"], + "cpu": ["ia32"] + }, + "@esbuild/win32-x64@0.18.20": { + "integrity": "sha512-kTdfRcSiDfQca/y9QIkng02avJ+NCaQvrMejlsB3RRv5sE9rRoeBPISaZpKxHELzRxZyLvNts1P27W3wV+8geQ==", + "os": ["win32"], + "cpu": ["x64"] + }, + "@loaders.gl/core@4.3.4": { + "integrity": "sha512-cG0C5fMZ1jyW6WCsf4LoHGvaIAJCEVA/ioqKoYRwoSfXkOf+17KupK1OUQyUCw5XoRn+oWA1FulJQOYlXnb9Gw==", + "dependencies": [ + "@loaders.gl/loader-utils", + "@loaders.gl/schema", + "@loaders.gl/worker-utils", + "@probe.gl/log" + ] + }, + "@loaders.gl/loader-utils@4.3.4_@loaders.gl+core@4.3.4": { + "integrity": "sha512-tjMZvlKQSaMl2qmYTAxg+ySR6zd6hQn5n3XaU8+Ehp90TD3WzxvDKOMNDqOa72fFmIV+KgPhcmIJTpq4lAdC4Q==", + "dependencies": [ + "@loaders.gl/core", + "@loaders.gl/schema", + "@loaders.gl/worker-utils", + "@probe.gl/log", + "@probe.gl/stats" + ] + }, + "@loaders.gl/ply@4.3.4_@loaders.gl+core@4.3.4": { + "integrity": "sha512-9/ijcIK2xP0cgSM3BmoS5JXfRXe6PKuIGHNteqJHhrQ5nwx1UrupmsYPXj58FykYVZx6PTBshMs9OmIXvPHduw==", + "dependencies": [ + "@loaders.gl/core", + "@loaders.gl/loader-utils", + "@loaders.gl/schema" + ] + }, + "@loaders.gl/schema@4.3.4_@loaders.gl+core@4.3.4": { + "integrity": "sha512-1YTYoatgzr/6JTxqBLwDiD3AVGwQZheYiQwAimWdRBVB0JAzych7s1yBuE0CVEzj4JDPKOzVAz8KnU1TiBvJGw==", + "dependencies": [ + "@loaders.gl/core", + "@types/geojson" + ] + }, + "@loaders.gl/worker-utils@4.3.4_@loaders.gl+core@4.3.4": { + "integrity": "sha512-EbsszrASgT85GH3B7jkx7YXfQyIYo/rlobwMx6V3ewETapPUwdSAInv+89flnk5n2eu2Lpdeh+2zS6PvqbL2RA==", + "dependencies": [ + "@loaders.gl/core" + ] + }, + "@petamoriken/float16@3.9.3": { + "integrity": "sha512-8awtpHXCx/bNpFt4mt2xdkgtgVvKqty8VbjHI/WWWQuEw+KLzFot3f4+LkQY9YmOtq7A5GdOnqoIC8Pdygjk2g==" + }, + "@probe.gl/env@4.1.0": { + "integrity": "sha512-5ac2Jm2K72VCs4eSMsM7ykVRrV47w32xOGMvcgqn8vQdEMF9PRXyBGYEV9YbqRKWNKpNKmQJVi4AHM/fkCxs9w==" + }, + "@probe.gl/log@4.1.0": { + "integrity": "sha512-r4gRReNY6f+OZEMgfWEXrAE2qJEt8rX0HsDJQXUBMoc+5H47bdB7f/5HBHAmapK8UydwPKL9wCDoS22rJ0yq7Q==", + "dependencies": [ + "@probe.gl/env" + ] + }, + "@probe.gl/stats@4.1.0": { + "integrity": "sha512-EI413MkWKBDVNIfLdqbeNSJTs7ToBz/KVGkwi3D+dQrSIkRI2IYbWGAU3xX+D6+CI4ls8ehxMhNpUVMaZggDvQ==" + }, + "@tweakpane/core@1.1.9": { + "integrity": "sha512-9tq+KAhaqPiOgsFyLPAz1IMXkVfhRqxGzAgy1ps3As6o3W7XjnU7sev6OlD/Z+Pzw8uZVMukkSHf2e0uCU6u0A==" + }, + "@types/geojson@7946.0.16": { + "integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==" + }, + "@webgpu/types@0.1.66": { + "integrity": "sha512-YA2hLrwLpDsRueNDXIMqN9NTzD6bCDkuXbOSe0heS+f8YE8usA6Gbv1prj81pzVHrbaAma7zObnIC+I6/sXJgA==" + }, + "esbuild@0.18.20": { + "integrity": "sha512-ceqxoedUrcayh7Y7ZX6NdbbDzGROiyVBgC4PriJThBKSVPWnnFHZAkfI1lJT8QFkOwH4qOS2SJkS4wvpGl8BpA==", + "optionalDependencies": [ + "@esbuild/android-arm", + "@esbuild/android-arm64", + "@esbuild/android-x64", + "@esbuild/darwin-arm64", + "@esbuild/darwin-x64", + "@esbuild/freebsd-arm64", + "@esbuild/freebsd-x64", + "@esbuild/linux-arm", + "@esbuild/linux-arm64", + "@esbuild/linux-ia32", + "@esbuild/linux-loong64", + "@esbuild/linux-mips64el", + "@esbuild/linux-ppc64", + "@esbuild/linux-riscv64", + "@esbuild/linux-s390x", + "@esbuild/linux-x64", + "@esbuild/netbsd-x64", + "@esbuild/openbsd-x64", + "@esbuild/sunos-x64", + "@esbuild/win32-arm64", + "@esbuild/win32-ia32", + "@esbuild/win32-x64" + ], + "scripts": true, + "bin": true + }, + "fsevents@2.3.3": { + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "os": ["darwin"], + "scripts": true + }, + "nanoid@3.3.11": { + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "bin": true + }, + "picocolors@1.1.1": { + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" + }, + "postcss@8.5.6": { + "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==", + "dependencies": [ + "nanoid", + "picocolors", + "source-map-js" + ] + }, + "rollup@3.29.5": { + "integrity": "sha512-GVsDdsbJzzy4S/v3dqWPJ7EfvZJfCHiDqe80IyrF59LYuP+e6U1LJoUqeuqRbwAWoMNoXivMNeNAOf5E22VA1w==", + "optionalDependencies": [ + "fsevents" + ], + "bin": true + }, + "source-map-js@1.2.1": { + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==" + }, + "tweakpane-plugin-file-import@0.2.1_tweakpane@3.1.10": { + "integrity": "sha512-8v9EFMKkyVXf5pu5xMdR8TAEBZ4qizMyd+1NN5fbdPN51KD28PFhDCbZcIvlDPPz05Y75MVhv0nJ7Cbk4Cbb5Q==", + "dependencies": [ + "tweakpane" + ] + }, + "tweakpane@3.1.10": { + "integrity": "sha512-rqwnl/pUa7+inhI2E9ayGTqqP0EPOOn/wVvSWjZsRbZUItzNShny7pzwL3hVlaN4m9t/aZhsP0aFQ9U5VVR2VQ==" + }, + "typescript@5.9.3": { + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "bin": true + }, + "vite-raw-plugin@1.0.2": { + "integrity": "sha512-gdp/OFVXBiVq1UwPujVb7+4mmgYHTGrzslMbQvxmgzTN4/HC+3j4GNrumsIKSWfA/y3hktII7XqY38muRaGjhw==" + }, + "vite@4.5.14": { + "integrity": "sha512-+v57oAaoYNnO3hIu5Z/tJRZjq5aHM2zDve9YZ8HngVHbhk66RStobhb1sqPMIPEleV6cNKYK4eGrAbE9Ulbl2g==", + "dependencies": [ + "esbuild", + "postcss", + "rollup" + ], + "optionalDependencies": [ + "fsevents" + ], + "bin": true + }, + "wgpu-matrix@3.4.0": { + "integrity": "sha512-kXHrbAPKEn9A32Wf4wVldyx9MmnzwhuB5p8GCqoJP3ItU5+iDT4J3aTQwPZWkfb153hwGtqZtUwR2M+ipJKadg==" + } + }, + "workspace": { + "packageJson": { + "dependencies": [ + "npm:@loaders.gl/core@^4.2.2", + "npm:@loaders.gl/ply@^4.2.2", + "npm:@petamoriken/float16@^3.8.7", + "npm:@tweakpane/core@^1.1.7", + "npm:@webgpu/types@~0.1.31", + "npm:tweakpane-plugin-file-import@0.2", + "npm:tweakpane@^3.1.8", + "npm:typescript@^5.0.4", + "npm:vite-raw-plugin@^1.0.1", + "npm:vite@^4.3.1", + "npm:wgpu-matrix@^3.2.0" + ] + } + } +} diff --git a/images/bicycleframe.png b/images/bicycleframe.png new file mode 100644 index 0000000..519177d Binary files /dev/null and b/images/bicycleframe.png differ diff --git a/images/bonzaiframe.png b/images/bonzaiframe.png new file mode 100644 index 0000000..b815798 Binary files /dev/null and b/images/bonzaiframe.png differ diff --git a/images/cover.png b/images/cover.png new file mode 100644 index 0000000..68d0d66 Binary files /dev/null and b/images/cover.png differ diff --git a/images/occluded.png b/images/occluded.png new file mode 100644 index 0000000..5ee0306 Binary files /dev/null and b/images/occluded.png differ diff --git a/src/renderers/gaussian-renderer.ts b/src/renderers/gaussian-renderer.ts index 1684523..39e7a4f 100644 --- a/src/renderers/gaussian-renderer.ts +++ b/src/renderers/gaussian-renderer.ts @@ -5,7 +5,7 @@ import { get_sorter,c_histogram_block_rows,C } from '../sort/sort'; import { Renderer } from './renderer'; export interface GaussianRenderer extends Renderer { - + setGaussianMultiplier: (value: number) => void, } // Utility to create GPU buffers @@ -14,7 +14,7 @@ const createBuffer = ( label: string, size: number, usage: GPUBufferUsageFlags, - data?: ArrayBuffer | ArrayBufferView + data?: BufferSource ) => { const buffer = device.createBuffer({ label, size, usage }); if (data) device.queue.writeBuffer(buffer, 0, data); @@ -29,19 +29,140 @@ export default function get_renderer( ): GaussianRenderer { const sorter = get_sorter(pc.num_points, device); - + // =============================================== - // Initialize GPU Buffers + // Create Compute Pipeline and Bind Groups // =============================================== const nulling_data = new Uint32Array([0]); + + // Create explicit bind group layout for sort data + const sortLayout = device.createBindGroupLayout({ + label: "sort layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "storage", + } + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "storage", + } + }, + { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "storage", + } + }, + { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "storage", + } + }, + ], + }); + + const gaussiansLayoutPreprocess = device.createBindGroupLayout({ + label: "gaussians layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "read-only-storage", + }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "storage", + }, + }, + { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "read-only-storage", + }, + }, + ], + }); + + const gaussiansLayoutRender = device.createBindGroupLayout({ + label: "gaussians layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.VERTEX, + buffer: { + type: "read-only-storage", + }, + }, + { + binding: 1, + visibility: GPUShaderStage.VERTEX, + buffer: { + type: "read-only-storage", + }, + }, + { + binding: 2, + visibility: GPUShaderStage.VERTEX, + buffer: { + type: "read-only-storage", + }, + }, + ], + }); + const uniformsLayout = device.createBindGroupLayout({ + label: "gaussian uniforms layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "uniform", + }, + }, + ], + }); + + const cameraLayout = device.createBindGroupLayout({ + label: "gaussian camera layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE | GPUShaderStage.VERTEX | GPUShaderStage.FRAGMENT, + buffer: { + type: "uniform", + }, + }, + ], + }); + + const preprocessLayout = device.createPipelineLayout({ + label: "preprocess layout", + bindGroupLayouts: [sortLayout, gaussiansLayoutPreprocess, uniformsLayout, cameraLayout], + }); + + const renderLayout = device.createPipelineLayout({ + label: "gaussian render layout", + bindGroupLayouts: [cameraLayout, gaussiansLayoutRender], + }); - // =============================================== - // Create Compute Pipeline and Bind Groups - // =============================================== const preprocess_pipeline = device.createComputePipeline({ label: 'preprocess', - layout: 'auto', + layout: preprocessLayout, compute: { module: device.createShaderModule({ code: preprocessWGSL }), entryPoint: 'preprocess', @@ -54,7 +175,7 @@ export default function get_renderer( const sort_bind_group = device.createBindGroup({ label: 'sort', - layout: preprocess_pipeline.getBindGroupLayout(2), + layout: sortLayout, entries: [ { binding: 0, resource: { buffer: sorter.sort_info_buffer } }, { binding: 1, resource: { buffer: sorter.ping_pong[0].sort_depths_buffer } }, @@ -67,7 +188,85 @@ export default function get_renderer( // =============================================== // Create Render Pipeline and Bind Groups // =============================================== - + const splatBuffer = device.createBuffer({ + label: "splat buffer", + size: pc.num_points * 32, + usage: GPUBufferUsage.STORAGE, + }); + + const uniformsBuffer = device.createBuffer({ + label: "uniforms buffer", + size: 4, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + }); + + + const render_shader = device.createShaderModule({code: renderWGSL}); + const render_pipeline = device.createRenderPipeline({ + label: 'render', + layout: renderLayout, + vertex: { + module: render_shader, + entryPoint: 'vs_main', + }, + fragment: { + module: render_shader, + entryPoint: 'fs_main', + targets: [{ + format: presentation_format, + blend: { + color: { + srcFactor: "one", + dstFactor: "one-minus-src-alpha", + operation: "add", + }, + alpha: { + srcFactor: "one", + dstFactor: "one-minus-src-alpha", + operation: "add", + }, + }, + }], + }, + primitive: { + topology: 'triangle-list', + }, + }); + + const camera_bind_group = device.createBindGroup({ + label: 'point cloud camera', + layout: cameraLayout, + entries: [{binding: 0, resource: { buffer: camera_buffer }}], + }); + + const gaussianGroupPreprocess = device.createBindGroup({ + label: 'point cloud gaussians', + layout: gaussiansLayoutPreprocess, + entries: [ + {binding: 0, resource: { buffer: pc.gaussian_3d_buffer }}, + {binding: 1, resource: { buffer: splatBuffer }}, + {binding: 2, resource: { buffer: pc.sh_buffer }}, + ], + }); + + + const gaussianGroupRender = device.createBindGroup({ + label: 'point cloud gaussians', + layout: gaussiansLayoutRender, + entries: [ + {binding: 0, resource: { buffer: pc.gaussian_3d_buffer }}, + {binding: 1, resource: { buffer: splatBuffer }}, + {binding: 2, resource: { buffer: sorter.ping_pong[0].sort_indices_buffer }}, + ], + }); + + const uniformsBindGroup = device.createBindGroup({ + label: "preprocess settings bind group", + layout: uniformsLayout, + entries: [ + {binding: 0, resource: {buffer: uniformsBuffer}}, + ], + }); // =============================================== // Command Encoder Functions @@ -77,10 +276,47 @@ export default function get_renderer( // =============================================== // Return Render Object // =============================================== + device.queue.writeBuffer(uniformsBuffer, 0, new Float32Array([1])); + return { frame: (encoder: GPUCommandEncoder, texture_view: GPUTextureView) => { + device.queue.writeBuffer(sorter.sort_info_buffer, 0, nulling_data); + device.queue.writeBuffer(sorter.sort_dispatch_indirect_buffer, 0, nulling_data); + + const computePass = encoder.beginComputePass({ + label: "Gaussian preprocess compute pass", + }); + computePass.setPipeline(preprocess_pipeline); + computePass.setBindGroup(0, sort_bind_group); + computePass.setBindGroup(1, gaussianGroupPreprocess); + computePass.setBindGroup(2, uniformsBindGroup); + computePass.setBindGroup(3, camera_bind_group) + computePass.dispatchWorkgroups(Math.ceil(pc.num_points / C.histogram_wg_size)); + computePass.end(); + sorter.sort(encoder); + + const renderPass = encoder.beginRenderPass({ + label: 'Gaussian render pass', + colorAttachments: [ + { + view: texture_view, + loadOp: 'clear', + storeOp: 'store', + } + ], + }); + renderPass.setPipeline(render_pipeline); + renderPass.setBindGroup(0, camera_bind_group); + renderPass.setBindGroup(1, gaussianGroupRender); + + renderPass.draw(6, pc.num_points); + renderPass.end(); + }, camera_buffer, + setGaussianMultiplier: (value: number) => { + device.queue.writeBuffer(uniformsBuffer, 0, new Float32Array([value])); + }, }; } diff --git a/src/renderers/renderer.ts b/src/renderers/renderer.ts index ffdf9ba..d793f1a 100644 --- a/src/renderers/renderer.ts +++ b/src/renderers/renderer.ts @@ -122,6 +122,8 @@ export default async function init( {min: 0, max: 1.5} ).on('change', (e) => { //TODO: Bind constants to the gaussian renderer. + if (gaussian_renderer === undefined) return; + gaussian_renderer.setGaussianMultiplier(e.value); }); } diff --git a/src/shaders/gaussian.wgsl b/src/shaders/gaussian.wgsl index 759226d..2f0d0c9 100644 --- a/src/shaders/gaussian.wgsl +++ b/src/shaders/gaussian.wgsl @@ -1,22 +1,101 @@ -struct VertexOutput { - @builtin(position) position: vec4, - //TODO: information passed from vertex shader to fragment shader +struct CameraUniforms { + view: mat4x4, + view_inv: mat4x4, + proj: mat4x4, + proj_inv: mat4x4, + viewport: vec2, + focal: vec2 }; +struct Gaussian { + pos_opacity: array, + rot: array, + scale: array +} + struct Splat { //TODO: information defined in preprocess compute shader + radiusOpacity: u32, + uvNormalized: u32, + conicXy: u32, + conicZ: f32, + color: vec3f, + culled: u32, +} + +@group(0) @binding(0) +var camera: CameraUniforms; + +@group(1) @binding(0) +var gaussians : array; + +@group(1) @binding(1) +var splats: array; + +@group(1) @binding(2) +var sortIndices: array; + +struct VertexOutput { + @builtin(position) position: vec4, + //TODO: information passed from vertex shader to fragment shader + @location(0) color: vec3f, + @location(1) radius: f32, + @location(2) conicUpperTriangle: vec3f, + @location(3) opacity: f32, + @location(4) splatCenterScreenPos: vec2f, }; +const quadOffsets = array( + vec2f(-1, -1), + vec2f(-1, 1), + vec2f(1, -1), + vec2f(1, -1), + vec2f(-1, 1), + vec2f(1, 1), +); + @vertex fn vs_main( + @builtin(vertex_index) in_vertex_index: u32, + @builtin(instance_index) in_instance_index: u32, ) -> VertexOutput { //TODO: reconstruct 2D quad based on information from splat, pass var out: VertexOutput; - out.position = vec4(1. ,1. , 0., 1.); + + let sortIndex = sortIndices[in_instance_index]; + + let splat = splats[sortIndex]; + if splat.culled == 1 { + out.position = vec4(0, 0, -1, 0); + return out; + } + + let radiusOpacity = unpack2x16float(splat.radiusOpacity); + let uvNormalized = unpack2x16float(splat.uvNormalized); + let conicXy = unpack2x16float(splat.conicXy); + + let screenPos = (uvNormalized * 0.5 + 0.5) * camera.viewport; + let offsetUv = uvNormalized + quadOffsets[in_vertex_index] * radiusOpacity.x / (camera.viewport * 0.5); + + out.position = vec4(offsetUv, 0, 1); + out.radius = radiusOpacity.x; + out.color = splat.color; + out.conicUpperTriangle = vec3f(conicXy.x, conicXy.y, splat.conicZ); + out.opacity = radiusOpacity.y; + out.splatCenterScreenPos = screenPos; + return out; } @fragment fn fs_main(in: VertexOutput) -> @location(0) vec4 { - return vec4(1.); + let posDiff = vec2f(in.position.x, camera.viewport.y - in.position.y) - in.splatCenterScreenPos; + // return vec4(in.color, 1); + // return vec4(posDiff, 0, 1); + let power = -0.5 * (in.conicUpperTriangle.x * posDiff.x * posDiff.x + in.conicUpperTriangle.z * posDiff.y * posDiff.y) + in.conicUpperTriangle.y * posDiff.x * posDiff.y; + if power > 0 { discard; } + + let alpha = min(0.99, in.opacity * exp(power)); + if alpha < 1. / 255. { discard; } + return vec4f(in.color * alpha, alpha); } \ No newline at end of file diff --git a/src/shaders/point_cloud.wgsl b/src/shaders/point_cloud.wgsl index 01dded1..617171e 100644 --- a/src/shaders/point_cloud.wgsl +++ b/src/shaders/point_cloud.wgsl @@ -35,7 +35,7 @@ fn vs_main( let pos = vec4(a.x, a.y, b.x, 1.); // TODO: MVP calculations - out.position = pos; + out.position = camera.proj * camera.view * pos; return out; } diff --git a/src/shaders/preprocess.wgsl b/src/shaders/preprocess.wgsl index bbc63f5..32afabd 100644 --- a/src/shaders/preprocess.wgsl +++ b/src/shaders/preprocess.wgsl @@ -57,22 +57,49 @@ struct Gaussian { struct Splat { //TODO: store information for 2D splat rendering + radiusOpacity: u32, + uvNormalized: u32, + conicXy: u32, + conicZ: f32, + color: vec3f, + culled: u32, }; //TODO: bind your data here -@group(2) @binding(0) +@group(0) @binding(0) var sort_infos: SortInfos; -@group(2) @binding(1) +@group(0) @binding(1) var sort_depths : array; -@group(2) @binding(2) +@group(0) @binding(2) var sort_indices : array; -@group(2) @binding(3) +@group(0) @binding(3) var sort_dispatch: DispatchIndirect; +@group(1) @binding(0) +var gaussians: array; +@group(1) @binding(1) +var splats: array; +@group(1) @binding(2) +var sphericalHarmonicCoeffs: array; + +@group(2) @binding(0) +var gaussianMultiplier: f32; + +@group(3) @binding(0) +var cameraUniforms: CameraUniforms; + /// reads the ith sh coef from the storage buffer fn sh_coef(splat_idx: u32, c_idx: u32) -> vec3 { //TODO: access your binded sh_coeff, see load.ts for how it is stored - return vec3(0.0); + let coeffsIndex = ((splat_idx * 16 + c_idx) * 3); + let vals0 = unpack2x16float(sphericalHarmonicCoeffs[coeffsIndex / 2]); + let vals1 = unpack2x16float(sphericalHarmonicCoeffs[coeffsIndex / 2 + 1]); + + if (coeffsIndex & 1u) == 0u { + return vec3f(vals0.x, vals0.y, vals1.x); + } else { + return vec3f(vals0.y, vals1.x, vals1.y); + } } // spherical harmonics evaluation with Condon–Shortley phase @@ -111,8 +138,105 @@ fn computeColorFromSH(dir: vec3, v_idx: u32, sh_deg: u32) -> vec3 { @compute @workgroup_size(workgroupSize,1,1) fn preprocess(@builtin(global_invocation_id) gid: vec3, @builtin(num_workgroups) wgs: vec3) { let idx = gid.x; + if idx >= arrayLength(&gaussians) { return; } + //TODO: set up pipeline as described in instruction + let gaussian = gaussians[idx]; + + let a = unpack2x16float(gaussian.pos_opacity[0]); + let b = unpack2x16float(gaussian.pos_opacity[1]); + let pos = vec3f(a.x, a.y, b.x); + + let viewPos = cameraUniforms.view * vec4(pos, 1); + let projViewPosHom = cameraUniforms.proj * viewPos; + if projViewPosHom.w <= 0 { + splats[idx].culled = 1; + return; + } + + let projViewPos = projViewPosHom.xyz / projViewPosHom.w; + + const MARGIN = 1.2; + if any(abs(projViewPos.xy) > vec2f(MARGIN, MARGIN)) { + splats[idx].culled = 1; + return; + } + + let opacity = 1 / (1 + exp(-b.y)); + + let rot0 = unpack2x16float(gaussian.rot[0]); + let rot1 = unpack2x16float(gaussian.rot[1]); + let quat = vec4(rot0.x, rot0.y, rot1.x, rot1.y); + let rotMat = mat3x3f( + 1 - 2 * (quat.z * quat.z + quat.w * quat.w), 2 * (quat.y * quat.z - quat.x * quat.w), 2 * (quat.y * quat.w + quat.x * quat.z), + 2 * (quat.y * quat.z + quat.x * quat.w), 1 - 2 * (quat.y * quat.y + quat.w * quat.w), 2 * (quat.z * quat.w - quat.x * quat.y), + 2 * (quat.y * quat.w - quat.x * quat.z), 2 * (quat.z * quat.w + quat.x * quat.y), 1 - 2 * (quat.y * quat.y + quat.z * quat.z), + ); + + let scale0 = unpack2x16float(gaussian.scale[0]); + let scale1 = unpack2x16float(gaussian.scale[1]); + let scale = vec3f(exp(scale0.x), exp(scale0.y), exp(scale1.x)); + let scaleMat = mat3x3f( + scale.x, 0, 0, + 0, scale.y, 0, + 0, 0, scale.z, + ) * gaussianMultiplier; + + let transformMat = scaleMat * rotMat; + let cov3 = transpose(transformMat) * transformMat; + + let w = transpose(mat3x3f( + cameraUniforms.view[0].xyz, + cameraUniforms.view[1].xyz, + cameraUniforms.view[2].xyz, + )); + + let j = mat3x3f( + cameraUniforms.focal.x, 0, -cameraUniforms.focal.x * viewPos.x / viewPos.z, + 0, cameraUniforms.focal.y, -cameraUniforms.focal.y * viewPos.y / viewPos.z, + 0, 0, 0, + ) * (1 / viewPos.z); + + let t = w * j; + let cov2_3 = transpose(t) * cov3 * t; + + let cov2 = mat2x2( + cov2_3[0][0] + 0.3, cov2_3[0][1], + cov2_3[0][1], cov2_3[1][1] + 0.3, + ); + + let det = determinant(cov2); + let conic = mat2x2( + cov2[1][1], -cov2[0][1], + -cov2[1][0], cov2[0][0], + ) * (1 / det); + + let mid = 0.5 * (cov2[0][0] + cov2[1][1]); + let quadDiff = sqrt(max(0.1, mid * mid - det)); + let l1 = mid + quadDiff; + let l2 = mid - quadDiff; + let radius = ceil(3 * sqrt(max(l1, l2))); + + let cameraDir = normalize(pos - cameraUniforms.view_inv[3].xyz); + let color = computeColorFromSH(cameraDir, idx, 3); + + + splats[idx].radiusOpacity = pack2x16float(vec2f(radius, opacity)); + splats[idx].uvNormalized = pack2x16float(projViewPos.xy); + splats[idx].conicXy = pack2x16float(vec2f(conic[0][0], conic[0][1])); + splats[idx].conicZ = conic[1][1]; + splats[idx].color = color; + splats[idx].culled = 0; + + let sortIndex = atomicAdd(&sort_infos.keys_size, 1u); + sort_depths[sortIndex] = bitcast(64 - viewPos.z); + sort_indices[sortIndex] = idx; + + let keys_per_dispatch = workgroupSize * sortKeyPerThread; // increment DispatchIndirect.dispatchx each time you reach limit for one dispatch of keys + if (sortIndex % keys_per_dispatch) == 0u { + atomicAdd(&sort_dispatch.dispatch_x, 1u); + } } \ No newline at end of file