Skip to content

Commit

Permalink
Merge pull request #76 from andrewkchan/achan/mps-backend
Browse files Browse the repository at this point in the history
Add MPS support with fused kernels
  • Loading branch information
pierotofy authored Apr 15, 2024
2 parents 6beab30 + 8c376c8 commit 373b337
Show file tree
Hide file tree
Showing 16 changed files with 2,549 additions and 16 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# MacOS
.DS_Store

# build
build/
.idea/
.vscode/
.vscode/
42 changes: 41 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ cmake_minimum_required(VERSION 3.21)
project(opensplat)

set(OPENSPLAT_BUILD_SIMPLE_TRAINER OFF CACHE BOOL "Build simple trainer applications")
set(GPU_RUNTIME "CUDA" CACHE STRING "HIP or CUDA")
set(GPU_RUNTIME "CUDA" CACHE STRING "HIP or CUDA or MPS")
set(OPENCV_DIR "OPENCV_DIR-NOTFOUND" CACHE PATH "Path to the OPENCV installation directory")
set(OPENSPLAT_MAX_CUDA_COMPATIBILITY OFF CACHE BOOL "Build for maximum CUDA device compatibility")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
Expand Down Expand Up @@ -81,6 +82,16 @@ elseif(GPU_RUNTIME STREQUAL "HIP")
set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation")
endif()
list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}")
elseif(GPU_RUNTIME STREQUAL "MPS")
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
message(STATUS "Metal framework found")

set(XC_FLAGS -O3)
set(USE_MPS ON CACHE BOOL "Use MPS for GPU acceleration")
else()
set(GPU_RUNTIME "CPU")
endif()

set(CMAKE_CXX_STANDARD 17)
Expand Down Expand Up @@ -119,6 +130,31 @@ if((GPU_RUNTIME STREQUAL "CUDA") OR (GPU_RUNTIME STREQUAL "HIP"))
${TORCH_INCLUDE_DIRS}
)
set_target_properties(gsplat PROPERTIES LINKER_LANGUAGE CXX)
elseif(GPU_RUNTIME STREQUAL "MPS")
add_library(gsplat vendor/gsplat-metal/gsplat_metal.mm)
list(APPEND GSPLAT_LIBS gsplat)
target_link_libraries(gsplat PRIVATE
${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK}
${METALKIT_FRAMEWORK}
)
target_include_directories(gsplat PRIVATE ${TORCH_INCLUDE_DIRS})
# copy shader files to bin directory
configure_file(vendor/gsplat-metal/gsplat_metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/gsplat_metal.metal COPYONLY)
add_custom_command(
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/gsplat_metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/gsplat_metal.air
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/gsplat_metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/gsplat_metal.air
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/gsplat_metal.metal
DEPENDS vendor/gsplat-metal/gsplat_metal.metal
COMMENT "Compiling Metal kernels"
)

add_custom_target(
gsplat_metal ALL
DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
)
endif()

add_library(gsplat_cpu vendor/gsplat-cpu/gsplat_cpu.cpp)
Expand All @@ -135,6 +171,8 @@ if(GPU_RUNTIME STREQUAL "HIP")
target_compile_definitions(opensplat PRIVATE USE_HIP __HIP_PLATFORM_AMD__)
elseif(GPU_RUNTIME STREQUAL "CUDA")
target_compile_definitions(opensplat PRIVATE USE_CUDA)
elseif(GPU_RUNTIME STREQUAL "MPS")
target_compile_definitions(opensplat PRIVATE USE_MPS)
endif()

if(OPENSPLAT_BUILD_SIMPLE_TRAINER)
Expand All @@ -149,6 +187,8 @@ if(OPENSPLAT_BUILD_SIMPLE_TRAINER)
target_compile_definitions(simple_trainer PRIVATE USE_HIP __HIP_PLATFORM_AMD__)
elseif(GPU_RUNTIME STREQUAL "CUDA")
target_compile_definitions(simple_trainer PRIVATE USE_CUDA)
elseif(GPU_RUNTIME STREQUAL "MPS")
target_compile_definitions(simple_trainer PRIVATE USE_MPS)
endif()
endif()

Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,23 @@ brew install opencv
brew install pytorch
```

You will also need to install Xcode and the Xcode command line tools to compile with metal support (otherwise, OpenSplat will build with CPU acceleration only):
1. Install Xcode from the Apple App Store.
2. Install the command line tools with `xcode-select --install`. This might do nothing on your machine.
3. If `xcode-select --print-path` prints `/Library/Developer/CommandLineTools`,then run `sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer`.

Then run:

```
git clone https://github.com/pierotofy/OpenSplat OpenSplat
cd OpenSplat
mkdir build && cd build
cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch/ .. && make -j$(nproc)
cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch/ -DGPU_RUNTIME=MPS .. && make -j$(sysctl -n hw.logicalcpu)
./opensplat
```

If building CPU-only, remove `-DGPU_RUNTIME=MPS`.

:warning: You will probably get a *libc10.dylib can’t be opened because Apple cannot check it for malicious software* error on first run. Open **System Settings** and go to **Privacy & Security** and find the **Allow** button. You might need to repeat this several times until all torch libraries are loaded.

## Docker Build
Expand Down Expand Up @@ -234,7 +241,6 @@ We recently released OpenSplat, so there's lots of work to do.

* Support for running on AMD cards (more testing needed)
* Improve speed / reduce memory usage
* Add Metal support on macOS
* Distributed computation using multiple machines
* Real-time training viewer output
* Compressed scene outputs
Expand Down
4 changes: 4 additions & 0 deletions gsplat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#include "vendor/gsplat/bindings.h"
#endif

#if defined(USE_MPS)
#include "vendor/gsplat-metal/bindings.h"
#endif

#include "vendor/gsplat-cpu/bindings.h"

#endif
6 changes: 3 additions & 3 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ torch::Tensor Model::forward(Camera& cam, int step){
cov2d = p[3];
camDepths = p[4];
}else{
#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)

TileBounds tileBounds = std::make_tuple((width + BLOCK_X - 1) / BLOCK_X,
(height + BLOCK_Y - 1) / BLOCK_Y,
Expand Down Expand Up @@ -152,7 +152,7 @@ torch::Tensor Model::forward(Camera& cam, int step){
if (device == torch::kCPU){
rgbs = SphericalHarmonicsCPU::apply(degreesToUse, viewDirs, colors);
}else{
#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)
rgbs = SphericalHarmonics::apply(degreesToUse, viewDirs, colors);
#endif
}
Expand All @@ -172,7 +172,7 @@ torch::Tensor Model::forward(Camera& cam, int step){
width,
backgroundColor);
}else{
#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)
rgb = RasterizeGaussians::apply(
xys,
depths,
Expand Down
5 changes: 4 additions & 1 deletion opensplat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,13 @@ int main(int argc, char *argv[]){
torch::Device device = torch::kCPU;
int displayStep = 1;

if (torch::cuda::is_available() && result.count("cpu") == 0) {
if (torch::hasCUDA() && result.count("cpu") == 0) {
std::cout << "Using CUDA" << std::endl;
device = torch::kCUDA;
displayStep = 10;
} else if (torch::hasMPS() && result.count("cpu") == 0) {
std::cout << "Using MPS" << std::endl;
device = torch::kMPS;
}else{
std::cout << "Using CPU" << std::endl;
}
Expand Down
2 changes: 1 addition & 1 deletion project_gaussians.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "project_gaussians.hpp"

#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)

variable_list ProjectGaussians::forward(AutogradContext *ctx,
torch::Tensor means,
Expand Down
2 changes: 1 addition & 1 deletion project_gaussians.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

using namespace torch::autograd;

#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)

class ProjectGaussians : public Function<ProjectGaussians>{
public:
Expand Down
4 changes: 2 additions & 2 deletions rasterize_gaussians.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "rasterize_gaussians.hpp"
#include "gsplat.hpp"

#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)

std::tuple<torch::Tensor,
torch::Tensor,
Expand Down Expand Up @@ -171,9 +171,9 @@ torch::Tensor RasterizeGaussiansCPU::forward(AutogradContext *ctx,
torch::Tensor finalTs = std::get<1>(t);
std::vector<int32_t> *px2gid = std::get<2>(t);

ctx->saved_data["px2gid"] = reinterpret_cast<int64_t>(px2gid);
ctx->saved_data["imgWidth"] = imgWidth;
ctx->saved_data["imgHeight"] = imgHeight;
ctx->saved_data["px2gid"] = reinterpret_cast<int64_t>(px2gid);
ctx->save_for_backward({ xys, conics, colors, opacity, background, cov2d, camDepths, finalTs });

return outImg;
Expand Down
2 changes: 1 addition & 1 deletion rasterize_gaussians.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

using namespace torch::autograd;

#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)

std::tuple<torch::Tensor,
torch::Tensor,
Expand Down
5 changes: 4 additions & 1 deletion simple_trainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ int main(int argc, char **argv){
if (torch::cuda::is_available() && result.count("cpu") == 0){
std::cout << "Using CUDA" << std::endl;
device = torch::kCUDA;
}else if(torch::mps::is_available() && result.count("cpu") == 0){
std::cout << "Using MPS" << std::endl;
device = torch::kMPS;
}else{
std::cout << "Using CPU" << std::endl;
}
Expand Down Expand Up @@ -160,7 +163,7 @@ int main(int argc, char **argv){
width,
background);
}else{
#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)
auto p = ProjectGaussians::apply(means, scales, 1,
quats, viewMat, viewMat,
focal, focal,
Expand Down
2 changes: 1 addition & 1 deletion spherical_harmonics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ torch::Tensor sh2rgb(const torch::Tensor &sh){
return (sh * C0) + 0.5;
}

#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)

torch::Tensor SphericalHarmonics::forward(AutogradContext *ctx,
int degreesToUse,
Expand Down
2 changes: 1 addition & 1 deletion spherical_harmonics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ int degFromSh(int numBases);
torch::Tensor rgb2sh(const torch::Tensor &rgb);
torch::Tensor sh2rgb(const torch::Tensor &sh);

#if defined(USE_HIP) || defined(USE_CUDA)
#if defined(USE_HIP) || defined(USE_CUDA) || defined(USE_MPS)

class SphericalHarmonics : public Function<SphericalHarmonics>{
public:
Expand Down
Loading

0 comments on commit 373b337

Please sign in to comment.