From d84f03c81a5e336c0146ec9c2ef1c96cb4fe7f09 Mon Sep 17 00:00:00 2001 From: 65a <65a@63bit.net> Date: Sat, 23 Sep 2023 15:10:31 -0700 Subject: [PATCH] Enable build for ROCm/HIPBLAS (#235) --- Makefile | 20 ++++++++++++++++++-- README.md | 17 +++++++++++++---- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 9a91fda..c4285df 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,10 @@ ifeq ($(UNAME_S),Haiku) CXXFLAGS += -pthread endif +# GPGPU specific +GGML_CUDA_OBJ_PATH=CMakeFiles/ggml.dir/ggml-cuda.cu.o + + # Architecture specific # TODO: probably these flags need to be tweaked on some architectures # feel free to update the Makefile for your architecture and send a pull request or issue @@ -137,6 +141,18 @@ ifeq ($(BUILD_TYPE),cublas) EXTRA_TARGETS+=llama.cpp/ggml-cuda.o endif +ifeq ($(BUILD_TYPE),hipblas) + ROCM_HOME ?= "/opt/rocm" + CXX="$(ROCM_HOME)"/llvm/bin/clang++ + CC="$(ROCM_HOME)"/llvm/bin/clang + EXTRA_LIBS= + GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100 + AMDGPU_TARGETS ?= "$(GPU_TARGETS)" + CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" + EXTRA_TARGETS+=llama.cpp/ggml-cuda.o + GGML_CUDA_OBJ_PATH=CMakeFiles/ggml-rocm.dir/ggml-cuda.cu.o +endif + ifeq ($(BUILD_TYPE),clblas) EXTRA_LIBS= CMAKE_ARGS+=-DLLAMA_CLBLAST=ON @@ -183,10 +199,10 @@ llama.cpp/ggml-alloc.o: llama.cpp/ggml.o: prepare mkdir -p build - cd build && cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o + cd build && CC="$(CC)" CXX="$(CXX)" cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o llama.cpp/ggml-cuda.o: llama.cpp/ggml.o - cd build && cp -rf CMakeFiles/ggml.dir/ggml-cuda.cu.o ../llama.cpp/ggml-cuda.o + cd build && cp -rf "$(GGML_CUDA_OBJ_PATH)" ../llama.cpp/ggml-cuda.o llama.cpp/ggml-opencl.o: llama.cpp/ggml.o cd build && cp -rf CMakeFiles/ggml.dir/ggml-opencl.cpp.o ../llama.cpp/ggml-opencl.o diff --git a/README.md b/README.md index 4ed1e28..1d3eb34 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [LLama.cpp](https://github.com/ggerganov/llama.cpp) golang bindings. -The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible. +The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible. Check out [this](https://about.sourcegraph.com/blog/go/gophercon-2018-adventures-in-cgo-performance) and [this](https://www.cockroachlabs.com/blog/the-cost-and-complexity-of-cgo/) write-ups which summarize the impact of a low-level interface which calls C functions from Go. @@ -57,6 +57,15 @@ BUILD_TYPE=cublas make libbinding.a CGO_LDFLAGS="-lcublas -lcudart -L/usr/local/cuda/lib64/" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -t 14 ``` +### ROCM + +To build with ROCM (HIPBLAS): + +``` +BUILD_TYPE=hipblas make libbinding.a +CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ CGO_LDFLAGS="-O3 --hip-link --rtlib=compiler-rt -unwindlib=libgcc -lrocblas -lhipblas" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -ngl 64 -t 32 +``` + ### OpenCL ``` @@ -68,9 +77,9 @@ CGO_LDFLAGS="-lOpenCL -lclblast -L/usr/local/lib64/" LIBRARY_PATH=$PWD C_INCLUDE You should see something like this from the output when using the GPU: ``` -ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics' -ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]' -ggml_opencl: device FP16 support: true +ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics' +ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]' +ggml_opencl: device FP16 support: true ``` ## GPU offloading