Skip to content

Commit

Permalink
Enable build for ROCm/HIPBLAS (#235)
Browse files Browse the repository at this point in the history
  • Loading branch information
65a authored Sep 23, 2023
1 parent 1171213 commit d84f03c
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
20 changes: 18 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ ifeq ($(UNAME_S),Haiku)
CXXFLAGS += -pthread
endif

# GPGPU specific
GGML_CUDA_OBJ_PATH=CMakeFiles/ggml.dir/ggml-cuda.cu.o


# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
Expand Down Expand Up @@ -137,6 +141,18 @@ ifeq ($(BUILD_TYPE),cublas)
EXTRA_TARGETS+=llama.cpp/ggml-cuda.o
endif

ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= "/opt/rocm"
CXX="$(ROCM_HOME)"/llvm/bin/clang++
CC="$(ROCM_HOME)"/llvm/bin/clang
EXTRA_LIBS=
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
EXTRA_TARGETS+=llama.cpp/ggml-cuda.o
GGML_CUDA_OBJ_PATH=CMakeFiles/ggml-rocm.dir/ggml-cuda.cu.o
endif

ifeq ($(BUILD_TYPE),clblas)
EXTRA_LIBS=
CMAKE_ARGS+=-DLLAMA_CLBLAST=ON
Expand Down Expand Up @@ -183,10 +199,10 @@ llama.cpp/ggml-alloc.o:

llama.cpp/ggml.o: prepare
mkdir -p build
cd build && cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o
cd build && CC="$(CC)" CXX="$(CXX)" cmake ../llama.cpp $(CMAKE_ARGS) && VERBOSE=1 cmake --build . --config Release && cp -rf CMakeFiles/ggml.dir/ggml.c.o ../llama.cpp/ggml.o

llama.cpp/ggml-cuda.o: llama.cpp/ggml.o
cd build && cp -rf CMakeFiles/ggml.dir/ggml-cuda.cu.o ../llama.cpp/ggml-cuda.o
cd build && cp -rf "$(GGML_CUDA_OBJ_PATH)" ../llama.cpp/ggml-cuda.o

llama.cpp/ggml-opencl.o: llama.cpp/ggml.o
cd build && cp -rf CMakeFiles/ggml.dir/ggml-opencl.cpp.o ../llama.cpp/ggml-opencl.o
Expand Down
17 changes: 13 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[LLama.cpp](https://github.com/ggerganov/llama.cpp) golang bindings.

The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible.
The go-llama.cpp bindings are high level, as such most of the work is kept into the C/C++ code to avoid any extra computational cost, be more performant and lastly ease out maintenance, while keeping the usage as simple as possible.

Check out [this](https://about.sourcegraph.com/blog/go/gophercon-2018-adventures-in-cgo-performance) and [this](https://www.cockroachlabs.com/blog/the-cost-and-complexity-of-cgo/) write-ups which summarize the impact of a low-level interface which calls C functions from Go.

Expand Down Expand Up @@ -57,6 +57,15 @@ BUILD_TYPE=cublas make libbinding.a
CGO_LDFLAGS="-lcublas -lcudart -L/usr/local/cuda/lib64/" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -t 14
```

### ROCM

To build with ROCM (HIPBLAS):

```
BUILD_TYPE=hipblas make libbinding.a
CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ CGO_LDFLAGS="-O3 --hip-link --rtlib=compiler-rt -unwindlib=libgcc -lrocblas -lhipblas" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -ngl 64 -t 32
```

### OpenCL

```
Expand All @@ -68,9 +77,9 @@ CGO_LDFLAGS="-lOpenCL -lclblast -L/usr/local/lib64/" LIBRARY_PATH=$PWD C_INCLUDE
You should see something like this from the output when using the GPU:

```
ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics'
ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]'
ggml_opencl: device FP16 support: true
ggml_opencl: selecting platform: 'Intel(R) OpenCL HD Graphics'
ggml_opencl: selecting device: 'Intel(R) Graphics [0x46a6]'
ggml_opencl: device FP16 support: true
```

## GPU offloading
Expand Down

0 comments on commit d84f03c

Please sign in to comment.