From 8173a5baeab976939f99b0e8c797803071136abc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 29 Sep 2023 19:00:35 +0200 Subject: [PATCH] ci: add GPU tests (#245) Signed-off-by: mudler --- .github/workflows/test-gpu.yaml | 63 +++++++++++++++++++++++++++++++++ Makefile | 16 +++++++-- llama_test.go | 25 +++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/test-gpu.yaml diff --git a/.github/workflows/test-gpu.yaml b/.github/workflows/test-gpu.yaml new file mode 100644 index 0000000..0d1d922 --- /dev/null +++ b/.github/workflows/test-gpu.yaml @@ -0,0 +1,63 @@ +--- +name: 'GPU tests' + +on: + pull_request: + push: + branches: + - master + tags: + - '*' + +concurrency: + group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + ubuntu-latest: + runs-on: self-hosted + strategy: + matrix: + go-version: ['1.21.x'] + steps: + - name: Clone + uses: actions/checkout@v3 + with: + submodules: true + - name: Setup Go ${{ matrix.go-version }} + uses: actions/setup-go@v4 + with: + go-version: ${{ matrix.go-version }} + # You can test your matrix by printing the current Go version + - name: Display Go version + run: go version + - name: Dependencies + run: | + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget + - name: Dependencies + run: | + # This fixes libc6-dev installations errors on containers... + sudo rm -rfv /run/systemd/system + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential ffmpeg nvidia-cuda-toolkit cmake + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates cmake curl patch + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y pip wget + - name: Build and test + run: | + GPU_TESTS=true BUILD_TYPE=cublas CMAKE_ARGS="-DLLAMA_METAL=OFF -DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" \ + make test 2>&1 | tee test_log.log + if grep -q "using CUDA for GPU acceleration" test_log.log; then + echo "All good"; + else + echo "No CUDA found"; + exit 1; + fi + - name: Release space from worker ♻ + if: always() + run: | + sudo rm -rf build || true + sudo rm -rf bin || true + sudo rm -rf dist || true + sudo rm -rf *.log || true + make clean || true \ No newline at end of file diff --git a/Makefile b/Makefile index c4285df..7dc8ffa 100644 --- a/Makefile +++ b/Makefile @@ -170,6 +170,14 @@ ifdef CLBLAST_DIR CMAKE_ARGS+=-DCLBlast_dir=$(CLBLAST_DIR) endif +# TODO: support Windows +ifeq ($(GPU_TESTS),true) + CGO_LDFLAGS="-lcublas -lcudart -L/usr/local/cuda/lib64/" + TEST_LABEL=gpu +else + TEST_LABEL=!gpu +endif + # # Print build information # @@ -236,6 +244,8 @@ clean: $(MAKE) -C llama.cpp clean rm -rf build -test: libbinding.a - test -f ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O ggllm-test-model.bin - C_INCLUDE_PATH=${INCLUDE_PATH} CGO_LDFLAGS=${CGO_LDFLAGS} LIBRARY_PATH=${LIBRARY_PATH} TEST_MODEL=ggllm-test-model.bin go test -v ./... +ggllm-test-model.bin: + wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O ggllm-test-model.bin + +test: ggllm-test-model.bin libbinding.a + C_INCLUDE_PATH=${INCLUDE_PATH} CGO_LDFLAGS=${CGO_LDFLAGS} LIBRARY_PATH=${LIBRARY_PATH} TEST_MODEL=ggllm-test-model.bin go run github.com/onsi/ginkgo/v2/ginkgo --label-filter="$(TEST_LABEL)" --flake-attempts 5 -v -r ./... \ No newline at end of file diff --git a/llama_test.go b/llama_test.go index 3bc8d0c..8c266ef 100644 --- a/llama_test.go +++ b/llama_test.go @@ -92,4 +92,29 @@ how much is 2+2? Expect(int(l)).To(Equal(len(tokens))) }) }) + + Context("Inferencing tests with GPU (using "+testModelPath+") ", Label("gpu"), func() { + getModel := func() (*LLama, error) { + model, err := New( + testModelPath, + llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(10), + ) + Expect(err).ToNot(HaveOccurred()) + Expect(model).ToNot(BeNil()) + return model, err + } + + It("predicts successfully", func() { + if testModelPath == "" { + Skip("test skipped - only makes sense if the TEST_MODEL environment variable is set.") + } + + model, err := getModel() + text, err := model.Predict(`[INST] Answer to the following question: +how much is 2+2? +[/INST]`) + Expect(err).ToNot(HaveOccurred(), text) + Expect(text).To(ContainSubstring("4"), text) + }) + }) })