diff --git a/.github/workflows/build-wheels-batch.yml b/.github/workflows/build-wheels-batch.yml new file mode 100644 index 00000000..5208a082 --- /dev/null +++ b/.github/workflows/build-wheels-batch.yml @@ -0,0 +1,45 @@ +name: Batch Build Wheels + +on: + workflow_dispatch: + inputs: + versions: + description: 'Comma-seperated version tags of llama-cpp-python to build' + default: 'v0.1.77,v0.1.76' + required: true + type: string + +permissions: + contents: write + +jobs: + define_matrix: + name: Define Workflow Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + defaults: + run: + shell: pwsh + env: + PCKGVERS: ${{ inputs.versions }} + + steps: + - uses: actions/checkout@v3 + + - name: Define Job Output + id: set-matrix + run: | + $x = $env:PCKGVERS.Split(',').Trim() -Join ', ' + Write-Output "matrix={`"node`":[$x]}" >> $env:GITHUB_OUTPUT + + run_workflows: + name: Build ${{ matrix.version }} Wheels + needs: define_matrix + strategy: + max-parallel: 1 + matrix: + version: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} + uses: ./.github/workflows/build-wheels.yml + with: + version: ${{ matrix.version }} diff --git a/.github/workflows/build-wheels-cpu.yml b/.github/workflows/build-wheels-cpu.yml new file mode 100644 index 00000000..fbdad596 --- /dev/null +++ b/.github/workflows/build-wheels-cpu.yml @@ -0,0 +1,77 @@ +name: Build CPU-only Wheels + +on: + workflow_dispatch: + inputs: + version: + description: 'Version tag of llama-cpp-python to build: v0.1.77' + default: 'v0.1.77' + required: false + type: string + workflow_call: + inputs: + version: + description: 'Version tag of llama-cpp-python to build: v0.1.77' + default: 'v0.1.77' + required: false + type: string + +permissions: + contents: write + +jobs: + build_wheels: + name: ${{ matrix.os }} ${{ matrix.pyver }} CPU ${{ matrix.avx }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, windows-latest] + pyver: ["3.7", "3.8", "3.9", "3.10", "3.11"] + avx: ["AVX","AVX2","AVX512","basic"] + defaults: + run: + shell: pwsh + env: + AVXVER: ${{ matrix.avx }} + PCKGVER: ${{ inputs.version }} + + steps: + - uses: actions/checkout@v3 + with: + repository: 'abetlen/llama-cpp-python' + ref: ${{ inputs.version }} + submodules: 'recursive' + + - uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.pyver }} + + - name: Install Dependencies + run: | + python -m pip install build wheel + + - name: Build Wheel + run: | + $env:VERBOSE = '1' + $env:FORCE_CMAKE = '1' + if ($env:AVXVER -eq 'AVX') {$env:CMAKE_ARGS = '-DLLAMA_AVX2=off'} + if ($env:AVXVER -eq 'AVX512') {$env:CMAKE_ARGS = '-DLLAMA_AVX512=on'} + if ($env:AVXVER -eq 'basic') {$env:CMAKE_ARGS = '-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'} + python -m build --wheel -C--build-option=egg_info "-C--build-option=--tag-build=+cpu$env:AVXVER" + + - name: Upload files to a GitHub release + id: upload-release + uses: svenstaro/upload-release-action@2.6.1 + continue-on-error: true + with: + file: ./dist/*.whl + tag: 'cpu' + file_glob: true + make_latest: false + overwrite: true + + - uses: actions/upload-artifact@v3 + if: steps.upload-release.outcome == 'failure' + with: + name: cpu + path: ./dist/*.whl diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml new file mode 100644 index 00000000..0c7febe6 --- /dev/null +++ b/.github/workflows/build-wheels.yml @@ -0,0 +1,112 @@ +name: Build Wheels + +on: + workflow_dispatch: + inputs: + version: + description: 'Version tag of llama-cpp-python to build: v0.1.77' + default: 'v0.1.77' + required: false + type: string + workflow_call: + inputs: + version: + description: 'Version tag of llama-cpp-python to build: v0.1.77' + default: 'v0.1.77' + required: false + type: string + +permissions: + contents: write + +jobs: + build_wheels: + name: ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, windows-latest] + pyver: ["3.7", "3.8", "3.9", "3.10", "3.11"] + cuda: ["11.6.2", "11.7.1", "11.8.0", "12.0.1", "12.1.1", "12.2.0"] + releasetag: ["AVX","wheels","AVX512","basic"] + defaults: + run: + shell: pwsh + env: + CUDAVER: ${{ matrix.cuda }} + AVXVER: ${{ matrix.releasetag }} + PCKGVER: ${{ inputs.version }} + + steps: + - uses: actions/checkout@v3 + with: + repository: 'abetlen/llama-cpp-python' + ref: ${{ inputs.version }} + submodules: 'recursive' + + - uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.pyver }} + + - name: Setup Mamba + uses: conda-incubator/setup-miniconda@v2.2.0 + with: + activate-environment: "build" + python-version: ${{ matrix.pyver }} + miniforge-variant: Mambaforge + miniforge-version: latest + use-mamba: true + add-pip-as-python-dependency: true + auto-activate-base: false + + - name: Install Dependencies + run: | + $cudaVersion = $env:CUDAVER + $cudaChannels = '' + $cudaNum = [int]$cudaVersion.substring($cudaVersion.LastIndexOf('.')+1) + while ($cudaNum -ge 0) { $cudaChannels += '-c nvidia/label/cuda-' + $cudaVersion.Remove($cudaVersion.LastIndexOf('.')+1) + $cudaNum + ' '; $cudaNum-- } + mamba install -y 'cuda' $cudaChannels.TrimEnd().Split() + python -m pip install build wheel + + - name: Build Wheel + run: | + $packageVersion = [version]$env:PCKGVER.TrimStart('v') + $cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','') + $env:CUDA_PATH = $env:CONDA_PREFIX + $env:CUDA_HOME = $env:CONDA_PREFIX + if ($IsLinux) {$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH} + $env:VERBOSE = '1' + $env:FORCE_CMAKE = '1' + $env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all' + if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.70') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'} + if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.70' -and [version]$env:CUDAVER -ge [version]'12.0') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=52;61-real;70-real;72-real;75-real;80-real;86-real;89-real;90'} + if ($packageVersion -gt [version]'0.1.68' -and $packageVersion -lt [version]'0.1.70' -and [version]$env:CUDAVER -lt [version]'11.8') {$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=35-real;37-real;52;61-real;70-real;72-real;75-real;80-real;86'} + if ($packageVersion -lt [version]'0.1.66') {$env:CUDAFLAGS = '-arch=all'} + if ($env:AVXVER -eq 'AVX') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off'} + if ($env:AVXVER -eq 'AVX512') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'} + if ($env:AVXVER -eq 'basic') {$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'} + python -m build --wheel -C--build-option=egg_info "-C--build-option=--tag-build=+cu$cudaVersion" + + - name: Upload files to a GitHub release + id: upload-release + uses: svenstaro/upload-release-action@2.6.1 + continue-on-error: true + with: + file: ./dist/*.whl + tag: ${{ matrix.releasetag }} + file_glob: true + make_latest: false + overwrite: true + + - uses: actions/upload-artifact@v3 + if: steps.upload-release.outcome == 'failure' + with: + name: ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} + path: ./dist/*.whl + + build_cpu: + name: Build CPU-only Wheels + needs: build_wheels + uses: ./.github/workflows/build-wheels-cpu.yml + with: + version: ${{ inputs.version }}