Skip to content

Commit 1929d6c

Browse files
committed
feat: add SGLang build support in Dockerfile
1 parent 0ff7a5b commit 1929d6c

File tree

8 files changed

+385
-3
lines changed

8 files changed

+385
-3
lines changed

.github/workflows/pack.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ on:
4949
- voxbox
5050
- mindie
5151
- vllm
52+
- sglang
5253
# Since specific Backend and Target still result in many tags,
5354
# we can leverage this to control packing one specific tag, even os/arch.
5455
tag:

gpustack_runner/runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from dataclasses_json import dataclass_json
1212

1313
_RE_DOCKER_IMAGE = re.compile(
14-
r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?gpustack/runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|mindie))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
14+
r"(?:(?P<prefix>[\w\\.\-]+(?:/[\w\\.\-]+)*)/)?gpustack/runner:(?P<backend>(Host|cann|corex|cuda|dtk|maca|rocm))(?P<backend_version>[XY\d\\.]+)(?:-(?P<backend_variant>\w+))?-(?P<service>(vllm|voxbox|sglang|mindie))(?P<service_version>[\w\\.]+)(?:-(?P<suffix>\w+))?",
1515
)
1616
"""
1717
Regex for Docker image parsing,

pack/cann/Dockerfile

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
# - Install vLLM-Ascend from source.
1818
# - Install dependencies.
1919
# - Postprocess, review installation.
20+
# 5. sglang target.
21+
# - Build SGLang from source (Ascend/NPU), including sgl-kernel-npu and deep-ep.
22+
# - Install sglang with NPU extras.
23+
# - Ecosystem install: MemFabric and Triton Ascend.
24+
# - Optional: Install BiSheng toolkit.
25+
# - Postprocess, review installation.
2026

2127
# Argument usage:
2228
# - PYTHON_VERSION: Version of Python to use.
@@ -33,6 +39,8 @@
3339
# - VLLM_ASCEND_VERSION: Version of vLLM Ascend to use,
3440
# if not specified, it will fetch from the vLLM Ascend PyPi RSS.
3541
# - VLLM_TORCH_VERSION: Version of Torch for vLLM to use.
42+
# - SGLANG_VERSION: Version of SGLang to use.
43+
3644
ARG PYTHON_VERSION=3.11
3745
ARG CMAKE_MAX_JOBS
3846
ARG CANN_VERSION=8.2.rc2
@@ -737,3 +745,125 @@ ENV RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES=1
737745

738746
WORKDIR /
739747
ENTRYPOINT [ "tini", "--" ]
748+
749+
# Stage SGLang (inherits vLLM)
750+
#
751+
# Example build command:
752+
# docker build --progress=plain --platform=linux/arm64 \
753+
# --file=test/testDockerfile.cann \
754+
# --tag=gpustack/runner:cann${CANN_VERSION%.*}-sglang-linux-arm64 \
755+
# --target=sglang test
756+
#
757+
FROM vllm AS sglang
758+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
759+
760+
ARG TARGETPLATFORM
761+
ARG TARGETOS
762+
ARG TARGETARCH
763+
764+
ENV UV_SYSTEM_PYTHON=1 \
765+
UV_PRERELEASE=allow
766+
767+
## Build args for SGLang
768+
ARG SGL_REPO="https://github.com/sgl-project/sglang.git"
769+
ARG SGL_DEFAULT="main"
770+
ARG SGL_BRANCH=${SGL_DEFAULT}
771+
ARG BUILD_TYPE=srt
772+
ARG NO_DEPS_FLAG=""
773+
ARG SGLANG_VERSION=0.5.3.post3
774+
ENV SGLANG_VERSION=${SGLANG_VERSION}
775+
776+
## Build args for sgl-kernel-npu
777+
ARG SGL_KERNEL_NPU_REPO="https://github.com/sgl-project/sgl-kernel-npu.git"
778+
ARG SGL_KERNEL_NPU_BRANCH=${SGL_DEFAULT}
779+
## NPU ecosystem components
780+
ARG MEMFABRIC_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl"
781+
ARG TRITON_ASCEND_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend-3.2.0%2Bgitb0ea0850-cp311-cp311-linux_aarch64.whl"
782+
ARG BISHENG_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/Ascend-BiSheng-toolkit_aarch64.run"
783+
784+
## Ascend toolkit path
785+
ENV ASCEND_CANN_PATH="${CANN_HOME}/ascend-toolkit"
786+
787+
## Install SGLang and NPU components
788+
RUN <<EOF
789+
# Prepare Python build deps and utilities
790+
uv pip install --verbose wheel build IPython orjson python-multipart pybind11
791+
792+
# Clean any previous installs
793+
pip uninstall -y sgl_kernel_npu deep-ep sglang || true
794+
795+
# Ecosystem: MemFabric and Triton Ascend
796+
uv pip install --no-cache-dir wheel==0.45.1
797+
uv pip install --no-cache-dir ${MEMFABRIC_URL}
798+
uv pip install --no-cache-dir ${TRITON_ASCEND_URL}
799+
800+
# Clone SGLang and install Python package (NPU extras)
801+
mkdir -p /sgl-workspace && pushd /sgl-workspace
802+
git clone ${SGL_REPO}
803+
cd sglang
804+
# Prefer version tag if provided, otherwise fall back to branch selection
805+
if [[ -n "${SGLANG_VERSION}" ]]; then
806+
git fetch --tags --depth=1
807+
if git rev-parse -q --verify "refs/tags/v${SGLANG_VERSION}" >/dev/null; then
808+
echo "Checking out tag v${SGLANG_VERSION}"; git checkout -q "tags/v${SGLANG_VERSION}"
809+
elif git rev-parse -q --verify "refs/tags/${SGLANG_VERSION}" >/dev/null; then
810+
echo "Checking out tag ${SGLANG_VERSION}"; git checkout -q "tags/${SGLANG_VERSION}"
811+
elif git rev-parse -q --verify "${SGLANG_VERSION}" >/dev/null; then
812+
echo "Checking out commit/branch ${SGLANG_VERSION}"; git checkout -q "${SGLANG_VERSION}"
813+
elif [[ "${SGL_BRANCH}" != "${SGL_DEFAULT}" ]]; then
814+
echo "Checking out branch ${SGL_BRANCH}"; git checkout -q "${SGL_BRANCH}"
815+
else
816+
echo "Using ${SGL_DEFAULT} default branch"
817+
fi
818+
else
819+
if [[ "${SGL_BRANCH}" != "${SGL_DEFAULT}" ]]; then
820+
echo "Checking out branch ${SGL_BRANCH}"; git checkout -q "${SGL_BRANCH}"
821+
fi
822+
fi
823+
rm -f python/pyproject.toml
824+
mv python/pyproject_other.toml python/pyproject.toml
825+
if [[ "${BUILD_TYPE}" == "srt" ]]; then
826+
python -m pip --no-cache-dir install -e "python[srt_npu]" ${NO_DEPS_FLAG}
827+
else
828+
python -m pip --no-cache-dir install -e "python[all_npu]" ${NO_DEPS_FLAG}
829+
fi
830+
popd
831+
832+
# Build sgl-kernel-npu and deep-ep wheels
833+
git -C /sgl-workspace clone --depth 1 ${SGL_KERNEL_NPU_REPO} ${SGL_KERNEL_NPU_BRANCH:+--branch ${SGL_KERNEL_NPU_BRANCH}}
834+
export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH
835+
source ${ASCEND_CANN_PATH}/set_env.sh
836+
pushd /sgl-workspace/sgl-kernel-npu
837+
bash build.sh
838+
pip install output/deep_ep*.whl output/sgl_kernel_npu*.whl --no-cache-dir
839+
popd
840+
841+
# Link deep_ep cpp .so to package root for runtime discovery
842+
cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -sf deep_ep/deep_ep_cpp*.so .
843+
844+
# Install BiSheng toolkit (Ascend)
845+
wget ${BISHENG_URL} && chmod a+x Ascend-BiSheng-toolkit_aarch64.run && ./Ascend-BiSheng-toolkit_aarch64.run --install && rm Ascend-BiSheng-toolkit_aarch64.run
846+
847+
# Cleanup
848+
rm -rf /var/tmp/* \
849+
&& rm -rf /tmp/*
850+
EOF
851+
852+
## Postprocess review
853+
RUN <<EOF
854+
uv pip tree \
855+
--package sglang \
856+
--package torch \
857+
--package torch-npu \
858+
--package deep-ep
859+
EOF
860+
861+
## Performance environment variables
862+
ENV PYTORCH_NPU_ALLOC_CONF=expandable_segments:True \
863+
SGLANG_SET_CPU_AFFINITY=1 \
864+
SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 \
865+
HCCL_BUFFSIZE=200 \
866+
SGLANG_NPU_USE_MLAPO=1
867+
868+
WORKDIR /
869+
ENTRYPOINT [ "tini", "--" ]

pack/cuda/Dockerfile

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
# - Install FlashInfer if existed.
2020
# - Install dependencies.
2121
# - Postprocess, review installation.
22+
# 4. sglang target.
23+
# - Install SGLang from PyPI (version controlled via SGLANG_VERSION).
24+
# - Postprocess, review installation.
2225

2326
# Argument usage:
2427
# - PYTHON_VERSION: Version of Python to use.
@@ -54,6 +57,8 @@
5457
# - VLLM_FLASHINFER_VERSION: Version of FlashInfer to use,
5558
# which is used to build the FlashInfer wheel.
5659
# - VLLM_LMCACHE_VERSION: Version of lmcache to use.
60+
# - SGLANG_VERSION: Version of SGLang to install (PyPI). Defaults to 0.5.3.post3.
61+
5762
ARG PYTHON_VERSION=3.12
5863
ARG CMAKE_MAX_JOBS
5964
ARG CUDA_VERSION=12.8.1
@@ -74,6 +79,10 @@ ARG VLLM_FLASHINFER_REPOSITORY=https://github.com/flashinfer-ai/flashinfer.git
7479
ARG VLLM_FLASHINFER_VERSION=0.3.1
7580
ARG VLLM_LMCACHE_VERSION=0.3.8
7681

82+
# SGLang build args (mirroring vLLM)
83+
ARG SGLANG_VERSION=0.5.3.post3
84+
85+
7786
#
7887
# Stage Bake Runtime
7988
#
@@ -954,3 +963,85 @@ ENV RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES=1
954963

955964
WORKDIR /
956965
ENTRYPOINT [ "tini", "--" ]
966+
967+
# Stage SGLang (inherits vLLM)
968+
#
969+
# Example build command:
970+
# docker build --progress=plain --platform=linux/amd64 --file=pack/cuda/Dockerfile --tag=gpustack/runner:cuda${CUDA_VERSION%.*}-sglang${SGLANG_VERSION}-linux-amd64 --target=sglang pack/cuda
971+
#
972+
FROM vllm AS sglang
973+
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
974+
975+
ARG TARGETPLATFORM
976+
ARG TARGETOS
977+
ARG TARGETARCH
978+
979+
ENV UV_SYSTEM_PYTHON=1 \
980+
UV_PRERELEASE=allow
981+
982+
## Install SGLang
983+
984+
ARG SGLANG_VERSION
985+
986+
ENV SGLANG_VERSION=${SGLANG_VERSION}
987+
988+
RUN <<EOF
989+
# SGLang
990+
991+
# Install
992+
uv pip install --verbose \
993+
sglang==${SGLANG_VERSION}
994+
995+
# Cleanup
996+
rm -rf /var/tmp/* \
997+
&& rm -rf /tmp/*
998+
EOF
999+
1000+
## Install Dependencies
1001+
1002+
RUN <<EOF
1003+
# Dependencies
1004+
1005+
cat <<EOT >/tmp/requirements.txt
1006+
requests
1007+
pyyaml
1008+
httpx<1.0
1009+
fastapi
1010+
uvicorn
1011+
EOT
1012+
uv pip install \
1013+
-r /tmp/requirements.txt
1014+
1015+
# Review
1016+
uv pip tree \
1017+
--package sglang \
1018+
--package vllm \
1019+
--package torch
1020+
EOF
1021+
1022+
## Runtime Enhancements
1023+
1024+
# Build-time switches
1025+
ARG NCCL_ENABLE=1
1026+
ARG NCCL_PACKAGE=nvidia-nccl-cu12
1027+
ARG NCCL_VERSION=2.27.6
1028+
ARG FLASHINFER_PREFETCH_CUBIN=1
1029+
1030+
RUN <<EOF
1031+
# Runtime accelerators
1032+
1033+
# NCCL: configurable install via build args
1034+
if [[ "${NCCL_ENABLE}" == "1" ]]; then
1035+
uv pip install --no-cache-dir ${NCCL_PACKAGE}==${NCCL_VERSION} --force-reinstall --no-deps
1036+
fi
1037+
1038+
# FlashInfer cubin prefetch: only if package is present
1039+
if [[ "${FLASHINFER_PREFETCH_CUBIN}" == "1" ]]; then
1040+
python -c "import importlib.util,sys; sys.exit(0 if importlib.util.find_spec('flashinfer') else 1)" \
1041+
&& python -m flashinfer --download-cubin || true
1042+
fi
1043+
1044+
# Cleanup
1045+
rm -rf /var/tmp/* \
1046+
&& rm -rf /tmp/*
1047+
EOF

pack/expand_matrix.sh

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,18 @@ EOT
238238
--arg tag "${TAG}${TAG_SUFFIX}" \
239239
--arg platform_tag "${PLATFORM_TAG}" \
240240
'.[$tag] += [$platform_tag]')"
241+
PLATFORM_TAG_CACHE="[\"${PLATFORM_TAG}\",\"${PLATFORM_TAG_XY}\",\"${PLATFORM_TAG_X}\"]"
242+
if [[ "${SERVICE}" == "sglang" ]]; then
243+
IFS="." read -r V_MAJOR V_MINOR V_PATCH V_POST <<<"${VLLM_VERSION}"
244+
if [[ -z "${V_PATCH}" ]]; then V_PATCH=0; fi
245+
VLLM_TAG="${TAG_PREFIX}vllm${V_MAJOR}.${V_MINOR}.${V_PATCH}"
246+
VLLM_TAG_X="${TAG_PREFIX}vllm${V_MAJOR}"
247+
VLLM_TAG_XY="${TAG_PREFIX}vllm${V_MAJOR}.${V_MINOR}"
248+
VLLM_PLATFORM_TAG="${VLLM_TAG}-${OS}-${ARCH}"
249+
VLLM_PLATFORM_TAG_X="${VLLM_TAG_X}-${OS}-${ARCH}"
250+
VLLM_PLATFORM_TAG_XY="${VLLM_TAG_XY}-${OS}-${ARCH}"
251+
PLATFORM_TAG_CACHE="[\"${PLATFORM_TAG}\",\"${PLATFORM_TAG_XY}\",\"${PLATFORM_TAG_X}\",\"${VLLM_PLATFORM_TAG}\",\"${VLLM_PLATFORM_TAG_XY}\",\"${VLLM_PLATFORM_TAG_X}\"]"
252+
fi
241253
BUILD_JOBS="$(echo "${BUILD_JOBS}" | jq -cr \
242254
--arg backend "${BACKEND}" \
243255
--arg backend_version "${BACKEND_VERSION}" \
@@ -249,7 +261,7 @@ EOT
249261
--arg tag "${TAG}${TAG_SUFFIX}" \
250262
--argjson args "${ARGS}" \
251263
--arg runner "${RUNNER}" \
252-
--argjson platform_tag_cache "[\"${PLATFORM_TAG}\",\"${PLATFORM_TAG_XY}\",\"${PLATFORM_TAG_X}\"]" \
264+
--argjson platform_tag_cache "${PLATFORM_TAG_CACHE}" \
253265
--arg original_backend_version "${ORIGINAL_BACKEND_VERSION}" \
254266
'[{
255267
backend: $backend,

pack/matrix.yaml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# used to select the packing rules and Dockerfile below `pack` directory.
55
# - services: (Optional) The inference service to pack for,
66
# used to select the Docker build phase described in `pack/${backend}/Dockerfile`.
7-
# Default to `voxbox` and `vllm`.
7+
# Default to `voxbox`, `vllm`, and `sglang`.
88
# - platforms: (Optional) The platforms to build for,
99
# used to select the Docker Linux build platforms.
1010
# Default to `linux/amd64` and `linux/arm64`.
@@ -20,9 +20,12 @@ rules:
2020
## Ascend CANN 8.2.rc2, using CANN Kernel for A3.
2121
##
2222
- backend: "cann"
23+
platforms:
24+
- "linux/arm64"
2325
services:
2426
- "mindie"
2527
- "vllm"
28+
- "sglang"
2629
args:
2730
- "CANN_VERSION=8.2.rc2"
2831
- "CANN_ARCHS=a3"
@@ -38,9 +41,12 @@ rules:
3841
## Ascend CANN 8.2.rc2, using CANN Kernel for 310P.
3942
##
4043
- backend: "cann"
44+
platforms:
45+
- "linux/arm64"
4146
services:
4247
- "mindie"
4348
- "vllm"
49+
- "sglang"
4450
args:
4551
- "CANN_VERSION=8.2.rc2"
4652
- "CANN_ARCHS=310p"
@@ -66,6 +72,10 @@ rules:
6672
## NVIDIA CUDA 12.4.1, using PyTorch +cu126 in linux/amd64.
6773
##
6874
- backend: "cuda"
75+
services:
76+
- "voxbox"
77+
- "vllm"
78+
- "sglang"
6979
args:
7080
- "CUDA_VERSION=12.4.1"
7181
- "VOXBOX_TORCH_CUDA_VERSION=12.6.3"
@@ -76,6 +86,10 @@ rules:
7686
## NVIDIA CUDA 12.6.3, using PyTorch +cu126 in linux/amd64.
7787
##
7888
- backend: "cuda"
89+
services:
90+
- "voxbox"
91+
- "vllm"
92+
- "sglang"
7993
args:
8094
- "CUDA_VERSION=12.6.3"
8195
- "VLLM_NVIDIA_GDRCOPY_VERSION=2.4.1"
@@ -84,6 +98,10 @@ rules:
8498
## NVIDIA CUDA 12.8.1, using PyTorch +cu128 in both linux/amd64 and linux/arm64.
8599
##
86100
- backend: "cuda"
101+
services:
102+
- "voxbox"
103+
- "vllm"
104+
- "sglang"
87105
args:
88106
- "CUDA_VERSION=12.8.1"
89107
- "VLLM_NVIDIA_GDRCOPY_VERSION=2.4.1"
@@ -129,5 +147,9 @@ rules:
129147
- "vllm"
130148
platforms:
131149
- "linux/amd64"
150+
services:
151+
- "voxbox"
152+
- "vllm"
153+
- "sglang"
132154
args:
133155
- "ROCM_VERSION=7.0.2"

0 commit comments

Comments
 (0)