|
17 | 17 | # - Install vLLM-Ascend from source. |
18 | 18 | # - Install dependencies. |
19 | 19 | # - Postprocess, review installation. |
| 20 | +# 5. sglang target. |
| 21 | +# - Build SGLang from source (Ascend/NPU), including sgl-kernel-npu and deep-ep. |
| 22 | +# - Install sglang with NPU extras. |
| 23 | +# - Ecosystem install: MemFabric and Triton Ascend. |
| 24 | +# - Optional: Install BiSheng toolkit. |
| 25 | +# - Postprocess, review installation. |
20 | 26 |
|
21 | 27 | # Argument usage: |
22 | 28 | # - PYTHON_VERSION: Version of Python to use. |
|
33 | 39 | # - VLLM_ASCEND_VERSION: Version of vLLM Ascend to use, |
34 | 40 | # if not specified, it will fetch from the vLLM Ascend PyPi RSS. |
35 | 41 | # - VLLM_TORCH_VERSION: Version of Torch for vLLM to use. |
| 42 | +# - SGLANG_VERSION: Version of SGLang to use. |
| 43 | + |
36 | 44 | ARG PYTHON_VERSION=3.11 |
37 | 45 | ARG CMAKE_MAX_JOBS |
38 | 46 | ARG CANN_VERSION=8.2.rc2 |
@@ -737,3 +745,125 @@ ENV RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES=1 |
737 | 745 |
|
738 | 746 | WORKDIR / |
739 | 747 | ENTRYPOINT [ "tini", "--" ] |
| 748 | + |
| 749 | +# Stage SGLang (inherits vLLM) |
| 750 | +# |
| 751 | +# Example build command: |
| 752 | +# docker build --progress=plain --platform=linux/arm64 \ |
| 753 | +# --file=test/testDockerfile.cann \ |
| 754 | +# --tag=gpustack/runner:cann${CANN_VERSION%.*}-sglang-linux-arm64 \ |
| 755 | +# --target=sglang test |
| 756 | +# |
| 757 | +FROM vllm AS sglang |
| 758 | +SHELL ["/bin/bash", "-eo", "pipefail", "-c"] |
| 759 | + |
| 760 | +ARG TARGETPLATFORM |
| 761 | +ARG TARGETOS |
| 762 | +ARG TARGETARCH |
| 763 | + |
| 764 | +ENV UV_SYSTEM_PYTHON=1 \ |
| 765 | + UV_PRERELEASE=allow |
| 766 | + |
| 767 | +## Build args for SGLang |
| 768 | +ARG SGL_REPO="https://github.com/sgl-project/sglang.git" |
| 769 | +ARG SGL_DEFAULT="main" |
| 770 | +ARG SGL_BRANCH=${SGL_DEFAULT} |
| 771 | +ARG BUILD_TYPE=srt |
| 772 | +ARG NO_DEPS_FLAG="" |
| 773 | +ARG SGLANG_VERSION=0.5.3.post3 |
| 774 | +ENV SGLANG_VERSION=${SGLANG_VERSION} |
| 775 | + |
| 776 | +## Build args for sgl-kernel-npu |
| 777 | +ARG SGL_KERNEL_NPU_REPO="https://github.com/sgl-project/sgl-kernel-npu.git" |
| 778 | +ARG SGL_KERNEL_NPU_BRANCH=${SGL_DEFAULT} |
| 779 | +## NPU ecosystem components |
| 780 | +ARG MEMFABRIC_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl" |
| 781 | +ARG TRITON_ASCEND_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend-3.2.0%2Bgitb0ea0850-cp311-cp311-linux_aarch64.whl" |
| 782 | +ARG BISHENG_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/Ascend-BiSheng-toolkit_aarch64.run" |
| 783 | + |
| 784 | +## Ascend toolkit path |
| 785 | +ENV ASCEND_CANN_PATH="${CANN_HOME}/ascend-toolkit" |
| 786 | + |
| 787 | +## Install SGLang and NPU components |
| 788 | +RUN <<EOF |
| 789 | + # Prepare Python build deps and utilities |
| 790 | + uv pip install --verbose wheel build IPython orjson python-multipart pybind11 |
| 791 | + |
| 792 | + # Clean any previous installs |
| 793 | + pip uninstall -y sgl_kernel_npu deep-ep sglang || true |
| 794 | + |
| 795 | + # Ecosystem: MemFabric and Triton Ascend |
| 796 | + uv pip install --no-cache-dir wheel==0.45.1 |
| 797 | + uv pip install --no-cache-dir ${MEMFABRIC_URL} |
| 798 | + uv pip install --no-cache-dir ${TRITON_ASCEND_URL} |
| 799 | + |
| 800 | + # Clone SGLang and install Python package (NPU extras) |
| 801 | + mkdir -p /sgl-workspace && pushd /sgl-workspace |
| 802 | + git clone ${SGL_REPO} |
| 803 | + cd sglang |
| 804 | + # Prefer version tag if provided, otherwise fall back to branch selection |
| 805 | + if [[ -n "${SGLANG_VERSION}" ]]; then |
| 806 | + git fetch --tags --depth=1 |
| 807 | + if git rev-parse -q --verify "refs/tags/v${SGLANG_VERSION}" >/dev/null; then |
| 808 | + echo "Checking out tag v${SGLANG_VERSION}"; git checkout -q "tags/v${SGLANG_VERSION}" |
| 809 | + elif git rev-parse -q --verify "refs/tags/${SGLANG_VERSION}" >/dev/null; then |
| 810 | + echo "Checking out tag ${SGLANG_VERSION}"; git checkout -q "tags/${SGLANG_VERSION}" |
| 811 | + elif git rev-parse -q --verify "${SGLANG_VERSION}" >/dev/null; then |
| 812 | + echo "Checking out commit/branch ${SGLANG_VERSION}"; git checkout -q "${SGLANG_VERSION}" |
| 813 | + elif [[ "${SGL_BRANCH}" != "${SGL_DEFAULT}" ]]; then |
| 814 | + echo "Checking out branch ${SGL_BRANCH}"; git checkout -q "${SGL_BRANCH}" |
| 815 | + else |
| 816 | + echo "Using ${SGL_DEFAULT} default branch" |
| 817 | + fi |
| 818 | + else |
| 819 | + if [[ "${SGL_BRANCH}" != "${SGL_DEFAULT}" ]]; then |
| 820 | + echo "Checking out branch ${SGL_BRANCH}"; git checkout -q "${SGL_BRANCH}" |
| 821 | + fi |
| 822 | + fi |
| 823 | + rm -f python/pyproject.toml |
| 824 | + mv python/pyproject_other.toml python/pyproject.toml |
| 825 | + if [[ "${BUILD_TYPE}" == "srt" ]]; then |
| 826 | + python -m pip --no-cache-dir install -e "python[srt_npu]" ${NO_DEPS_FLAG} |
| 827 | + else |
| 828 | + python -m pip --no-cache-dir install -e "python[all_npu]" ${NO_DEPS_FLAG} |
| 829 | + fi |
| 830 | + popd |
| 831 | + |
| 832 | + # Build sgl-kernel-npu and deep-ep wheels |
| 833 | + git -C /sgl-workspace clone --depth 1 ${SGL_KERNEL_NPU_REPO} ${SGL_KERNEL_NPU_BRANCH:+--branch ${SGL_KERNEL_NPU_BRANCH}} |
| 834 | + export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH |
| 835 | + source ${ASCEND_CANN_PATH}/set_env.sh |
| 836 | + pushd /sgl-workspace/sgl-kernel-npu |
| 837 | + bash build.sh |
| 838 | + pip install output/deep_ep*.whl output/sgl_kernel_npu*.whl --no-cache-dir |
| 839 | + popd |
| 840 | + |
| 841 | + # Link deep_ep cpp .so to package root for runtime discovery |
| 842 | + cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -sf deep_ep/deep_ep_cpp*.so . |
| 843 | + |
| 844 | + # Install BiSheng toolkit (Ascend) |
| 845 | + wget ${BISHENG_URL} && chmod a+x Ascend-BiSheng-toolkit_aarch64.run && ./Ascend-BiSheng-toolkit_aarch64.run --install && rm Ascend-BiSheng-toolkit_aarch64.run |
| 846 | + |
| 847 | + # Cleanup |
| 848 | + rm -rf /var/tmp/* \ |
| 849 | + && rm -rf /tmp/* |
| 850 | +EOF |
| 851 | + |
| 852 | +## Postprocess review |
| 853 | +RUN <<EOF |
| 854 | + uv pip tree \ |
| 855 | + --package sglang \ |
| 856 | + --package torch \ |
| 857 | + --package torch-npu \ |
| 858 | + --package deep-ep |
| 859 | +EOF |
| 860 | + |
| 861 | +## Performance environment variables |
| 862 | +ENV PYTORCH_NPU_ALLOC_CONF=expandable_segments:True \ |
| 863 | + SGLANG_SET_CPU_AFFINITY=1 \ |
| 864 | + SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 \ |
| 865 | + HCCL_BUFFSIZE=200 \ |
| 866 | + SGLANG_NPU_USE_MLAPO=1 |
| 867 | + |
| 868 | +WORKDIR / |
| 869 | +ENTRYPOINT [ "tini", "--" ] |
0 commit comments