Skip to content

Commit 251a6df

Browse files
committed
Add Dockerfile and build_image.sh modification
1 parent 9bcbd22 commit 251a6df

File tree

6 files changed

+164
-37
lines changed

6 files changed

+164
-37
lines changed

docker/Dockerfile

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ ARG BRANCH_NAME
3737
ARG REPO_URL=https://github.com/pytorch/serve.git
3838
ENV PYTHONUNBUFFERED TRUE
3939

40-
RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
40+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
4141
apt-get update && \
4242
apt-get upgrade -y && \
4343
apt-get install software-properties-common -y && \
4444
add-apt-repository -y ppa:deadsnakes/ppa && \
45-
apt remove python-pip python3-pip && \
45+
apt remove -y python-pip python3-pip && \
4646
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
4747
ca-certificates \
4848
g++ \
@@ -55,6 +55,13 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
5555
git \
5656
&& rm -rf /var/lib/apt/lists/*
5757

58+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
59+
if [ "$USE_ROCM_VERSION" ]; then \
60+
apt-get update && \
61+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
62+
&& rm -rf /var/lib/apt/lists/* ; \
63+
fi
64+
5865
# Make the virtual environment and "activating" it by adding it first to the path.
5966
# From here on the python$PYTHON_VERSION interpreter is used and the packages
6067
# are installed in /home/venv which is what we need for the "runtime-image"
@@ -67,6 +74,7 @@ RUN python -m pip install -U pip setuptools
6774
RUN export USE_CUDA=1
6875

6976
ARG USE_CUDA_VERSION=""
77+
ARG USE_ROCM_VERSION=""
7078

7179
COPY ./ serve
7280

@@ -76,7 +84,6 @@ RUN \
7684
git clone --recursive $REPO_URL -b $BRANCH_NAME serve; \
7785
fi
7886

79-
8087
WORKDIR "serve"
8188

8289
RUN cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
@@ -90,6 +97,14 @@ RUN \
9097
else \
9198
python ./ts_scripts/install_dependencies.py;\
9299
fi; \
100+
elif echo "${BASE_IMAGE}" | grep -q "rocm/"; then \
101+
# Install ROCm version specific binary when ROCm version is specified as a build arg
102+
if [ "$USE_ROCM_VERSION" ]; then \
103+
python ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION;\
104+
# Install the binary with the latest CPU image on a ROCm base image
105+
else \
106+
python ./ts_scripts/install_dependencies.py; \
107+
fi; \
93108
# Install the CPU binary
94109
else \
95110
python ./ts_scripts/install_dependencies.py; \
@@ -111,13 +126,14 @@ FROM ${BASE_IMAGE} AS production-image
111126
# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
112127
ARG PYTHON_VERSION
113128
ENV PYTHONUNBUFFERED TRUE
129+
ARG USE_ROCM_VERSION
114130

115-
RUN --mount=type=cache,target=/var/cache/apt \
131+
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
116132
apt-get update && \
117133
apt-get upgrade -y && \
118134
apt-get install software-properties-common -y && \
119135
add-apt-repository ppa:deadsnakes/ppa -y && \
120-
apt remove python-pip python3-pip && \
136+
apt remove -y python-pip python3-pip && \
121137
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
122138
python$PYTHON_VERSION \
123139
python3-distutils \
@@ -130,13 +146,25 @@ RUN --mount=type=cache,target=/var/cache/apt \
130146
&& rm -rf /var/lib/apt/lists/* \
131147
&& cd /tmp
132148

149+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
150+
if [ "$USE_ROCM_VERSION" ]; then \
151+
apt-get update && \
152+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
153+
&& rm -rf /var/lib/apt/lists/* ; \
154+
fi
155+
133156
RUN useradd -m model-server \
134157
&& mkdir -p /home/model-server/tmp
135158

136159
COPY --chown=model-server --from=compile-image /home/venv /home/venv
137160
COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
138161
ENV PATH="/home/venv/bin:$PATH"
139162

163+
RUN \
164+
if [ "$USE_ROCM_VERSION" ]; then \
165+
python -m pip install /opt/rocm/share/amd_smi; \
166+
fi
167+
140168
RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \
141169
&& chown -R model-server /home/model-server
142170

@@ -157,13 +185,14 @@ FROM ${BASE_IMAGE} AS ci-image
157185
ARG PYTHON_VERSION
158186
ARG BRANCH_NAME
159187
ENV PYTHONUNBUFFERED TRUE
188+
ARG USE_ROCM_VERSION
160189

161-
RUN --mount=type=cache,target=/var/cache/apt \
190+
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
162191
apt-get update && \
163192
apt-get upgrade -y && \
164193
apt-get install software-properties-common -y && \
165194
add-apt-repository -y ppa:deadsnakes/ppa && \
166-
apt remove python-pip python3-pip && \
195+
apt remove -y python-pip python3-pip && \
167196
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
168197
python$PYTHON_VERSION \
169198
python3-distutils \
@@ -183,13 +212,24 @@ RUN --mount=type=cache,target=/var/cache/apt \
183212
&& rm -rf /var/lib/apt/lists/* \
184213
&& cd /tmp
185214

215+
RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
216+
if [ "$USE_ROCM_VERSION" ]; then \
217+
apt-get update && \
218+
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
219+
&& rm -rf /var/lib/apt/lists/* ; \
220+
fi
186221

187222
COPY --from=compile-image /home/venv /home/venv
188223

189224
ENV PATH="/home/venv/bin:$PATH"
190225

191226
RUN python -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt
192227

228+
RUN \
229+
if [ "$USE_ROCM_VERSION" ]; then \
230+
python -m pip install /opt/rocm/share/amd_smi; \
231+
fi
232+
193233
RUN mkdir /home/serve
194234
ENV TS_RUN_IN_DOCKER True
195235

@@ -203,11 +243,13 @@ ARG PYTHON_VERSION
203243
ARG BRANCH_NAME
204244
ARG BUILD_FROM_SRC
205245
ARG LOCAL_CHANGES
246+
ARG USE_ROCM_VERSION
206247
ARG BUILD_WITH_IPEX
207248
ARG IPEX_VERSION=1.11.0
208249
ARG IPEX_URL=https://software.intel.com/ipex-whl-stable
209250
ENV PYTHONUNBUFFERED TRUE
210-
RUN --mount=type=cache,target=/var/cache/apt \
251+
252+
RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
211253
apt-get update && \
212254
apt-get upgrade -y && \
213255
apt-get install software-properties-common -y && \
@@ -227,9 +269,15 @@ RUN --mount=type=cache,target=/var/cache/apt \
227269
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905
228270
openjdk-17-jdk \
229271
build-essential \
272+
wget \
230273
curl \
231274
vim \
232275
numactl \
276+
nodejs \
277+
npm \
278+
zip \
279+
unzip \
280+
&& npm install -g newman@5.3.2 newman-reporter-htmlextra markdown-link-check \
233281
&& if [ "$BUILD_WITH_IPEX" = "true" ]; then apt-get update && apt-get install -y libjemalloc-dev libgoogle-perftools-dev libomp-dev && ln -s /usr/lib/x86_64-linux-gnu/libjemalloc.so /usr/lib/libjemalloc.so && ln -s /usr/lib/x86_64-linux-gnu/libtcmalloc.so /usr/lib/libtcmalloc.so && ln -s /usr/lib/x86_64-linux-gnu/libiomp5.so /usr/lib/libiomp5.so; fi \
234282
&& rm -rf /var/lib/apt/lists/*
235283

@@ -243,10 +291,17 @@ RUN \
243291

244292
COPY --from=compile-image /home/venv /home/venv
245293
ENV PATH="/home/venv/bin:$PATH"
294+
295+
RUN \
296+
if [ "$USE_ROCM_VERSION" ]; then \
297+
python -m pip install /opt/rocm/share/amd_smi; \
298+
fi
299+
246300
WORKDIR "serve"
301+
247302
RUN python -m pip install -U pip setuptools \
248303
&& python -m pip install --no-cache-dir -r requirements/developer.txt \
249-
&& python ts_scripts/install_from_src.py \
304+
&& python ts_scripts/install_from_src.py --environment=dev\
250305
&& useradd -m model-server \
251306
&& mkdir -p /home/model-server/tmp \
252307
&& cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \

docker/README.md

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p
4444
|-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, ci|
4545
|-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.|
4646
|-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. `cu121`, Default `cu121`|
47+
|-rv, --rocmversion| Specify to rocm version to use. Supported values `rocm60`, `rocm61`, `rocm62` |
4748
|-ipex, --build-with-ipex| Specify to build with intel_extension_for_pytorch. If not specified, script builds without intel_extension_for_pytorch.|
4849
|-cpp, --build-cpp specify to build TorchServe CPP|
4950
|-n, --nightly| Specify to build with TorchServe nightly.|
@@ -62,9 +63,9 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc
6263
./build_image.sh
6364
```
6465

65-
- To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`
66+
- To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118` for CUDA and `rocm60`, `rocm61`, `rocm62` for ROCm.
6667

67-
- GPU images are built with NVIDIA CUDA base image. If you want to use ONNX, please specify the base image as shown in the next section.
68+
- GPU images are built with either NVIDIA CUDA base image amd AMD ROCm base image. If you want to use ONNX, please specify the base image as shown in the next section.
6869

6970
```bash
7071
./build_image.sh -g -cv cu117
@@ -132,6 +133,24 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
132133
./build_image.sh -bt dev -g -cv cu92
133134
```
134135

136+
- For creating GPU based image with rocm version 6.0:
137+
138+
```bash
139+
./build_image.sh -bt dev -g -rv rocm60
140+
```
141+
142+
- For creating GPU based image with rocm version 6.1:
143+
144+
```bash
145+
./build_image.sh -bt dev -g -rv rocm61
146+
```
147+
148+
- For creating GPU based image with rocm version 6.2:
149+
150+
```bash
151+
./build_image.sh -bt dev -g -rv rocm62
152+
```
153+
135154
- For creating GPU based image with a different branch:
136155

137156
```bash
@@ -164,7 +183,6 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr
164183
./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp
165184
```
166185

167-
- For ROCm support (*experimental*), refer to [this documentation](../docs/hardware_support/amd_support.md).
168186

169187
## Start a container with a TorchServe image
170188

@@ -204,6 +222,12 @@ For GPU latest image with gpu devices 1 and 2:
204222
docker run --rm -it --gpus '"device=1,2"' -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 pytorch/torchserve:latest-gpu
205223
```
206224

225+
For GPU with ROCm support with gpu devices 1 and 2:
226+
227+
```bash
228+
docker run --rm -it --device=/dev/kfd --device=/dev/dri -e HIP_VISIBLE_DEVICES=1,2 -p 127.0.0.1:8080:8080 -p 127.0.0.1:8081:8081 -p 127.0.0.1:8082:8082 -p 127.0.0.1:7070:7070 -p 127.0.0.1:7071:7071 pytorch/torchserve:latest-gpu
229+
```
230+
207231
For specific versions you can pass in the specific tag to use (ex: `0.1.1-cuda10.1-cudnn7-runtime`):
208232

209233
```bash

docker/build_image.sh

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ BASE_IMAGE="ubuntu:20.04"
1111
UPDATE_BASE_IMAGE=false
1212
USE_CUSTOM_TAG=false
1313
CUDA_VERSION=""
14+
ROCM_VERSION=""
1415
USE_LOCAL_SERVE_FOLDER=false
1516
BUILD_WITH_IPEX=false
1617
BUILD_CPP=false
@@ -33,6 +34,7 @@ do
3334
echo "-bi, --baseimage specify base docker image. Example: nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04 "
3435
echo "-bt, --buildtype specify for type of created image. Possible values: production, dev, ci."
3536
echo "-cv, --cudaversion specify to cuda version to use"
37+
echo "-rv, --rocmversion spesify to rocm version to use"
3638
echo "-t, --tag specify tag name for docker image"
3739
echo "-lf, --use-local-serve-folder specify this option for the benchmark image if the current 'serve' folder should be used during automated benchmarks"
3840
echo "-ipex, --build-with-ipex specify to build with intel_extension_for_pytorch"
@@ -167,6 +169,24 @@ do
167169
shift
168170
shift
169171
;;
172+
-rv|--rocmversion)
173+
ROCM_VERSION="$2"
174+
if [ "${ROCM_VERSION}" == "rocm60" ];
175+
then
176+
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.0.2"
177+
elif [ "${ROCM_VERSION}" == "rocm61" ];
178+
then
179+
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.1.2"
180+
elif [ "${ROCM_VERSION}" == "rocm62" ];
181+
then
182+
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.2.4"
183+
else
184+
echo "ROCM version not supported"
185+
exit 1
186+
fi
187+
shift
188+
shift
189+
;;
170190
esac
171191
done
172192

@@ -218,6 +238,23 @@ then
218238
exit 1
219239
fi
220240
fi
241+
242+
if [[ "${MACHINE}" == "gpu" || "${ROCM_VERSION}" != "" ]];
243+
then
244+
if [ "${ROCM_VERSION}" == "rocm60" ];
245+
then
246+
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.0.2-complete"
247+
elif [ "${ROCM_VERSION}" == "rocm61" ];
248+
then
249+
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.1.2-complete"
250+
elif [ "${ROCM_VERSION}" == "rocm62" ];
251+
then
252+
BASE_IMAGE="rocm/dev-ubuntu-22.04:6.2.4-complete"
253+
else
254+
echo "ROCm version $ROCM_VERSION is not supported for CPP"
255+
exit 1
256+
fi
257+
fi
221258
fi
222259

223260
if [ "${BUILD_TYPE}" == "production" ]; then
@@ -232,16 +269,16 @@ if [ "${BUILD_TYPE}" == "production" ]; then
232269
fi
233270
elif [ "${BUILD_TYPE}" == "ci" ];
234271
then
235-
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
272+
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
236273
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
237274
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target ci-image ../
238275
else
239276
if [ "${BUILD_CPP}" == "true" ]
240277
then
241-
DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
278+
DOCKER_BUILDKIT=1 docker build --file Dockerfile.cpp --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
242279
--build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" -t "${DOCKER_TAG}" --target cpp-dev-image .
243280
else
244-
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
281+
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg USE_ROCM_VERSION="${ROCM_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
245282
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}" --build-arg LOCAL_CHANGES="${LOCAL_CHANGES}"\
246283
--build-arg BUILD_WITH_IPEX="${BUILD_WITH_IPEX}" -t "${DOCKER_TAG}" --target dev-image ../
247284
fi

docs/hardware_support/amd_support.md

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ TorchServe can be run on any combination of operating system and device that is
55

66
## Supported Versions of ROCm
77

8-
The current stable `major.patch` version of ROCm and the previous path version will be supported. For example version `N.2` and `N.1` where `N` is the current major version.
8+
The current stable `major.patch` version of ROCm and the previous patch version will be supported. For example version `N.2` and `N.1` where `N` is the current major version.
99

1010
## Installation
1111

@@ -35,7 +35,7 @@ The current stable `major.patch` version of ROCm and the previous path version w
3535
- install the dependencies needed for ROCm support.
3636

3737
```bash
38-
python ./ts_scripts/install_dependencies.py --rocm=rocm61
38+
python ./ts_scripts/install_dependencies.py --rocm=rocm62
3939
python ./ts_scripts/install_from_src.py
4040
```
4141
- enable amd-smi in the python virtual environment
@@ -60,20 +60,6 @@ If you have 8 accelerators but only want TorchServe to see the last four of them
6060
> ⚠️ Setting both `CUDA_VISIBLE_DEVICES` and `HIP_VISIBLE_DEVICES` may cause unintended behaviour and should be avoided.
6161
> Doing so may cause an exception in the future.
6262

63-
## Docker
64-
65-
**In Development**
66-
67-
`Dockerfile.rocm` provides preliminary ROCm support for TorchServe.
68-
69-
Building and running `dev-image`:
70-
71-
```bash
72-
docker build --file docker/Dockerfile.rocm --target dev-image -t torch-serve-dev-image-rocm --build-arg USE_ROCM_VERSION=rocm62 --build-arg BUILD_FROM_SRC=true .
73-
74-
docker run -it --rm --device=/dev/kfd --device=/dev/dri torch-serve-dev-image-rocm bash
75-
```
76-
7763
## Example Usage
7864

7965
After installing TorchServe with the required dependencies for ROCm you should be ready to serve your model.

docs/sphinx/Makefile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1+
22
# Minimal makefile for Sphinx documentation
33
#
44

@@ -26,6 +26,5 @@ docset: html
2626
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
2727
%: Makefile
2828
cp ../../SECURITY.md ../security.md
29-
cp ../../examples/usecases/RAG_based_LLM_serving/README.md ../enhancing_llm_serving_compile_rag.md
30-
cp ../../examples/usecases/llm_diffusion_serving_app/README.md ../llm_diffusion_serving_app.md
29+
cp ../../examples//usecases/RAG_based_LLM_serving/README.md ../enhancing_llm_serving_compile_rag.md
3130
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

0 commit comments

Comments
 (0)