forked from hyperonym/basaran
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Dockerfile
48 lines (40 loc) · 1.17 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
FROM nvcr.io/nvidia/pytorch:22.12-py3
# Create app directory
WORKDIR /app
# Install app dependencies
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
# Bundle app source
COPY . .
# Expose ports
EXPOSE 80
# Force the stdout and stderr streams to be unbuffered
ENV PYTHONUNBUFFERED="1"
# Hide welcome message from bitsandbytes
ENV BITSANDBYTES_NOWELCOME="1"
# Provide default environment variables
ENV MODEL="bigscience/bloomz-560m"
ENV HOST="0.0.0.0"
ENV PORT="80"
ENV MODEL_REVISION=""
ENV MODEL_CACHE_DIR="/models"
ENV MODEL_LOAD_IN_8BIT="false"
ENV MODEL_LOAD_IN_4BIT="false"
ENV MODEL_LOCAL_FILES_ONLY="false"
ENV MODEL_TRUST_REMOTE_CODE="false"
ENV MODEL_HALF_PRECISION="false"
ENV SERVER_THREADS="32"
ENV SERVER_IDENTITY="basaran"
ENV SERVER_CONNECTION_LIMIT="1024"
ENV SERVER_CHANNEL_TIMEOUT="300"
ENV SERVER_MODEL_NAME=""
ENV SERVER_NO_PLAYGROUND="false"
ENV SERVER_CORS_ORIGINS="*"
ENV COMPLETION_MAX_PROMPT="32768"
ENV COMPLETION_MAX_TOKENS="8192"
ENV COMPLETION_MAX_N="5"
ENV COMPLETION_MAX_LOGPROBS="5"
ENV COMPLETION_MAX_INTERVAL="50"
ENV CUDA_MEMORY_FRACTION="1.0"
# Specify entrypoint and default parameters
ENTRYPOINT [ "python", "-m", "basaran" ]