-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from c0sogi/dev
Dev update (23.8.22.)
- Loading branch information
Showing
31 changed files
with
2,280 additions
and
1,305 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,34 @@ | ||
### Dockerfile for Python 3.11.4 & CUDA 12.1.1 & Ubuntu 22.04 | ||
### Approximately 5 ~ 10 minutes to build | ||
|
||
# Select the required CUDA version. | ||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 | ||
ENV PYTHON_VERSION="3.11.4" | ||
ENV PYTHON_VERSION_SHORT="3.11" | ||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder | ||
|
||
# Copy the necessary files. | ||
COPY llama_api /app/llama_api | ||
COPY pyproject.toml /app/pyproject.toml | ||
COPY requirements.txt /app/requirements.txt | ||
COPY main.py /app/main.py | ||
COPY model_definitions.py /app/model_definitions.py | ||
ENV PYTHON_VERSION="3.11.4" \ | ||
PYTHON_VERSION_SHORT="3.11" \ | ||
DEBIAN_FRONTEND=noninteractive \ | ||
CUDA_DOCKER_ARCH=all | ||
|
||
# Install the necessary applications, and then install Python. | ||
# Then, install the necessary Python packages(Dependencies). | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
build-essential \ | ||
zlib1g-dev \ | ||
libncurses5-dev \ | ||
libgdbm-dev \ | ||
libnss3-dev \ | ||
libssl-dev \ | ||
libreadline-dev \ | ||
libffi-dev \ | ||
wget \ | ||
git \ | ||
libsqlite3-dev\ | ||
git build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev wget libsqlite3-dev gcc ocl-icd-opencl-dev opencl-headers clinfo libclblast-dev libopenblas-dev \ | ||
&& wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz -O /tmp/Python-${PYTHON_VERSION}.tgz \ | ||
&& tar -xvf /tmp/Python-${PYTHON_VERSION}.tgz -C /tmp \ | ||
&& cd /tmp/Python-${PYTHON_VERSION} \ | ||
&& ./configure \ | ||
&& make \ | ||
&& make install \ | ||
&& ./configure && make && make install \ | ||
&& python3 -m pip install --upgrade pip --no-cache-dir \ | ||
&& rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* \ | ||
&& update-alternatives --install /usr/bin/python python /usr/local/bin/python${PYTHON_VERSION_SHORT} 1 \ | ||
&& update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python${PYTHON_VERSION_SHORT} 1 \ | ||
&& python3 -m pip install --upgrade pip \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& apt-get clean \ | ||
&& rm -rf /tmp/* \ | ||
&& cd /app \ | ||
&& python3 -m llama_api.server.app_settings --skip-compile --install-pkgs --force-cuda | ||
# Need to skip complie because GPU access to host is not supported when building image. | ||
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \ | ||
&& nvcc --version | ||
|
||
# Copy the necessary files. | ||
COPY llama_api /app/llama_api | ||
COPY pyproject.toml requirements.txt main.py model_definitions.py /app/ | ||
|
||
# Install the necessary Python packages(Dependencies). | ||
RUN cd /app && python3 -m llama_api.server.app_settings --install-pkgs --force-cuda --no-cache-dir | ||
|
||
# Set the working directory and start the server. | ||
ENV PORT=${PORT:-8000} | ||
STOPSIGNAL SIGINT | ||
WORKDIR /app | ||
ENTRYPOINT [ "python3", "-m", "main", "--port", "${PORT}" ] | ||
ENTRYPOINT [ "python3", "-m", "main", "--port", "${PORT}" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
### Compressed version of the Dockerfile. | ||
### It is compressed by only do RUN once to reduce the number of layers. | ||
### However, it takes a long time to build the image compared to the original Dockerfile | ||
### because it does not use the cache. | ||
|
||
# Select the required CUDA version. | ||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as builder | ||
|
||
ENV PYTHON_VERSION="3.11.4" \ | ||
PYTHON_VERSION_SHORT="3.11" \ | ||
DEBIAN_FRONTEND=noninteractive \ | ||
CUDA_DOCKER_ARCH=all | ||
|
||
# Copy the necessary files. | ||
COPY llama_api /app/llama_api | ||
COPY pyproject.toml requirements.txt main.py model_definitions.py /app/ | ||
|
||
# Install the necessary applications, and then install Python. | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
git build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev wget libsqlite3-dev gcc ocl-icd-opencl-dev opencl-headers clinfo libclblast-dev libopenblas-dev \ | ||
&& wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz -O /tmp/Python-${PYTHON_VERSION}.tgz \ | ||
&& tar -xvf /tmp/Python-${PYTHON_VERSION}.tgz -C /tmp \ | ||
&& cd /tmp/Python-${PYTHON_VERSION} \ | ||
&& ./configure && make && make install \ | ||
&& python3 -m pip install --upgrade pip --no-cache-dir \ | ||
&& rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* \ | ||
&& update-alternatives --install /usr/bin/python python /usr/local/bin/python${PYTHON_VERSION_SHORT} 1 \ | ||
&& update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python${PYTHON_VERSION_SHORT} 1 \ | ||
&& mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \ | ||
&& nvcc --version \ | ||
&& cd /app && python3 -m llama_api.server.app_settings --install-pkgs --force-cuda --no-cache-dir | ||
|
||
# Set the working directory and start the server. | ||
ENV PORT=${PORT:-8000} | ||
STOPSIGNAL SIGINT | ||
WORKDIR /app | ||
ENTRYPOINT [ "python3", "-m", "main", "--port", "${PORT}" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from collections import defaultdict | ||
from dataclasses import dataclass, field | ||
from time import time | ||
from typing import Dict, Literal, Optional | ||
|
||
from ..schemas.api import CompletionLogprobs, TextGenerationSettings | ||
|
||
|
||
@dataclass | ||
class CompletionStatus: | ||
# These fields are automatically set | ||
started_at: float = field(default_factory=time, init=False) | ||
|
||
# These fields are set by `accept_settings` method. | ||
input_text: str = field(default="", init=False) | ||
input_tokens: int = field(default=0, init=False) | ||
|
||
# These fields are set by `generate_text` method. | ||
generated_text: str = field(default="", init=False) | ||
generated_tokens: int = field(default=0, init=False) | ||
logprobs: Optional[CompletionLogprobs] = field(default=None, init=False) | ||
|
||
|
||
class CompletionMixin: | ||
"""A mixin for modules that support completion generation.""" | ||
|
||
_completion_status: Optional["defaultdict[str, CompletionStatus]"] = None | ||
|
||
@property | ||
def completion_status(self) -> Dict[str, CompletionStatus]: | ||
"""Get the completion status. | ||
key: completion_id | ||
value: CompletionStatus""" | ||
if self._completion_status is None: | ||
self._completion_status = defaultdict(CompletionStatus) | ||
return self._completion_status | ||
|
||
def get_finish_reason( | ||
self, | ||
settings: TextGenerationSettings, | ||
) -> Literal["length", "stop"]: | ||
"""Get the finish reason for the completion.""" | ||
return ( | ||
"length" | ||
if self.completion_status[settings.completion_id].generated_tokens | ||
>= settings.max_tokens | ||
else "stop" | ||
) |
Oops, something went wrong.