Skip to content

Commit

Permalink
Reworked dockerfile to not create a virtual environment and to instal…
Browse files Browse the repository at this point in the history
…l all necessary external tools
  • Loading branch information
isaac091 committed Oct 19, 2023
1 parent 52a4986 commit 2da76d9
Showing 1 changed file with 94 additions and 22 deletions.
116 changes: 94 additions & 22 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,51 +1,123 @@
ARG CUDA_VERSION=11.2.2-cudnn8-runtime-ubuntu20.04
ARG PYTHON_VERSION=3.8
ARG POETRY_VERSION=1.2.2
FROM nvidia/cuda:$CUDA_VERSION
ARG PYTHON_VERSION

FROM python:$PYTHON_VERSION-slim as builder
ARG POETRY_VERSION
WORKDIR /app

ENV POETRY_HOME=/opt/poetry
ENV POETRY_VENV=/opt/poetry-venv
ENV POETRY_CACHE_DIR=/opt/.cache
ENV DOTNET_ROLL_FORWARD=LatestMajor

# Install poetry separated from system interpreter
RUN python3 -m venv $POETRY_VENV \
&& $POETRY_VENV/bin/pip install -U pip setuptools \
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION}

# Add `poetry` to PATH
ENV PATH="${PATH}:${POETRY_VENV}/bin"

WORKDIR /src
COPY poetry.lock pyproject.toml /src/
RUN poetry export -E eflomal --without-hashes -f requirements.txt > requirements.txt
COPY . /src
RUN poetry build

FROM nvidia/cuda:$CUDA_VERSION

ARG PYTHON_VERSION=3.8

ENV PIP_DISABLE_PIP_VERSION_CHECK=on
ENV TZ=America/New_York
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
# Install .NET SDK
RUN apt-get update
RUN apt-get install --no-install-recommends -y wget
RUN wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb && \
dpkg -i packages-microsoft-prod.deb && \
rm packages-microsoft-prod.deb

WORKDIR /root

# Install apt packages
RUN apt-get update
RUN apt-get upgrade -y
RUN apt-get install --no-install-recommends -y \
git \
python$PYTHON_VERSION \
python3-pip \
python$PYTHON_VERSION-venv \
wget \
build-essential \
gdb \
curl \
unzip \
dotnet-sdk-7.0
nano \
cmake \
tar \
vim

# Make some useful symlinks that are expected to exist
RUN ln -sfn /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 & \
ln -sfn /usr/bin/python${PYTHON_VERSION} /usr/bin/python
# Install poetry separated from system interpreter
RUN python3 -m venv $POETRY_VENV \
&& $POETRY_VENV/bin/pip install -U pip setuptools \
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION}
# Add `poetry` to PATH
ENV PATH="${PATH}:${POETRY_VENV}/bin"

# Install .NET SDK
RUN wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb && \
dpkg -i packages-microsoft-prod.deb && \
rm packages-microsoft-prod.deb
RUN apt-get update && \
apt-get install --no-install-recommends -y dotnet-sdk-7.0
ENV DOTNET_ROLL_FORWARD=LatestMajor

# Install AWS CLI
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip awscliv2.zip && \
./aws/install && \
rm awscliv2.zip
RUN rm -rf /var/lib/apt/lists/*
RUN poetry config virtualenvs.create true && \
poetry config virtualenvs.in-project true
CMD ["bash"]

# Install dependencies from poetry
COPY --from=builder /src/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt

# Install silnlp
COPY --from=builder /src/dist/*.whl .
RUN pip install --no-deps *.whl && rm *.whl

# Install eflomal
RUN git clone https://github.com/robertostling/eflomal.git
RUN make -C eflomal/src
RUN make -C eflomal/src install
RUN rm -rf eflomal
ENV EFLOMAL_PATH=/usr/local/bin

# Install fast_align
RUN apt-get install --no-install-recommends -y libgoogle-perftools-dev libsparsehash-dev
RUN git clone https://github.com/clab/fast_align.git
RUN mkdir fast_align/build
RUN cmake -S fast_align -B fast_align/build
RUN make -C fast_align/build
RUN mv fast_align/build/atools fast_align/build/fast_align /usr/local/bin
RUN rm -rf fast_align
ENV FAST_ALIGN_PATH=/usr/local/bin

# Install mgiza
RUN apt-get install --no-install-recommends -y libboost-all-dev
RUN git clone https://github.com/moses-smt/mgiza.git
RUN cmake -S mgiza/mgizapp -B mgiza/mgizapp
RUN make -C mgiza/mgizapp
RUN make -C mgiza/mgizapp install
RUN mv mgiza/mgizapp/inst/mgiza mgiza/mgizapp/inst/mkcls mgiza/mgizapp/inst/plain2snt mgiza/mgizapp/inst/snt2cooc /usr/local/bin
RUN rm -rf mgiza
ENV MGIZA_PATH=/usr/local/bin

# Install meteor
RUN wget "https://download.oracle.com/java/21/latest/jdk-21_linux-x64_bin.tar.gz"
RUN tar -xf jdk-21_linux-x64_bin.tar.gz
RUN rm jdk-21_linux-x64_bin.tar.gz
RUN wget "http://www.cs.cmu.edu/~alavie/METEOR/download/meteor-1.5.tar.gz"
RUN tar -xf meteor-1.5.tar.gz
RUN rm meteor-1.5.tar.gz
RUN mv meteor-1.5/meteor-1.5.jar /usr/local/bin
RUN rm -rf meteor-1.5
ENV METEOR_PATH=/usr/local/bin

# Other environment variables
ENV SIL_NLP_DATA_PATH=/aqua-ml-data
RUN mkdir -p .cache/silnlp
ENV SIL_NLP_CACHE_EXPERIMENT_DIR=/root/.cache/silnlp

# Default docker run behavior
ENTRYPOINT [ "/bin/bash", "-it" ]

0 comments on commit 2da76d9

Please sign in to comment.