Skip to content

Commit 2da76d9

Browse files
committed
Reworked dockerfile to not create a virtual environment and to install all necessary external tools
1 parent 52a4986 commit 2da76d9

File tree

1 file changed

+94
-22
lines changed

1 file changed

+94
-22
lines changed

Dockerfile

Lines changed: 94 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,123 @@
11
ARG CUDA_VERSION=11.2.2-cudnn8-runtime-ubuntu20.04
22
ARG PYTHON_VERSION=3.8
33
ARG POETRY_VERSION=1.2.2
4-
FROM nvidia/cuda:$CUDA_VERSION
5-
ARG PYTHON_VERSION
4+
5+
FROM python:$PYTHON_VERSION-slim as builder
66
ARG POETRY_VERSION
7-
WORKDIR /app
7+
88
ENV POETRY_HOME=/opt/poetry
99
ENV POETRY_VENV=/opt/poetry-venv
1010
ENV POETRY_CACHE_DIR=/opt/.cache
11-
ENV DOTNET_ROLL_FORWARD=LatestMajor
11+
12+
# Install poetry separated from system interpreter
13+
RUN python3 -m venv $POETRY_VENV \
14+
&& $POETRY_VENV/bin/pip install -U pip setuptools \
15+
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION}
16+
17+
# Add `poetry` to PATH
18+
ENV PATH="${PATH}:${POETRY_VENV}/bin"
19+
20+
WORKDIR /src
21+
COPY poetry.lock pyproject.toml /src/
22+
RUN poetry export -E eflomal --without-hashes -f requirements.txt > requirements.txt
23+
COPY . /src
24+
RUN poetry build
25+
26+
FROM nvidia/cuda:$CUDA_VERSION
27+
28+
ARG PYTHON_VERSION=3.8
29+
1230
ENV PIP_DISABLE_PIP_VERSION_CHECK=on
1331
ENV TZ=America/New_York
1432
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
15-
# Install .NET SDK
16-
RUN apt-get update
17-
RUN apt-get install --no-install-recommends -y wget
18-
RUN wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb && \
19-
dpkg -i packages-microsoft-prod.deb && \
20-
rm packages-microsoft-prod.deb
33+
34+
WORKDIR /root
35+
2136
# Install apt packages
2237
RUN apt-get update
2338
RUN apt-get upgrade -y
2439
RUN apt-get install --no-install-recommends -y \
2540
git \
2641
python$PYTHON_VERSION \
2742
python3-pip \
28-
python$PYTHON_VERSION-venv \
43+
wget \
2944
build-essential \
3045
gdb \
3146
curl \
3247
unzip \
33-
dotnet-sdk-7.0
48+
nano \
49+
cmake \
50+
tar \
51+
vim
52+
3453
# Make some useful symlinks that are expected to exist
3554
RUN ln -sfn /usr/bin/python${PYTHON_VERSION} /usr/bin/python3 & \
3655
ln -sfn /usr/bin/python${PYTHON_VERSION} /usr/bin/python
37-
# Install poetry separated from system interpreter
38-
RUN python3 -m venv $POETRY_VENV \
39-
&& $POETRY_VENV/bin/pip install -U pip setuptools \
40-
&& $POETRY_VENV/bin/pip install poetry==${POETRY_VERSION}
41-
# Add `poetry` to PATH
42-
ENV PATH="${PATH}:${POETRY_VENV}/bin"
56+
57+
# Install .NET SDK
58+
RUN wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb && \
59+
dpkg -i packages-microsoft-prod.deb && \
60+
rm packages-microsoft-prod.deb
61+
RUN apt-get update && \
62+
apt-get install --no-install-recommends -y dotnet-sdk-7.0
63+
ENV DOTNET_ROLL_FORWARD=LatestMajor
64+
4365
# Install AWS CLI
4466
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
4567
unzip awscliv2.zip && \
4668
./aws/install && \
4769
rm awscliv2.zip
48-
RUN rm -rf /var/lib/apt/lists/*
49-
RUN poetry config virtualenvs.create true && \
50-
poetry config virtualenvs.in-project true
51-
CMD ["bash"]
70+
71+
# Install dependencies from poetry
72+
COPY --from=builder /src/requirements.txt .
73+
RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt
74+
75+
# Install silnlp
76+
COPY --from=builder /src/dist/*.whl .
77+
RUN pip install --no-deps *.whl && rm *.whl
78+
79+
# Install eflomal
80+
RUN git clone https://github.com/robertostling/eflomal.git
81+
RUN make -C eflomal/src
82+
RUN make -C eflomal/src install
83+
RUN rm -rf eflomal
84+
ENV EFLOMAL_PATH=/usr/local/bin
85+
86+
# Install fast_align
87+
RUN apt-get install --no-install-recommends -y libgoogle-perftools-dev libsparsehash-dev
88+
RUN git clone https://github.com/clab/fast_align.git
89+
RUN mkdir fast_align/build
90+
RUN cmake -S fast_align -B fast_align/build
91+
RUN make -C fast_align/build
92+
RUN mv fast_align/build/atools fast_align/build/fast_align /usr/local/bin
93+
RUN rm -rf fast_align
94+
ENV FAST_ALIGN_PATH=/usr/local/bin
95+
96+
# Install mgiza
97+
RUN apt-get install --no-install-recommends -y libboost-all-dev
98+
RUN git clone https://github.com/moses-smt/mgiza.git
99+
RUN cmake -S mgiza/mgizapp -B mgiza/mgizapp
100+
RUN make -C mgiza/mgizapp
101+
RUN make -C mgiza/mgizapp install
102+
RUN mv mgiza/mgizapp/inst/mgiza mgiza/mgizapp/inst/mkcls mgiza/mgizapp/inst/plain2snt mgiza/mgizapp/inst/snt2cooc /usr/local/bin
103+
RUN rm -rf mgiza
104+
ENV MGIZA_PATH=/usr/local/bin
105+
106+
# Install meteor
107+
RUN wget "https://download.oracle.com/java/21/latest/jdk-21_linux-x64_bin.tar.gz"
108+
RUN tar -xf jdk-21_linux-x64_bin.tar.gz
109+
RUN rm jdk-21_linux-x64_bin.tar.gz
110+
RUN wget "http://www.cs.cmu.edu/~alavie/METEOR/download/meteor-1.5.tar.gz"
111+
RUN tar -xf meteor-1.5.tar.gz
112+
RUN rm meteor-1.5.tar.gz
113+
RUN mv meteor-1.5/meteor-1.5.jar /usr/local/bin
114+
RUN rm -rf meteor-1.5
115+
ENV METEOR_PATH=/usr/local/bin
116+
117+
# Other environment variables
118+
ENV SIL_NLP_DATA_PATH=/aqua-ml-data
119+
RUN mkdir -p .cache/silnlp
120+
ENV SIL_NLP_CACHE_EXPERIMENT_DIR=/root/.cache/silnlp
121+
122+
# Default docker run behavior
123+
ENTRYPOINT [ "/bin/bash", "-it" ]

0 commit comments

Comments
 (0)