-
Notifications
You must be signed in to change notification settings - Fork 0
/
linux-Dockerfile
124 lines (104 loc) · 6.63 KB
/
linux-Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Base image from https://nightlies.apache.org/flink/flink-docs-release-1.19/docs/deployment/resource-providers/standalone/docker/
FROM flink:1.20.0-scala_2.12-java17
# Build argument(s)
ARG FLINK_LANGUAGE
ARG AWS_ACCESS_KEY_ID
ARG AWS_SECRET_ACCESS_KEY
ARG AWS_REGION
ARG AWS_S3_BUCKET
ARG AWS_SESSION_TOKEN
# Map build argument(s) to container environment variable(s)
ENV FLINK_LANGUAGE=${FLINK_LANGUAGE}
ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
ENV AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
ENV AWS_REGION=${AWS_REGION}
ENV AWS_S3_BUCKET=${AWS_S3_BUCKET}
ENV AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN}
# Create/set version number container environment variable(s)
ENV APACHE_FLINK_VERSION=1.20
ENV APACHE_FLINK_PATCH=.0
ENV APACHE_FLINK_VERSION_WITH_PATCH=${APACHE_FLINK_VERSION}${APACHE_FLINK_PATCH}
ENV PYTHON_VERSION=3.11
ENV PYTHON_PATCH=.9
ENV PYTHON_VERSION_WITH_PATCH=${PYTHON_VERSION}${PYTHON_PATCH}
ENV HADOOP_VERSION=3.4.1
ENV ICEBERG_VERSION=1.7.0
# Create/set folder contianer environment variable(s)
ENV FLINK_CONF_DIR=/opt/flink/conf
ENV FLINK_LIB_DIR=/opt/flink/lib/
# Create/set URL contianer environment variable(s)
ENV MAVEN_ROOT_URL=https://repo1.maven.org/maven2/org/apache/
# Container metadata
LABEL maintainer="j3@thej3.com" \
description="Apache Flink ${APACHE_FLINK_VERSION_WITH_PATCH} container with Python ${PYTHON_VERSION_WITH_PATCH} installed."
# Install Python dependencies and whatever utilities are needed to install Python from source
# Install libbz2-dev, a development package for the bzip2 library (libbz2), which is used for compressing and decompressing data using the Burrows-Wheeler block-sorting text compression algorithm and Huffman coding.
# Install libffi-dev, a development package for the libffi library, which provides a portable, high-level programming interface to various calling conventions.
# Install libssl-dev, a development package for the OpenSSL library, which is a general-purpose cryptography library that provides an open-source implementation of the Secure Sockets Layer (SSL) and Transport Layer Security (TLS) protocols.
# Install zlib1g-dev, a development package for the zlib library, which is a software library used for data compression.
# Install openjdk-17-jdk-headless, a headless version of the OpenJDK 17 Java Development Kit (JDK) that does not include any graphical user interface (GUI) libraries or tools.
RUN apt-get update && apt-get install -y \
build-essential \
openjdk-17-jdk-headless \
zlib1g-dev \
libbz2-dev \
libpq-dev \
libncurses5-dev \
libgdbm-dev \
liblzma-dev \
libnss3-dev \
libssl-dev \
libsqlite3-dev \
libreadline-dev \
libffi-dev \
wget \
git \
python3-pip \
python3-venv \
zip \
nano
# Download JAR files for Python
RUN curl -L "${MAVEN_ROOT_URL}flink/flink-s3-fs-hadoop/${APACHE_FLINK_VERSION_WITH_PATCH}/flink-s3-fs-hadoop-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" -o "${FLINK_LIB_DIR}flink-s3-fs-hadoop-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" && \
curl -L "${MAVEN_ROOT_URL}flink/flink-sql-connector-hive-3.1.3_2.12/${APACHE_FLINK_VERSION_WITH_PATCH}/flink-sql-connector-hive-3.1.3_2.12-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" -o "${FLINK_LIB_DIR}flink-sql-connector-hive-3.1.3_2.12-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" && \
curl -L "${MAVEN_ROOT_URL}hadoop/hadoop-common/${HADOOP_VERSION}/hadoop-common-${HADOOP_VERSION}.jar" -o "${FLINK_LIB_DIR}hadoop-common-${HADOOP_VERSION}.jar" && \
curl -L "${MAVEN_ROOT_URL}flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar" -o "${FLINK_LIB_DIR}flink-shaded-hadoop-2-uber-2.8.3-10.0.jar" && \
curl -L "${MAVEN_ROOT_URL}iceberg/iceberg-flink-runtime-${APACHE_FLINK_VERSION}/${ICEBERG_VERSION}/iceberg-flink-runtime-${APACHE_FLINK_VERSION}-${ICEBERG_VERSION}.jar" -o "${FLINK_LIB_DIR}iceberg-flink-runtime-${APACHE_FLINK_VERSION}-${ICEBERG_VERSION}.jar" && \
curl -L "${MAVEN_ROOT_URL}iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar" -o "${FLINK_LIB_DIR}iceberg-aws-bundle-${ICEBERG_VERSION}.jar" && \
curl -L "${MAVEN_ROOT_URL}hadoop/hadoop-hdfs-client/${HADOOP_VERSION}/hadoop-hdfs-client-${HADOOP_VERSION}.jar" -o "${FLINK_LIB_DIR}hadoop-hdfs-client-${HADOOP_VERSION}.jar" && \
curl -L "${MAVEN_ROOT_URL}flink/flink-sql-connector-kafka/3.3.0-${APACHE_FLINK_VERSION}/flink-sql-connector-kafka-3.3.0-${APACHE_FLINK_VERSION}.jar" -o "${FLINK_LIB_DIR}flink-sql-connector-kafka-3.3.0-${APACHE_FLINK_VERSION}.jar" && \
curl -L "${MAVEN_ROOT_URL}flink/flink-connector-kafka/3.3.0-${APACHE_FLINK_VERSION}/flink-connector-kafka-3.3.0-${APACHE_FLINK_VERSION}.jar" -o "${FLINK_LIB_DIR}flink-connector-kafka-3.3.0-${APACHE_FLINK_VERSION}.jar" && \
curl -L "${MAVEN_ROOT_URL}flink/flink-json/${APACHE_FLINK_VERSION_WITH_PATCH}/flink-json-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" -o "${FLINK_LIB_DIR}flink-json-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" && \
curl -L "${MAVEN_ROOT_URL}flink/flink-avro/${APACHE_FLINK_VERSION_WITH_PATCH}/flink-avro-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" -o "${FLINK_LIB_DIR}flink-avro-${APACHE_FLINK_VERSION_WITH_PATCH}.jar" && \
curl -L "${MAVEN_ROOT_URL}avro/avro/1.12.0/avro-1.12.0.jar" -o "${FLINK_LIB_DIR}avro-1.12.0.jar" && \
curl -L "${MAVEN_ROOT_URL}kafka/kafka-clients/3.8.1/kafka-clients-3.8.1.jar" -o "${FLINK_LIB_DIR}kafka-clients-3.8.1.jar";
# Install Python from source
RUN cd /usr/local && \
wget https://www.python.org/ftp/python/${PYTHON_VERSION_WITH_PATCH}/Python-${PYTHON_VERSION_WITH_PATCH}.tgz && \
tar -xf Python-${PYTHON_VERSION_WITH_PATCH}.tgz && \
cd Python-${PYTHON_VERSION_WITH_PATCH} && \
./configure --enable-optimizations && \
make -j$(nproc) && \
make altinstall
# Create a symbolic link to the Python binary
RUN ln -s /usr/local/Python-${PYTHON_VERSION_WITH_PATCH} /usr/bin/python
# Set the PATH environment variable with the path to the Python binary
ENV PATH="/usr/local/Python-${PYTHON_VERSION_WITH_PATCH}:${PATH}"
# Set JAVA_HOME
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64
# Upgrade pip
RUN python${PYTHON_VERSION} -m pip install --upgrade pip
# Install AWS CLI to interact with AWS services for debugging services
RUN pip --no-cache-dir install --upgrade awscli
# Set CLASSPATH environment variable
ENV CLASSPATH="${FLINK_LIB_DIR}*"
# Set JAVA_HOME
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64
# Download UV
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
# Copy Python files to the container
COPY . /opt/flink/python_apps/
# Install Python dependencies based on the `uv.lock` file
WORKDIR /opt/flink/python_apps
RUN uv sync --frozen
# Set the entrypoint to Flink's entrypoint script
CMD ["./bin/start-cluster.sh"]