diff --git a/docker/glibc-package/Dockerfile b/docker/glibc-package/Dockerfile deleted file mode 100644 index 016e5c622365f..0000000000000 --- a/docker/glibc-package/Dockerfile +++ /dev/null @@ -1,80 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - - -ARG GLIBC_VERSION=2.38 -ARG ALPINE_VERSION=3.20 - -FROM ubuntu:22.04 as build -ARG GLIBC_VERSION - -RUN apt-get -q update \ - && apt-get -qy install \ - bison \ - build-essential \ - gawk \ - gettext \ - openssl \ - python3 \ - texinfo \ - wget - -# Build GLibc -RUN wget -qO- https://ftpmirror.gnu.org/libc/glibc-${GLIBC_VERSION}.tar.gz | tar zxf - -RUN mkdir /glibc-build -WORKDIR /glibc-build -RUN /glibc-${GLIBC_VERSION}/configure \ - --prefix=/usr/glibc-compat \ - --libdir=/usr/glibc-compat/lib \ - --libexecdir=/usr/glibc-compat/lib \ - --enable-multi-arch \ - --enable-stack-protector=strong -RUN make -j$(nproc) -RUN make install -RUN tar --dereference --hard-dereference -zcf /glibc-bin.tar.gz /usr/glibc-compat - - -################################################ -## Build the APK package -FROM alpine:$ALPINE_VERSION as apk -ARG GLIBC_VERSION - -RUN apk add abuild sudo build-base - -RUN mkdir /build -WORKDIR build - -COPY --from=build /glibc-bin.tar.gz /build - -COPY ./scripts /build - -RUN echo "pkgver=\"${GLIBC_VERSION}\"" >> /build/APKBUILD -RUN echo "sha512sums=\"$(sha512sum glibc-bin.tar.gz ld.so.conf)\"" >> /build/APKBUILD - -RUN abuild-keygen -a -i -n -RUN abuild -F -c -r - -################################################ -## Last stage - Only leaves the packages -FROM busybox -ARG GLIBC_VERSION - -RUN mkdir -p /root/packages -COPY --from=apk /root/packages/*/glibc-${GLIBC_VERSION}-r0.apk /root/packages -COPY --from=apk /root/packages/*/glibc-bin-${GLIBC_VERSION}-r0.apk /root/packages diff --git a/docker/glibc-package/scripts/APKBUILD b/docker/glibc-package/scripts/APKBUILD deleted file mode 100644 index 0545508f0a7d4..0000000000000 --- a/docker/glibc-package/scripts/APKBUILD +++ /dev/null @@ -1,53 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -pkgname="glibc" -pkgrel="0" -pkgdesc="GNU C Library compatibility layer" -arch="all" -url="https:/pulsar.apache.org" -license="LGPL" -options="!check" -source="glibc-bin.tar.gz -ld.so.conf" -subpackages="${pkgname}-bin ${pkgname}-dev" -triggers="glibc-bin.trigger=/lib:/usr/lib:/usr/glibc-compat/lib" -depends="libuuid libgcc" - -package() { - mkdir -p $pkgdir/lib $pkgdir/usr/glibc-compat/lib/locale $pkgdir/usr/glibc-compat/lib64 $pkgdir/etc $pkgdir/usr/glibc-compat/etc/ - cp -a $srcdir/usr $pkgdir - cp $srcdir/ld.so.conf $pkgdir/usr/glibc-compat/etc/ld.so.conf - cd $pkgdir/usr/glibc-compat - rm -rf etc/rpc bin sbin lib/gconv lib/getconf lib/audit share var include - - FILENAME=$(ls $pkgdir/usr/glibc-compat/lib/ld-linux-*.so.*) - LIBNAME=$(basename $FILENAME) - ln -s /usr/glibc-compat/lib/$LIBNAME $pkgdir/lib/$LIBNAME - ln -s /usr/glibc-compat/lib/$LIBNAME $pkgdir/usr/glibc-compat/lib64/$LIBNAME - ln -s /usr/glibc-compat/etc/ld.so.cache $pkgdir/etc/ld.so.cache -} - -bin() { - depends="$pkgname libc6-compat" - mkdir -p $subpkgdir/usr/glibc-compat - cp -a $srcdir/usr/glibc-compat/bin $subpkgdir/usr/glibc-compat - cp -a $srcdir/usr/glibc-compat/sbin $subpkgdir/usr/glibc-compat -} - diff --git a/docker/glibc-package/scripts/glibc-bin.trigger b/docker/glibc-package/scripts/glibc-bin.trigger deleted file mode 100755 index 5bae5d7ca2bda..0000000000000 --- a/docker/glibc-package/scripts/glibc-bin.trigger +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -/usr/glibc-compat/sbin/ldconfig \ No newline at end of file diff --git a/docker/glibc-package/scripts/ld.so.conf b/docker/glibc-package/scripts/ld.so.conf deleted file mode 100644 index 6548b9300bb9c..0000000000000 --- a/docker/glibc-package/scripts/ld.so.conf +++ /dev/null @@ -1,23 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -/usr/local/lib -/usr/glibc-compat/lib -/usr/lib -/lib diff --git a/docker/kinesis-producer-alpine/Dockerfile b/docker/kinesis-producer-alpine/Dockerfile new file mode 100644 index 0000000000000..ffdf44f55d083 --- /dev/null +++ b/docker/kinesis-producer-alpine/Dockerfile @@ -0,0 +1,90 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +ARG ALPINE_VERSION=3.20 + +# Builds an Alpine image with kinesis_producer compiled for Alpine Linux / musl + +# Build stage +FROM alpine:$ALPINE_VERSION AS kinesis-producer-build +ENV KINESIS_PRODUCER_LIB_VERSION=0.15.12 + +# Install build dependencies +RUN apk update && apk add --no-cache \ + git \ + binutils \ + coreutils \ + alpine-sdk \ + util-linux \ + cmake \ + autoconf \ + automake \ + libtool \ + curl \ + bash \ + tar \ + libuuid \ + linux-headers \ + zlib \ + zlib-dev \ + perl \ + wget \ + boost-dev \ + openssl-dev \ + curl-dev \ + build-base \ + util-linux-dev \ + g++ \ + make \ + upx + +ENV LANG=C.UTF-8 + +RUN mkdir /build +COPY kinesis_producer_alpine.patch /build/ + +# Clone KPL and copy build script +RUN cd /build && \ + git clone --depth 1 --single-branch --branch v${KINESIS_PRODUCER_LIB_VERSION} https://github.com/awslabs/amazon-kinesis-producer && \ + cd amazon-kinesis-producer && \ + git apply ../kinesis_producer_alpine.patch + +# Copy and execute build script +COPY build-alpine.sh /build/ +RUN chmod +x /build/build-alpine.sh +RUN /build/build-alpine.sh + +# Final stage +FROM alpine:$ALPINE_VERSION +COPY --from=kinesis-producer-build /opt/amazon-kinesis-producer /opt/amazon-kinesis-producer +RUN apk update && apk add --no-cache \ + brotli-libs \ + c-ares \ + libcrypto3 \ + libcurl \ + libgcc \ + libidn2 \ + libpsl \ + libssl3 \ + libunistring \ + nghttp2-libs \ + zlib \ + zstd-libs \ + libuuid +WORKDIR /opt/amazon-kinesis-producer/bin diff --git a/docker/glibc-package/README.md b/docker/kinesis-producer-alpine/README.md similarity index 54% rename from docker/glibc-package/README.md rename to docker/kinesis-producer-alpine/README.md index ee1f643705ad2..4526f08c65ec1 100644 --- a/docker/glibc-package/README.md +++ b/docker/kinesis-producer-alpine/README.md @@ -19,21 +19,19 @@ --> -# GLibc compatibility package +# Alpine image with kinesis_producer compiled for Alpine Linux / musl -This directory includes the Docker scripts to build an image with GLibc compiled for Alpine Linux. +This directory includes the Docker scripts to build an image with `kinesis_producer` for Alpine Linux. +`kinesis_producer` is a native executable that is required by [Amazon Kinesis Producer library (KPL)](https://github.com/awslabs/amazon-kinesis-producer) which is used by the Pulsar IO Kinesis Sink connector. The default `kinesis_producer` binary is compiled for glibc, and it does not work on Alpine Linux which uses musl. -This is used to ensure plugins that are going to be used in the Pulsar image and that are depeding on GLibc, will -still be working correctly in the Alpine Image. (eg: Netty Tc-Native and Kinesis Producer Library). - -This image only needs to be re-created when we want to upgrade to a newer version of GLibc. +This image only needs to be re-created when we want to upgrade to a newer version of `kinesis_producer`. # Steps 1. Change the version in the Dockerfile for this directory. 2. Rebuild the image and push it to Docker Hub: ``` -docker buildx build --platform=linux/amd64,linux/arm64 -t apachepulsar/glibc-base:2.38 . --push +docker buildx build --platform=linux/amd64,linux/arm64 -t apachepulsar/pulsar-io-kinesis-sink-kinesis_producer:0.15.12 . --push ``` -The image tag is then used in `docker/pulsar/Dockerfile`. +The image tag is then used in `docker/pulsar-all/Dockerfile`. The `kinesis_producer` binary is copied from the image to the `pulsar-all` image that is used by Pulsar Functions to run the Pulsar IO Kinesis Sink connector. The environment variable `PULSAR_IO_KINESIS_KPL_PATH` is set to `/opt/amazon-kinesis-producer/bin/kinesis_producer` and this is how the Kinesis Sink connector knows where to find the `kinesis_producer` binary. \ No newline at end of file diff --git a/docker/kinesis-producer-alpine/build-alpine.sh b/docker/kinesis-producer-alpine/build-alpine.sh new file mode 100644 index 0000000000000..23718450bbc83 --- /dev/null +++ b/docker/kinesis-producer-alpine/build-alpine.sh @@ -0,0 +1,139 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -e +set -x + +INSTALL_DIR=/build/third_party +AWS_SDK_CPP_VERSION="1.11.420" +PROTOBUF_VERSION="3.11.4" +BOOST_VERSION="1.76.0" +BOOST_VERSION_UNDERSCORED="${BOOST_VERSION//\./_}" + +# Create install directory +mkdir -p $INSTALL_DIR + +# Setup environment variables +export CC="gcc" +export CXX="g++" +export CXXFLAGS="-I$INSTALL_DIR/include -O3 -Wno-implicit-fallthrough -Wno-int-in-bool-context" +export LDFLAGS="-L$INSTALL_DIR/lib" +export LD_LIBRARY_PATH="$INSTALL_DIR/lib:$LD_LIBRARY_PATH" + +cd $INSTALL_DIR + +# Build protobuf +if [ ! -d "protobuf-${PROTOBUF_VERSION}" ]; then + curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-all-${PROTOBUF_VERSION}.tar.gz + tar xf protobuf-all-${PROTOBUF_VERSION}.tar.gz + rm protobuf-all-${PROTOBUF_VERSION}.tar.gz + + cd protobuf-${PROTOBUF_VERSION} + ./configure --prefix=${INSTALL_DIR} \ + --disable-shared \ + CFLAGS="-fPIC" \ + CXXFLAGS="-fPIC ${CXXFLAGS}" \ + --with-pic + make -j4 + make install + cd .. +fi + +# Build Boost +if [ ! -d "boost_${BOOST_VERSION_UNDERSCORED}" ]; then + curl -LO https://boostorg.jfrog.io/artifactory/main/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION_UNDERSCORED}.tar.gz + tar xf boost_${BOOST_VERSION_UNDERSCORED}.tar.gz + rm boost_${BOOST_VERSION_UNDERSCORED}.tar.gz + + cd boost_${BOOST_VERSION_UNDERSCORED} + + BOOST_LIBS="regex,thread,log,system,random,filesystem,chrono,atomic,date_time,program_options,test" + + ./bootstrap.sh --with-libraries=$BOOST_LIBS --with-toolset=gcc + + ./b2 \ + -j4 \ + variant=release \ + link=static \ + threading=multi \ + runtime-link=static \ + --prefix=${INSTALL_DIR} \ + cxxflags="-fPIC ${CXXFLAGS}" \ + install + + cd .. +fi + +# Download and build AWS SDK +if [ ! -d "aws-sdk-cpp" ]; then + git clone --depth 1 --branch ${AWS_SDK_CPP_VERSION} https://github.com/awslabs/aws-sdk-cpp.git aws-sdk-cpp + pushd aws-sdk-cpp + git config submodule.fetchJobs 8 + git submodule update --init --depth 1 --recursive + popd + + rm -rf aws-sdk-cpp-build + mkdir aws-sdk-cpp-build + cd aws-sdk-cpp-build + + cmake \ + -DBUILD_ONLY="kinesis;monitoring;sts" \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DSTATIC_LINKING=1 \ + -DCMAKE_PREFIX_PATH="$INSTALL_DIR" \ + -DCMAKE_C_COMPILER="$CC" \ + -DCMAKE_CXX_COMPILER="$CXX" \ + -DCMAKE_CXX_FLAGS="$CXXFLAGS" \ + -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ + -DCMAKE_FIND_FRAMEWORK=LAST \ + -DENABLE_TESTING="OFF" \ + ../aws-sdk-cpp + make -j4 + make install + cd .. +fi + +# Build the native kinesis producer +cd /build/amazon-kinesis-producer +ln -fs ../third_party +cmake -DCMAKE_PREFIX_PATH="$INSTALL_DIR" -DCMAKE_BUILD_TYPE=RelWithDebInfo . +make -j4 + +FINAL_DIR=/opt/amazon-kinesis-producer +# copy the binary +mkdir -p $FINAL_DIR/bin +cp kinesis_producer $FINAL_DIR/bin/kinesis_producer.original + +# capture version information +git describe --long --tags > $FINAL_DIR/bin/.version +git rev-parse HEAD > $FINAL_DIR/bin/.revision +uname -a > $FINAL_DIR/bin/.system_info +cat /etc/os-release > $FINAL_DIR/bin/.os_info +date > $FINAL_DIR/bin/.build_time + +# copy tests +mkdir -p $FINAL_DIR/tests +cp tests $FINAL_DIR/tests/ +cp test_driver $FINAL_DIR/tests/ + +# Strip and compress the binary +cd $FINAL_DIR/bin +strip -o kinesis_producer.stripped kinesis_producer.original +upx --best -o kinesis_producer kinesis_producer.stripped \ No newline at end of file diff --git a/docker/kinesis-producer-alpine/kinesis_producer_alpine.patch b/docker/kinesis-producer-alpine/kinesis_producer_alpine.patch new file mode 100644 index 0000000000000..d1ddd6a85501c --- /dev/null +++ b/docker/kinesis-producer-alpine/kinesis_producer_alpine.patch @@ -0,0 +1,127 @@ +From 96ba2eb7145363586529e6c770dcc0920bf04ac2 Mon Sep 17 00:00:00 2001 +From: Lari Hotari +Date: Wed, 18 Dec 2024 17:16:02 +0200 +Subject: [PATCH] Adapt build for Alpine, fix issue with NULL_BACKTRACE support + +- also use dynamic linking to some libraries (zlib, openssl, libz, libcurl, libcrypto) + to reduce binary size +--- + CMakeLists.txt | 20 +++++++++++--------- + aws/utils/backtrace/bsd_backtrace.cc | 2 +- + aws/utils/backtrace/gcc_backtrace.cc | 3 +-- + aws/utils/backtrace/null_backtrace.cc | 5 ++--- + aws/utils/signal_handler.cc | 1 - + 5 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 2dd7084..2ba47e6 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -213,22 +213,24 @@ set(STATIC_LIBS + boost_chrono) + + find_package(Threads) +-find_package(ZLIB) ++find_package(ZLIB REQUIRED) + find_package(AWSSDK REQUIRED COMPONENTS kinesis monitoring sts) ++find_package(OpenSSL REQUIRED) ++find_package(CURL REQUIRED) + +-add_library(LibCrypto STATIC IMPORTED) +-set_property(TARGET LibCrypto PROPERTY IMPORTED_LOCATION ${THIRD_PARTY_LIB_DIR}/libcrypto.a) ++add_library(LibCrypto SHARED IMPORTED) ++set_property(TARGET LibCrypto PROPERTY IMPORTED_LOCATION ${OPENSSL_CRYPTO_LIBRARY}) + set_property(TARGET LibCrypto PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES ${LIBDL_LIBRARIES}) + +-add_library(LibSsl STATIC IMPORTED) +-set_property(TARGET LibSsl PROPERTY IMPORTED_LOCATION ${THIRD_PARTY_LIB_DIR}/libssl.a) ++add_library(LibSsl SHARED IMPORTED) ++set_property(TARGET LibSsl PROPERTY IMPORTED_LOCATION ${OPENSSL_SSL_LIBRARY}) + set_property(TARGET LibSsl PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES LibCrypto) + +-add_library(LibZ STATIC IMPORTED) +-set_property(TARGET LibZ PROPERTY IMPORTED_LOCATION ${THIRD_PARTY_LIB_DIR}/libz.a) ++add_library(LibZ SHARED IMPORTED) ++set_property(TARGET LibZ PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) + +-add_library(LibCurl STATIC IMPORTED) +-set_property(TARGET LibCurl PROPERTY IMPORTED_LOCATION ${THIRD_PARTY_LIB_DIR}/libcurl.a) ++add_library(LibCurl SHARED IMPORTED) ++set_property(TARGET LibCurl PROPERTY IMPORTED_LOCATION ${CURL_LIBRARIES}) + set_property(TARGET LibCurl PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT} ${LIBRT_LIBRARIES} ${LIBDL_LIBRARIES} LibSsl LibZ) + + add_library(LibProto STATIC IMPORTED) +diff --git a/aws/utils/backtrace/bsd_backtrace.cc b/aws/utils/backtrace/bsd_backtrace.cc +index fb5dbe3..cd6c5fe 100644 +--- a/aws/utils/backtrace/bsd_backtrace.cc ++++ b/aws/utils/backtrace/bsd_backtrace.cc +@@ -15,10 +15,10 @@ + + #include "backtrace.h" + #include +-#include + #include + + #ifdef BSD_BACKTRACE ++#include + namespace aws { + namespace utils { + namespace backtrace { +diff --git a/aws/utils/backtrace/gcc_backtrace.cc b/aws/utils/backtrace/gcc_backtrace.cc +index 446ede9..32a866d 100644 +--- a/aws/utils/backtrace/gcc_backtrace.cc ++++ b/aws/utils/backtrace/gcc_backtrace.cc +@@ -15,7 +15,6 @@ + + #include "backtrace.h" + #include +-#include + #include + #include + #include +@@ -23,7 +22,7 @@ + #include + + #ifdef LIB_BACKTRACE +- ++#include + #include + + namespace { +diff --git a/aws/utils/backtrace/null_backtrace.cc b/aws/utils/backtrace/null_backtrace.cc +index 69d57f9..d443eae 100644 +--- a/aws/utils/backtrace/null_backtrace.cc ++++ b/aws/utils/backtrace/null_backtrace.cc +@@ -15,10 +15,9 @@ + + #include "backtrace.h" + #include +-#include + #include + +-#ifdef NULL_STACKTRACE ++#ifdef NULL_BACKTRACE + + namespace aws { + namespace utils { +@@ -36,4 +35,4 @@ void stack_trace_for_signal(int skip, bool /*signaled*/) { + } + } + +-#endif // NULL_STACKTRACE ++#endif // NULL_BACKTRACE +diff --git a/aws/utils/signal_handler.cc b/aws/utils/signal_handler.cc +index b58ab0e..f483c77 100644 +--- a/aws/utils/signal_handler.cc ++++ b/aws/utils/signal_handler.cc +@@ -19,7 +19,6 @@ + #include "backtrace/backtrace.h" + + #include +-#include + #include + #include + #include +-- +2.47.1 + diff --git a/docker/pulsar-all/Dockerfile b/docker/pulsar-all/Dockerfile index 81ad74b65000f..59ab86e45569e 100644 --- a/docker/pulsar-all/Dockerfile +++ b/docker/pulsar-all/Dockerfile @@ -17,16 +17,42 @@ # under the License. # +# global arguments (only to be used in FROM clauses) ARG PULSAR_IMAGE -FROM busybox as pulsar-all +ARG PULSAR_IO_KINESIS_KPL_IMAGE -ARG PULSAR_IO_DIR -ARG PULSAR_OFFLOADER_TARBALL +FROM busybox AS pulsar-extensions +ARG PULSAR_IO_DIR ADD ${PULSAR_IO_DIR} /connectors +ARG PULSAR_OFFLOADER_TARBALL ADD ${PULSAR_OFFLOADER_TARBALL} / RUN mv /apache-pulsar-offloaders-*/offloaders /offloaders -FROM $PULSAR_IMAGE -COPY --from=pulsar-all /connectors /pulsar/connectors -COPY --from=pulsar-all /offloaders /pulsar/offloaders +FROM ${PULSAR_IO_KINESIS_KPL_IMAGE} AS pulsar-io-kinesis-sink-kinesis_producer + +FROM ${PULSAR_IMAGE} +COPY --from=pulsar-extensions /connectors /pulsar/connectors +COPY --from=pulsar-extensions /offloaders /pulsar/offloaders +# Copy the kinesis_producer native executable compiled for Alpine musl to the pulsar-all image +# This is required to support the Pulsar IO Kinesis sink connector +COPY --from=pulsar-io-kinesis-sink-kinesis_producer /opt/amazon-kinesis-producer/bin/kinesis_producer /opt/amazon-kinesis-producer/bin/.os_info /opt/amazon-kinesis-producer/bin/.build_time /opt/amazon-kinesis-producer/bin/.revision /opt/amazon-kinesis-producer/bin/.system_info /opt/amazon-kinesis-producer/bin/.version /opt/amazon-kinesis-producer/bin/ +# Set the environment variable to point to the kinesis_producer native executable +ENV PULSAR_IO_KINESIS_KPL_PATH=/opt/amazon-kinesis-producer/bin/kinesis_producer +# Install the required dependencies for the kinesis_producer native executable +USER 0 +RUN apk update && apk add --no-cache \ + brotli-libs \ + c-ares \ + libcrypto3 \ + libcurl \ + libgcc \ + libidn2 \ + libpsl \ + libssl3 \ + libunistring \ + nghttp2-libs \ + zlib \ + zstd-libs \ + libuuid +USER 10000 \ No newline at end of file diff --git a/docker/pulsar-all/pom.xml b/docker/pulsar-all/pom.xml index 1f365109c7983..5281e2ab38db2 100644 --- a/docker/pulsar-all/pom.xml +++ b/docker/pulsar-all/pom.xml @@ -80,6 +80,10 @@ docker + + + apachepulsar/pulsar-io-kinesis-sink-kinesis_producer:0.15.12 + ${project.groupId} @@ -161,6 +165,7 @@ target/apache-pulsar-io-connectors-${project.version}-bin target/pulsar-offloader-distribution-${project.version}-bin.tar.gz ${docker.organization}/${docker.image}:${project.version}-${git.commit.id.abbrev} + ${PULSAR_IO_KINESIS_KPL_IMAGE} diff --git a/docker/pulsar/Dockerfile b/docker/pulsar/Dockerfile index c8fc3a4d13c8f..6f1f41755c919 100644 --- a/docker/pulsar/Dockerfile +++ b/docker/pulsar/Dockerfile @@ -75,7 +75,7 @@ ARG SNAPPY_VERSION RUN apk add git alpine-sdk util-linux cmake autoconf automake libtool openjdk17 maven curl bash tar ENV JAVA_HOME=/usr RUN curl -Ls https://github.com/xerial/snappy-java/archive/refs/tags/v$SNAPPY_VERSION.tar.gz | tar zxf - && cd snappy-java-$SNAPPY_VERSION && make clean-native native -FROM apachepulsar/glibc-base:2.38 as glibc + ## Create final stage from Alpine image ## and add OpenJDK and Python dependencies (for Pulsar functions) @@ -90,6 +90,9 @@ RUN apk add --no-cache \ py3-grpcio \ py3-yaml \ gcompat \ + libgcc \ + libstdc++ \ + libuuid \ ca-certificates \ procps \ curl \ @@ -124,10 +127,6 @@ fastavro>=1.9.2\n\ RUN pip3 install --break-system-packages --no-cache-dir --only-binary grpcio -r /requirements.txt RUN rm /requirements.txt -# Install GLibc compatibility library -COPY --from=glibc /root/packages /root/packages -RUN apk add --allow-untrusted --force-overwrite /root/packages/glibc-*.apk - COPY --from=jvm /opt/jvm /opt/jvm ENV JAVA_HOME=/opt/jvm @@ -144,6 +143,8 @@ WORKDIR /pulsar ENV PATH=$PATH:$JAVA_HOME/bin:/pulsar/bin # Use musl libc library for RocksDB ENV ROCKSDB_MUSL_LIBC=true +# Preload gcompat library for glibc compatibility with Netty native libraries +ENV LD_PRELOAD=/lib/libgcompat.so.0 # The UID must be non-zero. Otherwise, it is arbitrary. No logic should rely on its specific value. ARG DEFAULT_USERNAME=pulsar diff --git a/pom.xml b/pom.xml index bd7320ef1c32d..442a67969fa2a 100644 --- a/pom.xml +++ b/pom.xml @@ -166,7 +166,7 @@ flexible messaging model and an intuitive client API. 0.10.2 1.6.2 10.14.2 - 0.45.0 + 0.45.1 true 0.5.0 1.14.12 @@ -2098,6 +2098,7 @@ flexible messaging model and an intuitive client API. **/*.so **/*.so.* **/*.dylib + **/*.patch src/test/resources/*.txt diff --git a/pulsar-io/kinesis/pom.xml b/pulsar-io/kinesis/pom.xml index b472289a472d3..64edc9670a774 100644 --- a/pulsar-io/kinesis/pom.xml +++ b/pulsar-io/kinesis/pom.xml @@ -32,7 +32,7 @@ 2.2.8 - 0.14.13 + 0.15.12 0.13.0 1.9.0 2.3.0 diff --git a/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSink.java b/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSink.java index d8e4e4bab85e5..1db63c90b2776 100644 --- a/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSink.java +++ b/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSink.java @@ -88,7 +88,6 @@ configClass = KinesisSinkConfig.class ) public class KinesisSink extends AbstractAwsConnector implements Sink { - private static final Logger LOG = LoggerFactory.getLogger(KinesisSink.class); private KinesisProducer kinesisProducer; @@ -166,6 +165,12 @@ public void open(Map config, SinkContext sinkContext) { if (kinesisSinkConfig.getAwsEndpointPort() != null) { kinesisConfig.setKinesisPort(kinesisSinkConfig.getAwsEndpointPort()); } + if (kinesisSinkConfig.getAwsStsEndpoint() != null) { + kinesisConfig.setStsEndpoint(kinesisSinkConfig.getAwsStsEndpoint()); + } + if (kinesisSinkConfig.getAwsStsPort() != null) { + kinesisConfig.setStsPort(kinesisSinkConfig.getAwsStsPort()); + } kinesisConfig.setRegion(kinesisSinkConfig.getAwsRegion()); kinesisConfig.setThreadingModel(ThreadingModel.POOLED); kinesisConfig.setThreadPoolSize(4); @@ -179,6 +184,7 @@ public void open(Map config, SinkContext sinkContext) { kinesisSinkConfig.getAwsCredentialPluginParam()) .getCredentialProvider(); kinesisConfig.setCredentialsProvider(credentialsProvider); + kinesisConfig.setNativeExecutable(StringUtils.trimToEmpty(kinesisSinkConfig.getNativeExecutable())); this.streamName = kinesisSinkConfig.getAwsKinesisStreamName(); this.kinesisProducer = new KinesisProducer(kinesisConfig); diff --git a/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSinkConfig.java b/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSinkConfig.java index f81fd32134be2..a3b87d4886a6a 100644 --- a/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSinkConfig.java +++ b/pulsar-io/kinesis/src/main/java/org/apache/pulsar/io/kinesis/KinesisSinkConfig.java @@ -104,6 +104,16 @@ public class KinesisSinkConfig extends BaseKinesisConfig implements Serializable help = "The maximum delay(in milliseconds) between retries.") private long retryMaxDelayInMillis = 60000; + @FieldDoc( + required = false, + defaultValue = "", + help = "Path to the native Amazon Kinesis Producer Library (KPL) binary.\n" + + "Only use this setting if you want to use a custom build of the native code.\n" + + "This setting can also be set with the environment variable `PULSAR_IO_KINESIS_KPL_PATH`.\n" + + "If not set, the Kinesis sink will use the built-in native executable." + ) + private String nativeExecutable = System.getenv("PULSAR_IO_KINESIS_KPL_PATH"); + public static KinesisSinkConfig load(Map config, SinkContext sinkContext) { KinesisSinkConfig kinesisSinkConfig = IOConfigUtils.loadWithSecrets(config, KinesisSinkConfig.class, sinkContext); checkArgument(isNotBlank(kinesisSinkConfig.getAwsRegion()) @@ -149,4 +159,17 @@ public enum MessageFormat { FULL_MESSAGE_IN_JSON_EXPAND_VALUE } + @FieldDoc( + required = false, + defaultValue = "", + help = "Custom AWS STS endpoint" + ) + private String awsStsEndpoint = ""; + + @FieldDoc( + required = false, + defaultValue = "", + help = "Custom AWS STS port to connect to" + ) + private Integer awsStsPort; } diff --git a/pulsar-io/kinesis/src/test/java/org/apache/pulsar/io/kinesis/KinesisSinkTest.java b/pulsar-io/kinesis/src/test/java/org/apache/pulsar/io/kinesis/KinesisSinkTest.java index 8a5d5be0ca40c..c682776a14d2b 100644 --- a/pulsar-io/kinesis/src/test/java/org/apache/pulsar/io/kinesis/KinesisSinkTest.java +++ b/pulsar-io/kinesis/src/test/java/org/apache/pulsar/io/kinesis/KinesisSinkTest.java @@ -18,6 +18,16 @@ */ package org.apache.pulsar.io.kinesis; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import lombok.SneakyThrows; import org.apache.pulsar.client.api.Message; @@ -42,24 +52,14 @@ import software.amazon.awssdk.services.kinesis.model.ListShardsRequest; import software.amazon.awssdk.services.kinesis.model.ShardIteratorType; -import java.net.URI; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.atomic.AtomicBoolean; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - public class KinesisSinkTest { public static final String STREAM_NAME = "my-stream-1"; - public static LocalStackContainer LOCALSTACK_CONTAINER = new LocalStackContainer(DockerImageName.parse("localstack/localstack:1.0.4")) - .withServices(LocalStackContainer.Service.KINESIS); + public static LocalStackContainer LOCALSTACK_CONTAINER = + new LocalStackContainer(DockerImageName.parse("localstack/localstack:4.0.3")) + .withServices(LocalStackContainer.Service.KINESIS, LocalStackContainer.Service.STS) + .withEnv("KINESIS_PROVIDER", "kinesalite"); @BeforeClass(alwaysRun = true) public void beforeClass() throws Exception { @@ -123,10 +123,13 @@ public Optional> getMessage() { } private Map createConfig() { - final URI endpointOverride = LOCALSTACK_CONTAINER.getEndpointOverride(LocalStackContainer.Service.KINESIS); + final URI kinesisEndpointOverride = LOCALSTACK_CONTAINER.getEndpointOverride(LocalStackContainer.Service.KINESIS); Map map = new HashMap<>(); - map.put("awsEndpoint", endpointOverride.getHost()); - map.put("awsEndpointPort", endpointOverride.getPort()); + map.put("awsEndpoint", kinesisEndpointOverride.getHost()); + map.put("awsEndpointPort", kinesisEndpointOverride.getPort()); + final URI stsEndpointOverride = LOCALSTACK_CONTAINER.getEndpointOverride(LocalStackContainer.Service.STS); + map.put("awsStsEndpoint", stsEndpointOverride.getHost()); + map.put("awsStsPort", stsEndpointOverride.getPort()); map.put("skipCertificateValidation", true); map.put("awsKinesisStreamName", STREAM_NAME); map.put("awsRegion", "us-east-1"); diff --git a/tests/docker-images/latest-version-image/Dockerfile b/tests/docker-images/latest-version-image/Dockerfile index 0645dd2e78aab..3efc481721464 100644 --- a/tests/docker-images/latest-version-image/Dockerfile +++ b/tests/docker-images/latest-version-image/Dockerfile @@ -17,10 +17,9 @@ # under the License. # -# build go lang examples first in a separate layer ARG PULSAR_ALL_IMAGE -ARG PULSAR_IMAGE +# build go lang examples first in a separate layer FROM golang:1.21-alpine as pulsar-function-go COPY target/pulsar-function-go/ /go/src/github.com/apache/pulsar/pulsar-function-go @@ -28,13 +27,10 @@ RUN cd /go/src/github.com/apache/pulsar/pulsar-function-go && go install ./... RUN cd /go/src/github.com/apache/pulsar/pulsar-function-go/pf && go install RUN cd /go/src/github.com/apache/pulsar/pulsar-function-go/examples && go install ./... -# Reference pulsar-all to copy connectors from there -FROM $PULSAR_ALL_IMAGE as pulsar-all - ######################################## ###### Main image build ######################################## -FROM $PULSAR_IMAGE +FROM $PULSAR_ALL_IMAGE # Switch to run as the root user to simplify building container and then running # supervisord. Each of the pulsar components are spawned by supervisord and their @@ -78,20 +74,6 @@ COPY target/certificate-authority /pulsar/certificate-authority/ # copy broker plugins COPY target/plugins/ /pulsar/examples/ -# Include all offloaders -COPY --from=pulsar-all /pulsar/offloaders /pulsar/offloaders - -# Include only the connectors needed by integration tests -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-cassandra-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-debezium-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-elastic-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-hdfs*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-jdbc-postgres-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-kafka-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-rabbitmq-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-batch-data-generator-*.nar /pulsar/connectors/ -COPY --from=pulsar-all /pulsar/connectors/pulsar-io-kinesis-*.nar /pulsar/connectors/ - # download Oracle JDBC driver for Oracle Debezium Connector tests RUN mkdir -p META-INF/bundled-dependencies RUN cd META-INF/bundled-dependencies && curl -sSL https://search.maven.org/remotecontent?filepath=com/oracle/ojdbc/ojdbc8/19.3.0.0/ojdbc8-19.3.0.0.jar -o ojdbc8-19.3.0.0.jar @@ -110,4 +92,4 @@ RUN jar uf connectors/pulsar-io-debezium-oracle-*.nar META-INF/bundled-dependenc RUN mkdir -p pulsar RUN chmod g+rwx pulsar -CMD bash +CMD bash \ No newline at end of file diff --git a/tests/docker-images/latest-version-image/pom.xml b/tests/docker-images/latest-version-image/pom.xml index 14984d809ac8d..e767d2a2f37e9 100644 --- a/tests/docker-images/latest-version-image/pom.xml +++ b/tests/docker-images/latest-version-image/pom.xml @@ -156,7 +156,6 @@ ${project.basedir} - ${docker.organization}/${docker.image}:${project.version}-${git.commit.id.abbrev} ${docker.organization}/${docker.image}-all:${project.version}-${git.commit.id.abbrev} diff --git a/tests/integration/src/test/java/org/apache/pulsar/tests/integration/io/sinks/KinesisSinkTester.java b/tests/integration/src/test/java/org/apache/pulsar/tests/integration/io/sinks/KinesisSinkTester.java index 6ca9c7b108373..83cb0088cd76b 100644 --- a/tests/integration/src/test/java/org/apache/pulsar/tests/integration/io/sinks/KinesisSinkTester.java +++ b/tests/integration/src/test/java/org/apache/pulsar/tests/integration/io/sinks/KinesisSinkTester.java @@ -79,6 +79,11 @@ public KinesisSinkTester(boolean withSchema) { sinkConfig.put("awsKinesisStreamName", STREAM_NAME); sinkConfig.put("awsRegion", "us-east-1"); sinkConfig.put("awsCredentialPluginParam", "{\"accessKey\":\"access\",\"secretKey\":\"secret\"}"); + sinkConfig.put("awsEndpoint", NAME); + sinkConfig.put("awsEndpointPort", LOCALSTACK_SERVICE_PORT); + sinkConfig.put("awsStsEndpoint", NAME); + sinkConfig.put("awsStsPort", LOCALSTACK_SERVICE_PORT); + sinkConfig.put("skipCertificateValidation", true); if (withSchema) { sinkConfig.put("messageFormat", "FULL_MESSAGE_IN_JSON_EXPAND_VALUE"); } @@ -100,9 +105,6 @@ public Schema getInputTopicSchema() { public void prepareSink() throws Exception { final LocalStackContainer localStackContainer = getServiceContainer(); final URI endpointOverride = localStackContainer.getEndpointOverride(LocalStackContainer.Service.KINESIS); - sinkConfig.put("awsEndpoint", NAME); - sinkConfig.put("awsEndpointPort", LOCALSTACK_SERVICE_PORT); - sinkConfig.put("skipCertificateValidation", true); client = KinesisAsyncClient.builder().credentialsProvider(() -> AwsBasicCredentials.create( "access", "secret")) @@ -128,8 +130,9 @@ public void stopServiceContainer() { @Override protected LocalStackContainer createSinkService(PulsarCluster cluster) { - return new LocalStackContainer(DockerImageName.parse("localstack/localstack:1.0.4")) - .withServices(LocalStackContainer.Service.KINESIS); + return new LocalStackContainer(DockerImageName.parse("localstack/localstack:4.0.3")) + .withServices(LocalStackContainer.Service.KINESIS, LocalStackContainer.Service.STS) + .withEnv("KINESIS_PROVIDER", "kinesalite"); } @Override