Skip to content

Commit

Permalink
feat(ci): add package generation
Browse files Browse the repository at this point in the history
stack-info: PR: #592, branch: aws-nslick/stack/33
  • Loading branch information
aws-nslick committed Sep 13, 2024
1 parent 3413c46 commit 83b3b05
Show file tree
Hide file tree
Showing 14 changed files with 612 additions and 1 deletion.
44 changes: 44 additions & 0 deletions .docker/Dockerfile.dnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

ARG FAMILY=fedora
ARG VERSION=rawhide
ARG VARIANT=cuda
ARG CUDA_DISTRO
ARG AWS_BUILD
ARG ENABLE_POWERTOOLS

# Install EFA-installer deps.
FROM ${FAMILY}:${VERSION} AS builder
ARG CUDA_DISTRO
ARG ENABLE_POWERTOOLS
ENV CUDA_DISTRO=${CUDA_DISTRO}
ENV ENABLE_POWERTOOLS=${ENABLE_POWERTOOLS}
# Add NVIDIA repo for CUDA builds.
COPY --from=efainstaller / /
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
bash -c "cd /aws-efa-installer && dnf install -y gcc rpmdevtools rpmlint dnf-plugins-core util-linux && ./efa_installer.sh -n -l -k -d -y && rm -rf /aws-efa-installer" && \
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-${CUDA_DISTRO}.repo && \
( test "${ENABLE_POWERTOOLS}" = "1" && sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/Rocky-PowerTools.repo || /bin/true ) && \
dnf -y update && dnf -y upgrade
RUN rpmdev-setuptree

FROM builder AS environment
ARG VARIANT
ARG AWS_BUILD
ENV VARIANT=${VARIANT}
ENV AWS_BUILD=${AWS_BUILD}
COPY --from=srpm . .
RUN yum search hwloc
RUN echo "%with_${VARIANT} 1" >> ~/.rpmmacros
RUN echo "%with_platform_aws ${AWS_BUILD}" >> ~/.rpmmacros
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
dnf -y install cuda-cudart-devel-12-6 && dnf -y builddep *.src.rpm && rpmbuild --rebuild *.src.rpm

FROM scratch
COPY --from=environment /root/rpmbuild/RPMS/**/* /
65 changes: 65 additions & 0 deletions .docker/Dockerfile.dpkg
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

ARG FAMILY=ubuntu
ARG VERSION=latest
ARG CUDA_DISTRO
ARG DEBIAN_FRONTEND=noninteractive
ARG AWS_BUILD

FROM ${FAMILY}:${VERSION} AS build
ARG CUDA_DISTRO
ENV CUDA_DISTRO=${CUDA_DISTRO}
ARG AWS_BUILD=0
ENV AWS_BUILD=${AWS_BUILD}

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update -y && apt-get install wget -y

RUN wget https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-keyring_1.1-1_all.deb

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
dpkg -i cuda-keyring_1.1-1_all.deb

COPY --from=efainstaller / .
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
bash -c "apt-get update -y && cd /aws-efa-installer && ./efa_installer.sh /efa_installer.sh -n -l -k -d -y && apt-get install -y autoconf automake libtool gcc g++ git libhwloc-dev make && rm -rf /aws-efa-installer"

COPY --from=makedist / .
RUN tar xvf ./aws-ofi-nccl*.tar.gz -C .
RUN cd aws-ofi-nccl* && \
./configure --$(test "$ACCELERATOR" = "cuda" && echo "with-cuda=/usr/local/cuda" || echo "enable-neuron=yes") \
--prefix=/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws") \
--with-libfabric=/opt/amazon/efa \
--disable-tests \
--$(test "$AWS_BUILD" -eq 0 && echo -n "disable" || echo -n "enable")-platform-aws \
--with-mpi=no && make -j && make install

FROM ubuntu:latest AS packager
ARG FAMILY
ARG VERSION
ARG AWS_BUILD=0
ENV AWS_BUILD=${AWS_BUILD}
ENV FAMILY=${FAMILY}
ENV VERSION=${VERSION}
COPY --from=build /opt/amazon/ /opt/amazon/
RUN find /opt/amazon/ | grep -E \.la$ | xargs rm
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update -y && apt-get install -y ruby tar squashfs-tools binutils && gem install fpm
RUN fpm \
-s dir -t deb \
--license Apache2.0 \
-p /libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")-${FAMILY}-${VERSION}.deb \
--name nccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws") \
/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")/=/opt/amazon/libnccl-net-ofi$(test "$AWS_BUILD" -eq 0 || echo -n "-aws")

FROM scratch
COPY --from=packager /libnccl-net-ofi* /

15 changes: 15 additions & 0 deletions .docker/Dockerfile.efa
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

FROM alpine:latest AS efa_installer_extracted
ARG EFA_INSTALLER_VERSION=latest
ENV EFA_INSTALLER_VERSION=${EFA_INSTALLER_VERSION}
RUN apk add tar curl
RUN mkdir /libfabric
RUN curl -s -L https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz | tar -xvzf - -C /

FROM scratch
COPY --from=efa_installer_extracted /aws-efa-installer /aws-efa-installer
26 changes: 26 additions & 0 deletions .docker/Dockerfile.makedist
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

ARG ACCELERATOR
ARG BASE_IMAGE=ubuntu:22.04
FROM ${BASE_IMAGE} AS distbuilder
ARG ACCELERATOR
ENV ACCELERATOR=${ACCELERATOR}
RUN mkdir /aws-efa-installer
COPY --from=efainstaller /aws-efa-installer /aws-efa-installer
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
bash -c "apt-get update -y && cd /aws-efa-installer && ./efa_installer.sh /efa_installer.sh -n -l -k -d -y && apt-get install -y autoconf automake libtool gcc git libhwloc-dev make && rm -rf /aws-efa-installer"
COPY ../ /proj
WORKDIR /proj
RUN autoreconf -ivf
RUN ./configure --with-libfabric=/opt/amazon/efa --$(test "$ACCELERATOR" = "cuda" && echo "with-cuda=/usr/local/cuda" || echo "enable-neuron=yes") --with-libfabric=/opt/amazon/efa
RUN make dist
RUN ls -lart
RUN pwd

FROM scratch
COPY --from=distbuilder /proj/aws-ofi-nccl*.tar.gz /
18 changes: 18 additions & 0 deletions .docker/Dockerfile.srpm
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

FROM fedora:rawhide AS packitimg
RUN dnf install -y packit mock

FROM packitimg AS srpm
RUN mkdir /proj
WORKDIR /proj
COPY --from=src . .
COPY --from=makedist . .
RUN packit srpm

FROM scratch
COPY --from=srpm /proj/*.src.rpm /
42 changes: 42 additions & 0 deletions .docker/Dockerfile.yum
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#
# Copyright (c) 2024, Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

ARG FAMILY=amazonlinux
ARG VERSION=2
ARG VARIANT=cuda
ARG CUDA_DISTRO
ARG AWS_BUILD

# Install EFA-installer deps.
FROM ${FAMILY}:${VERSION} AS builder
ARG CUDA_DISTRO
ENV CUDA_DISTRO=${CUDA_DISTRO}
# Add NVIDIA repo for CUDA builds.
COPY --from=efainstaller / /
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
bash -c "cd /aws-efa-installer && yum install -y gcc rpmdevtools rpmlint yum-utils util-linux && ./efa_installer.sh -n -l -k -d -y && rm -rf /aws-efa-installer" && \
yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/${CUDA_DISTRO}/$(uname -m)/cuda-${CUDA_DISTRO}.repo && \
yum update -y
RUN rpmdev-setuptree

FROM builder AS environment
ARG VARIANT
ARG AWS_BUILD
ARG TOOLKIT_VERSION=12-6
ENV VARIANT=${VARIANT}
ENV AWS_BUILD=${AWS_BUILD}
ENV TOOLKIT_VERSION=${TOOLKIT_VERSION}
COPY --from=srpm . .
RUN echo "%with_${VARIANT} 1" >> ~/.rpmmacros
RUN echo "%with_platform_aws ${AWS_BUILD}" >> ~/.rpmmacros
RUN echo "%_cuda_toolkit_version ${TOOLKIT_VERSION}" >> ~/.rpmmacros
RUN --mount=type=cache,target=/var/cache/yum,sharing=locked \
--mount=type=cache,target=/var/cache/dnf,sharing=locked \
yum install -y cuda-cudart-devel-${TOOLKIT_VERSION} && yum-builddep -y *.src.rpm && rpmbuild --rebuild *.src.rpm

FROM scratch
COPY --from=environment /root/rpmbuild/RPMS/**/* /
93 changes: 93 additions & 0 deletions .github/workflows/packages.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: Package Generation
on:
push:
branches:
- master
- main
- v*
pull_request:

jobs:
dist:
name: Call make dist
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
platforms: linux/amd64,linux/arm64
- name: docker buildx bake makedist
uses: docker/bake-action@v5
with:
set: |
*.cache-from=type=gha
*.cache-to=type=gha,mode=max
push: true
targets: makedist
srpm:
name: Generate a universal SRPM
needs: [ dist ]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
platforms: linux/amd64,linux/arm64
- name: docker buildx bake srpm
uses: docker/bake-action@v5
with:
set: |
*.cache-from=type=gha
*.cache-to=type=gha,mode=max
push: true
targets: srpm
debs:
name: Generate Debian-like Packages
needs: [ dist ]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
platforms: linux/amd64,linux/arm64
- name: docker buildx bake debs
uses: docker/bake-action@v5
with:
set: |
*.cache-from=type=gha
*.cache-to=type=gha,mode=max
push: ${{ github.event_name != 'pull_request' }}
targets: debs
rpms:
name: Generate RPM-like Packages
needs: [ srpm ]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
platforms: linux/amd64,linux/arm64
- name: docker buildx bake rpms
uses: docker/bake-action@v5
with:
set: |
*.cache-from=type=gha
*.cache-to=type=gha,mode=max
push: ${{ github.event_name != 'pull_request' }}
targets: rpms
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,5 @@ m4/lt~obsolete.m4
.idea/
.devenv/
.direnv
*.src.rpm
dockerbld
20 changes: 20 additions & 0 deletions .packit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
# vi:ts=2 sw=2 et:
#
# Docs: https://packit.dev/docs/


srpm_build_deps:
- git

actions:
get-current-version:
- bash -c "cat .version"
create-archive:
- bash -c "echo ./aws-ofi-nccl-${PACKIT_PROJECT_VERSION}.tar.gz"

specfile_path: .packit/libnccl-net-ofi.spec
upstream_package_name: libnccl-net-ofi
downstream_package_name: libnccl-net-ofi
release_suffix: "{PACKIT_PROJECT_BRANCH}"
update_release: false
Loading

0 comments on commit 83b3b05

Please sign in to comment.