Skip to content

Fix device sorting on aws platforms #1870

Fix device sorting on aws platforms

Fix device sorting on aws platforms #1870

Workflow file for this run

name: PR CI
on:
workflow_dispatch:
push:
branches:
- master
- main
- v*
paths:
- "configure.ac"
- "Makefile.am"
- "autogen.sh"
- "include/**"
- "m4/**"
- "src/**"
- "tests/**"
- "topology/**"
- ".github/workflows/**"
pull_request:
paths:
- "configure.ac"
- "Makefile.am"
- "autogen.sh"
- "include/**"
- "m4/**"
- "src/**"
- "tests/**"
- "topology/**"
- ".github/workflows/**"
env:
APT_PACKAGES: >-
build-essential
git
libhwloc-dev
make
# note, related to issue around actions/checkout@v4, linked below. This
# environment variable is also now needed, as of july 2024.
# ref: https://github.com/actions/runner/issues/2906#issuecomment-2208546951
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: 'true'
concurrency:
group: ${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
amazonlinux:
strategy:
matrix:
sdk:
- cuda
- neuron
container:
- public.ecr.aws/amazonlinux/amazonlinux:2023
- public.ecr.aws/amazonlinux/amazonlinux:2
efainstaller:
- latest
- 1.25.0
include:
- container: public.ecr.aws/amazonlinux/amazonlinux:2023
displayname: al2023
efainstallerdir: ALINUX2023
nvidiadistro: amzn2023
configmanager: dnf config-manager
cudapackages: cuda-cudart-devel-12-6 cuda-crt-12-6
- container: public.ecr.aws/amazonlinux/amazonlinux:2
displayname: al2
efainstallerdir: ALINUX2
nvidiadistro: rhel7
configmanager: yum-config-manager
cudapackages: cuda-cudart-devel-12-4 cuda-crt-12-4
runs-on: ubuntu-latest
container: ${{ matrix.container }}
name: ${{ matrix.displayname }}/${{ matrix.sdk }}/efa@${{ matrix.efainstaller }}/build+test
steps:
- run: |
yum -y update && yum -y install git tar util-linux findutils yum-utils
# note, do not bump to v4: https://github.com/actions/checkout/issues/1590
- uses: actions/checkout@v3
- name: Fetch and Install EFA Installer Dependencies
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-${{ matrix.efainstaller }}.tar.gz
tar -xf aws-efa-installer-*.tar.gz
cd aws-efa-installer/RPMS/${{ matrix.efainstallerdir }}/x86_64
find . | grep rpm$ | xargs yum -y localinstall
- name: Install hwloc, utilities.
run: |
yum -y install hwloc-devel autoconf automake libtool gcc g++ git make
- name: Install CUDA
if: matrix.sdk == 'cuda'
run: |
${{ matrix.configmanager }} --add-repo \
http://developer.download.nvidia.com/compute/cuda/repos/${{ matrix.nvidiadistro }}/x86_64/cuda-${{ matrix.nvidiadistro }}.repo \
--save
yum -y clean expire-cache
yum -y install ${{ matrix.cudapackages }}
- name: Call `autoreconf -ivf`
run: |
./autogen.sh
- name: Run Configure
run: |
if [ "${{ matrix.sdk }}" == "neuron" ]; then
./configure --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--enable-tests=yes \
--enable-werror=yes \
--enable-picky-compiler=yes \
--enable-platform-aws \
--enable-neuron
else
./configure --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--enable-tests=yes \
--enable-werror=yes \
--enable-picky-compiler=yes \
--enable-platform-aws \
--with-cuda=/usr/local/cuda/
fi
- name: Call `make`
run: make V=1
- name: Call `make install`
run: make install V=1
- name: Call `make distcheck`
run: make distcheck V=1
- name: Call `make check`
run: make check V=1
distcheck:
runs-on: ubuntu-22.04
strategy:
matrix:
cc-variant:
- latest
- legacy
cc:
- gcc
- clang
tracing:
- lttng
- none
sdk:
- cuda
- neuron
include:
- cc-variant: latest
cc: clang
cc-version: 18
- cc-variant: latest
cc: gcc
cc-version: 13
name: u2204/${{ matrix.sdk }}/${{matrix.cc}}-${{matrix.cc-variant}}/build+test
steps:
- uses: actions/checkout@v4
- name: Configure Compilers
run: |
if [ "${{ matrix.cc }}" == "clang" ]; then
if [ "${{ matrix.cc-variant }}" == "latest" ]; then
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh ${{ matrix.cc-version }}
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-${{ matrix.cc-version }} 10
sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-${{ matrix.cc-version }} 10
else
sudo apt-get install -y clang
fi
sudo update-alternatives --install /usr/bin/cc cc /usr/bin/clang 10
sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 10
fi
if [ "${{ matrix.cc }}" == "gcc" ]; then
if [ "${{ matrix.cc-variant }}" == "latest" ]; then
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
sudo apt-get install -y gcc-${{ matrix.cc-version }} g++-${{ matrix.cc-version }}
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${{ matrix.cc-version }} 10
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${{ matrix.cc-version }} 10
else
sudo apt-get install -y gcc g++
fi
sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 10
sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 10
fi
- name: Install Base Dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y ${{ env.APT_PACKAGES }}
- name: Install CUDA SDK
if: matrix.sdk == 'cuda'
run: |
sudo apt-get update -y && sudo apt-get install -y wget lsb-release
repo="ubuntu$(lsb_release -r | cut -d':' -f2 | xargs | sed 's/[.]//g')"
wget https://developer.download.nvidia.com/compute/cuda/repos/${repo}/$(uname -m)/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update -y
sudo apt-get install -y cuda-cudart-dev-12-6 cuda-crt-12-6
- name: Install lttng
if: matrix.tracing == 'lttng'
run: |
sudo apt-get install -y liblttng-ust-dev
- name: Fetch and Install EFA Installer Dependencies
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz
tar -xf aws-efa-installer-*.tar.gz
pushd aws-efa-installer/
sudo ./efa_installer.sh -y --skip-kmod
popd
- name: Build Plugin
run: |
set -x
export CC="cc"
export CXX="c++"
# actions/checkout@v4 would drop the plugin source in $PWD,
# so go ahead and build it.
./autogen.sh
if [ "${{ matrix.sdk }}" == "neuron" ]
then
./configure --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--enable-tests=yes \
--enable-werror=yes \
--enable-picky-compiler=yes \
--enable-platform-aws \
--enable-neuron
else
./configure --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--enable-tests=yes \
--enable-werror=yes \
--enable-picky-compiler=yes \
--enable-platform-aws \
--with-cuda=/usr/local/cuda/
fi
- name: Call `make`
run: make V=1
- name: Call `make install`
run: sudo make install V=1
- name: Call `make distcheck`
run: make distcheck V=1
- name: Call `make check`
run: make check V=1
- name: Upload config.log
if: failure()
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.cc }}-${{ matrix.cc-variant }}-${{ matrix.sdk }}-config.log
path: config.log
if-no-files-found: ignore
codechecker:
runs-on: ubuntu-22.04
needs: [distcheck]
strategy:
matrix:
sdk:
- cuda
- neuron
name: CodeChecker - ${{ matrix.sdk }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Install Base Dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y ${{ env.APT_PACKAGES }}
- name: Install CUDA SDK
if: matrix.sdk == 'cuda'
run: |
sudo apt-get update -y && sudo apt-get install -y wget lsb-release
repo="ubuntu$(lsb_release -r | cut -d':' -f2 | xargs | sed 's/[.]//g')"
wget https://developer.download.nvidia.com/compute/cuda/repos/${repo}/$(uname -m)/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update -y
sudo apt-get install -y cuda-cudart-dev-12-6 cuda-crt-12-6
- name: Install cppcheck
run: |
sudo apt-get install -y cppcheck
- name: Fetch and Install EFA Installer Dependencies
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz
tar -xf aws-efa-installer-*.tar.gz
pushd aws-efa-installer/
sudo ./efa_installer.sh -y --skip-kmod
popd
- name: Run Configure
run: |
./autogen.sh
if [ "${{ matrix.sdk }}" == "neuron" ]; then
./configure --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--enable-tests=yes \
--enable-werror=yes \
--enable-picky-compiler=yes \
--enable-platform-aws \
--enable-neuron
else
./configure --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--enable-tests=yes \
--enable-werror=yes \
--enable-picky-compiler=yes \
--enable-platform-aws \
--with-cuda=/usr/local/cuda/
fi
- name: Run CodeChecker
uses: whisperity/codechecker-analysis-action@v1
id: codechecker
with:
build-command: make
ctu: true
config: .github/codechecker.yaml
install-custom: true
version: v6.23.1
llvm-version: '18'
- name: Save CodeChecker HTML output.
uses: actions/upload-artifact@v4
with:
name: CodeChecker Bug Reports for ${{ matrix.sdk }}
path: ${{ steps.codechecker.outputs.result-html-dir }}/*.html
- name: CodeChecker Pass Or Fail?
if: steps.codechecker.outputs.warnings-in-diff == 'true'
shell: bash
run: |
echo "::error title=Static Analyzers Failed::Analysed commit(s) caused static analysis warnings"
exit 0