From 1949a8aa9897cc4367dad8df90bc59f92e5d80d9 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Fri, 26 Jan 2024 18:41:43 +0100 Subject: [PATCH] Extend Levante CI with NAG (#260) This adds CI tests with the NAG compiler on Levante with compiler version and flags currently used to build ICON by default. Both DP and SP floating models are tested, as are default and accelerator kernels, but not all possible combinations. Accelerator kernels fail with a run-time error and are marked as experimental. --- .github/workflows/gitlab-ci.yml | 19 +++++- .gitlab/levante.yml | 113 +++++++++++++++++++++++++++----- 2 files changed, 113 insertions(+), 19 deletions(-) diff --git a/.github/workflows/gitlab-ci.yml b/.github/workflows/gitlab-ci.yml index 0567d97b5..941e85796 100644 --- a/.github/workflows/gitlab-ci.yml +++ b/.github/workflows/gitlab-ci.yml @@ -29,7 +29,7 @@ jobs: # Check out GitHub repository # - name: Check out GitHub repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 # @@ -59,10 +59,24 @@ jobs: levante: runs-on: ubuntu-latest needs: levante-init + continue-on-error: ${{ matrix.experimental }} strategy: fail-fast: false matrix: - config-name: [nvhpc-gpu-openacc-DP, nvhpc-gpu-openacc-SP] + config-name: + - nvhpc-gpu-openacc-DP + - nvhpc-gpu-openacc-SP + #- nag-cpu-default-DP + - nag-cpu-default-SP + - nag-cpu-accel-DP + #- nag-cpu-accel-SP + include: + # The tests are not experimental by default: + - experimental: false + - config-name: nag-cpu-accel-DP + experimental: true + #- config-name: nag-cpu-accel-SP + # experimental: true steps: # # Build, run and check (fetch the log) @@ -87,3 +101,4 @@ jobs: password: ${{ secrets.DKRZ_GITLAB_TOKEN }} ref-type: tag ref-name: ${{ needs.levante-init.outputs.ref-name }} + force: true diff --git a/.gitlab/levante.yml b/.gitlab/levante.yml index 30efc7680..701303ceb 100644 --- a/.gitlab/levante.yml +++ b/.gitlab/levante.yml @@ -9,42 +9,74 @@ include: variables: SCHEDULER_PARAMETERS: >- --account=mh0287 - --partition=gpu - --gpus=1 --time=05:00 + ${EXTRA_SCHEDULER_PARAMETERS} + EXTRA_SCHEDULER_PARAMETERS: -.build-common: +.gpu: extends: .default + variables: + EXTRA_SCHEDULER_PARAMETERS: >- + --partition=gpu + --gpus=1 + +.cpu: + extends: .default + variables: + EXTRA_SCHEDULER_PARAMETERS: >- + --partition=shared + +.nvhpc: variables: # Core variables: FC: /sw/spack-levante/nvhpc-22.5-v4oky3/Linux_x86_64/22.5/compilers/bin/nvfortran - # Production flags for ICON model: - FCFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda11.7 -DRTE_USE_${FPMODEL} # Convenience variables: + VERSION_FCFLAGS: --version NFHOME: /sw/spack-levante/netcdf-fortran-4.5.4-syv4qr NCHOME: /sw/spack-levante/netcdf-c-4.9.0-gc7kgj + +.nag: + variables: + # Core variables: + FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor + # Convenience variables: + VERSION_FCFLAGS: -V + NFHOME: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe + NCHOME: /sw/spack-levante/netcdf-c-4.8.1-vbnli5 + +.dp: + variables: + FPMODEL: DP + FAILURE_THRESHOLD: "7.e-4" + +.sp: + variables: + FPMODEL: SP + FAILURE_THRESHOLD: "3.5e-1" + +.common: + variables: PYHOME: /sw/spack-levante/mambaforge-22.9.0-2-Linux-x86_64-kptncg - # Suppress an irrelevant but annoying error message: + # Suppress an irrelevant but annoying error message: PROJ_LIB: ${PYHOME}/share/proj # Make variables: FCINCLUDE: -I${NFHOME}/include LDFLAGS: -L${NFHOME}/lib -L${NCHOME}/lib RRTMGP_ROOT: ${CI_PROJECT_DIR} RRTMGP_DATA: ${CI_PROJECT_DIR}/rrtmgp-data - RTE_KERNELS: accel before_script: - module purge - module load git # Extend the existing environment variables: - export PATH="${PYHOME}/bin:${PATH}" - export LD_LIBRARY_PATH="${NFHOME}/lib:${NCHOME}/lib:${LD_LIBRARY_PATH-}" - # The -Mstack_arrays compiler flag requires a large stack: + # Some tests require a large stack: - ulimit -s unlimited script: # # Build libraries, examples and tests # - - ${FC} --version + - ${FC} ${VERSION_FCFLAGS} - make libs - make -C build separate-libs # @@ -60,14 +92,61 @@ variables: # - make check -nvhpc-gpu-openacc-DP: - extends: .build-common +.nvhpc-gpu-openacc: + extends: + - .gpu + - .nvhpc + - .common variables: - FPMODEL: DP - FAILURE_THRESHOLD: "7.e-4" + # Compiler flags used for ICON model: + FCFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda11.7 -DRTE_USE_${FPMODEL} + RTE_KERNELS: accel -nvhpc-gpu-openacc-SP: - extends: .build-common +.nag-cpu: + extends: + - .cpu + - .nag + - .common variables: - FPMODEL: SP - FAILURE_THRESHOLD: "3.5e-1" + # Compiler flags used for ICON model: + FCFLAGS: -Wc=/sw/spack-levante/gcc-11.2.0-bcn7mb/bin/gcc -f2008 -colour -w=uep -g -gline -O0 -float-store -nan -Wc,-g -Wc,-pipe -Wc,--param,max-vartrack-size=200000000 -Wc,-mno-fma -C=all -DRTE_USE_CBOOL -DRTE_USE_${FPMODEL} + +.nag-cpu-default: + extends: .nag-cpu + variables: + RTE_KERNELS: default + +.nag-cpu-accel: + extends: .nag-cpu + variables: + RTE_KERNELS: accel + +nvhpc-gpu-openacc-DP: + extends: + - .dp + - .nvhpc-gpu-openacc + +nvhpc-gpu-openacc-SP: + extends: + - .sp + - .nvhpc-gpu-openacc + +#nag-cpu-default-DP: +# extends: +# - .dp +# - .nag-cpu-default + +nag-cpu-default-SP: + extends: + - .sp + - .nag-cpu-default + +nag-cpu-accel-DP: + extends: + - .dp + - .nag-cpu-accel + +#nag-cpu-accel-SP: +# extends: +# - .sp +# - .nag-cpu-accel