forked from aws/aws-ofi-nccl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(build): add nix build definitions
Add a nix build matrix and github ci actions that use them. On a powerful machine, this can build and run unit tests for ~700 build configuration combinations in about 5 minutes.
- Loading branch information
1 parent
e43640f
commit a473699
Showing
12 changed files
with
705 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
"customizations": { | ||
"vscode": { | ||
"extensions": [ | ||
"mkhl.direnv" | ||
] | ||
} | ||
}, | ||
"image": "ghcr.io/cachix/devenv:latest", | ||
"overrideCommand": false, | ||
"updateContentCommand": "devenv test" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
use flake . --impure --show-trace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
name: Cache Nix CI Dependencies | ||
|
||
on: | ||
workflow_dispatch: | ||
pull-request: | ||
paths: ['.nix/*', 'flake.nix', 'flake.lock'] | ||
push: | ||
branches: ['master', 'v*'] | ||
paths: ['.nix/*', 'flake.nix', 'flake.lock'] | ||
|
||
jobs: | ||
build-and-cache-dependencies: | ||
name: Build and Cache Dependencies | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: DeterminateSystems/nix-installer-action@main | ||
with: | ||
determinate: true | ||
extra-conf: | | ||
experimental-features = nix-command flakes auto-allocate-uids | ||
extra-substituters = https://nix-community.cachix.org https://cuda-maintainers.cachix.org | ||
extra-trusted-public-keys = nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= | ||
- uses: DeterminateSystems/magic-nix-cache-action@main | ||
- name: Build and Cache Dependencies | ||
run: | | ||
export NIXPKGS_ALLOW_UNFREE=1 | ||
nix build --impure -L '.#checks.x86_64-linux.deps' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
name: Quick CI Builds | ||
|
||
on: | ||
workflow_dispatch: | ||
pull_request: | ||
paths: | ||
- "configure.ac" | ||
- "Makefile.am" | ||
- "autogen.sh" | ||
- "include/**" | ||
- "m4/**" | ||
- "src/**" | ||
- "tests/**" | ||
- ".github/workflows/nix*.yaml" | ||
- "flake.nix" | ||
- "flake.lock" | ||
- ".nix/**" | ||
|
||
jobs: | ||
nix-build: | ||
name: nix build smoke test | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: DeterminateSystems/nix-installer-action@main | ||
with: | ||
determinate: true | ||
extra-conf: | | ||
experimental-features = nix-command flakes auto-allocate-uids | ||
extra-substituters = https://nix-community.cachix.org https://cuda-maintainers.cachix.org | ||
extra-trusted-public-keys = nix-community.cachix.org-1:mB9FSh9qf2dCimDSUo8Zy7bkq5CX+/rkCWyvRCYg3Fs= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= | ||
- uses: DeterminateSystems/magic-nix-cache-action@main | ||
- name: Build and Cache Dependencies | ||
run: | | ||
export NIXPKGS_ALLOW_UNFREE=1 | ||
nix build --impure -L '.#checks.x86_64-linux.default" | ||
# We want to use the cache here, but we don't want to cache this | ||
# specific build, so purge the cache now to prevent it from being | ||
# pushed on cleanup. | ||
nix-collect-garbage -d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
accelerator = [ "-cuda" "-neuron" ]; | ||
platform = [ "-aws" "" ]; | ||
tracing = [ [ "" ] [ "-nvtx" ] [ "-lttng" ] [ "-nvtx" "-lttng" ] ]; | ||
debug = [ "-debug" "" ]; | ||
memory = [ "-valgrind" "" ]; | ||
traceprints = [ "-trace" "" ]; | ||
cpp = [ "-cpp" "" ]; | ||
stdenv = [ (pkgs: pkgs.gcc7Stdenv) (pkgs: pkgs.clangStdenv) (pkgs: pkgs.gcc14Stdenv) ]; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
{ lib | ||
, fetchFromGitHub | ||
, symlinkJoin | ||
, gitUpdater | ||
, stdenv | ||
, config | ||
, libfabric | ||
, hwloc | ||
, autoreconfHook | ||
, lttng-ust | ||
, valgrind | ||
, mpi | ||
, cudaPackages ? { } | ||
, enableTests ? true | ||
, enableTracePrints ? (enableTests) | ||
, neuronSupport ? (!config.cudaSupport) | ||
, cudaSupport ? (config.cudaSupport && !neuronSupport) | ||
, enableLTTNGTracing ? false | ||
, enableNVTXTracing ? false | ||
, enableValgrind ? false | ||
, enableAwsTuning ? false | ||
, enableCPPMode ? false | ||
}: | ||
|
||
assert neuronSupport != cudaSupport; | ||
#assert !enableNVTXTracing || (enableNVTXTracing && !neuronSupport); | ||
|
||
let | ||
basename = "lib${if neuronSupport then "nccom" else "nccl"}-net-ofi"; | ||
pname = "${basename}${if enableAwsTuning == true then "-aws" else ""}"; | ||
version = "1.11.0"; | ||
src = fetchFromGitHub { | ||
owner = "aws"; | ||
repo = "aws-ofi-nccl"; | ||
rev = "v${version}-aws"; | ||
sha256 = "sha256-y3yVPqak+36UXI6L/ddQIfBBwpeiciW571noc8LNefU="; | ||
}; | ||
cuda_build_deps_joined = symlinkJoin { | ||
name = "cuda-build-deps-joined"; | ||
paths = lib.optionals (cudaSupport) [ | ||
(lib.getOutput "static" cudaPackages.cuda_cudart) | ||
(lib.getDev cudaPackages.cuda_cudart) | ||
(lib.getDev cudaPackages.cuda_nvcc) | ||
]; | ||
}; | ||
in | ||
stdenv.mkDerivation { | ||
inherit pname version src; | ||
|
||
enableParallelBuilding = true; | ||
separateDebugInfo = true; | ||
strictDeps = true; | ||
|
||
nativeBuildInputs = [ autoreconfHook ]; | ||
configureFlags = | ||
[ | ||
"--enable-picky-compiler" | ||
"--enable-werror" | ||
"--with-hwloc=${lib.getDev hwloc}" | ||
"--with-libfabric=${lib.getDev libfabric}" | ||
] | ||
++ lib.optionals enableCPPMode [ | ||
"--enable-cpp=yes" | ||
] | ||
++ lib.optionals (!enableTests) [ | ||
"--disable-tests" | ||
] | ||
++ lib.optionals enableTests [ | ||
"--enable-tests" | ||
"--with-mpi=${lib.getDev mpi}" | ||
] | ||
++ lib.optionals enableTracePrints [ | ||
"--enable-trace" | ||
] | ||
++ lib.optionals cudaSupport [ | ||
"--with-cuda=${cuda_build_deps_joined}" | ||
] | ||
++ lib.optionals enableLTTNGTracing [ | ||
"--with-lttng=${lib.getDev lttng-ust}" | ||
] | ||
++ lib.optionals enableValgrind [ | ||
"--with-valgrind=${lib.getDev valgrind}" | ||
] | ||
++ lib.optionals (enableNVTXTracing && cudaSupport) [ | ||
"--with-nvtx=${lib.getDev cudaPackages.cuda_nvtx}" | ||
] | ||
++ lib.optionals enableAwsTuning [ | ||
"--enable-platform-aws" | ||
] | ||
++ lib.optionals neuronSupport [ | ||
"--enable-neuron" | ||
]; | ||
|
||
buildInputs = | ||
[ | ||
libfabric | ||
hwloc | ||
] | ||
++ lib.optionals cudaSupport [ | ||
cuda_build_deps_joined | ||
] | ||
++ lib.optionals enableValgrind [ | ||
valgrind | ||
] | ||
++ lib.optionals enableTests [ | ||
mpi | ||
] | ||
++ lib.optionals enableLTTNGTracing [ | ||
lttng-ust | ||
]; | ||
postInstall = ''find $out/lib | grep -E \.la$ | xargs rm''; | ||
|
||
doCheck = enableTests; | ||
checkPhase = '' | ||
set -euo pipefail | ||
for test in $(find tests/unit/ -type f -executable -print | xargs) ; do | ||
echo "======================================================================" | ||
echo "Running $test" | ||
./$test | ||
test $? -eq 0 && (echo "✅ Passed" || (echo "❌ Failed!" && exit 1)) | ||
done | ||
echo "All unit tests passed successfully." | ||
set +u | ||
''; | ||
|
||
passthru = { | ||
inherit cudaSupport; | ||
updateScript = gitUpdater { | ||
inherit pname version; | ||
rev-prefix = "v"; | ||
}; | ||
}; | ||
meta = with lib; { | ||
homepage = "https://github.com/aws/aws-ofi-nccl"; | ||
license = licenses.asl20; | ||
broken = (cudaSupport && !config.cudaSupport); | ||
maintainers = with maintainers; [ sielicki ]; | ||
platforms = [ | ||
"x86_64-linux" | ||
"aarch64-linux" | ||
]; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ inputs, lib }: | ||
rec { | ||
mkGitVersion = i: "git${if (i ? rev) then (builtins.substring 0 7 "${i.rev}") else "dirty" + (builtins.substring 0 7 "${i.dirtyRev}")}"; | ||
compilerName = s: "-${if s.cc.isGNU then "gcc" else "clang"}${s.cc.version}"; | ||
genComboName = pkgs: combo: prevname: "${prevname}${(lib.strings.concatStrings combo.tracing)}${combo.debug}${combo.memory}${combo.traceprints}${combo.cpp}${compilerName (combo.stdenv pkgs)}"; | ||
genPkgFromCombo = pkgs: combo: (pkgs.callPackage ./default.nix { | ||
stdenv = (combo.stdenv pkgs); | ||
cudaSupport = (combo.accelerator == "-cuda"); | ||
neuronSupport = (combo.accelerator == "-neuron"); | ||
enableAwsTuning = (combo.platform == "-aws"); | ||
enableNVTXTracing = (combo.accelerator == "-cuda" && (builtins.elem "-nvtx" combo.tracing)); | ||
enableLTTNGTracing = (builtins.elem "-lttng" combo.tracing); | ||
enableValgrind = (combo.memory == "-valgrind"); | ||
enableTracePrints = (combo.traceprints == "-trace"); | ||
enableCPPMode = (combo.cpp == "-cpp"); | ||
}).overrideAttrs (pprev: { | ||
src = inputs.self; | ||
version = mkGitVersion inputs.self; | ||
}); | ||
|
||
genAttrsFromCombo = { pkgs }: combo: let | ||
value = (genPkgFromCombo pkgs combo); | ||
name = (genComboName pkgs combo value.pname); | ||
in { inherit name; value = value.overrideAttrs { inherit name; }; }; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
{ inputs }: | ||
(final: prev: | ||
let | ||
lib = prev.lib // (import ./lib.nix { inherit inputs; lib = prev.lib; }); | ||
in | ||
{ | ||
|
||
libgdrcopy = final.cudaPackages.backendStdenv.mkDerivation rec { | ||
pname = "libgdrcopy"; | ||
src = inputs.gdrcopy; | ||
version = lib.mkGitVersion inputs.gdrcopy; | ||
makeFlags = [ | ||
"LIB_MAJOR_VER=2" | ||
"LIB_MINOR_VER=5" | ||
"DESTLIB=$out/lib" | ||
"DESTINC=$out/include" | ||
"GDRAPI_ARCH=X86" | ||
]; | ||
patchPhase = "chmod +x config_arch"; | ||
buildPhase = "make -C src all"; | ||
depsTargetTarget = with final.cudaPackages; [ cuda_cudart ]; | ||
installPhase = "mkdir -p $out/lib && make ${lib.strings.concatStringsSep " " makeFlags} lib_install"; | ||
}; | ||
|
||
rdma-core = prev.rdma-core.overrideAttrs (pprev: { | ||
src = inputs.rdma-core; | ||
version = lib.mkGitVersion inputs.rdma-core; | ||
}); | ||
|
||
hwloc = prev.hwloc.overrideAttrs (pprev: { | ||
src = inputs.hwloc; | ||
version = lib.mkGitVersion inputs.hwloc; | ||
nativeBuildInputs = (pprev.nativeBuildInputs or [ ]) ++ [ prev.autoreconfHook ]; | ||
}); | ||
|
||
# pmix/prrte/openmpi cannot support new hwloc | ||
pmix = prev.pmix.override { hwloc = final.hwloc; }; | ||
prrte = prev.prrte.override { hwloc = final.hwloc; }; | ||
openmpi = (prev.openmpi.override { | ||
cudaSupport = true; | ||
libfabric = final.libfabric; | ||
rdma-core = final.rdma-core; | ||
hwloc = final.hwloc; | ||
}).overrideAttrs (pprev: { | ||
src = inputs.openmpi; | ||
version = lib.mkGitVersion inputs.openmpi; | ||
nativeBuildInputs = (pprev.nativeBuildInputs or [ ]) ++ [ | ||
prev.autoconf | ||
prev.automake | ||
prev.libtool | ||
prev.perl | ||
prev.git | ||
prev.flex | ||
]; | ||
prePatch = '' | ||
patchShebangs . | ||
./autogen.pl | ||
''; | ||
outputs = final.lib.lists.remove "man" pprev.outputs; | ||
NIX_CFLAGS_COMPILE = "-Wno-deprecated-declarations"; | ||
}); | ||
|
||
libfabric = (prev.libfabric.override { | ||
enableOpx = false; | ||
enablePsm2 = false; | ||
}).overrideAttrs (pprev: { | ||
src = inputs.libfabric; | ||
version = lib.mkGitVersion inputs.libfabric; | ||
configureFlags = (prev.configureFlags or [ ]) ++ [ | ||
"--enable-efa=yes" | ||
"--with-cuda=${prev.lib.getDev final.cudaPackages.cudatoolkit}" | ||
"--enable-cuda-dlopen" | ||
"--with-gdrcopy=${prev.lib.getDev final.libgdrcopy}" | ||
"--enable-gdrcopy-dlopen" | ||
]; | ||
buildInputs = (pprev.buildInputs or [ ]) ++ [ | ||
final.rdma-core | ||
]; | ||
}); | ||
|
||
cudaPackages = prev.cudaPackages.overrideScope (ffinal: pprev: rec { | ||
nccl = pprev.nccl.overrideAttrs { | ||
src = inputs.nccl; | ||
version = lib.mkGitVersion inputs.nccl; | ||
}; | ||
nccl-tests = (pprev.nccl-tests.overrideAttrs { | ||
src = inputs.nccl-tests; | ||
version = lib.mkGitVersion inputs.nccl-tests; | ||
}).override { | ||
mpiSupport = true; | ||
mpi = final.openmpi; | ||
cudaPackages = pprev.cudaPackages // { inherit nccl; }; | ||
config.cudaSupport = true; | ||
}; | ||
}); | ||
}) |
Oops, something went wrong.