Skip to content
This repository has been archived by the owner on Sep 15, 2021. It is now read-only.

Commit

Permalink
fix: merge bagua_install_library and setup.py, remove nccl<=2.6 support
Browse files Browse the repository at this point in the history
  • Loading branch information
NOBLES5E committed Jun 17, 2021
1 parent 28cfb63 commit bda550c
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 172 deletions.
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,4 @@ recursive-include bagua-core-py/src *
include bagua-core-c/Cargo.toml
include bagua-core-c/build.rs
recursive-include bagua-core-c/src *
recursive-include bagua_install_library *
recursive-include python *
Empty file removed bagua_install_library/__init__.py
Empty file.
161 changes: 0 additions & 161 deletions bagua_install_library/install_library.py

This file was deleted.

138 changes: 128 additions & 10 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,133 @@
from setuptools import setup, find_packages
from setuptools_rust import Binding, RustExtension
import sys
import platform
import shutil
import sys
import tempfile
import urllib.request
from tqdm import tqdm


_nccl_records = []
library_records = {}


class DownloadProgressBar(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)


def download_url(url, output_path):
with DownloadProgressBar(unit='B', unit_scale=True,
miniters=1, desc=url.split('/')[-1]) as t:
urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)


def _make_nccl_url(public_version, filename):
# https://developer.download.nvidia.com/compute/redist/nccl/v2.8/nccl_2.8.4-1+cuda11.2_x86_64.txz
return (
"https://developer.download.nvidia.com/compute/redist/nccl/"
+ "v{}/{}".format(public_version, filename)
)


def _make_nccl_record(cuda_version, full_version, public_version, filename_linux):
return {
"cuda": cuda_version,
"nccl": full_version,
"assets": {
"Linux": {
"url": _make_nccl_url(public_version, filename_linux),
"filename": "libnccl.so.{}".format(full_version),
},
},
}


_nccl_records.append(
_make_nccl_record("11.3", "2.9.8", "2.9", "nccl_2.9.8-1+cuda11.3_x86_64.txz")
)
_nccl_records.append(
_make_nccl_record("11.2", "2.8.4", "2.8", "nccl_2.8.4-1+cuda11.2_x86_64.txz")
)
_nccl_records.append(
_make_nccl_record("11.1", "2.8.4", "2.8", "nccl_2.8.4-1+cuda11.1_x86_64.txz")
)
_nccl_records.append(
_make_nccl_record("11.0", "2.9.8", "2.9", "nccl_2.9.8-1+cuda11.0_x86_64.txz")
)
_nccl_records.append(
_make_nccl_record("10.2", "2.9.8", "2.9", "nccl_2.9.8-1+cuda10.2_x86_64.txz")
)
_nccl_records.append(
_make_nccl_record("10.1", "2.8.3", "2.8", "nccl_2.8.3-1+cuda10.1_x86_64.txz")
)
library_records["nccl"] = _nccl_records


def install_lib(cuda, prefix, library):
record = None
lib_records = library_records
for record in lib_records[library]:
if record["cuda"] == cuda:
break
else:
raise RuntimeError(
"""
The CUDA version({}) specified is not supported.
Should be one of {}.""".format(
cuda, str([x["cuda"] for x in lib_records[library]])
)
)
if prefix is None:
prefix = os.path.expanduser("~/.bagua_core/cuda_lib")
destination = calculate_destination(prefix, cuda, library, record[library])

if os.path.exists(destination):
print("The destination directory {} already exists.".format(destination))
shutil.rmtree(destination)

target_platform = platform.system()
asset = record["assets"].get(target_platform, None)
if asset is None:
raise RuntimeError(
"""
The current platform ({}) is not supported.""".format(
target_platform
)
)

print(
"Installing {} {} for CUDA {} to: {}".format(
library, record[library], record["cuda"], destination
)
)

url = asset["url"]
print("Downloading {}...".format(url))
with tempfile.TemporaryDirectory() as tmpdir:
filename = os.path.join(tmpdir, os.path.basename(url))
download_url(url, filename)
print("Extracting...")
outdir = os.path.join(tmpdir, "extract")
shutil.unpack_archive(filename, outdir)
print("Installing...")
if library == "nccl":
subdir = os.listdir(outdir)
assert len(subdir) == 1
shutil.move(os.path.join(outdir, subdir[0]), destination)
else:
assert False
print("Cleaning up...")
print("Done!")


def calculate_destination(prefix, cuda, lib, lib_ver):
"""Calculates the installation directory."""
return os.path.join(prefix, ".data")


def check_torch_version():
Expand All @@ -23,7 +150,6 @@ def check_torch_version():


def install_dependency_library():
from bagua_install_library import install_library
nvcc_version = (
os.popen(
"nvcc --version | grep release | sed 's/.*release //' | sed 's/,.*//'"
Expand All @@ -32,15 +158,7 @@ def install_dependency_library():
.strip()
)
print("nvcc_version: ", nvcc_version)
args = [
"--library",
"nccl",
"--cuda",
nvcc_version,
"--prefix",
os.path.join(cwd, "python/bagua_core"),
]
install_library.main(args)
install_lib(nvcc_version, os.path.join(cwd, "python/bagua_core"), "nccl")


if __name__ == "__main__":
Expand Down

0 comments on commit bda550c

Please sign in to comment.