From f063220ed1380cb8f99c8793251f935511d807f2 Mon Sep 17 00:00:00 2001 From: "Edgar A. Leon" <800736+eleon@users.noreply.github.com> Date: Mon, 9 Nov 2020 14:58:14 -0800 Subject: [PATCH] Adding programs to determine the affinity of threads and processes on a given system. They include CPU and GPU mappings. --- {hello => affinity}/README.md | 0 affinity/affinity.h | 35 ++++++++ affinity/cpu.c | 143 ++++++++++++++++++++++++++++++++ affinity/gpu.cu | 111 +++++++++++++++++++++++++ affinity/mpi+omp.c | 107 ++++++++++++++++++++++++ affinity/mpi.c | 54 ++++++++++++ affinity/omp.c | 67 +++++++++++++++ affinity/orig.mk | 150 ++++++++++++++++++++++++++++++++++ affinity/simple.cpp | 56 +++++++++++++ 9 files changed, 723 insertions(+) rename {hello => affinity}/README.md (100%) create mode 100644 affinity/affinity.h create mode 100644 affinity/cpu.c create mode 100644 affinity/gpu.cu create mode 100644 affinity/mpi+omp.c create mode 100644 affinity/mpi.c create mode 100644 affinity/omp.c create mode 100644 affinity/orig.mk create mode 100644 affinity/simple.cpp diff --git a/hello/README.md b/affinity/README.md similarity index 100% rename from hello/README.md rename to affinity/README.md diff --git a/affinity/affinity.h b/affinity/affinity.h new file mode 100644 index 0000000..f20966e --- /dev/null +++ b/affinity/affinity.h @@ -0,0 +1,35 @@ +/*********************************************************** + * Edgar A. Leon + * Lawrence Livermore National Laboratory + ***********************************************************/ + +#ifndef AFFINITY_H_INCLUDED +#define AFFINITY_H_INCLUDED + +#define SHORT_STR_SIZE 32 +#define LONG_STR_SIZE 4096 + +#ifdef __cplusplus +extern "C" { +#endif + + int get_gpu_count(); + + int get_gpu_pci_id(int dev); + + int get_gpu_affinity(char *buf); + + int get_gpu_info(int dev, char *buf); + + int get_gpu_info_all(char *buf); + + int get_num_cpus(); + + int get_cpu_affinity(char *buf); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif diff --git a/affinity/cpu.c b/affinity/cpu.c new file mode 100644 index 0000000..dad4f54 --- /dev/null +++ b/affinity/cpu.c @@ -0,0 +1,143 @@ +/*********************************************************** + * Edgar A. Leon + * Lawrence Livermore National Laboratory + ***********************************************************/ + +#include +#include +#include + +/* __USE_GNU is needed for CPU_ISSET definition */ +#ifndef __USE_GNU +#define __USE_GNU 1 +#endif +#include // sched_getaffinity + + +/* + * Convert a non-negative array of ints to a range + */ +int int2range(int *intarr, int size, char *range) +{ + int i, curr; + int nc = 0; + int start = -1; + int prev = -2; + + for (i=0; i= 0) + nc += sprintf(range+nc, "-%d", prev); + + /* Record start of range */ + if (prev >= 0) + nc += sprintf(range+nc, ","); + nc += sprintf(range+nc, "%d", curr); + start = curr; + } else + /* The last int is end of range */ + if (i == size-1) + nc += sprintf(range+nc, "-%d", curr); + + prev = curr; + } + + return nc; +} + + +/* + * Get number of processing units (cores or hwthreads) + */ +static +int get_total_num_pus() +{ + int pus = sysconf(_SC_NPROCESSORS_ONLN); + + if ( pus < 0 ) + perror("sysconf"); + + return pus; +} + + + + +/* + * Get the affinity. + */ +static +int get_affinity(int *cpus, int *count) +{ + int i; + cpu_set_t resmask; + + CPU_ZERO(&resmask); + + int rc = sched_getaffinity(0, sizeof(resmask), &resmask); + if ( rc < 0 ) { + perror("sched_getaffinity"); + return rc; + } + + *count = 0; + int pus = get_total_num_pus(); + for (i=0; i +#include /* Documentation in hip_runtime_api.h */ +#include "affinity.h" /* Do not perform name mangling */ + + +int get_gpu_count() +{ + int count = 0; + + cudaGetDeviceCount(&count); + + return count; +} + + +int get_gpu_pci_id(int dev) +{ + int value = -1; + cudaError_t err = cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, dev); + + if ( err ) + fprintf(stderr, "Could not get PCI ID for GPU %d\n", dev); + + return value; +} + + +int get_gpu_affinity(char *buf) +{ + int i, value, count; + int nc = 0; + + cudaGetDeviceCount(&count); + + for (i=0; i> 30); + nc += sprintf(buf+nc, "\tMultiprocessor count: %d\n", prop.multiProcessorCount); + nc += sprintf(buf+nc, "\tClock rate: %.3f Ghz\n", ghz); + nc += sprintf(buf+nc, "\tCompute capability: %d.%d\n", + prop.major, prop.minor); + nc += sprintf(buf+nc, "\tECC enabled: %d\n", prop.ECCEnabled); +#else + nc += sprintf(buf+nc, "\t0x%.2x: %s, %lu GB Mem, " + "%d Multiprocessors, %.3f GHZ, %d.%d CC\n", + prop.pciBusID, prop.name, prop.totalGlobalMem >> 30, + prop.multiProcessorCount, ghz, prop.major, prop.minor); +#endif + + return nc; +} + + +int get_gpu_info_all(char *buf) +{ + cudaError_t err; + int i, myid, count, value; + int nc = 0; + + cudaGetDeviceCount(&count); + err = cudaGetDevice(&myid); + if ( err ) { + fprintf(stderr, "Could not get default device\n"); + return -1; + } + cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, myid); + + nc += sprintf(buf+nc, "\tDefault device: 0x%x\n", value); + + for (i=0; i +#include +#include +#include +#include "affinity.h" + + +static +void usage(char *name) +{ + printf("Usage: %s [options]\n", name); + printf("\t -mpi: Show MPI info only (no OpenMP)\n"); + printf("\t-verbose: Show detailed GPU info when -mpi enabled\n"); + printf("\t -help: Show this page\n"); +} + + +int main(int argc, char *argv[]) +{ + char buf[LONG_STR_SIZE]; + char hostname[MPI_MAX_PROCESSOR_NAME]; + int rank, np, size, i, ngpus, ncpus; + int verbose = 0; + int help = 0; + int mpi = 0; + int nc = 0; + + /* Command-line options */ + if (argc > 1) + for (i=1; i= 0 ) + verbose = 1; + else if ( strcmp(argv[i], "-m") >= 0 ) + mpi = 1; + else if ( strcmp(argv[i], "-h") >= 0 ) + help = 1; + } + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &np); + MPI_Get_processor_name(hostname, &size); + + if (help) { + if (rank == 0) + usage(argv[0]); + + MPI_Finalize(); + return 0; + } + + if ( mpi ) { + + /* MPI */ + ncpus = get_num_cpus(); + nc += sprintf(buf+nc, "%s Task %2d/%2d with %d cpus: ", + hostname, rank, np, ncpus); + nc += get_cpu_affinity(buf+nc); +#ifdef HAVE_GPUS + ngpus = get_gpu_count(); + nc += sprintf(buf+nc, "%s Task %2d/%2d with %d gpus: ", + hostname, rank, np, ngpus); + nc += get_gpu_affinity(buf+nc); + if (verbose) + nc += get_gpu_info_all(buf+nc); +#endif + + /* Print per-task information */ + printf("%s", buf); + + } else { + + /* MPI+OpenMP */ +#ifdef HAVE_GPUS + ngpus = get_gpu_count(); +#endif + +#pragma omp parallel firstprivate(buf, nc) private(ncpus) shared(rank, np, ngpus, verbose) + { + int tid = omp_get_thread_num(); + int nthreads = omp_get_num_threads(); + ncpus = get_num_cpus(); + + nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d cpus: ", + hostname, rank, np, tid, nthreads, ncpus); + nc += get_cpu_affinity(buf+nc); +#ifdef HAVE_GPUS + nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d gpus: ", + hostname, rank, np, tid, nthreads, ngpus); + nc += get_gpu_affinity(buf+nc); +#endif + + /* Print per-worker information */ + printf("%s", buf); + } + + } + + MPI_Finalize(); + return 0; +} diff --git a/affinity/mpi.c b/affinity/mpi.c new file mode 100644 index 0000000..beeb2ab --- /dev/null +++ b/affinity/mpi.c @@ -0,0 +1,54 @@ +/*********************************************************** + * Edgar A. Leon + * Lawrence Livermore National Laboratory + ***********************************************************/ + +#include +#include +#include +#include "affinity.h" + + +int main(int argc, char *argv[]) +{ + char buf[LONG_STR_SIZE]; + char hostname[MPI_MAX_PROCESSOR_NAME]; + int rank, np, size, i; + int verbose = 0; + int ncpus = get_num_cpus(); + int nc = 0; + + /* Get rid of compiler warning. Ay. */ + (void) verbose; + + /* Command-line options */ + if (argc > 1) + for (i=1; i +#include +#include +#include "affinity.h" + + +int main(int argc, char *argv[]) +{ + char buf[LONG_STR_SIZE]; + int i; + int ncpus = get_num_cpus(); + int verbose = 0; + int nc = 0; + + /* Get rid of compiler warning. Ay. */ + (void) verbose; + + /* Command-line options */ + if (argc > 1) + for (i=1; i/dev/null | grep GPU) +#HAVE_NVIDIA_GPUS = 1 +#HAVE_AMD_GPUS = 1 + +CFLAGS = -Wall -Werror +HIP_LDFLAGS = -L$(shell hipconfig --path)/lib -lamdhip64 + +OBJS = cpu.o +ifneq ($(strip $(or $(HAVE_AMD_GPUS),$(HAVE_NVIDIA_GPUS))),) +GPU_FLAGS = -DHAVE_GPUS +OBJS += gpu.o +endif + + +# Get system configuration with 'hipconfig' +# hipconfig --platform +# hipconfig --version +# hipconfig --compiler +# hipconfig --runtime + +############################################################## +# Build a HIP program with nvcc (for NVIDIA hardware) +############################################################## +# nvcc -I$(HIP_ROOT)/include $(MPI_CFLAGS) -Xcompiler -DCUDA_ENABLE_DEPRECATED -x cu $< -Xlinker -lcuda -Xlinker "$(MPI_LIBS)" +# nvcc -I$(HIP_ROOT)/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -x cu -ccbin mpicc $< -Xlinker -lcuda + +############################################################## +# Build a HIP program with hipcc (for NVIDIA hardware) +# To start with a CUDA program, hipify first, e.g., +# hipify-perl square.cu > square.cpp +# Note: hipcc takes .cpp programs (not .c for example) +############################################################## +# Export the following environment variables +# HIP_PLATFORM=nvcc +# HIP_COMPILER=nvcc +# HIPCC_VERBOSE=1 +# hipcc -Xcompiler -DCUDA_ENABLE_DEPRECATED $(MPI_CFLAGS) $< $(MPI_LIBS) -o $@ +# Could use HIP_PLATFORM to determine the flags to use +#ifeq (${HIP_PLATFORM}, nvcc) +# HIPCC_FLAGS = -Xcompiler -DCUDA_ENABLE_DEPRECATED +# HIPCC_FLAGS = -gencode=arch=compute_20,code=sm_20 +#endif + +############################################################## +# Build an MPI program with hipcc +############################################################## +# MPI_ROOT = /usr/tce/packages/mvapich2/mvapich2-2.3-intel-19.0.4 +# MPI_CFLAGS = -I$(MPI_ROOT)/include +# MPI_LIBS = -L$(MPI_ROOT)/lib -lmpi +# ifneq ($(strip $(HAVE_AMD_GPUS)),) +# simple: simple.cpp +# hipcc $(MPI_CFLAGS) $^ $(MPI_LIBS) -o $@ +# endif + + +############################################################## +# Link an OpenMP program with hipcc +############################################################## +# Find the OpenMP lib +# HIP_CLANG_LIB = $(shell hipconfig --hipclangpath)/../lib +# omp: omp.o gpu.o +# hipcc -fopenmp -Xlinker -rpath=$(HIP_CLANG_LIB) $^ -o $@ + + +## I could have chosen to build GPU programs with hipcc +## for both AMD and NVIDIA devices, but the hipcc options +## for NVIDIA are almost like calling nvcc directly... +## I might as well call nvcc directly and no need +## for HIP on NVIDIA architectures! + + +PROGS = mpi omp mpi+omp + + +all: $(PROGS) + + +mpi: mpi.o $(OBJS) +ifneq ($(strip $(HAVE_AMD_GPUS)),) + mpicc $^ -o $@ $(HIP_LDFLAGS) +else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),) + nvcc -ccbin mpicc -Xlinker -lcuda $^ -o $@ +else + mpicc $^ -o $@ +endif + +omp: omp.o $(OBJS) +ifneq ($(strip $(HAVE_AMD_GPUS)),) + $(CC) -fopenmp $^ -o $@ $(HIP_LDFLAGS) +else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),) + nvcc $^ -Xcompiler -fopenmp -o $@ +else + $(CC) -fopenmp $^ -o $@ +endif + +mpi+omp: mpi+omp.o $(OBJS) +ifneq ($(strip $(HAVE_AMD_GPUS)),) + mpicc -fopenmp $^ -o $@ $(HIP_LDFLAGS) +else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),) + nvcc -ccbin mpicc -Xcompiler -fopenmp -Xlinker -lcuda $^ -o $@ +else + mpicc -fopenmp $^ -o $@ +endif + + +ifneq ($(strip $(HAVE_AMD_GPUS)),) +gpu.o: gpu.cpp affinity.h + hipcc -c $< +else +gpu.o: gpu.cu affinity.h + nvcc --Werror all-warnings -x cu -c $< +endif + +omp.o: omp.c affinity.h + $(CC) $(CFLAGS) $(GPU_FLAGS) -fopenmp -c $< + +mpi.o: mpi.c affinity.h + mpicc $(CFLAGS) $(GPU_FLAGS) -c $< + +mpi+omp.o: mpi+omp.c affinity.h + mpicc $(CFLAGS) $(GPU_FLAGS) -fopenmp -c $< + +cpu.o: cpu.c + $(CC) $(CFLAGS) -c $< + +gpu.cpp: gpu.cu + hipify-perl $< > $@ + + +clean: + rm -f *.o *~ $(PROGS) + + + +# gpu-hip.o: gpu-hip.cpp affinity.h +# ifneq ($(strip $(HAVE_AMD_GPUS)),) +# hipcc -g -c -o $@ $< +# else +# nvcc -I$(HIP_ROOT)/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -c -o $@ $< +# endif + +#/usr/tce/packages/cuda/cuda-10.1.243/nvidia/bin/nvcc -I/usr/tce/packages/hip/hip-3.0.0/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -Xcompiler -DHIP_VERSION_MAJOR=3 -Xcompiler -DHIP_VERSION_MINOR=0 -Xcompiler -DHIP_VERSION_PATCH=0 -x cu square.hipref.cpp -Xlinker '"-rpath=/usr/tce/packages/cuda/cuda-10.1.243/nvidia/lib64:/usr/tce/packages/cuda/cuda-10.1.243"' + diff --git a/affinity/simple.cpp b/affinity/simple.cpp new file mode 100644 index 0000000..aa57147 --- /dev/null +++ b/affinity/simple.cpp @@ -0,0 +1,56 @@ +/*********************************************************** + * Edgar A. Leon + * Lawrence Livermore National Laboratory + ***********************************************************/ + +#include +#include +#include + +#define STR_SIZE 100 + +void check_devices(char *buf) +{ + hipDevice_t mydev; + hipDeviceProp_t devProp; + int i, ndevs, myid; + char pciBusId[STR_SIZE] = ""; + int nc = 0; + + hipGetDeviceCount(&ndevs); + nc += sprintf(buf+nc, "Num devices: %d\n", ndevs); + + hipGetDevice(&myid); + hipDeviceGet(&mydev, myid); + hipDeviceGetPCIBusId(pciBusId, STR_SIZE, mydev); + nc += sprintf(buf+nc, "Default device: %s\n", pciBusId); + + for (i=0; i