From f063220ed1380cb8f99c8793251f935511d807f2 Mon Sep 17 00:00:00 2001
From: "Edgar A. Leon" <800736+eleon@users.noreply.github.com>
Date: Mon, 9 Nov 2020 14:58:14 -0800
Subject: [PATCH] Adding programs to determine the affinity of threads and
 processes on a given system. They include CPU and GPU mappings.

---
 {hello => affinity}/README.md |   0
 affinity/affinity.h           |  35 ++++++++
 affinity/cpu.c                | 143 ++++++++++++++++++++++++++++++++
 affinity/gpu.cu               | 111 +++++++++++++++++++++++++
 affinity/mpi+omp.c            | 107 ++++++++++++++++++++++++
 affinity/mpi.c                |  54 ++++++++++++
 affinity/omp.c                |  67 +++++++++++++++
 affinity/orig.mk              | 150 ++++++++++++++++++++++++++++++++++
 affinity/simple.cpp           |  56 +++++++++++++
 9 files changed, 723 insertions(+)
 rename {hello => affinity}/README.md (100%)
 create mode 100644 affinity/affinity.h
 create mode 100644 affinity/cpu.c
 create mode 100644 affinity/gpu.cu
 create mode 100644 affinity/mpi+omp.c
 create mode 100644 affinity/mpi.c
 create mode 100644 affinity/omp.c
 create mode 100644 affinity/orig.mk
 create mode 100644 affinity/simple.cpp

diff --git a/hello/README.md b/affinity/README.md
similarity index 100%
rename from hello/README.md
rename to affinity/README.md
diff --git a/affinity/affinity.h b/affinity/affinity.h
new file mode 100644
index 0000000..f20966e
--- /dev/null
+++ b/affinity/affinity.h
@@ -0,0 +1,35 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#ifndef AFFINITY_H_INCLUDED
+#define AFFINITY_H_INCLUDED
+
+#define SHORT_STR_SIZE 32
+#define LONG_STR_SIZE 4096
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  int get_gpu_count(); 
+
+  int get_gpu_pci_id(int dev); 
+  
+  int get_gpu_affinity(char *buf);
+
+  int get_gpu_info(int dev, char *buf);
+  
+  int get_gpu_info_all(char *buf);
+
+  int get_num_cpus();
+  
+  int get_cpu_affinity(char *buf); 
+  
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif 
diff --git a/affinity/cpu.c b/affinity/cpu.c
new file mode 100644
index 0000000..dad4f54
--- /dev/null
+++ b/affinity/cpu.c
@@ -0,0 +1,143 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/* __USE_GNU is needed for CPU_ISSET definition */ 
+#ifndef __USE_GNU
+#define __USE_GNU 1                
+#endif
+#include <sched.h>            // sched_getaffinity
+
+
+/* 
+ * Convert a non-negative array of ints to a range
+ */ 
+int int2range(int *intarr, int size, char *range)
+{
+  int i, curr;
+  int nc = 0; 
+  int start = -1; 
+  int prev = -2;
+  
+  for (i=0; i<size; i++) {
+    curr = intarr[i]; 
+    if (curr != prev+1) {
+      /* Record end of range */ 
+      if (start != prev && prev >= 0)
+	nc += sprintf(range+nc, "-%d", prev);
+
+      /* Record start of range */ 
+      if (prev >= 0)
+	nc += sprintf(range+nc, ",");
+      nc += sprintf(range+nc, "%d", curr);
+      start = curr; 
+    } else
+      /* The last int is end of range */ 
+      if (i == size-1)
+	nc += sprintf(range+nc, "-%d", curr);
+    
+    prev = curr; 
+  }
+  
+  return nc; 
+}
+
+
+/* 
+ * Get number of processing units (cores or hwthreads) 
+ */ 
+static
+int get_total_num_pus()
+{
+  int pus = sysconf(_SC_NPROCESSORS_ONLN);
+  
+  if ( pus < 0 )
+    perror("sysconf");
+  
+  return pus; 
+}
+
+
+
+
+/*
+ * Get the affinity.
+ */
+static
+int get_affinity(int *cpus, int *count)
+{
+  int i; 
+  cpu_set_t resmask;
+  
+  CPU_ZERO(&resmask);
+  
+  int rc = sched_getaffinity(0, sizeof(resmask), &resmask); 
+  if ( rc < 0 ) {
+    perror("sched_getaffinity");
+    return rc; 
+  }
+  
+  *count = 0; 
+  int pus = get_total_num_pus(); 
+  for (i=0; i<pus; i++) 
+    if ( CPU_ISSET(i, &resmask) ) {
+      cpus[*count] = i; 
+      (*count)++; 
+    }
+  
+  return 0; 
+}
+
+
+/*
+ * Get the number of CPUs where this worker can run. 
+ */
+int get_num_cpus()
+{
+  cpu_set_t mask; 
+
+  CPU_ZERO(&mask);
+  
+  int rc = sched_getaffinity(0, sizeof(mask), &mask); 
+  if ( rc < 0 ) {
+    perror("sched_getaffinity");
+    return rc; 
+  }
+
+  return CPU_COUNT(&mask);
+}
+
+
+/* 
+ * Print my affinity into a buffer.
+ */ 
+int get_cpu_affinity(char *outbuf)
+{
+  int count; 
+  int nc = 0; 
+  
+  int *cpus = malloc(sizeof(int) * get_total_num_pus()); 
+  get_affinity(cpus, &count);
+  
+#if 1
+  nc += int2range(cpus, count, outbuf+nc);
+  //printf("nc=%d count=%d\n", nc, count); 
+#else
+  int i; 
+  for (i=0; i<count; i++) {
+    nc += sprintf(outbuf+nc, "%d ", cpus[i]); 
+  }
+#endif
+  nc += sprintf(outbuf+nc, "\n"); 
+
+  free(cpus); 
+  
+  return nc; 
+}
+
+
diff --git a/affinity/gpu.cu b/affinity/gpu.cu
new file mode 100644
index 0000000..1d8a9c4
--- /dev/null
+++ b/affinity/gpu.cu
@@ -0,0 +1,111 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <cuda_runtime.h>      /* Documentation in hip_runtime_api.h */ 
+#include "affinity.h"          /* Do not perform name mangling */ 
+
+
+int get_gpu_count()
+{
+  int count = 0;
+
+  cudaGetDeviceCount(&count);
+
+  return count;
+}
+
+
+int get_gpu_pci_id(int dev)
+{
+  int value = -1; 
+  cudaError_t err = cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, dev);
+  
+  if ( err )
+    fprintf(stderr, "Could not get PCI ID for GPU %d\n", dev);
+
+  return value; 
+}
+
+
+int get_gpu_affinity(char *buf)
+{
+  int i, value, count;
+  int nc = 0; 
+
+  cudaGetDeviceCount(&count); 
+
+  for (i=0; i<count; i++) {
+    cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, i);
+    nc += sprintf(buf+nc, "0x%x ", value); 
+  }
+  nc += sprintf(buf+nc, "\n"); 
+
+  return nc; 
+}
+
+
+int get_gpu_info(int devid, char *buf)
+{
+  cudaDeviceProp prop;
+  cudaError_t err; 
+  int nc = 0;
+  
+  err = cudaGetDeviceProperties(&prop, devid);
+  if ( err ) {
+    fprintf(stderr, "Could not get info for GPU %d\n", devid);
+    return -1;
+  }
+
+  float ghz = prop.clockRate / 1000.0 / 1000.0; 
+#if 0
+  nc += sprintf(buf+nc, "\tName: %s\n", prop.name); 
+  nc += sprintf(buf+nc, "\tPCI bus ID: 0x%x\n", prop.pciBusID);
+  //nc += sprintf(buf+nc, "\tPCI device ID 0x%x\n", prop.pciDeviceID);
+  //nc += sprintf(buf+nc, "\tPCI domain ID 0x%x\n", prop.pciDomainID); 
+  nc += sprintf(buf+nc, "\tMemory: %lu GB\n", prop.totalGlobalMem >> 30);
+  nc += sprintf(buf+nc, "\tMultiprocessor count: %d\n", prop.multiProcessorCount);
+  nc += sprintf(buf+nc, "\tClock rate: %.3f Ghz\n", ghz); 
+  nc += sprintf(buf+nc, "\tCompute capability: %d.%d\n",
+		prop.major, prop.minor);
+  nc += sprintf(buf+nc, "\tECC enabled: %d\n", prop.ECCEnabled);
+#else
+  nc += sprintf(buf+nc, "\t0x%.2x: %s, %lu GB Mem, "
+		"%d Multiprocessors, %.3f GHZ, %d.%d CC\n",
+		prop.pciBusID, prop.name, prop.totalGlobalMem >> 30,
+		prop.multiProcessorCount, ghz, prop.major, prop.minor); 
+#endif
+  
+  return nc; 
+}
+
+
+int get_gpu_info_all(char *buf)
+{
+  cudaError_t err; 
+  int i, myid, count, value;
+  int nc = 0; 
+  
+  cudaGetDeviceCount(&count);
+  err = cudaGetDevice(&myid);
+  if ( err ) {
+    fprintf(stderr, "Could not get default device\n");
+    return -1; 
+  }
+  cudaDeviceGetAttribute(&value, cudaDevAttrPciBusId, myid);
+
+  nc += sprintf(buf+nc, "\tDefault device: 0x%x\n", value); 
+  
+  for (i=0; i<count; i++) {
+    //nc += sprintf(buf+nc, "\t--\n"); 
+    nc += get_gpu_info(i, buf+nc);
+  }
+  
+  return nc; 
+}
+
+
+
+
diff --git a/affinity/mpi+omp.c b/affinity/mpi+omp.c
new file mode 100644
index 0000000..226a218
--- /dev/null
+++ b/affinity/mpi+omp.c
@@ -0,0 +1,107 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <mpi.h>
+#include <omp.h>
+#include "affinity.h"
+
+
+static
+void usage(char *name)
+{
+  printf("Usage: %s [options]\n", name);
+  printf("\t    -mpi: Show MPI info only (no OpenMP)\n"); 
+  printf("\t-verbose: Show detailed GPU info when -mpi enabled\n");
+  printf("\t   -help: Show this page\n");
+}
+
+
+int main(int argc, char *argv[])
+{
+  char buf[LONG_STR_SIZE];
+  char hostname[MPI_MAX_PROCESSOR_NAME]; 
+  int rank, np, size, i, ngpus, ncpus;
+  int verbose = 0;
+  int help = 0; 
+  int mpi = 0; 
+  int nc = 0; 
+  
+  /* Command-line options */
+  if (argc > 1) 
+    for (i=1; i<argc; i++) {
+      /* Todo: Eat heading dashes here */ 
+      if ( strcmp(argv[i], "-v") >= 0 )
+	verbose = 1;
+      else if ( strcmp(argv[i], "-m") >= 0 )
+	mpi = 1;
+      else if ( strcmp(argv[i], "-h") >= 0 )
+	help = 1; 
+    }
+  
+  MPI_Init(&argc, &argv); 
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
+  MPI_Comm_size(MPI_COMM_WORLD, &np); 
+  MPI_Get_processor_name(hostname, &size); 
+  
+  if (help) {
+    if (rank == 0)
+      usage(argv[0]);
+    
+    MPI_Finalize(); 
+    return 0; 
+  }
+  
+  if ( mpi ) {
+    
+    /* MPI */  
+    ncpus = get_num_cpus();
+    nc += sprintf(buf+nc, "%s Task %2d/%2d with %d cpus: ",
+		  hostname, rank, np, ncpus);
+    nc += get_cpu_affinity(buf+nc);
+#ifdef HAVE_GPUS
+    ngpus = get_gpu_count(); 
+    nc += sprintf(buf+nc, "%s Task %2d/%2d with %d gpus: ",
+		  hostname, rank, np, ngpus); 
+    nc += get_gpu_affinity(buf+nc);
+    if (verbose)
+      nc += get_gpu_info_all(buf+nc);
+#endif
+    
+    /* Print per-task information */ 
+    printf("%s", buf);
+    
+  } else {
+
+    /* MPI+OpenMP */ 
+#ifdef HAVE_GPUS    
+    ngpus = get_gpu_count();
+#endif 
+    
+#pragma omp parallel firstprivate(buf, nc) private(ncpus) shared(rank, np, ngpus, verbose)
+    {
+      int tid = omp_get_thread_num();
+      int nthreads = omp_get_num_threads();
+      ncpus = get_num_cpus();
+      
+      nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d cpus: ",
+		    hostname, rank, np, tid, nthreads, ncpus);
+      nc += get_cpu_affinity(buf+nc);
+#ifdef HAVE_GPUS
+      nc += sprintf(buf+nc, "%s Task %3d/%3d Thread %3d/%3d with %2d gpus: ",
+		    hostname, rank, np, tid, nthreads, ngpus);
+      nc += get_gpu_affinity(buf+nc);
+#endif
+      
+      /* Print per-worker information */ 
+      printf("%s", buf);
+    }
+    
+  }
+  
+  MPI_Finalize(); 
+  return 0; 
+}
diff --git a/affinity/mpi.c b/affinity/mpi.c
new file mode 100644
index 0000000..beeb2ab
--- /dev/null
+++ b/affinity/mpi.c
@@ -0,0 +1,54 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <mpi.h>
+#include "affinity.h"
+
+
+int main(int argc, char *argv[])
+{
+  char buf[LONG_STR_SIZE];
+  char hostname[MPI_MAX_PROCESSOR_NAME]; 
+  int rank, np, size, i;
+  int verbose = 0; 
+  int ncpus = get_num_cpus(); 
+  int nc = 0; 
+
+  /* Get rid of compiler warning. Ay. */
+  (void) verbose; 
+  
+  /* Command-line options */
+  if (argc > 1) 
+    for (i=1; i<argc; i++) {
+      if ( strcmp(argv[i], "-v") == 0 )
+	verbose = 1; 
+    }
+  
+  MPI_Init(&argc, &argv); 
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
+  MPI_Comm_size(MPI_COMM_WORLD, &np); 
+  MPI_Get_processor_name(hostname, &size); 
+
+  nc += sprintf(buf+nc, "%-10s Task %3d/%3d runing on %d CPUs: ",
+		hostname, rank, np, ncpus);
+  nc += get_cpu_affinity(buf+nc);
+#ifdef HAVE_GPUS
+  int ndevs = get_gpu_count();
+  nc += sprintf(buf+nc, "%10s Task %3d/%3d has %d GPUs: ",
+		"", rank, np, ndevs); 
+  nc += get_gpu_affinity(buf+nc);
+  if (verbose)
+    nc += get_gpu_info_all(buf+nc);
+#endif
+  
+  /* Print per-task information */ 
+  printf("%s", buf);
+  
+  MPI_Finalize(); 
+
+  return 0; 
+}
diff --git a/affinity/omp.c b/affinity/omp.c
new file mode 100644
index 0000000..7806a0b
--- /dev/null
+++ b/affinity/omp.c
@@ -0,0 +1,67 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <string.h>
+#include <omp.h>
+#include "affinity.h"
+
+
+int main(int argc, char *argv[])
+{
+  char buf[LONG_STR_SIZE];
+  int i;
+  int ncpus = get_num_cpus();
+  int verbose = 0; 
+  int nc = 0;
+
+  /* Get rid of compiler warning. Ay. */
+  (void) verbose; 
+  
+  /* Command-line options */
+  if (argc > 1) 
+    for (i=1; i<argc; i++) {
+      if ( strcmp(argv[i], "-v") == 0 )
+	verbose = 1; 
+    }
+
+  nc += sprintf(buf+nc, "Process runing on %d CPUs: ", ncpus);
+  nc += get_cpu_affinity(buf+nc);
+#ifdef HAVE_GPUS
+  int ndevs = get_gpu_count();
+  nc += sprintf(buf+nc, "Process has %d GPUs: ", ndevs); 
+  nc += get_gpu_affinity(buf+nc);
+  nc += get_gpu_info_all(buf+nc);
+#endif
+  
+  /* Print the process information */ 
+  printf("\n%s", buf);
+  
+  /* Clear buffer for reuse */
+  nc = 0; 
+  buf[0] = '\0';
+
+#pragma omp parallel firstprivate(buf, nc) private(ncpus) shared(verbose)
+  {
+    int tid = omp_get_thread_num();
+    int nthreads = omp_get_num_threads();
+    ncpus = get_num_cpus();
+ 
+    nc += sprintf(buf+nc, "Thread %3d/%3d running on %d CPUs: ",
+		  tid, nthreads, ncpus);
+    nc += get_cpu_affinity(buf+nc);
+#ifdef HAVE_GPUS
+    int dev = tid % ndevs; 
+    nc += sprintf(buf+nc, "Thread %3d/%3d assigned to GPU: 0x%x\n",
+		  tid, nthreads, get_gpu_pci_id(dev));
+    if (verbose)
+      nc += get_gpu_info(dev, buf+nc);
+#endif
+
+    printf("%s", buf); 
+  }
+  
+  return 0;
+}
diff --git a/affinity/orig.mk b/affinity/orig.mk
new file mode 100644
index 0000000..ae46dc3
--- /dev/null
+++ b/affinity/orig.mk
@@ -0,0 +1,150 @@
+##############################################################
+# Edgar A. Leon
+# Lawrence Livermore National Laboratory
+##############################################################
+
+
+# Check if we have AMD GPUs
+#HAVE_AMD_GPUS = $(shell rocm-smi --showbus 2>/dev/null | grep GPU)
+#HAVE_NVIDIA_GPUS = 1
+#HAVE_AMD_GPUS    = 1
+
+CFLAGS      = -Wall -Werror
+HIP_LDFLAGS = -L$(shell hipconfig --path)/lib -lamdhip64
+
+OBJS        = cpu.o
+ifneq ($(strip $(or $(HAVE_AMD_GPUS),$(HAVE_NVIDIA_GPUS))),)
+GPU_FLAGS   = -DHAVE_GPUS
+OBJS       += gpu.o
+endif
+
+
+# Get system configuration with 'hipconfig'
+# hipconfig --platform
+# hipconfig --version
+# hipconfig --compiler
+# hipconfig --runtime
+
+##############################################################
+# Build a HIP program with nvcc (for NVIDIA hardware)
+##############################################################
+#	nvcc -I$(HIP_ROOT)/include $(MPI_CFLAGS) -Xcompiler -DCUDA_ENABLE_DEPRECATED -x cu $< -Xlinker -lcuda -Xlinker "$(MPI_LIBS)"
+#	nvcc -I$(HIP_ROOT)/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -x cu -ccbin mpicc $< -Xlinker -lcuda
+
+##############################################################
+# Build a HIP program with hipcc (for NVIDIA hardware)
+# To start with a CUDA program, hipify first, e.g., 
+# hipify-perl square.cu > square.cpp
+# Note: hipcc takes .cpp programs (not .c for example)
+##############################################################
+# Export the following environment variables 
+# HIP_PLATFORM=nvcc
+# HIP_COMPILER=nvcc
+# HIPCC_VERBOSE=1
+#	hipcc -Xcompiler -DCUDA_ENABLE_DEPRECATED $(MPI_CFLAGS) $< $(MPI_LIBS) -o $@
+# Could use HIP_PLATFORM to determine the flags to use
+#ifeq (${HIP_PLATFORM}, nvcc)
+#	HIPCC_FLAGS = -Xcompiler -DCUDA_ENABLE_DEPRECATED
+#	HIPCC_FLAGS = -gencode=arch=compute_20,code=sm_20 
+#endif
+
+##############################################################
+# Build an MPI program with hipcc 
+##############################################################
+# MPI_ROOT   = /usr/tce/packages/mvapich2/mvapich2-2.3-intel-19.0.4
+# MPI_CFLAGS = -I$(MPI_ROOT)/include
+# MPI_LIBS   = -L$(MPI_ROOT)/lib -lmpi
+# ifneq ($(strip $(HAVE_AMD_GPUS)),)
+# simple: simple.cpp
+# 	hipcc $(MPI_CFLAGS) $^ $(MPI_LIBS) -o $@
+# endif
+
+
+##############################################################
+# Link an OpenMP program with hipcc
+##############################################################
+# Find the OpenMP lib
+# HIP_CLANG_LIB = $(shell hipconfig --hipclangpath)/../lib
+# omp: omp.o gpu.o
+#	hipcc -fopenmp -Xlinker -rpath=$(HIP_CLANG_LIB) $^ -o $@
+
+
+## I could have chosen to build GPU programs with hipcc
+## for both AMD and NVIDIA devices, but the hipcc options 
+## for NVIDIA are almost like calling nvcc directly...
+## I might as well call nvcc directly and no need
+## for HIP on NVIDIA architectures! 
+
+
+PROGS = mpi omp mpi+omp
+
+
+all: $(PROGS)
+
+
+mpi: mpi.o $(OBJS)
+ifneq ($(strip $(HAVE_AMD_GPUS)),)
+	mpicc $^ -o $@ $(HIP_LDFLAGS)
+else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),)
+	nvcc -ccbin mpicc -Xlinker -lcuda $^ -o $@
+else
+	mpicc $^ -o $@ 
+endif 
+
+omp: omp.o $(OBJS)
+ifneq ($(strip $(HAVE_AMD_GPUS)),)
+	$(CC) -fopenmp $^ -o $@ $(HIP_LDFLAGS)
+else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),)
+	nvcc $^ -Xcompiler -fopenmp -o $@
+else
+	$(CC) -fopenmp $^ -o $@
+endif
+
+mpi+omp: mpi+omp.o $(OBJS)
+ifneq ($(strip $(HAVE_AMD_GPUS)),)
+	mpicc -fopenmp $^ -o $@ $(HIP_LDFLAGS)
+else ifneq ($(strip $(HAVE_NVIDIA_GPUS)),)
+	nvcc -ccbin mpicc -Xcompiler -fopenmp -Xlinker -lcuda $^ -o $@
+else
+	mpicc -fopenmp $^ -o $@ 
+endif 
+
+
+ifneq ($(strip $(HAVE_AMD_GPUS)),)
+gpu.o: gpu.cpp affinity.h
+	hipcc -c $<
+else
+gpu.o: gpu.cu affinity.h
+	nvcc --Werror all-warnings -x cu -c $<
+endif 
+
+omp.o: omp.c affinity.h
+	$(CC) $(CFLAGS) $(GPU_FLAGS) -fopenmp -c $<
+
+mpi.o: mpi.c affinity.h
+	mpicc $(CFLAGS) $(GPU_FLAGS) -c $<
+
+mpi+omp.o: mpi+omp.c affinity.h
+	mpicc $(CFLAGS) $(GPU_FLAGS) -fopenmp -c $<
+
+cpu.o: cpu.c
+	$(CC) $(CFLAGS) -c $< 
+
+gpu.cpp: gpu.cu
+	hipify-perl $< > $@
+
+
+clean:
+	rm -f *.o *~ $(PROGS)
+
+
+
+# gpu-hip.o: gpu-hip.cpp affinity.h
+# ifneq ($(strip $(HAVE_AMD_GPUS)),)
+# 	hipcc -g -c -o $@ $<
+# else
+# 	nvcc -I$(HIP_ROOT)/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -c -o $@ $<
+# endif
+
+#/usr/tce/packages/cuda/cuda-10.1.243/nvidia/bin/nvcc -I/usr/tce/packages/hip/hip-3.0.0/include -Xcompiler -DCUDA_ENABLE_DEPRECATED -Xcompiler -DHIP_VERSION_MAJOR=3 -Xcompiler -DHIP_VERSION_MINOR=0 -Xcompiler -DHIP_VERSION_PATCH=0 -x cu square.hipref.cpp -Xlinker '"-rpath=/usr/tce/packages/cuda/cuda-10.1.243/nvidia/lib64:/usr/tce/packages/cuda/cuda-10.1.243"'
+
diff --git a/affinity/simple.cpp b/affinity/simple.cpp
new file mode 100644
index 0000000..aa57147
--- /dev/null
+++ b/affinity/simple.cpp
@@ -0,0 +1,56 @@
+/***********************************************************
+ * Edgar A. Leon
+ * Lawrence Livermore National Laboratory 
+ ***********************************************************/
+
+#include <stdio.h>
+#include <hip/hip_runtime.h>
+#include <mpi.h>
+
+#define STR_SIZE 100
+
+void check_devices(char *buf)
+{
+  hipDevice_t mydev;
+  hipDeviceProp_t devProp;
+  int i, ndevs, myid;
+  char pciBusId[STR_SIZE] = "";
+  int nc = 0; 
+  
+  hipGetDeviceCount(&ndevs);
+  nc += sprintf(buf+nc, "Num devices: %d\n", ndevs); 
+
+  hipGetDevice(&myid); 
+  hipDeviceGet(&mydev, myid);
+  hipDeviceGetPCIBusId(pciBusId, STR_SIZE, mydev);
+  nc += sprintf(buf+nc, "Default device: %s\n", pciBusId); 
+		
+  for (i=0; i<ndevs; i++) { 
+    hipGetDeviceProperties(&devProp, i);
+    nc += sprintf(buf+nc, "\t--\n"); 
+    nc += sprintf(buf+nc, "\tSystem major and minor: %d %d\n",
+		  devProp.major, devProp.minor); 
+    nc += sprintf(buf+nc, "\tName: %s\n", devProp.name); 
+    nc += sprintf(buf+nc, "\tPCI bus ID: 0x%x\n", devProp.pciBusID);
+    nc += sprintf(buf+nc, "\tPCI device ID 0x%x\n", devProp.pciDeviceID);
+  }
+}
+
+
+int main(int argc, char *argv[])
+{
+  int rank, np;
+  char buf[1024]; 
+  
+  MPI_Init(&argc, &argv); 
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
+  MPI_Comm_size(MPI_COMM_WORLD, &np); 
+  
+  check_devices(buf);
+
+  printf("Buf: %s\n", buf);
+  
+  MPI_Finalize(); 
+
+  return 0; 
+}