cbuchner1 · mwhite73 · Oct 6, 2014
diff --git a/Makefile.am b/Makefile.am
@@ -18,6 +18,7 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  compat/sys/time.h compat/getopt/getopt.h \
 			  cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
 			  sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
+                          wrapnvml.h \
 			  heavy/heavy.cu \
 			  heavy/cuda_blake512.cu heavy/cuda_blake512.h \
 			  heavy/cuda_combine.cu heavy/cuda_combine.h \
@@ -38,7 +39,8 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
 			  x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
 			  x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
-			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu
+			  x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
+                          wrapnvml.cu
 
 ccminer_LDFLAGS		= $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
 ccminer_LDADD		= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@

diff --git a/cpu-miner.c b/cpu-miner.c
@@ -44,6 +44,11 @@
 #pragma comment(lib, "winmm.lib")
 #endif
 
+#if 1 || defined(USE_WRAPNVML)
+#define USE_WRAPNVML 1
+#include "wrapnvml.h"
+#endif
+
 #define PROGRAM_NAME		"minerd"
 #define LP_SCANTIME		60
 #define HEAVYCOIN_BLKHDR_SZ		84
@@ -57,6 +62,7 @@ extern "C"
 int cuda_num_devices();
 void cuda_devicenames();
 int cuda_finddevice(char *name);
+
 #ifdef __cplusplus
 }
 #endif
@@ -196,6 +202,10 @@ static unsigned long accepted_count = 0L;
 static unsigned long rejected_count = 0L;
 static double *thr_hashrates;
 
+#if defined(USE_WRAPNVML)
+wrap_nvml_handle *nvmlh = NULL;
+#endif
+
 #ifdef HAVE_GETOPT_LONG
 #include <getopt.h>
 #else
@@ -384,7 +394,7 @@ static void share_result(int result, const char *reason)
 		hashrate += thr_hashrates[i];
 	result ? accepted_count++ : rejected_count++;
 	pthread_mutex_unlock(&stats_lock);
-	
+
 	sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate);
 	applog(LOG_INFO, "accepted: %lu/%lu (%.2f%%), %s khash/s %s",
 		   accepted_count,
@@ -942,13 +952,42 @@ static void *miner_thread(void *userdata)
 			pthread_mutex_unlock(&stats_lock);
 		}
 		if (!opt_quiet) {
+
+#if defined(USE_WRAPNVML)
+                if (nvmlh != NULL) {
+                        unsigned int tempC=0, fanpcnt=0, mwatts=0;
+                        char gputempbuf[64], gpufanbuf[64], gpupowbuf[64];
+                        strcpy(gputempbuf, " N/A");
+                        strcpy(gpufanbuf, " N/A");
+                        strcpy(gpupowbuf, " N/A");
+
+#if 1
+                        if (wrap_nvml_get_tempC(nvmlh, device_map[thr_id], &tempC) == 0)
+                                sprintf(gputempbuf, "%3dC", tempC);
+
+                        if (wrap_nvml_get_fanpcnt(nvmlh, device_map[thr_id], &fanpcnt) == 0)
+                                sprintf(gpufanbuf, "%3d%%", fanpcnt);
+
+                        if (wrap_nvml_get_power_usage(nvmlh, device_map[thr_id], &mwatts) == 0)
+                                sprintf(gpupowbuf, "%dW", (mwatts / 1000));
+#endif
 			sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f",
 				1e-3 * thr_hashrates[thr_id]);
-			applog(LOG_INFO, "GPU #%d: %s, %s khash/s",
-				device_map[thr_id], device_name[thr_id], s);
+                        applog(LOG_INFO, "GPU #%d: %s, Temp: %s Fan: %s Power: %s  %s khash/s",
+                                device_map[thr_id], device_name[thr_id], gputempbuf, gpufanbuf, gpupowbuf, s);
 //			applog(LOG_INFO, "thread %d: %lu hashes, %s khash/s",
 //				thr_id, hashes_done, s);
+		} else {
+#endif
+
+			sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f",
+                                1e-3 * thr_hashrates[thr_id]);
+			applog(LOG_INFO, "GPU #%d: %s  %s khash/s",
+                                device_map[thr_id], device_name[thr_id], s);
+		}
+
 		}
+
 		if (opt_benchmark && thr_id == opt_n_threads - 1) {
 			double hashrate = 0.;
 			for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
@@ -1609,6 +1648,14 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 	}
+#if defined(USE_WRAPNVML)
+        nvmlh = wrap_nvml_create();
+        if (nvmlh == NULL) {
+                applog(LOG_INFO, "NVML GPU monitoring is not available.");
+        } else {
+                applog(LOG_INFO, "NVML GPU temperature, fan, power monitoring enabled.");
+        }
+#endif
 	if (want_stratum) {
 		/* init stratum thread info */
 		stratum_thr_id = opt_n_threads + 2;

diff --git a/wrapnvml.cu b/wrapnvml.cu
@@ -0,0 +1,281 @@
+/*
+ * A trivial little dlopen()-based wrapper library for the
+ * NVIDIA NVML library, to allow runtime discovery of NVML on an
+ * arbitrary system.  This is all very hackish and simple-minded, but
+ * it serves my immediate needs in the short term until NVIDIA provides
+ * a static NVML wrapper library themselves, hopefully in
+ * CUDA 6.5 or maybe sometime shortly after.
+ *
+ * This trivial code is made available under the "new" 3-clause BSD license,
+ * and/or any of the GPL licenses you prefer.
+ * Feel free to use the code and modify as you see fit.
+ *
+ * John E. Stone - john.stone@gmail.com
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "wrapnvml.h"
+#include "cuda_runtime.h"
+
+/*
+ * Wrappers to emulate dlopen() on other systems like Windows
+ */
+#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+static void *wrap_dlopen(const char *filename) {
+  return (void *)LoadLibrary(filename);
+}
+static void *wrap_dlsym(void *h, const char *sym) {
+  return (void *)GetProcAddress((HINSTANCE)h, sym);
+}
+static int wrap_dlclose(void *h) {
+  /* FreeLibrary returns nonzero on success */
+  return (!FreeLibrary((HINSTANCE)h));
+}
+#else
+/* assume we can use dlopen itself... */
+#include <dlfcn.h>
+static void *wrap_dlopen(const char *filename) {
+  return dlopen(filename, RTLD_NOW);
+}
+static void *wrap_dlsym(void *h, const char *sym) {
+  return dlsym(h, sym);
+}
+static int wrap_dlclose(void *h) {
+  return dlclose(h);
+}
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+wrap_nvml_handle * wrap_nvml_create() {
+  int i=0;
+  wrap_nvml_handle *nvmlh = NULL;
+
+  /* 
+   * We use hard-coded library installation locations for the time being...
+   * No idea where or if libnvidia-ml.so is installed on MacOS X, a 
+   * deep scouring of the filesystem on one of the Mac CUDA build boxes
+   * I used turned up nothing, so for now it's not going to work on OSX.
+   */
+#if defined(_WIN64)
+  /* 64-bit Windows */
+#define  libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
+#elif defined(_WIN32) || defined(_MSC_VER)
+  /* 32-bit Windows */
+#define  libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
+#elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
+  /* 32-bit linux assumed */
+#define  libnvidia_ml "/usr/lib32/libnvidia-ml.so"
+#elif defined(__linux)
+  /* 64-bit linux assumed */
+#define  libnvidia_ml "/usr/lib/libnvidia-ml.so"
+#else
+#error "Unrecognized platform: need NVML DLL path for this platform..."
+#endif
+
+#if WIN32
+  char tmp[512];
+  ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp)); 
+#else
+  char tmp[512] = libnvidia_ml;
+#endif
+
+  void *nvml_dll = wrap_dlopen(tmp);
+  if (nvml_dll == NULL)
+    return NULL;
+
+  nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
+
+  nvmlh->nvml_dll = nvml_dll;  
+
+  nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) 
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
+  nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) 
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
+  nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) 
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
+  nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) 
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
+  nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
+  nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
+  nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
+  nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
+  nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) 
+    wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
+
+  if (nvmlh->nvmlInit == NULL || 
+      nvmlh->nvmlShutdown == NULL ||
+      nvmlh->nvmlDeviceGetCount == NULL ||
+      nvmlh->nvmlDeviceGetHandleByIndex == NULL || 
+      nvmlh->nvmlDeviceGetPciInfo == NULL ||
+      nvmlh->nvmlDeviceGetName == NULL ||
+      nvmlh->nvmlDeviceGetTemperature == NULL ||
+      nvmlh->nvmlDeviceGetFanSpeed == NULL ||
+      nvmlh->nvmlDeviceGetPowerUsage == NULL
+      ) {
+#if 0
+    printf("Failed to obtain all required NVML function pointers\n");
+#endif
+    wrap_dlclose(nvmlh->nvml_dll);
+    free(nvmlh);
+    return NULL;
+  }
+
+  nvmlh->nvmlInit();
+  nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
+
+  /* Query CUDA device count, in case it doesn't agree with NVML, since  */
+  /* CUDA will only report GPUs with compute capability greater than 1.0 */ 
+  if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
+#if 0
+    printf("Failed to query CUDA device count!\n");
+#endif
+    wrap_dlclose(nvmlh->nvml_dll);
+    free(nvmlh);
+    return NULL;
+  }
+
+  nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
+  nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+  nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+  nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
+  nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
+  nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
+
+  /* Obtain GPU device handles we're going to need repeatedly... */
+  for (i=0; i<nvmlh->nvml_gpucount; i++) {
+    nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
+  } 
+
+  /* Query PCI info for each NVML device, and build table for mapping of */
+  /* CUDA device IDs to NVML device IDs and vice versa                   */
+  for (i=0; i<nvmlh->nvml_gpucount; i++) {
+    wrap_nvmlPciInfo_t pciinfo;
+    nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
+    nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
+    nvmlh->nvml_pci_bus_id[i]    = pciinfo.bus;
+    nvmlh->nvml_pci_device_id[i] = pciinfo.device;
+  }
+
+  /* build mapping of NVML device IDs to CUDA IDs */
+  for (i=0; i<nvmlh->nvml_gpucount; i++) {
+    nvmlh->nvml_cuda_device_id[i] = -1;
+  } 
+  for (i=0; i<nvmlh->cuda_gpucount; i++) {
+    cudaDeviceProp props;
+    nvmlh->cuda_nvml_device_id[i] = -1;
+
+    if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
+      int j;
+      for (j=0; j<nvmlh->nvml_gpucount; j++) {
+        if ((nvmlh->nvml_pci_domain_id[j] == props.pciDomainID) &&
+            (nvmlh->nvml_pci_bus_id[j]    == props.pciBusID) &&
+            (nvmlh->nvml_pci_device_id[j] == props.pciDeviceID)) {
+#if 0
+          printf("CUDA GPU[%d] matches NVML GPU[%d]\n", i, j);
+#endif
+          nvmlh->nvml_cuda_device_id[j] = i;
+          nvmlh->cuda_nvml_device_id[i] = j;
+        }
+      }
+    }
+  }
+
+  return nvmlh;
+}
+
+
+int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) {
+  nvmlh->nvmlShutdown();
+
+  wrap_dlclose(nvmlh->nvml_dll);
+  free(nvmlh);
+  return 0;
+}
+
+
+int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
+  *gpucount = nvmlh->nvml_gpucount;
+  return 0; 
+}
+
+int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
+  *gpucount = nvmlh->cuda_gpucount;
+  return 0; 
+}
+
+int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
+                           int cudaindex, 
+                           char *namebuf,
+                           int bufsize) {
+  int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
+  if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+    return -1;
+
+  if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
+    return -1; 
+
+  return 0;
+}
+
+
+int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
+                        int cudaindex, unsigned int *tempC) {
+  wrap_nvmlReturn_t rc;
+  int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
+  if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+    return -1;
+
+  rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
+  if (rc != WRAPNVML_SUCCESS) {
+    return -1; 
+  }
+
+  return 0;
+}
+
+
+int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
+                          int cudaindex, unsigned int *fanpcnt) {
+  wrap_nvmlReturn_t rc;
+  int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
+  if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+    return -1;
+
+  rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
+  if (rc != WRAPNVML_SUCCESS) {
+    return -1; 
+  }
+
+  return 0;
+}
+
+
+int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
+                              int cudaindex,
+                              unsigned int *milliwatts) {
+  int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
+  if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
+    return -1;
+
+  if (nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts) != WRAPNVML_SUCCESS)
+    return -1; 
+
+  return 0;
+}
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+