Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
compat/sys/time.h compat/getopt/getopt.h \
cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \
sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \
wrapnvml.h \
heavy/heavy.cu \
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
heavy/cuda_combine.cu heavy/cuda_combine.h \
Expand All @@ -38,7 +39,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \
sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
wrapnvml.cu

ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
Expand Down
53 changes: 50 additions & 3 deletions cpu-miner.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
#pragma comment(lib, "winmm.lib")
#endif

#if 1 || defined(USE_WRAPNVML)
#define USE_WRAPNVML 1
#include "wrapnvml.h"
#endif

#define PROGRAM_NAME "minerd"
#define LP_SCANTIME 60
#define HEAVYCOIN_BLKHDR_SZ 84
Expand All @@ -57,6 +62,7 @@ extern "C"
int cuda_num_devices();
void cuda_devicenames();
int cuda_finddevice(char *name);

#ifdef __cplusplus
}
#endif
Expand Down Expand Up @@ -196,6 +202,10 @@ static unsigned long accepted_count = 0L;
static unsigned long rejected_count = 0L;
static double *thr_hashrates;

#if defined(USE_WRAPNVML)
wrap_nvml_handle *nvmlh = NULL;
#endif

#ifdef HAVE_GETOPT_LONG
#include <getopt.h>
#else
Expand Down Expand Up @@ -384,7 +394,7 @@ static void share_result(int result, const char *reason)
hashrate += thr_hashrates[i];
result ? accepted_count++ : rejected_count++;
pthread_mutex_unlock(&stats_lock);

sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate);
applog(LOG_INFO, "accepted: %lu/%lu (%.2f%%), %s khash/s %s",
accepted_count,
Expand Down Expand Up @@ -942,13 +952,42 @@ static void *miner_thread(void *userdata)
pthread_mutex_unlock(&stats_lock);
}
if (!opt_quiet) {

#if defined(USE_WRAPNVML)
if (nvmlh != NULL) {
unsigned int tempC=0, fanpcnt=0, mwatts=0;
char gputempbuf[64], gpufanbuf[64], gpupowbuf[64];
strcpy(gputempbuf, " N/A");
strcpy(gpufanbuf, " N/A");
strcpy(gpupowbuf, " N/A");

#if 1
if (wrap_nvml_get_tempC(nvmlh, device_map[thr_id], &tempC) == 0)
sprintf(gputempbuf, "%3dC", tempC);

if (wrap_nvml_get_fanpcnt(nvmlh, device_map[thr_id], &fanpcnt) == 0)
sprintf(gpufanbuf, "%3d%%", fanpcnt);

if (wrap_nvml_get_power_usage(nvmlh, device_map[thr_id], &mwatts) == 0)
sprintf(gpupowbuf, "%dW", (mwatts / 1000));
#endif
sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f",
1e-3 * thr_hashrates[thr_id]);
applog(LOG_INFO, "GPU #%d: %s, %s khash/s",
device_map[thr_id], device_name[thr_id], s);
applog(LOG_INFO, "GPU #%d: %s, Temp: %s Fan: %s Power: %s %s khash/s",
device_map[thr_id], device_name[thr_id], gputempbuf, gpufanbuf, gpupowbuf, s);
// applog(LOG_INFO, "thread %d: %lu hashes, %s khash/s",
// thr_id, hashes_done, s);
} else {
#endif

sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f",
1e-3 * thr_hashrates[thr_id]);
applog(LOG_INFO, "GPU #%d: %s %s khash/s",
device_map[thr_id], device_name[thr_id], s);
}

}

if (opt_benchmark && thr_id == opt_n_threads - 1) {
double hashrate = 0.;
for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++)
Expand Down Expand Up @@ -1609,6 +1648,14 @@ int main(int argc, char *argv[])
return 1;
}
}
#if defined(USE_WRAPNVML)
nvmlh = wrap_nvml_create();
if (nvmlh == NULL) {
applog(LOG_INFO, "NVML GPU monitoring is not available.");
} else {
applog(LOG_INFO, "NVML GPU temperature, fan, power monitoring enabled.");
}
#endif
if (want_stratum) {
/* init stratum thread info */
stratum_thr_id = opt_n_threads + 2;
Expand Down
281 changes: 281 additions & 0 deletions wrapnvml.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
/*
* A trivial little dlopen()-based wrapper library for the
* NVIDIA NVML library, to allow runtime discovery of NVML on an
* arbitrary system. This is all very hackish and simple-minded, but
* it serves my immediate needs in the short term until NVIDIA provides
* a static NVML wrapper library themselves, hopefully in
* CUDA 6.5 or maybe sometime shortly after.
*
* This trivial code is made available under the "new" 3-clause BSD license,
* and/or any of the GPL licenses you prefer.
* Feel free to use the code and modify as you see fit.
*
* John E. Stone - john.stone@gmail.com
*
*/

#include <stdio.h>
#include <stdlib.h>
#include "wrapnvml.h"
#include "cuda_runtime.h"

/*
* Wrappers to emulate dlopen() on other systems like Windows
*/
#if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
#include <windows.h>
static void *wrap_dlopen(const char *filename) {
return (void *)LoadLibrary(filename);
}
static void *wrap_dlsym(void *h, const char *sym) {
return (void *)GetProcAddress((HINSTANCE)h, sym);
}
static int wrap_dlclose(void *h) {
/* FreeLibrary returns nonzero on success */
return (!FreeLibrary((HINSTANCE)h));
}
#else
/* assume we can use dlopen itself... */
#include <dlfcn.h>
static void *wrap_dlopen(const char *filename) {
return dlopen(filename, RTLD_NOW);
}
static void *wrap_dlsym(void *h, const char *sym) {
return dlsym(h, sym);
}
static int wrap_dlclose(void *h) {
return dlclose(h);
}
#endif

#if defined(__cplusplus)
extern "C" {
#endif

wrap_nvml_handle * wrap_nvml_create() {
int i=0;
wrap_nvml_handle *nvmlh = NULL;

/*
* We use hard-coded library installation locations for the time being...
* No idea where or if libnvidia-ml.so is installed on MacOS X, a
* deep scouring of the filesystem on one of the Mac CUDA build boxes
* I used turned up nothing, so for now it's not going to work on OSX.
*/
#if defined(_WIN64)
/* 64-bit Windows */
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
#elif defined(_WIN32) || defined(_MSC_VER)
/* 32-bit Windows */
#define libnvidia_ml "%PROGRAMFILES%/NVIDIA Corporation/NVSMI/nvml.dll"
#elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
/* 32-bit linux assumed */
#define libnvidia_ml "/usr/lib32/libnvidia-ml.so"
#elif defined(__linux)
/* 64-bit linux assumed */
#define libnvidia_ml "/usr/lib/libnvidia-ml.so"
#else
#error "Unrecognized platform: need NVML DLL path for this platform..."
#endif

#if WIN32
char tmp[512];
ExpandEnvironmentStringsA(libnvidia_ml, tmp, sizeof(tmp));
#else
char tmp[512] = libnvidia_ml;
#endif

void *nvml_dll = wrap_dlopen(tmp);
if (nvml_dll == NULL)
return NULL;

nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));

nvmlh->nvml_dll = nvml_dll;

nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)())
wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");

if (nvmlh->nvmlInit == NULL ||
nvmlh->nvmlShutdown == NULL ||
nvmlh->nvmlDeviceGetCount == NULL ||
nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
nvmlh->nvmlDeviceGetPciInfo == NULL ||
nvmlh->nvmlDeviceGetName == NULL ||
nvmlh->nvmlDeviceGetTemperature == NULL ||
nvmlh->nvmlDeviceGetFanSpeed == NULL ||
nvmlh->nvmlDeviceGetPowerUsage == NULL
) {
#if 0
printf("Failed to obtain all required NVML function pointers\n");
#endif
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}

nvmlh->nvmlInit();
nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);

/* Query CUDA device count, in case it doesn't agree with NVML, since */
/* CUDA will only report GPUs with compute capability greater than 1.0 */
if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
#if 0
printf("Failed to query CUDA device count!\n");
#endif
wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return NULL;
}

nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));

/* Obtain GPU device handles we're going to need repeatedly... */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
}

/* Query PCI info for each NVML device, and build table for mapping of */
/* CUDA device IDs to NVML device IDs and vice versa */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
wrap_nvmlPciInfo_t pciinfo;
nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
nvmlh->nvml_pci_device_id[i] = pciinfo.device;
}

/* build mapping of NVML device IDs to CUDA IDs */
for (i=0; i<nvmlh->nvml_gpucount; i++) {
nvmlh->nvml_cuda_device_id[i] = -1;
}
for (i=0; i<nvmlh->cuda_gpucount; i++) {
cudaDeviceProp props;
nvmlh->cuda_nvml_device_id[i] = -1;

if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
int j;
for (j=0; j<nvmlh->nvml_gpucount; j++) {
if ((nvmlh->nvml_pci_domain_id[j] == props.pciDomainID) &&
(nvmlh->nvml_pci_bus_id[j] == props.pciBusID) &&
(nvmlh->nvml_pci_device_id[j] == props.pciDeviceID)) {
#if 0
printf("CUDA GPU[%d] matches NVML GPU[%d]\n", i, j);
#endif
nvmlh->nvml_cuda_device_id[j] = i;
nvmlh->cuda_nvml_device_id[i] = j;
}
}
}
}

return nvmlh;
}


int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) {
nvmlh->nvmlShutdown();

wrap_dlclose(nvmlh->nvml_dll);
free(nvmlh);
return 0;
}


int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
*gpucount = nvmlh->nvml_gpucount;
return 0;
}

int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
*gpucount = nvmlh->cuda_gpucount;
return 0;
}

int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
int cudaindex,
char *namebuf,
int bufsize) {
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
return -1;

return 0;
}


int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
int cudaindex, unsigned int *tempC) {
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}

return 0;
}


int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
int cudaindex, unsigned int *fanpcnt) {
wrap_nvmlReturn_t rc;
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
if (rc != WRAPNVML_SUCCESS) {
return -1;
}

return 0;
}


int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
int cudaindex,
unsigned int *milliwatts) {
int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
return -1;

if (nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts) != WRAPNVML_SUCCESS)
return -1;

return 0;
}


#if defined(__cplusplus)
}
#endif


Loading