diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 48f939cfb3..e5f141a1cc 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -292,6 +292,18 @@ class MinerCLI BOOST_THROW_EXCEPTION(BadArgument()); } } + else if (arg == "--cl-kernel" && i + 1 < argc) + { + try + { + m_openclSelectedKernel = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } + } #endif #if ETH_ETHASHCL || ETH_ETHASHCUDA else if ((arg == "--cl-global-work" || arg == "--cuda-grid-size") && i + 1 < argc) @@ -501,7 +513,9 @@ class MinerCLI m_miningThreads = m_openclDeviceCount; } + CLMiner::setCLKernel(m_openclSelectedKernel); CLMiner::setThreadsPerHash(m_openclThreadsPerHash); + if (!CLMiner::configureGPU( m_localWorkSize, m_globalWorkSizeMultiplier, @@ -601,11 +615,17 @@ class MinerCLI << " sequential - load DAG on GPUs one after another. Use this when the miner crashes during DAG generation" << endl << " single - generate DAG on device n, then copy to other devices" << endl #if ETH_ETHASHCL + << " OpenCL configuration:" << endl + << " --cl-kernel Use a different OpenCL kernel (default: use stable kernel)" << endl + << " 0: stable kernel" << endl + << " 1: unstable kernel" << endl +// << " 2: experimental kernel" << endl << " --cl-local-work Set the OpenCL local work size. Default is " << CLMiner::c_defaultLocalWorkSize << endl << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << CLMiner::c_defaultGlobalWorkSizeMultiplier << " * " << CLMiner::c_defaultLocalWorkSize << endl << " --cl-parallel-hash <1 2 ..8> Define how many threads to associate per hash. Default=8" << endl #endif #if ETH_ETHASHCUDA + << " CUDA configuration:" << endl << " --cuda-block-size Set the CUDA block work size. Default is " << toString(ethash_cuda_miner::c_defaultBlockSize) << endl << " --cuda-grid-size Set the CUDA grid size. Default is " << toString(ethash_cuda_miner::c_defaultGridSize) << endl << " --cuda-streams Set the number of CUDA streams. Default is " << toString(ethash_cuda_miner::c_defaultNumStreams) << endl @@ -618,6 +638,7 @@ class MinerCLI << " --cuda-parallel-hash <1 2 ..8> Define how many hashes to calculate in a kernel, can be scaled to achieve better performance. Default=4" << endl #endif #if API_CORE + << " API core configuration:" << endl << " --api-port Set the api port, the miner should listen to. Use 0 to disable. Default=0, use negative numbers to run in readonly mode. for example -3333." << endl #endif ; @@ -856,30 +877,22 @@ class MinerCLI } this_thread::sleep_for(chrono::milliseconds(_recheckPeriod)); } - if (EthashAux::eval(solution.seedHash, solution.headerHash, solution.nonce).value < solution.boundary) - { - bool ok = prpc->eth_submitWork("0x" + toHex(solution.nonce), "0x" + toString(solution.headerHash), "0x" + toString(solution.mixHash)); - if (ok) { - cnote << "Solution found; Submitted to" << _remote << "..."; - cnote << " Nonce:" << solution.nonce; - cnote << " headerHash:" << solution.headerHash.hex(); - cnote << " mixHash:" << solution.mixHash.hex(); - cnote << EthLime << "Accepted." << EthReset; - f.acceptedSolution(false); - } - else { - cwarn << "Solution found; Submitted to" << _remote << "..."; - cwarn << " Nonce:" << solution.nonce; - cwarn << " headerHash:" << solution.headerHash.hex(); - cwarn << " mixHash:" << solution.mixHash.hex(); - cwarn << "Not accepted."; - f.rejectedSolution(false); - } - //exit(0); + bool ok = prpc->eth_submitWork("0x" + toHex(solution.nonce), "0x" + toString(solution.headerHash), "0x" + toString(solution.mixHash)); + if (ok) { + cnote << "Solution found; Submitted to" << _remote; + cnote << " Nonce:" << solution.nonce; + cnote << " headerHash:" << solution.headerHash.hex(); + cnote << " mixHash:" << solution.mixHash.hex(); + cnote << EthLime << " Accepted." << EthReset; + f.acceptedSolution(solution.stale); } else { - f.failedSolution(); - cwarn << "FAILURE: GPU gave incorrect result!"; + cwarn << "Solution found; Submitted to" << _remote; + cwarn << " Nonce:" << solution.nonce; + cwarn << " headerHash:" << solution.headerHash.hex(); + cwarn << " mixHash:" << solution.mixHash.hex(); + cwarn << EthYellow << " Rejected." << EthReset; + f.rejectedSolution(solution.stale); } } catch (jsonrpc::JsonRpcException&) @@ -1056,6 +1069,7 @@ class MinerCLI unsigned m_miningThreads = UINT_MAX; bool m_shouldListDevices = false; #if ETH_ETHASHCL + unsigned m_openclSelectedKernel = 0; ///< A numeric value for the selected OpenCL kernel unsigned m_openclDeviceCount = 0; unsigned m_openclDevices[16]; unsigned m_openclThreadsPerHash = 8; diff --git a/libethash-cl/CLMiner.cpp b/libethash-cl/CLMiner.cpp index 625d381449..a8f9594e50 100644 --- a/libethash-cl/CLMiner.cpp +++ b/libethash-cl/CLMiner.cpp @@ -5,7 +5,8 @@ #include "CLMiner.h" #include -#include "CLMiner_kernel.h" +#include "CLMiner_kernel_stable.h" +#include "CLMiner_kernel_unstable.h" using namespace dev; using namespace eth; @@ -18,6 +19,7 @@ namespace eth unsigned CLMiner::s_workgroupSize = CLMiner::c_defaultLocalWorkSize; unsigned CLMiner::s_initialGlobalWorkSize = CLMiner::c_defaultGlobalWorkSizeMultiplier * CLMiner::c_defaultLocalWorkSize; unsigned CLMiner::s_threadsPerHash = 8; +CLKernelName CLMiner::s_clKernelName = CLMiner::c_defaultKernelName; constexpr size_t c_maxSearchResults = 1; @@ -30,6 +32,172 @@ struct CLChannel: public LogChannel #define cllog clog(CLChannel) #define ETHCL_LOG(_contents) cllog << _contents +/** + * Returns the name of a numerical cl_int error + * Takes constants from CL/cl.h and returns them in a readable format + */ +static const char *strClError(cl_int err) { + + switch (err) { + case CL_SUCCESS: + return "CL_SUCCESS"; + case CL_DEVICE_NOT_FOUND: + return "CL_DEVICE_NOT_FOUND"; + case CL_DEVICE_NOT_AVAILABLE: + return "CL_DEVICE_NOT_AVAILABLE"; + case CL_COMPILER_NOT_AVAILABLE: + return "CL_COMPILER_NOT_AVAILABLE"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; + case CL_OUT_OF_RESOURCES: + return "CL_OUT_OF_RESOURCES"; + case CL_OUT_OF_HOST_MEMORY: + return "CL_OUT_OF_HOST_MEMORY"; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return "CL_PROFILING_INFO_NOT_AVAILABLE"; + case CL_MEM_COPY_OVERLAP: + return "CL_MEM_COPY_OVERLAP"; + case CL_IMAGE_FORMAT_MISMATCH: + return "CL_IMAGE_FORMAT_MISMATCH"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; + case CL_BUILD_PROGRAM_FAILURE: + return "CL_BUILD_PROGRAM_FAILURE"; + case CL_MAP_FAILURE: + return "CL_MAP_FAILURE"; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: + return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: + return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; + +#ifdef CL_VERSION_1_2 + case CL_COMPILE_PROGRAM_FAILURE: + return "CL_COMPILE_PROGRAM_FAILURE"; + case CL_LINKER_NOT_AVAILABLE: + return "CL_LINKER_NOT_AVAILABLE"; + case CL_LINK_PROGRAM_FAILURE: + return "CL_LINK_PROGRAM_FAILURE"; + case CL_DEVICE_PARTITION_FAILED: + return "CL_DEVICE_PARTITION_FAILED"; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: + return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; +#endif // CL_VERSION_1_2 + + case CL_INVALID_VALUE: + return "CL_INVALID_VALUE"; + case CL_INVALID_DEVICE_TYPE: + return "CL_INVALID_DEVICE_TYPE"; + case CL_INVALID_PLATFORM: + return "CL_INVALID_PLATFORM"; + case CL_INVALID_DEVICE: + return "CL_INVALID_DEVICE"; + case CL_INVALID_CONTEXT: + return "CL_INVALID_CONTEXT"; + case CL_INVALID_QUEUE_PROPERTIES: + return "CL_INVALID_QUEUE_PROPERTIES"; + case CL_INVALID_COMMAND_QUEUE: + return "CL_INVALID_COMMAND_QUEUE"; + case CL_INVALID_HOST_PTR: + return "CL_INVALID_HOST_PTR"; + case CL_INVALID_MEM_OBJECT: + return "CL_INVALID_MEM_OBJECT"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + case CL_INVALID_IMAGE_SIZE: + return "CL_INVALID_IMAGE_SIZE"; + case CL_INVALID_SAMPLER: + return "CL_INVALID_SAMPLER"; + case CL_INVALID_BINARY: + return "CL_INVALID_BINARY"; + case CL_INVALID_BUILD_OPTIONS: + return "CL_INVALID_BUILD_OPTIONS"; + case CL_INVALID_PROGRAM: + return "CL_INVALID_PROGRAM"; + case CL_INVALID_PROGRAM_EXECUTABLE: + return "CL_INVALID_PROGRAM_EXECUTABLE"; + case CL_INVALID_KERNEL_NAME: + return "CL_INVALID_KERNEL_NAME"; + case CL_INVALID_KERNEL_DEFINITION: + return "CL_INVALID_KERNEL_DEFINITION"; + case CL_INVALID_KERNEL: + return "CL_INVALID_KERNEL"; + case CL_INVALID_ARG_INDEX: + return "CL_INVALID_ARG_INDEX"; + case CL_INVALID_ARG_VALUE: + return "CL_INVALID_ARG_VALUE"; + case CL_INVALID_ARG_SIZE: + return "CL_INVALID_ARG_SIZE"; + case CL_INVALID_KERNEL_ARGS: + return "CL_INVALID_KERNEL_ARGS"; + case CL_INVALID_WORK_DIMENSION: + return "CL_INVALID_WORK_DIMENSION"; + case CL_INVALID_WORK_GROUP_SIZE: + return "CL_INVALID_WORK_GROUP_SIZE"; + case CL_INVALID_WORK_ITEM_SIZE: + return "CL_INVALID_WORK_ITEM_SIZE"; + case CL_INVALID_GLOBAL_OFFSET: + return "CL_INVALID_GLOBAL_OFFSET"; + case CL_INVALID_EVENT_WAIT_LIST: + return "CL_INVALID_EVENT_WAIT_LIST"; + case CL_INVALID_EVENT: + return "CL_INVALID_EVENT"; + case CL_INVALID_OPERATION: + return "CL_INVALID_OPERATION"; + case CL_INVALID_GL_OBJECT: + return "CL_INVALID_GL_OBJECT"; + case CL_INVALID_BUFFER_SIZE: + return "CL_INVALID_BUFFER_SIZE"; + case CL_INVALID_MIP_LEVEL: + return "CL_INVALID_MIP_LEVEL"; + case CL_INVALID_GLOBAL_WORK_SIZE: + return "CL_INVALID_GLOBAL_WORK_SIZE"; + case CL_INVALID_PROPERTY: + return "CL_INVALID_PROPERTY"; + +#ifdef CL_VERSION_1_2 + case CL_INVALID_IMAGE_DESCRIPTOR: + return "CL_INVALID_IMAGE_DESCRIPTOR"; + case CL_INVALID_COMPILER_OPTIONS: + return "CL_INVALID_COMPILER_OPTIONS"; + case CL_INVALID_LINKER_OPTIONS: + return "CL_INVALID_LINKER_OPTIONS"; + case CL_INVALID_DEVICE_PARTITION_COUNT: + return "CL_INVALID_DEVICE_PARTITION_COUNT"; +#endif // CL_VERSION_1_2 + +#ifdef CL_VERSION_2_0 + case CL_INVALID_PIPE_SIZE: + return "CL_INVALID_PIPE_SIZE"; + case CL_INVALID_DEVICE_QUEUE: + return "CL_INVALID_DEVICE_QUEUE"; +#endif // CL_VERSION_2_0 + +#ifdef CL_VERSION_2_2 + case CL_INVALID_SPEC_ID: + return "CL_INVALID_SPEC_ID"; + case CL_MAX_SIZE_RESTRICTION_EXCEEDED: + return "CL_MAX_SIZE_RESTRICTION_EXCEEDED"; +#endif // CL_VERSION_2_2 + } + + return "Unknown CL error encountered"; +} + +/** + * Prints cl::Errors in a uniform way + * @param msg text prepending the error message + * @param clerr cl:Error object + * + * Prints errors in the format: + * msg: what(), string err() (numeric err()) + */ +static std::string ethCLErrorHelper(const char *msg, cl::Error const &clerr) { + std::ostringstream osstream; + osstream << msg << ": " << clerr.what() << ": " << strClError(clerr.err()) + << " (" << clerr.err() << ")"; + return osstream.str(); +} + namespace { @@ -103,9 +271,11 @@ void CLMiner::report(uint64_t _nonce, WorkPackage const& _w) // TODO: Why re-evaluating? Result r = EthashAux::eval(_w.seed, _w.header, _nonce); if (r.value < _w.boundary) - farm.submitProof(Solution{_nonce, r.mixHash, _w.header, _w.seed, _w.boundary}); - else - cwarn << "Invalid solution"; + farm.submitProof(Solution{_nonce, r.mixHash, _w.header, _w.seed, _w.boundary, _w.job, false}); + else { + farm.failedSolution(); + cwarn << "FAILURE: GPU gave incorrect result!"; + } } void CLMiner::kickOff() @@ -229,7 +399,7 @@ void CLMiner::workLoop() } catch (cl::Error const& _e) { - cwarn << "OpenCL Error:" << _e.what() << _e.err(); + cwarn << ethCLErrorHelper("OpenCL Error", _e); } } @@ -464,10 +634,26 @@ bool CLMiner::init(const h256& seed) uint32_t lightSize64 = (unsigned)(light->data().size() / sizeof(node)); // patch source code - // note: CLMiner_kernel is simply ethash_cl_miner_kernel.cl compiled + // note: The kernels here are simply compiled version of the respective .cl kernels // into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime + // See libethash-cl/CMakeLists.txt: add_custom_command() // TODO: Just use C++ raw string literal. - string code(CLMiner_kernel, CLMiner_kernel + sizeof(CLMiner_kernel)); + string code; + + if ( s_clKernelName == CLKernelName::Unstable ) { + cllog << "OpenCL kernel: Unstable kernel"; + code = string(CLMiner_kernel_unstable, CLMiner_kernel_unstable + sizeof(CLMiner_kernel_unstable)); + } + else { //if(s_clKernelName == CLKernelName::Stable) + cllog << "OpenCL kernel: Stable kernel"; + + //CLMiner_kernel_stable.cl will do a #undef THREADS_PER_HASH + if(s_threadsPerHash != 8) { + cwarn << "The current stable OpenCL kernel only supports exactly 8 threads. Thread parameter will be ignored."; + } + + code = string(CLMiner_kernel_stable, CLMiner_kernel_stable + sizeof(CLMiner_kernel_stable)); + } addDefinition(code, "GROUP_SIZE", m_workgroupSize); addDefinition(code, "DAG_SIZE", dagSize128); addDefinition(code, "LIGHT_SIZE", lightSize64); @@ -506,7 +692,7 @@ bool CLMiner::init(const h256& seed) } catch (cl::Error const& err) { - cwarn << "Creating DAG buffer failed:" << err.what() << err.err(); + cwarn << ethCLErrorHelper("Creating DAG buffer failed", err); return false; } // create buffer for header @@ -543,7 +729,7 @@ bool CLMiner::init(const h256& seed) } catch (cl::Error const& err) { - cwarn << err.what() << "(" << err.err() << ")"; + cwarn << ethCLErrorHelper("OpenCL init failed", err); return false; } return true; diff --git a/libethash-cl/CLMiner.h b/libethash-cl/CLMiner.h index 27064a947b..9b8c26ee33 100644 --- a/libethash-cl/CLMiner.h +++ b/libethash-cl/CLMiner.h @@ -41,6 +41,11 @@ namespace dev namespace eth { +enum CLKernelName { + Stable, + Unstable, +}; + class CLMiner: public Miner { public: @@ -50,6 +55,9 @@ class CLMiner: public Miner /// Default value of the global work size as a multiplier of the local work size static const unsigned c_defaultGlobalWorkSizeMultiplier = 8192; + /// Default value of the kernel is the original one + static const CLKernelName c_defaultKernelName = CLKernelName::Stable; + CLMiner(FarmFace& _farm, unsigned _index); ~CLMiner(); @@ -73,6 +81,7 @@ class CLMiner: public Miner s_devices[i] = _devices[i]; } } + static void setCLKernel(unsigned _clKernel) { s_clKernelName = _clKernel == 1 ? CLKernelName::Unstable : CLKernelName::Stable; } HwMonitor hwmon() override; protected: void kickOff() override; @@ -98,6 +107,7 @@ class CLMiner: public Miner static unsigned s_platformId; static unsigned s_numInstances; static unsigned s_threadsPerHash; + static CLKernelName s_clKernelName; static int s_devices[16]; /// The local work size for the search diff --git a/libethash-cl/CLMiner_kernel_stable.cl b/libethash-cl/CLMiner_kernel_stable.cl new file mode 100644 index 0000000000..a6a301e278 --- /dev/null +++ b/libethash-cl/CLMiner_kernel_stable.cl @@ -0,0 +1,414 @@ +#define OPENCL_PLATFORM_UNKNOWN 0 +#define OPENCL_PLATFORM_NVIDIA 1 +#define OPENCL_PLATFORM_AMD 2 +#define OPENCL_PLATFORM_CLOVER 3 + +#ifndef ACCESSES +#define ACCESSES 64 +#endif + +#ifndef GROUP_SIZE +#define GROUP_SIZE 128 +#endif + +#ifndef MAX_OUTPUTS +#define MAX_OUTPUTS 63U +#endif + +#ifndef PLATFORM +#define PLATFORM OPENCL_PLATFORM_AMD +#endif + +#ifndef DAG_SIZE +#define DAG_SIZE 8388593 +#endif + +#ifndef LIGHT_SIZE +#define LIGHT_SIZE 262139 +#endif + +#define ETHASH_DATASET_PARENTS 256 +#define NODE_WORDS (64/4) + +//this kernel supports only exactly 8 threads +//overwrite whatever is incoming +#ifdef THREADS_PER_HASH +#undef THREADS_PER_HASH +#endif + +#define THREADS_PER_HASH (128 / 16) +#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) +#define FNV_PRIME 0x01000193 + +__constant uint2 const Keccak_f1600_RC[24] = { + (uint2)(0x00000001, 0x00000000), + (uint2)(0x00008082, 0x00000000), + (uint2)(0x0000808a, 0x80000000), + (uint2)(0x80008000, 0x80000000), + (uint2)(0x0000808b, 0x00000000), + (uint2)(0x80000001, 0x00000000), + (uint2)(0x80008081, 0x80000000), + (uint2)(0x00008009, 0x80000000), + (uint2)(0x0000008a, 0x00000000), + (uint2)(0x00000088, 0x00000000), + (uint2)(0x80008009, 0x00000000), + (uint2)(0x8000000a, 0x00000000), + (uint2)(0x8000808b, 0x00000000), + (uint2)(0x0000008b, 0x80000000), + (uint2)(0x00008089, 0x80000000), + (uint2)(0x00008003, 0x80000000), + (uint2)(0x00008002, 0x80000000), + (uint2)(0x00000080, 0x80000000), + (uint2)(0x0000800a, 0x00000000), + (uint2)(0x8000000a, 0x80000000), + (uint2)(0x80008081, 0x80000000), + (uint2)(0x00008080, 0x80000000), + (uint2)(0x80000001, 0x00000000), + (uint2)(0x80008008, 0x80000000), +}; + +#ifdef cl_clang_storage_class_specifiers +#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable +#endif + +#if PLATFORM == OPENCL_PLATFORM_NVIDIA && COMPUTE >= 35 +static uint2 ROL2(const uint2 a, const int offset) { + uint2 result; + if (offset >= 32) { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); + } + else { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + } + return result; +} +#elif PLATFORM == OPENCL_PLATFORM_AMD +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +static uint2 ROL2(const uint2 vv, const int r) +{ + if (r <= 32) + { + return amd_bitalign((vv).xy, (vv).yx, 32 - r); + } + else + { + return amd_bitalign((vv).yx, (vv).xy, 64 - r); + } +} +#else +static uint2 ROL2(const uint2 v, const int n) +{ + uint2 result; + if (n <= 32) + { + result.y = ((v.y << (n)) | (v.x >> (32 - n))); + result.x = ((v.x << (n)) | (v.y >> (32 - n))); + } + else + { + result.y = ((v.x << (n - 32)) | (v.y >> (64 - n))); + result.x = ((v.y << (n - 32)) | (v.x >> (64 - n))); + } + return result; +} +#endif + +static void chi(uint2 * a, const uint n, const uint2 * t) +{ + a[n+0] = bitselect(t[n + 0] ^ t[n + 2], t[n + 0], t[n + 1]); + a[n+1] = bitselect(t[n + 1] ^ t[n + 3], t[n + 1], t[n + 2]); + a[n+2] = bitselect(t[n + 2] ^ t[n + 4], t[n + 2], t[n + 3]); + a[n+3] = bitselect(t[n + 3] ^ t[n + 0], t[n + 3], t[n + 4]); + a[n+4] = bitselect(t[n + 4] ^ t[n + 1], t[n + 4], t[n + 0]); +} + +static void keccak_f1600_round(uint2* a, uint r) +{ + uint2 t[25]; + uint2 u; + + // Theta + t[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; + t[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]; + t[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]; + t[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]; + t[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]; + u = t[4] ^ ROL2(t[1], 1); + a[0] ^= u; + a[5] ^= u; + a[10] ^= u; + a[15] ^= u; + a[20] ^= u; + u = t[0] ^ ROL2(t[2], 1); + a[1] ^= u; + a[6] ^= u; + a[11] ^= u; + a[16] ^= u; + a[21] ^= u; + u = t[1] ^ ROL2(t[3], 1); + a[2] ^= u; + a[7] ^= u; + a[12] ^= u; + a[17] ^= u; + a[22] ^= u; + u = t[2] ^ ROL2(t[4], 1); + a[3] ^= u; + a[8] ^= u; + a[13] ^= u; + a[18] ^= u; + a[23] ^= u; + u = t[3] ^ ROL2(t[0], 1); + a[4] ^= u; + a[9] ^= u; + a[14] ^= u; + a[19] ^= u; + a[24] ^= u; + + // Rho Pi + + t[0] = a[0]; + t[10] = ROL2(a[1], 1); + t[20] = ROL2(a[2], 62); + t[5] = ROL2(a[3], 28); + t[15] = ROL2(a[4], 27); + + t[16] = ROL2(a[5], 36); + t[1] = ROL2(a[6], 44); + t[11] = ROL2(a[7], 6); + t[21] = ROL2(a[8], 55); + t[6] = ROL2(a[9], 20); + + t[7] = ROL2(a[10], 3); + t[17] = ROL2(a[11], 10); + t[2] = ROL2(a[12], 43); + t[12] = ROL2(a[13], 25); + t[22] = ROL2(a[14], 39); + + t[23] = ROL2(a[15], 41); + t[8] = ROL2(a[16], 45); + t[18] = ROL2(a[17], 15); + t[3] = ROL2(a[18], 21); + t[13] = ROL2(a[19], 8); + + t[14] = ROL2(a[20], 18); + t[24] = ROL2(a[21], 2); + t[9] = ROL2(a[22], 61); + t[19] = ROL2(a[23], 56); + t[4] = ROL2(a[24], 14); + + // Chi + chi(a, 0, t); + + // Iota + a[0] ^= Keccak_f1600_RC[r]; + + chi(a, 5, t); + chi(a, 10, t); + chi(a, 15, t); + chi(a, 20, t); +} + +static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate) +{ + // Originally I unrolled the first and last rounds to interface + // better with surrounding code, however I haven't done this + // without causing the AMD compiler to blow up the VGPR usage. + + + //uint o = 25; + for (uint r = 0; r < 24;) + { + // This dynamic branch stops the AMD compiler unrolling the loop + // and additionally saves about 33% of the VGPRs, enough to gain another + // wavefront. Ideally we'd get 4 in flight, but 3 is the best I can + // massage out of the compiler. It doesn't really seem to matter how + // much we try and help the compiler save VGPRs because it seems to throw + // that information away, hence the implementation of keccak here + // doesn't bother. + if (isolate) + { + keccak_f1600_round(a, r++); + //if (r == 23) o = out_size; + } + } + + + // final round optimised for digest size + //keccak_f1600_round(a, 23, out_size); +} + +#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; } + +static uint fnv(uint x, uint y) +{ + return x * FNV_PRIME ^ y; +} + +static uint4 fnv4(uint4 x, uint4 y) +{ + return x * FNV_PRIME ^ y; +} + +static uint fnv_reduce(uint4 v) +{ + return fnv(fnv(fnv(v.x, v.y), v.z), v.w); +} + +typedef struct +{ + ulong ulongs[32 / sizeof(ulong)]; +} hash32_t; + +typedef union { + uint words[64 / sizeof(uint)]; + uint2 uint2s[64 / sizeof(uint2)]; + uint4 uint4s[64 / sizeof(uint4)]; +} hash64_t; + +typedef union { + uint words[200 / sizeof(uint)]; + uint2 uint2s[200 / sizeof(uint2)]; + uint4 uint4s[200 / sizeof(uint4)]; +} hash200_t; + +typedef struct +{ + uint4 uint4s[128 / sizeof(uint4)]; +} hash128_t; + +typedef union { + uint4 uint4s[4]; + ulong ulongs[8]; + uint uints[16]; +} compute_hash_share; + +#if PLATFORM != OPENCL_PLATFORM_NVIDIA // use maxrregs on nv +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) +#endif +__kernel void ethash_search( + __global volatile uint* restrict g_output, + __constant hash32_t const* g_header, + __global hash128_t const* g_dag, + ulong start_nonce, + ulong target, + uint isolate + ) +{ + __local compute_hash_share share[HASHES_PER_LOOP]; + + uint const gid = get_global_id(0); + + // Compute one init hash per work item. + + // sha3_512(header .. nonce) + ulong state[25]; + copy(state, g_header->ulongs, 4); + state[4] = start_nonce + gid; + + for (uint i = 6; i != 25; ++i) + { + state[i] = 0; + } + state[5] = 0x0000000000000001; + state[8] = 0x8000000000000000; + + keccak_f1600_no_absorb((uint2*)state, 8, isolate); + + // Threads work together in this phase in groups of 8. + uint const thread_id = gid & 7; + uint const hash_id = (gid % GROUP_SIZE) >> 3; + + for (int i = 0; i < THREADS_PER_HASH; i++) + { + // share init with other threads + if (i == thread_id) + copy(share[hash_id].ulongs, state, 8); + + barrier(CLK_LOCAL_MEM_FENCE); + + uint4 mix = share[hash_id].uint4s[thread_id & 3]; + barrier(CLK_LOCAL_MEM_FENCE); + + __local uint *share0 = share[hash_id].uints; + + // share init0 + if (thread_id == 0) + *share0 = mix.x; + barrier(CLK_LOCAL_MEM_FENCE); + uint init0 = *share0; + + for (uint a = 0; a < ACCESSES; a += 4) + { + bool update_share = thread_id == ((a >> 2) & (THREADS_PER_HASH - 1)); + + for (uint i = 0; i != 4; ++i) + { + if (update_share) + { + *share0 = fnv(init0 ^ (a + i), ((uint *)&mix)[i]) % DAG_SIZE; + } + barrier(CLK_LOCAL_MEM_FENCE); + + mix = fnv4(mix, g_dag[*share0].uint4s[thread_id]); + } + } + + share[hash_id].uints[thread_id] = fnv_reduce(mix); + barrier(CLK_LOCAL_MEM_FENCE); + + if (i == thread_id) + copy(state + 8, share[hash_id].ulongs, 4); + + barrier(CLK_LOCAL_MEM_FENCE); + } + + for (uint i = 13; i != 25; ++i) + { + state[i] = 0; + } + state[12] = 0x0000000000000001; + state[16] = 0x8000000000000000; + + // keccak_256(keccak_512(header..nonce) .. mix); + keccak_f1600_no_absorb((uint2*)state, 1, isolate); + + if (as_ulong(as_uchar8(state[0]).s76543210) < target) + { + uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); + g_output[slot] = gid; + } +} + +static void SHA3_512(uint2* s, uint isolate) +{ + for (uint i = 8; i != 25; ++i) + { + s[i] = (uint2){ 0, 0 }; + } + s[8].x = 0x00000001; + s[8].y = 0x80000000; + keccak_f1600_no_absorb(s, 8, isolate); +} + +__kernel void ethash_calculate_dag_item(uint start, __global hash64_t const* g_light, __global hash64_t * g_dag, uint isolate) +{ + uint const node_index = start + get_global_id(0); + if (node_index > DAG_SIZE * 2) return; + + hash200_t dag_node; + copy(dag_node.uint4s, g_light[node_index % LIGHT_SIZE].uint4s, 4); + dag_node.words[0] ^= node_index; + SHA3_512(dag_node.uint2s, isolate); + + for (uint i = 0; i != ETHASH_DATASET_PARENTS; ++i) { + uint parent_index = fnv(node_index ^ i, dag_node.words[i % NODE_WORDS]) % LIGHT_SIZE; + + for (uint w = 0; w != 4; ++w) { + dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], g_light[parent_index].uint4s[w]); + } + } + SHA3_512(dag_node.uint2s, isolate); + copy(g_dag[node_index].uint4s, dag_node.uint4s, 4); +} diff --git a/libethash-cl/CLMiner_kernel.cl b/libethash-cl/CLMiner_kernel_unstable.cl similarity index 99% rename from libethash-cl/CLMiner_kernel.cl rename to libethash-cl/CLMiner_kernel_unstable.cl index c473f447b7..7c3bef9c6f 100644 --- a/libethash-cl/CLMiner_kernel.cl +++ b/libethash-cl/CLMiner_kernel_unstable.cl @@ -36,6 +36,10 @@ #endif #define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) +#ifdef cl_clang_storage_class_specifiers + #pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable +#endif + // Check for valid THREADS_PER_HASH param #if THREADS_PER_HASH == 1 #define LN_THREAD_PER_HASH 0 diff --git a/libethash-cl/CMakeLists.txt b/libethash-cl/CMakeLists.txt index 55e58e9215..be3ee2e902 100644 --- a/libethash-cl/CMakeLists.txt +++ b/libethash-cl/CMakeLists.txt @@ -1,21 +1,36 @@ # A custom command and target to turn the OpenCL kernel into a byte array header # The normal build depends on it properly and if the kernel file is changed, then # a rebuild of libethash-cl should be triggered + +#TODO: clean up the copy&pasting here +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h + COMMAND ${CMAKE_COMMAND} ARGS + -DBIN2H_SOURCE_FILE="${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_stable.cl" + -DBIN2H_VARIABLE_NAME=CLMiner_kernel_stable + -DBIN2H_HEADER_FILE="${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h" + -P "${CMAKE_CURRENT_SOURCE_DIR}/bin2h.cmake" + COMMENT "Generating OpenCL Kernel Byte Array" + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_stable.cl +) +add_custom_target(clbin2h_stable DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_stable.cl) + add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h COMMAND ${CMAKE_COMMAND} ARGS - -DBIN2H_SOURCE_FILE="${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl" - -DBIN2H_VARIABLE_NAME=CLMiner_kernel - -DBIN2H_HEADER_FILE="${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h" + -DBIN2H_SOURCE_FILE="${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_unstable.cl" + -DBIN2H_VARIABLE_NAME=CLMiner_kernel_unstable + -DBIN2H_HEADER_FILE="${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h" -P "${CMAKE_CURRENT_SOURCE_DIR}/bin2h.cmake" COMMENT "Generating OpenCL Kernel Byte Array" - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_unstable.cl ) -add_custom_target(clbin2h DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl) +add_custom_target(clbin2h_unstable DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_unstable.cl) set(SOURCES CLMiner.h CLMiner.cpp - ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h + ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h + ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h ) if(APPLE) diff --git a/libethash-cuda/CUDAMiner.cpp b/libethash-cuda/CUDAMiner.cpp index d563144f46..058a937eb7 100644 --- a/libethash-cuda/CUDAMiner.cpp +++ b/libethash-cuda/CUDAMiner.cpp @@ -14,12 +14,6 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License along with cpp-ethereum. If not, see . */ -/** @file CUDAMiner.cpp -* @author Gav Wood -* @date 2014 -* -* Determines the PoW algorithm. -*/ #include "CUDAMiner.h" @@ -59,18 +53,26 @@ namespace eth m_aborted = m_abort = false; } + bool isStale() + { + return m_abort; + } + + protected: - virtual bool found(uint64_t const* _nonces) override + void found(uint64_t const* _nonces, uint32_t count) override { - m_owner.report(_nonces[0]); - return m_owner.shouldStop(); + for (uint32_t i = 0; i < count; i++) + m_owner.report(_nonces[i]); } - virtual bool searched(uint64_t _startNonce, uint32_t _count) override + void searched(uint32_t _count) override { - (void) _startNonce; // FIXME: unusued arg. - UniqueGuard l(x_all); m_owner.addHashCount(_count); + } + + bool shouldStop() override + { if (m_abort || m_owner.shouldStop()) return (m_aborted = true); return false; @@ -84,21 +86,19 @@ namespace eth }; } } -unsigned CUDAMiner::s_platformId = 0; -unsigned CUDAMiner::s_deviceId = 0; unsigned CUDAMiner::s_numInstances = 0; int CUDAMiner::s_devices[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; CUDAMiner::CUDAMiner(FarmFace& _farm, unsigned _index) : Miner("CUDA", _farm, _index), - m_hook(new EthashCUDAHook(*this)) // FIXME! + m_hook(new EthashCUDAHook(*this)), + m_miner(getNumDevices()) {} CUDAMiner::~CUDAMiner() { stopWorking(); pause(); - delete m_miner; delete m_hook; } @@ -108,7 +108,13 @@ void CUDAMiner::report(uint64_t _nonce) WorkPackage w = work(); // Copy work package to avoid repeated mutex lock. Result r = EthashAux::eval(w.seed, w.header, _nonce); if (r.value < w.boundary) - farm.submitProof(Solution{_nonce, r.mixHash, w.header, w.seed, w.boundary}); + farm.submitProof(Solution{_nonce, r.mixHash, w.header, w.seed, w.boundary, w.job, m_hook->isStale()}); + else + { + farm.failedSolution(); + cwarn << "FAILURE: GPU gave incorrect result!"; + + } } void CUDAMiner::kickOff() @@ -123,17 +129,13 @@ bool CUDAMiner::init(const h256& seed) unsigned device = s_devices[index] > -1 ? s_devices[index] : index; cnote << "Initialising miner..."; - m_minerSeed = seed; - - if(!m_miner) - m_miner = new ethash_cuda_miner; EthashAux::LightType light; light = EthashAux::light(seed); bytesConstRef lightData = light->data(); - m_miner->init(light->light, lightData.data(), lightData.size(), - device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory); + m_miner.init(getNumDevices(), light->light, lightData.data(), lightData.size(), + device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory, s_dagCreateDevice); s_dagLoadIndex++; if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE) @@ -143,15 +145,13 @@ bool CUDAMiner::init(const h256& seed) // all devices have loaded DAG, we can free now delete[] s_dagInHostMemory; s_dagInHostMemory = NULL; - cout << "Freeing DAG from host" << endl; + cnote << "Freeing DAG from host"; } } return true; } catch (std::runtime_error const& _e) { - delete m_miner; - m_miner = nullptr; cwarn << "Error CUDA mining: " << _e.what(); return false; } @@ -168,7 +168,7 @@ void CUDAMiner::workLoop() { const WorkPackage w = work(); - if(!m_miner || current.header != w.header || current.seed != w.seed) + if (current.header != w.header || current.seed != w.seed) { if(!w || w.header == h256()) { @@ -178,7 +178,7 @@ void CUDAMiner::workLoop() } //cnote << "set work; seed: " << "#" + w.seed.hex().substr(0, 8) + ", target: " << "#" + w.boundary.hex().substr(0, 12); - if (!m_miner || current.seed != w.seed) + if (current.seed != w.seed) { if(!init(w.seed)) break; @@ -189,7 +189,7 @@ void CUDAMiner::workLoop() uint64_t startN = current.startNonce; if (current.exSizeBits >= 0) startN = current.startNonce | ((uint64_t)index << (64 - 4 - current.exSizeBits)); // this can support up to 16 devices - m_miner->search(current.header.data(), upper64OfBoundary, *m_hook, (current.exSizeBits >= 0), startN); + m_miner.search(current.header.data(), upper64OfBoundary, *m_hook, (current.exSizeBits >= 0), startN); // Check if we should stop. if (shouldStop()) @@ -200,8 +200,6 @@ void CUDAMiner::workLoop() } catch (std::runtime_error const& _e) { - delete m_miner; - m_miner = nullptr; cwarn << "Error CUDA mining: " << _e.what(); } } @@ -211,28 +209,51 @@ void CUDAMiner::pause() m_hook->abort(); } -std::string CUDAMiner::platformInfo() -{ - return ethash_cuda_miner::platform_info(s_deviceId); -} - unsigned CUDAMiner::getNumDevices() { - return ethash_cuda_miner::getNumDevices(); + int deviceCount = -1; + cudaError_t err = cudaGetDeviceCount(&deviceCount); + if (err == cudaSuccess) + return deviceCount; + + if (err == cudaErrorInsufficientDriver) + { + int driverVersion = -1; + cudaDriverGetVersion(&driverVersion); + if (driverVersion == 0) + throw std::runtime_error{"No CUDA driver found"}; + throw std::runtime_error{"Insufficient CUDA driver: " + std::to_string(driverVersion)}; + } + + throw std::runtime_error{cudaGetErrorString(err)}; } void CUDAMiner::listDevices() { - return ethash_cuda_miner::listDevices(); + try + { + string outString = "\nListing CUDA devices.\nFORMAT: [deviceID] deviceName\n"; + int numDevices = getNumDevices(); + for (int i = 0; i < numDevices; ++i) + { + cudaDeviceProp props; + CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i)); + + outString += "[" + to_string(i) + "] " + string(props.name) + "\n"; + outString += "\tCompute version: " + to_string(props.major) + "." + to_string(props.minor) + "\n"; + outString += "\tcudaDeviceProp::totalGlobalMem: " + to_string(props.totalGlobalMem) + "\n"; + } + std::cout << outString; + } + catch(std::runtime_error const& err) + { + cwarn << "CUDA error: " << err.what(); + } } HwMonitor CUDAMiner::hwmon() { - HwMonitor hw; - if (m_miner) { - hw = m_miner->hwmon(); - } - return hw; + return m_miner.hwmon(); } bool CUDAMiner::configureGPU( @@ -250,6 +271,7 @@ bool CUDAMiner::configureGPU( _blockSize = ((_blockSize + 7) / 8) * 8; if (!ethash_cuda_miner::configureGPU( + getNumDevices(), s_devices, _blockSize, _gridSize, diff --git a/libethash-cuda/CUDAMiner.h b/libethash-cuda/CUDAMiner.h index 3e0fce5fd2..780e4a51cb 100644 --- a/libethash-cuda/CUDAMiner.h +++ b/libethash-cuda/CUDAMiner.h @@ -39,13 +39,12 @@ class EthashCUDAHook; public: CUDAMiner(FarmFace& _farm, unsigned _index); - ~CUDAMiner(); + ~CUDAMiner() override; static unsigned instances() { return s_numInstances > 0 ? s_numInstances : 1; } - static std::string platformInfo(); static unsigned getNumDevices(); static void listDevices(); static void setParallelHash(unsigned _parallelHash); @@ -62,7 +61,7 @@ class EthashCUDAHook; { s_numInstances = std::min(_instances, getNumDevices()); } - static void setDevices(unsigned * _devices, unsigned _selectedDeviceCount) + static void setDevices(const unsigned* _devices, unsigned _selectedDeviceCount) { for (unsigned i = 0; i < _selectedDeviceCount; i++) { @@ -76,16 +75,12 @@ class EthashCUDAHook; private: void workLoop() override; void report(uint64_t _nonce); - void initDevice(WorkPackage w); bool init(const h256& seed); EthashCUDAHook* m_hook = nullptr; - ethash_cuda_miner* m_miner = nullptr; + ethash_cuda_miner m_miner; - h256 m_minerSeed; ///< Last seed in m_miner - static unsigned s_platformId; - static unsigned s_deviceId; static unsigned s_numInstances; static int s_devices[16]; diff --git a/libethash-cuda/ethash_cuda_miner.cpp b/libethash-cuda/ethash_cuda_miner.cpp index 162287c590..f6b61b76cb 100644 --- a/libethash-cuda/ethash_cuda_miner.cpp +++ b/libethash-cuda/ethash_cuda_miner.cpp @@ -61,61 +61,11 @@ unsigned const ethash_cuda_miner::c_defaultNumStreams = 2; ethash_cuda_miner::search_hook::~search_hook() {} -ethash_cuda_miner::ethash_cuda_miner() -{ - m_light.resize(getNumDevices()); -} - -std::string ethash_cuda_miner::platform_info(unsigned _deviceId) -{ - int runtime_version; - int device_count; - - device_count = getNumDevices(); - - if (device_count == 0) - return std::string(); - - CUDA_SAFE_CALL(cudaRuntimeGetVersion(&runtime_version)); - - // use selected default device - int device_num = std::min((int)_deviceId, device_count - 1); - cudaDeviceProp device_props; - - CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_props, device_num)); - - char platform[5]; - int version_major = runtime_version / 1000; - int version_minor = (runtime_version - (version_major * 1000)) / 10; - sprintf(platform, "%d.%d", version_major, version_minor); - - char compute[5]; - sprintf(compute, "%d.%d", device_props.major, device_props.minor); - - return "{ \"platform\": \"CUDA " + std::string(platform) + "\", \"device\": \"" + std::string(device_props.name) + "\", \"version\": \"Compute " + std::string(compute) + "\" }"; -} - -int ethash_cuda_miner::getNumDevices() -{ - int deviceCount = -1; - cudaError_t err = cudaGetDeviceCount(&deviceCount); - if (err == cudaSuccess) - return deviceCount; - - if (err == cudaErrorInsufficientDriver) - { - int driverVersion = -1; - cudaDriverGetVersion(&driverVersion); - if (driverVersion == 0) - throw std::runtime_error{"No CUDA driver found"}; - throw std::runtime_error{"Insufficient CUDA driver: " + std::to_string(driverVersion)}; - } - - throw std::runtime_error{cudaGetErrorString(err)}; -} +ethash_cuda_miner::ethash_cuda_miner(size_t numDevices) : m_light(numDevices) {} bool ethash_cuda_miner::configureGPU( - int * _devices, + size_t numDevices, + const int* _devices, unsigned _blockSize, unsigned _gridSize, unsigned _numStreams, @@ -134,10 +84,9 @@ bool ethash_cuda_miner::configureGPU( // by default let's only consider the DAG of the first epoch uint64_t dagSize = ethash_get_datasize(_currentBlock); - int devicesCount = getNumDevices(); + int devicesCount = static_cast(numDevices); for (int i = 0; i < devicesCount; i++) { - if (_devices[i] != -1) { int deviceId = min(devicesCount - 1, _devices[i]); @@ -173,45 +122,15 @@ unsigned ethash_cuda_miner::s_gridSize = ethash_cuda_miner::c_defaultGridSize; unsigned ethash_cuda_miner::s_numStreams = ethash_cuda_miner::c_defaultNumStreams; unsigned ethash_cuda_miner::s_scheduleFlag = 0; -void ethash_cuda_miner::listDevices() +bool ethash_cuda_miner::init(size_t numDevices, ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t* &hostDAG, unsigned dagCreateDevice) { try { - string outString = "\nListing CUDA devices.\nFORMAT: [deviceID] deviceName\n"; - int numDevices = getNumDevices(); - for (int i = 0; i < numDevices; ++i) - { - cudaDeviceProp props; - CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i)); - - outString += "[" + to_string(i) + "] " + string(props.name) + "\n"; - outString += "\tCompute version: " + to_string(props.major) + "." + to_string(props.minor) + "\n"; - outString += "\tcudaDeviceProp::totalGlobalMem: " + to_string(props.totalGlobalMem) + "\n"; - } - std::cout << outString; - } - catch(std::runtime_error const& err) - { - cwarn << "CUDA error: " << err.what(); - } -} - -void ethash_cuda_miner::finish() -{ - CUDA_SAFE_CALL(cudaDeviceReset()); -} - -bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t* &hostDAG) -{ - try - { - unsigned device_count = getNumDevices(); - - if (device_count == 0) + if (numDevices == 0) return false; // use selected device - m_device_num = _deviceId < device_count -1 ? _deviceId : device_count - 1; + m_device_num = _deviceId < numDevices -1 ? _deviceId : numDevices - 1; nvmlh = wrap_nvml_create(); cudaDeviceProp device_props; @@ -232,6 +151,12 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u cudalog << "Set Device to current"; if(dagSize128 != m_dag_size || !m_dag) { + //Check whether the current device has sufficient memory everytime we recreate the dag + if (device_props.totalGlobalMem < dagSize) + { + cudalog << "CUDA device " << string(device_props.name) << " has insufficient GPU memory." << device_props.totalGlobalMem << " bytes of memory found < " << dagSize << " bytes of memory required"; + return false; + } //We need to reset the device and recreate the dag cudalog << "Resetting device"; CUDA_SAFE_CALL(cudaDeviceReset()); @@ -278,7 +203,7 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u if (!hostDAG) { - if(m_device_num == 0 || !_cpyToHost){ //if !cpyToHost -> All devices shall generate their DAG + if((m_device_num == dagCreateDevice) || !_cpyToHost){ //if !cpyToHost -> All devices shall generate their DAG cudalog << "Generating DAG for GPU #" << m_device_num << " with dagSize: " << dagSize <<" gridSize: " << s_gridSize << " &m_streams[0]: " << &m_streams[0]; ethash_generate_dag(dagSize, s_gridSize, s_blockSize, m_streams[0], m_device_num); @@ -374,16 +299,21 @@ void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_ho { CUDA_SAFE_CALL(cudaStreamSynchronize(stream)); found_count = buffer[0]; - if (found_count) + if (found_count) { buffer[0] = 0; - for (unsigned int j = 0; j < found_count; j++) - nonces[j] = nonce_base + buffer[j + 1]; + if (found_count > (SEARCH_RESULT_BUFFER_SIZE - 1)) + found_count = SEARCH_RESULT_BUFFER_SIZE - 1; + for (unsigned int j = 0; j < found_count; j++) + nonces[j] = nonce_base + buffer[j + 1]; + } } run_ethash_search(s_gridSize, s_blockSize, m_sharedBytes, stream, buffer, m_current_nonce, m_parallelHash); if (m_current_index >= s_numStreams) { - exit = found_count && hook.found(nonces); - exit |= hook.searched(nonce_base, batch_size); + if (found_count) + hook.found(nonces, found_count); + hook.searched(batch_size); + exit = hook.shouldStop(); } } } diff --git a/libethash-cuda/ethash_cuda_miner.h b/libethash-cuda/ethash_cuda_miner.h index e924e8de60..dbcbe53f5a 100644 --- a/libethash-cuda/ethash_cuda_miner.h +++ b/libethash-cuda/ethash_cuda_miner.h @@ -17,29 +17,27 @@ class ethash_cuda_miner virtual ~search_hook(); // always a virtual destructor for a class with virtuals. // reports progress, return true to abort - virtual bool found(uint64_t const* nonces) = 0; - virtual bool searched(uint64_t start_nonce, uint32_t count) = 0; + virtual void found(uint64_t const* nonces, uint32_t count) = 0; + virtual void searched(uint32_t count) = 0; + virtual bool shouldStop() = 0; }; public: - ethash_cuda_miner(); + explicit ethash_cuda_miner(size_t numDevices); - static std::string platform_info(unsigned _deviceId = 0); - static int getNumDevices(); - static void listDevices(); static bool configureGPU( - int * _devices, + size_t numDevices, + const int* _devices, unsigned _blockSize, unsigned _gridSize, unsigned _numStreams, unsigned _scheduleFlag, uint64_t _currentBlock ); - static void setParallelHash(unsigned _parallelHash); + static void setParallelHash(unsigned _parallelHash); - bool init(ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t * &hostDAG); + bool init(size_t numDevices, ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t * &hostDAG, unsigned dagCreateDevice); - void finish(); void search(uint8_t const* header, uint64_t target, search_hook& hook, bool _ethStratum, uint64_t _startN); dev::eth::HwMonitor hwmon(); @@ -81,5 +79,5 @@ class ethash_cuda_miner static unsigned m_parallelHash; - wrap_nvml_handle *nvmlh = NULL; + wrap_nvml_handle *nvmlh = nullptr; }; diff --git a/libethash-cuda/ethash_cuda_miner_kernel.cu b/libethash-cuda/ethash_cuda_miner_kernel.cu index 5ae6f89649..03ff2ce698 100644 --- a/libethash-cuda/ethash_cuda_miner_kernel.cu +++ b/libethash-cuda/ethash_cuda_miner_kernel.cu @@ -31,7 +31,8 @@ ethash_search( uint32_t const gid = blockIdx.x * blockDim.x + threadIdx.x; uint64_t hash = compute_hash<_PARALLEL_HASH>(start_nonce + gid); if (cuda_swab64(hash) > d_target) return; - uint32_t index = atomicInc(const_cast(g_output), SEARCH_RESULT_BUFFER_SIZE - 1) + 1; + uint32_t index = atomicInc(const_cast(g_output), 0xffffffff) + 1; + if (index > SEARCH_RESULT_BUFFER_SIZE -1) return; g_output[index] = gid; } @@ -49,11 +50,7 @@ void run_ethash_search( { case 1: ethash_search <1> <<>>(g_output, start_nonce); break; case 2: ethash_search <2> <<>>(g_output, start_nonce); break; - case 3: ethash_search <3> <<>>(g_output, start_nonce); break; case 4: ethash_search <4> <<>>(g_output, start_nonce); break; - case 5: ethash_search <5> <<>>(g_output, start_nonce); break; - case 6: ethash_search <6> <<>>(g_output, start_nonce); break; - case 7: ethash_search <7> <<>>(g_output, start_nonce); break; case 8: ethash_search <8> <<>>(g_output, start_nonce); break; default: ethash_search <4> <<>>(g_output, start_nonce); break; } @@ -153,9 +150,7 @@ void ethash_generate_dag( { ethash_calculate_dag_item <<>>(i * blocks * threads); CUDA_SAFE_CALL(cudaDeviceSynchronize()); - printf("CUDA#%d: %.0f%%\n",device, 100.0f * (float)i / (float)fullRuns); } - //printf("GPU#%d 100%%\n"); CUDA_SAFE_CALL(cudaGetLastError()); } diff --git a/libethcore/EthashAux.h b/libethcore/EthashAux.h index 909c223e31..145f294f41 100644 --- a/libethcore/EthashAux.h +++ b/libethcore/EthashAux.h @@ -39,6 +39,8 @@ struct Solution h256 headerHash; h256 seedHash; h256 boundary; + h256 job; + bool stale; }; struct Result @@ -95,6 +97,7 @@ struct WorkPackage h256 boundary; h256 header; ///< When h256() means "pause until notified a new work package is available". h256 seed; + h256 job; uint64_t startNonce = 0; int exSizeBits = -1; diff --git a/libethcore/Miner.h b/libethcore/Miner.h index ac04bce8be..e4fdca42a4 100644 --- a/libethcore/Miner.h +++ b/libethcore/Miner.h @@ -155,6 +155,7 @@ class FarmFace * @return true iff the solution was good (implying that mining should be . */ virtual bool submitProof(Solution const& _p) = 0; + virtual void failedSolution() = 0; }; /** diff --git a/libstratum/EthStratumClient.cpp b/libstratum/EthStratumClient.cpp index d2b1ec0dd4..7fd5eeb07e 100644 --- a/libstratum/EthStratumClient.cpp +++ b/libstratum/EthStratumClient.cpp @@ -28,7 +28,8 @@ static void diffToTarget(uint32_t *target, double diff) EthStratumClient::EthStratumClient(Farm* f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, int const & protocol, string const & email) - : m_socket(m_io_service) + : m_socket(m_io_service), + m_worktimer(m_io_service) { m_minerType = m; m_primary.host = host; @@ -50,7 +51,6 @@ EthStratumClient::EthStratumClient(Farm* f, MinerType m, string const & host, st m_submit_hashrate_id = h256::random().hex(); p_farm = f; - p_worktimer = nullptr; connect(); } @@ -100,10 +100,7 @@ void EthStratumClient::connect() void EthStratumClient::reconnect() { - if (p_worktimer) { - p_worktimer->cancel(); - p_worktimer = nullptr; - } + m_worktimer.cancel(); m_io_service.reset(); //m_socket.close(); // leads to crashes on Linux @@ -399,23 +396,13 @@ void EthStratumClient::processReponse(Json::Value& responseObject) if (sHeaderHash != "" && sSeedHash != "") { - - h256 seedHash = h256(sSeedHash); - - m_previous.header = m_current.header; - m_previous.seed = m_current.seed; - m_previous.boundary = m_current.boundary; - m_previous.startNonce = m_current.startNonce; - m_previous.exSizeBits = m_previous.exSizeBits; - m_previousJob = m_job; - m_current.header = h256(sHeaderHash); - m_current.seed = seedHash; + m_current.seed = h256(sSeedHash); m_current.boundary = h256(); diffToTarget((uint32_t*)m_current.boundary.data(), m_nextWorkDifficulty); m_current.startNonce = ethash_swap_u64(*((uint64_t*)m_extraNonce.data())); m_current.exSizeBits = m_extraNonceHexSize * 4; - m_job = job; + m_current.job = h256(job); p_farm->setWork(m_current); cnote << "Received new job #" + job.substr(0, 8) @@ -437,35 +424,22 @@ void EthStratumClient::processReponse(Json::Value& responseObject) if (sHeaderHash != "" && sSeedHash != "" && sShareTarget != "") { - - h256 seedHash = h256(sSeedHash); h256 headerHash = h256(sHeaderHash); if (headerHash != m_current.header) { - //x_current.lock(); - if (p_worktimer) - p_worktimer->cancel(); - - m_previous.header = m_current.header; - m_previous.seed = m_current.seed; - m_previous.boundary = m_current.boundary; - m_previousJob = m_job; + m_worktimer.cancel(); + m_worktimer.expires_from_now(boost::posix_time::seconds(m_worktimeout)); m_current.header = h256(sHeaderHash); - m_current.seed = seedHash; + m_current.seed = h256(sSeedHash); m_current.boundary = h256(sShareTarget); - m_job = job; + m_current.job = h256(job); p_farm->setWork(m_current); cnote << "Received new job #" + job.substr(0, 8) << " seed: " << "#" + m_current.seed.hex().substr(0, 32) << " target: " << "#" + m_current.boundary.hex().substr(0, 24); - - //x_current.unlock(); - p_worktimer = new boost::asio::deadline_timer(m_io_service, boost::posix_time::seconds(m_worktimeout)); - p_worktimer->async_wait(boost::bind(&EthStratumClient::work_timeout_handler, this, boost::asio::placeholders::error)); - } } } @@ -519,12 +493,6 @@ bool EthStratumClient::submitHashrate(string const & rate) { } bool EthStratumClient::submit(Solution solution) { - x_current.lock(); - WorkPackage tempWork(m_current); - string temp_job = m_job; - WorkPackage tempPreviousWork(m_previous); - string temp_previous_job = m_previousJob; - x_current.unlock(); string minernonce; string nonceHex = toHex(solution.nonce); @@ -532,65 +500,37 @@ bool EthStratumClient::submit(Solution solution) { minernonce = nonceHex.substr(m_extraNonceHexSize, 16 - m_extraNonceHexSize); } - if (EthashAux::eval(tempWork.seed, tempWork.header, solution.nonce).value < tempWork.boundary) - { - string json; - - switch (m_protocol) { - case STRATUM_PROTOCOL_STRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; - break; - case STRATUM_PROTOCOL_ETHPROXY: - json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; - break; - case STRATUM_PROTOCOL_ETHEREUMSTRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"" + minernonce + "\"]}\n"; - break; - } - - std::ostream os(&m_requestBuffer); - os << json; - m_stale = false; - async_write(m_socket, m_requestBuffer, - boost::bind(&EthStratumClient::handleResponse, this, - boost::asio::placeholders::error)); - cnote << "Solution found; Submitted to" << p_active->host; - if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) { - cnote << "Nonce:" << "0x" + nonceHex; - } - return true; - } - else if (EthashAux::eval(tempPreviousWork.seed, tempPreviousWork.header, solution.nonce).value < tempPreviousWork.boundary) - { - string json; + string json; - switch (m_protocol) { + switch (m_protocol) { case STRATUM_PROTOCOL_STRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; + json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; break; case STRATUM_PROTOCOL_ETHPROXY: - json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; + json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; break; case STRATUM_PROTOCOL_ETHEREUMSTRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"" + minernonce + "\"]}\n"; + json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"" + minernonce + "\"]}\n"; break; - } + } - std::ostream os(&m_requestBuffer); - os << json; - m_stale = true; - async_write(m_socket, m_requestBuffer, - boost::bind(&EthStratumClient::handleResponse, this, - boost::asio::placeholders::error)); - cwarn << "Submitted stale solution."; - return true; + std::ostream os(&m_requestBuffer); + os << json; + m_stale = solution.stale; + async_write(m_socket, m_requestBuffer, + boost::bind(&EthStratumClient::handleResponse, this, + boost::asio::placeholders::error)); + if (m_stale) + { + cwarn << "Stale solution found. Submitted to" << p_active->host; } - else { - m_stale = false; - cwarn << "FAILURE: GPU gave incorrect result!"; - p_farm->failedSolution(); + else + { + cnote << "Solution found; Submitted to" << p_active->host; } - - return false; + if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) { + cnote << "Nonce:" << "0x" + nonceHex; + } + return true; } diff --git a/libstratum/EthStratumClient.h b/libstratum/EthStratumClient.h index e9a44b6a27..6928fe457f 100644 --- a/libstratum/EthStratumClient.h +++ b/libstratum/EthStratumClient.h @@ -65,15 +65,10 @@ class EthStratumClient int m_pending; Farm* p_farm; - std::mutex x_current; WorkPackage m_current; - WorkPackage m_previous; bool m_stale = false; - string m_job; - string m_previousJob; - std::thread m_serviceThread; ///< The IO service thread. boost::asio::io_service m_io_service; boost::asio::ip::tcp::socket m_socket; @@ -81,7 +76,7 @@ class EthStratumClient boost::asio::streambuf m_requestBuffer; boost::asio::streambuf m_responseBuffer; - boost::asio::deadline_timer * p_worktimer; + boost::asio::deadline_timer m_worktimer; int m_protocol; string m_email; @@ -94,4 +89,4 @@ class EthStratumClient string m_submit_hashrate_id; void processExtranonce(std::string& enonce); -}; \ No newline at end of file +}; diff --git a/libstratum/EthStratumClientV2.cpp b/libstratum/EthStratumClientV2.cpp index ef3df8db86..f7654899b5 100644 --- a/libstratum/EthStratumClientV2.cpp +++ b/libstratum/EthStratumClientV2.cpp @@ -344,22 +344,13 @@ void EthStratumClientV2::processReponse(Json::Value& responseObject) if (sHeaderHash != "" && sSeedHash != "") { - h256 seedHash = h256(sSeedHash); - - m_previous.header = m_current.header; - m_previous.seed = m_current.seed; - m_previous.boundary = m_current.boundary; - m_previous.startNonce = m_current.startNonce; - m_previous.exSizeBits = m_previous.exSizeBits; - m_previousJob = m_job; - m_current.header = h256(sHeaderHash); - m_current.seed = seedHash; + m_current.seed = h256(sSeedHash); m_current.boundary = h256(); diffToTarget((uint32_t*)m_current.boundary.data(), m_nextWorkDifficulty); m_current.startNonce = ethash_swap_u64(*((uint64_t*)m_extraNonce.data())); m_current.exSizeBits = m_extraNonceHexSize * 4; - m_job = job; + m_current.job = h256(job); p_farm->setWork(m_current); cnote << "Received new job #" + job.substr(0, 8) @@ -382,29 +373,16 @@ void EthStratumClientV2::processReponse(Json::Value& responseObject) if (sHeaderHash != "" && sSeedHash != "" && sShareTarget != "") { - h256 seedHash = h256(sSeedHash); h256 headerHash = h256(sHeaderHash); if (headerHash != m_current.header) { - //x_current.lock(); - //if (p_worktimer) - // p_worktimer->cancel(); - - m_previous.header = m_current.header; - m_previous.seed = m_current.seed; - m_previous.boundary = m_current.boundary; - m_previousJob = m_job; - m_current.header = h256(sHeaderHash); - m_current.seed = seedHash; + m_current.seed = h256(sSeedHash); m_current.boundary = h256(sShareTarget); - m_job = job; + m_current.job = h256(job); p_farm->setWork(m_current); - //x_current.unlock(); - //p_worktimer = new boost::asio::deadline_timer(m_io_service, boost::posix_time::seconds(m_worktimeout)); - //p_worktimer->async_wait(boost::bind(&EthStratumClientV2::work_timeout_handler, this, boost::asio::placeholders::error)); } cnote << "Received new job #" + job.substr(0, 8) << " seed: " << "#" + m_current.seed.hex().substr(0, 32) @@ -459,12 +437,6 @@ bool EthStratumClientV2::submitHashrate(string const & rate) { } bool EthStratumClientV2::submit(Solution solution) { - x_current.lock(); - WorkPackage tempWork(m_current); - string temp_job = m_job; - WorkPackage tempPreviousWork(m_previous); - string temp_previous_job = m_previousJob; - x_current.unlock(); string minernonce; string nonceHex = toHex(solution.nonce); @@ -472,56 +444,33 @@ bool EthStratumClientV2::submit(Solution solution) { minernonce = nonceHex.substr(m_extraNonceHexSize, 16 - m_extraNonceHexSize); } - if (EthashAux::eval(tempWork.seed, tempWork.header, solution.nonce).value < tempWork.boundary) + string json; + switch (m_protocol) { + case STRATUM_PROTOCOL_STRATUM: + json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; + break; + case STRATUM_PROTOCOL_ETHPROXY: + json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; + break; + case STRATUM_PROTOCOL_ETHEREUMSTRATUM: + json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"" + minernonce + "\"]}\n"; + break; + } + std::ostream os(&m_requestBuffer); + os << json; + m_stale = solution.stale; + write(m_socket, m_requestBuffer); + if (m_stale) { - string json; - switch (m_protocol) { - case STRATUM_PROTOCOL_STRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; - break; - case STRATUM_PROTOCOL_ETHPROXY: - json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; - break; - case STRATUM_PROTOCOL_ETHEREUMSTRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"" + minernonce + "\"]}\n"; - break; - } - std::ostream os(&m_requestBuffer); - os << json; - m_stale = false; - write(m_socket, m_requestBuffer); - cnote << "Solution found; Submitted to" << p_active->host; - if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) { - cnote << "Nonce:" << "0x" + nonceHex; - } - return true; + cwarn << "Stale solution found; Submitted to" << p_active->host; } - else if (EthashAux::eval(tempPreviousWork.seed, tempPreviousWork.header, solution.nonce).value < tempPreviousWork.boundary) + else { - string json; - switch (m_protocol) { - case STRATUM_PROTOCOL_STRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; - break; - case STRATUM_PROTOCOL_ETHPROXY: - json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n"; - break; - case STRATUM_PROTOCOL_ETHEREUMSTRATUM: - json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"" + minernonce + "\"]}\n"; - break; - } std::ostream os(&m_requestBuffer); - os << json; - m_stale = true; - write(m_socket, m_requestBuffer); - cwarn << "Submitted stale solution."; - return true; + cnote << "Solution found; Submitted to" << p_active->host; } - else { - m_stale = false; - cwarn << "FAILURE: GPU gave incorrect result!"; - p_farm->failedSolution(); + if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) { + cnote << "Nonce:" << "0x" + nonceHex; } - - return false; + return true; } diff --git a/libstratum/EthStratumClientV2.h b/libstratum/EthStratumClientV2.h index 8902805747..f308fa1569 100644 --- a/libstratum/EthStratumClientV2.h +++ b/libstratum/EthStratumClientV2.h @@ -64,15 +64,10 @@ class EthStratumClientV2 : public Worker string m_response; Farm* p_farm; - mutex x_current; WorkPackage m_current; - WorkPackage m_previous; bool m_stale = false; - string m_job; - string m_previousJob; - boost::asio::io_service m_io_service; boost::asio::ip::tcp::socket m_socket; @@ -92,4 +87,4 @@ class EthStratumClientV2 : public Worker string m_submit_hashrate_id; void processExtranonce(std::string& enonce); -}; \ No newline at end of file +};