diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h
index 48f939cfb3..e5f141a1cc 100644
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@@ -292,6 +292,18 @@ class MinerCLI
 				BOOST_THROW_EXCEPTION(BadArgument());
 			}
 		}
+		else if (arg == "--cl-kernel" && i + 1 < argc)
+		{
+			try
+			{
+				m_openclSelectedKernel = stol(argv[++i]);
+			}
+			catch (...)
+			{
+				cerr << "Bad " << arg << " option: " << argv[i] << endl;
+				BOOST_THROW_EXCEPTION(BadArgument());
+			}
+		}
 #endif
 #if ETH_ETHASHCL || ETH_ETHASHCUDA
 		else if ((arg == "--cl-global-work" || arg == "--cuda-grid-size")  && i + 1 < argc)
@@ -501,7 +513,9 @@ class MinerCLI
 				m_miningThreads = m_openclDeviceCount;
 			}
 			
+			CLMiner::setCLKernel(m_openclSelectedKernel);
 			CLMiner::setThreadsPerHash(m_openclThreadsPerHash);
+
 			if (!CLMiner::configureGPU(
 					m_localWorkSize,
 					m_globalWorkSizeMultiplier,
@@ -601,11 +615,17 @@ class MinerCLI
 			<< "        sequential  - load DAG on GPUs one after another. Use this when the miner crashes during DAG generation" << endl
 			<< "        single <n>  - generate DAG on device n, then copy to other devices" << endl
 #if ETH_ETHASHCL
+			<< " OpenCL configuration:" << endl
+			<< "    --cl-kernel <n>  Use a different OpenCL kernel (default: use stable kernel)" << endl
+			<< "        0: stable kernel" << endl
+			<< "        1: unstable kernel" << endl
+//			<< "        2: experimental kernel" << endl
 			<< "    --cl-local-work Set the OpenCL local work size. Default is " << CLMiner::c_defaultLocalWorkSize << endl
 			<< "    --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << CLMiner::c_defaultGlobalWorkSizeMultiplier << " * " << CLMiner::c_defaultLocalWorkSize << endl
 			<< "    --cl-parallel-hash <1 2 ..8> Define how many threads to associate per hash. Default=8" << endl
 #endif
 #if ETH_ETHASHCUDA
+			<< " CUDA configuration:" << endl
 			<< "    --cuda-block-size Set the CUDA block work size. Default is " << toString(ethash_cuda_miner::c_defaultBlockSize) << endl
 			<< "    --cuda-grid-size Set the CUDA grid size. Default is " << toString(ethash_cuda_miner::c_defaultGridSize) << endl
 			<< "    --cuda-streams Set the number of CUDA streams. Default is " << toString(ethash_cuda_miner::c_defaultNumStreams) << endl
@@ -618,6 +638,7 @@ class MinerCLI
 			<< "    --cuda-parallel-hash <1 2 ..8> Define how many hashes to calculate in a kernel, can be scaled to achieve better performance. Default=4" << endl
 #endif
 #if API_CORE
+			<< " API core configuration:" << endl
 			<< "    --api-port Set the api port, the miner should listen to. Use 0 to disable. Default=0, use negative numbers to run in readonly mode. for example -3333." << endl
 #endif
 			;
@@ -856,30 +877,22 @@ class MinerCLI
 					}
 					this_thread::sleep_for(chrono::milliseconds(_recheckPeriod));
 				}
-				if (EthashAux::eval(solution.seedHash, solution.headerHash, solution.nonce).value < solution.boundary)
-				{
-					bool ok = prpc->eth_submitWork("0x" + toHex(solution.nonce), "0x" + toString(solution.headerHash), "0x" + toString(solution.mixHash));
-					if (ok) {
-						cnote << "Solution found; Submitted to" << _remote << "...";
-						cnote << "  Nonce:" << solution.nonce;
-						cnote << "  headerHash:" << solution.headerHash.hex();
-						cnote << "  mixHash:" << solution.mixHash.hex();
-						cnote << EthLime << "Accepted." << EthReset;
-						f.acceptedSolution(false);
-					}
-					else {
-						cwarn << "Solution found; Submitted to" << _remote << "...";
-						cwarn << "  Nonce:" << solution.nonce;
-						cwarn << "  headerHash:" << solution.headerHash.hex();
-						cwarn << "  mixHash:" << solution.mixHash.hex();
-						cwarn << "Not accepted.";
-						f.rejectedSolution(false);
-					}
-					//exit(0);
+				bool ok = prpc->eth_submitWork("0x" + toHex(solution.nonce), "0x" + toString(solution.headerHash), "0x" + toString(solution.mixHash));
+				if (ok) {
+					cnote << "Solution found; Submitted to" << _remote;
+					cnote << "  Nonce:" << solution.nonce;
+					cnote << "  headerHash:" << solution.headerHash.hex();
+					cnote << "  mixHash:" << solution.mixHash.hex();
+					cnote << EthLime << " Accepted." << EthReset;
+					f.acceptedSolution(solution.stale);
 				}
 				else {
-					f.failedSolution();
-					cwarn << "FAILURE: GPU gave incorrect result!";
+					cwarn << "Solution found; Submitted to" << _remote;
+					cwarn << "  Nonce:" << solution.nonce;
+					cwarn << "  headerHash:" << solution.headerHash.hex();
+					cwarn << "  mixHash:" << solution.mixHash.hex();
+					cwarn << EthYellow << " Rejected." << EthReset;
+					f.rejectedSolution(solution.stale);
 				}
 			}
 			catch (jsonrpc::JsonRpcException&)
@@ -1056,6 +1069,7 @@ class MinerCLI
 	unsigned m_miningThreads = UINT_MAX;
 	bool m_shouldListDevices = false;
 #if ETH_ETHASHCL
+	unsigned m_openclSelectedKernel = 0;  ///< A numeric value for the selected OpenCL kernel
 	unsigned m_openclDeviceCount = 0;
 	unsigned m_openclDevices[16];
 	unsigned m_openclThreadsPerHash = 8;
diff --git a/libethash-cl/CLMiner.cpp b/libethash-cl/CLMiner.cpp
index 625d381449..a8f9594e50 100644
--- a/libethash-cl/CLMiner.cpp
+++ b/libethash-cl/CLMiner.cpp
@@ -5,7 +5,8 @@
 
 #include "CLMiner.h"
 #include <libethash/internal.h>
-#include "CLMiner_kernel.h"
+#include "CLMiner_kernel_stable.h"
+#include "CLMiner_kernel_unstable.h"
 
 using namespace dev;
 using namespace eth;
@@ -18,6 +19,7 @@ namespace eth
 unsigned CLMiner::s_workgroupSize = CLMiner::c_defaultLocalWorkSize;
 unsigned CLMiner::s_initialGlobalWorkSize = CLMiner::c_defaultGlobalWorkSizeMultiplier * CLMiner::c_defaultLocalWorkSize;
 unsigned CLMiner::s_threadsPerHash = 8;
+CLKernelName CLMiner::s_clKernelName = CLMiner::c_defaultKernelName;
 
 constexpr size_t c_maxSearchResults = 1;
 
@@ -30,6 +32,172 @@ struct CLChannel: public LogChannel
 #define cllog clog(CLChannel)
 #define ETHCL_LOG(_contents) cllog << _contents
 
+/**
+ * Returns the name of a numerical cl_int error
+ * Takes constants from CL/cl.h and returns them in a readable format
+ */
+static const char *strClError(cl_int err) {
+
+	switch (err) {
+	case CL_SUCCESS:
+		return "CL_SUCCESS";
+	case CL_DEVICE_NOT_FOUND:
+		return "CL_DEVICE_NOT_FOUND";
+	case CL_DEVICE_NOT_AVAILABLE:
+		return "CL_DEVICE_NOT_AVAILABLE";
+	case CL_COMPILER_NOT_AVAILABLE:
+		return "CL_COMPILER_NOT_AVAILABLE";
+	case CL_MEM_OBJECT_ALLOCATION_FAILURE:
+		return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+	case CL_OUT_OF_RESOURCES:
+		return "CL_OUT_OF_RESOURCES";
+	case CL_OUT_OF_HOST_MEMORY:
+		return "CL_OUT_OF_HOST_MEMORY";
+	case CL_PROFILING_INFO_NOT_AVAILABLE:
+		return "CL_PROFILING_INFO_NOT_AVAILABLE";
+	case CL_MEM_COPY_OVERLAP:
+		return "CL_MEM_COPY_OVERLAP";
+	case CL_IMAGE_FORMAT_MISMATCH:
+		return "CL_IMAGE_FORMAT_MISMATCH";
+	case CL_IMAGE_FORMAT_NOT_SUPPORTED:
+		return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+	case CL_BUILD_PROGRAM_FAILURE:
+		return "CL_BUILD_PROGRAM_FAILURE";
+	case CL_MAP_FAILURE:
+		return "CL_MAP_FAILURE";
+	case CL_MISALIGNED_SUB_BUFFER_OFFSET:
+		return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+	case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
+		return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+
+#ifdef CL_VERSION_1_2
+	case CL_COMPILE_PROGRAM_FAILURE:
+		return "CL_COMPILE_PROGRAM_FAILURE";
+	case CL_LINKER_NOT_AVAILABLE:
+		return "CL_LINKER_NOT_AVAILABLE";
+	case CL_LINK_PROGRAM_FAILURE:
+		return "CL_LINK_PROGRAM_FAILURE";
+	case CL_DEVICE_PARTITION_FAILED:
+		return "CL_DEVICE_PARTITION_FAILED";
+	case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
+		return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+#endif // CL_VERSION_1_2
+
+	case CL_INVALID_VALUE:
+		return "CL_INVALID_VALUE";
+	case CL_INVALID_DEVICE_TYPE:
+		return "CL_INVALID_DEVICE_TYPE";
+	case CL_INVALID_PLATFORM:
+		return "CL_INVALID_PLATFORM";
+	case CL_INVALID_DEVICE:
+		return "CL_INVALID_DEVICE";
+	case CL_INVALID_CONTEXT:
+		return "CL_INVALID_CONTEXT";
+	case CL_INVALID_QUEUE_PROPERTIES:
+		return "CL_INVALID_QUEUE_PROPERTIES";
+	case CL_INVALID_COMMAND_QUEUE:
+		return "CL_INVALID_COMMAND_QUEUE";
+	case CL_INVALID_HOST_PTR:
+		return "CL_INVALID_HOST_PTR";
+	case CL_INVALID_MEM_OBJECT:
+		return "CL_INVALID_MEM_OBJECT";
+	case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
+		return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+	case CL_INVALID_IMAGE_SIZE:
+		return "CL_INVALID_IMAGE_SIZE";
+	case CL_INVALID_SAMPLER:
+		return "CL_INVALID_SAMPLER";
+	case CL_INVALID_BINARY:
+		return "CL_INVALID_BINARY";
+	case CL_INVALID_BUILD_OPTIONS:
+		return "CL_INVALID_BUILD_OPTIONS";
+	case CL_INVALID_PROGRAM:
+		return "CL_INVALID_PROGRAM";
+	case CL_INVALID_PROGRAM_EXECUTABLE:
+		return "CL_INVALID_PROGRAM_EXECUTABLE";
+	case CL_INVALID_KERNEL_NAME:
+		return "CL_INVALID_KERNEL_NAME";
+	case CL_INVALID_KERNEL_DEFINITION:
+		return "CL_INVALID_KERNEL_DEFINITION";
+	case CL_INVALID_KERNEL:
+		return "CL_INVALID_KERNEL";
+	case CL_INVALID_ARG_INDEX:
+		return "CL_INVALID_ARG_INDEX";
+	case CL_INVALID_ARG_VALUE:
+		return "CL_INVALID_ARG_VALUE";
+	case CL_INVALID_ARG_SIZE:
+		return "CL_INVALID_ARG_SIZE";
+	case CL_INVALID_KERNEL_ARGS:
+		return "CL_INVALID_KERNEL_ARGS";
+	case CL_INVALID_WORK_DIMENSION:
+		return "CL_INVALID_WORK_DIMENSION";
+	case CL_INVALID_WORK_GROUP_SIZE:
+		return "CL_INVALID_WORK_GROUP_SIZE";
+	case CL_INVALID_WORK_ITEM_SIZE:
+		return "CL_INVALID_WORK_ITEM_SIZE";
+	case CL_INVALID_GLOBAL_OFFSET:
+		return "CL_INVALID_GLOBAL_OFFSET";
+	case CL_INVALID_EVENT_WAIT_LIST:
+		return "CL_INVALID_EVENT_WAIT_LIST";
+	case CL_INVALID_EVENT:
+		return "CL_INVALID_EVENT";
+	case CL_INVALID_OPERATION:
+		return "CL_INVALID_OPERATION";
+	case CL_INVALID_GL_OBJECT:
+		return "CL_INVALID_GL_OBJECT";
+	case CL_INVALID_BUFFER_SIZE:
+		return "CL_INVALID_BUFFER_SIZE";
+	case CL_INVALID_MIP_LEVEL:
+		return "CL_INVALID_MIP_LEVEL";
+	case CL_INVALID_GLOBAL_WORK_SIZE:
+		return "CL_INVALID_GLOBAL_WORK_SIZE";
+	case CL_INVALID_PROPERTY:
+		return "CL_INVALID_PROPERTY";
+
+#ifdef CL_VERSION_1_2
+	case CL_INVALID_IMAGE_DESCRIPTOR:
+		return "CL_INVALID_IMAGE_DESCRIPTOR";
+	case CL_INVALID_COMPILER_OPTIONS:
+		return "CL_INVALID_COMPILER_OPTIONS";
+	case CL_INVALID_LINKER_OPTIONS:
+		return "CL_INVALID_LINKER_OPTIONS";
+	case CL_INVALID_DEVICE_PARTITION_COUNT:
+		return "CL_INVALID_DEVICE_PARTITION_COUNT";
+#endif // CL_VERSION_1_2
+
+#ifdef CL_VERSION_2_0
+	case CL_INVALID_PIPE_SIZE:
+		return "CL_INVALID_PIPE_SIZE";
+	case CL_INVALID_DEVICE_QUEUE:
+		return "CL_INVALID_DEVICE_QUEUE";
+#endif // CL_VERSION_2_0
+
+#ifdef CL_VERSION_2_2
+	case CL_INVALID_SPEC_ID:
+		return "CL_INVALID_SPEC_ID";
+	case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
+		return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
+#endif // CL_VERSION_2_2
+	}
+
+	return "Unknown CL error encountered";
+}
+
+/**
+ * Prints cl::Errors in a uniform way
+ * @param msg text prepending the error message
+ * @param clerr cl:Error object
+ *
+ * Prints errors in the format:
+ *      msg: what(), string err() (numeric err())
+ */
+static std::string ethCLErrorHelper(const char *msg, cl::Error const &clerr) {
+	std::ostringstream osstream;
+	osstream << msg << ": " << clerr.what() << ": " << strClError(clerr.err())
+	         << " (" << clerr.err() << ")";
+	return osstream.str();
+}
+
 namespace
 {
 
@@ -103,9 +271,11 @@ void CLMiner::report(uint64_t _nonce, WorkPackage const& _w)
 	// TODO: Why re-evaluating?
 	Result r = EthashAux::eval(_w.seed, _w.header, _nonce);
 	if (r.value < _w.boundary)
-		farm.submitProof(Solution{_nonce, r.mixHash, _w.header, _w.seed, _w.boundary});
-	else
-		cwarn << "Invalid solution";
+		farm.submitProof(Solution{_nonce, r.mixHash, _w.header, _w.seed, _w.boundary, _w.job, false});
+	else {
+		farm.failedSolution();
+		cwarn << "FAILURE: GPU gave incorrect result!";
+	}
 }
 
 void CLMiner::kickOff()
@@ -229,7 +399,7 @@ void CLMiner::workLoop()
 	}
 	catch (cl::Error const& _e)
 	{
-		cwarn << "OpenCL Error:" << _e.what() << _e.err();
+		cwarn << ethCLErrorHelper("OpenCL Error", _e);
 	}
 }
 
@@ -464,10 +634,26 @@ bool CLMiner::init(const h256& seed)
 		uint32_t lightSize64 = (unsigned)(light->data().size() / sizeof(node));
 
 		// patch source code
-		// note: CLMiner_kernel is simply ethash_cl_miner_kernel.cl compiled
+		// note: The kernels here are simply compiled version of the respective .cl kernels
 		// into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
+		// See libethash-cl/CMakeLists.txt: add_custom_command()
 		// TODO: Just use C++ raw string literal.
-		string code(CLMiner_kernel, CLMiner_kernel + sizeof(CLMiner_kernel));
+		string code;
+
+		if ( s_clKernelName == CLKernelName::Unstable ) {
+			cllog << "OpenCL kernel: Unstable kernel";
+			code = string(CLMiner_kernel_unstable, CLMiner_kernel_unstable + sizeof(CLMiner_kernel_unstable));
+		}
+		else { //if(s_clKernelName == CLKernelName::Stable)
+			cllog << "OpenCL kernel: Stable kernel";
+
+			//CLMiner_kernel_stable.cl will do a #undef THREADS_PER_HASH
+			if(s_threadsPerHash != 8) {
+				cwarn << "The current stable OpenCL kernel only supports exactly 8 threads. Thread parameter will be ignored.";
+			}
+
+			code = string(CLMiner_kernel_stable, CLMiner_kernel_stable + sizeof(CLMiner_kernel_stable));
+		}
 		addDefinition(code, "GROUP_SIZE", m_workgroupSize);
 		addDefinition(code, "DAG_SIZE", dagSize128);
 		addDefinition(code, "LIGHT_SIZE", lightSize64);
@@ -506,7 +692,7 @@ bool CLMiner::init(const h256& seed)
 		}
 		catch (cl::Error const& err)
 		{
-			cwarn << "Creating DAG buffer failed:" << err.what() << err.err();
+			cwarn << ethCLErrorHelper("Creating DAG buffer failed", err);
 			return false;
 		}
 		// create buffer for header
@@ -543,7 +729,7 @@ bool CLMiner::init(const h256& seed)
 	}
 	catch (cl::Error const& err)
 	{
-		cwarn << err.what() << "(" << err.err() << ")";
+		cwarn << ethCLErrorHelper("OpenCL init failed", err);
 		return false;
 	}
 	return true;
diff --git a/libethash-cl/CLMiner.h b/libethash-cl/CLMiner.h
index 27064a947b..9b8c26ee33 100644
--- a/libethash-cl/CLMiner.h
+++ b/libethash-cl/CLMiner.h
@@ -41,6 +41,11 @@ namespace dev
 namespace eth
 {
 
+enum CLKernelName {
+	Stable,
+	Unstable,
+};
+
 class CLMiner: public Miner
 {
 public:
@@ -50,6 +55,9 @@ class CLMiner: public Miner
 	/// Default value of the global work size as a multiplier of the local work size
 	static const unsigned c_defaultGlobalWorkSizeMultiplier = 8192;
 
+	/// Default value of the kernel is the original one
+	static const CLKernelName c_defaultKernelName = CLKernelName::Stable;
+
 	CLMiner(FarmFace& _farm, unsigned _index);
 	~CLMiner();
 
@@ -73,6 +81,7 @@ class CLMiner: public Miner
 			s_devices[i] = _devices[i];
 		}
 	}
+	static void setCLKernel(unsigned _clKernel) { s_clKernelName = _clKernel == 1 ? CLKernelName::Unstable : CLKernelName::Stable; }
 	HwMonitor hwmon() override;
 protected:
 	void kickOff() override;
@@ -98,6 +107,7 @@ class CLMiner: public Miner
 	static unsigned s_platformId;
 	static unsigned s_numInstances;
 	static unsigned s_threadsPerHash;
+	static CLKernelName s_clKernelName;
 	static int s_devices[16];
 
 	/// The local work size for the search
diff --git a/libethash-cl/CLMiner_kernel_stable.cl b/libethash-cl/CLMiner_kernel_stable.cl
new file mode 100644
index 0000000000..a6a301e278
--- /dev/null
+++ b/libethash-cl/CLMiner_kernel_stable.cl
@@ -0,0 +1,414 @@
+#define OPENCL_PLATFORM_UNKNOWN 0
+#define OPENCL_PLATFORM_NVIDIA  1
+#define OPENCL_PLATFORM_AMD     2
+#define OPENCL_PLATFORM_CLOVER  3
+
+#ifndef ACCESSES
+#define ACCESSES 64
+#endif
+
+#ifndef GROUP_SIZE
+#define GROUP_SIZE 128
+#endif
+
+#ifndef MAX_OUTPUTS
+#define MAX_OUTPUTS 63U
+#endif
+
+#ifndef PLATFORM
+#define PLATFORM OPENCL_PLATFORM_AMD
+#endif
+
+#ifndef DAG_SIZE
+#define DAG_SIZE 8388593
+#endif
+
+#ifndef LIGHT_SIZE
+#define LIGHT_SIZE 262139
+#endif
+
+#define ETHASH_DATASET_PARENTS 256
+#define NODE_WORDS (64/4)
+
+//this kernel supports only exactly 8 threads
+//overwrite whatever is incoming
+#ifdef THREADS_PER_HASH
+#undef THREADS_PER_HASH
+#endif
+
+#define THREADS_PER_HASH (128 / 16)
+#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
+#define FNV_PRIME	0x01000193
+
+__constant uint2 const Keccak_f1600_RC[24] = {
+	(uint2)(0x00000001, 0x00000000),
+	(uint2)(0x00008082, 0x00000000),
+	(uint2)(0x0000808a, 0x80000000),
+	(uint2)(0x80008000, 0x80000000),
+	(uint2)(0x0000808b, 0x00000000),
+	(uint2)(0x80000001, 0x00000000),
+	(uint2)(0x80008081, 0x80000000),
+	(uint2)(0x00008009, 0x80000000),
+	(uint2)(0x0000008a, 0x00000000),
+	(uint2)(0x00000088, 0x00000000),
+	(uint2)(0x80008009, 0x00000000),
+	(uint2)(0x8000000a, 0x00000000),
+	(uint2)(0x8000808b, 0x00000000),
+	(uint2)(0x0000008b, 0x80000000),
+	(uint2)(0x00008089, 0x80000000),
+	(uint2)(0x00008003, 0x80000000),
+	(uint2)(0x00008002, 0x80000000),
+	(uint2)(0x00000080, 0x80000000),
+	(uint2)(0x0000800a, 0x00000000),
+	(uint2)(0x8000000a, 0x80000000),
+	(uint2)(0x80008081, 0x80000000),
+	(uint2)(0x00008080, 0x80000000),
+	(uint2)(0x80000001, 0x00000000),
+	(uint2)(0x80008008, 0x80000000),
+};
+
+#ifdef cl_clang_storage_class_specifiers
+#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+#endif
+
+#if PLATFORM == OPENCL_PLATFORM_NVIDIA && COMPUTE >= 35
+static uint2 ROL2(const uint2 a, const int offset) {
+	uint2 result;
+	if (offset >= 32) {
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
+	}
+	else {
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
+		asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
+	}
+	return result;
+}
+#elif PLATFORM == OPENCL_PLATFORM_AMD
+#pragma OPENCL EXTENSION cl_amd_media_ops : enable
+static uint2 ROL2(const uint2 vv, const int r)
+{
+	if (r <= 32)
+	{
+		return amd_bitalign((vv).xy, (vv).yx, 32 - r);
+	}
+	else
+	{
+		return amd_bitalign((vv).yx, (vv).xy, 64 - r);
+	}
+}
+#else
+static uint2 ROL2(const uint2 v, const int n)
+{
+	uint2 result;
+	if (n <= 32)
+	{
+		result.y = ((v.y << (n)) | (v.x >> (32 - n)));
+		result.x = ((v.x << (n)) | (v.y >> (32 - n)));
+	}
+	else
+	{
+		result.y = ((v.x << (n - 32)) | (v.y >> (64 - n)));
+		result.x = ((v.y << (n - 32)) | (v.x >> (64 - n)));
+	}
+	return result;
+}
+#endif
+
+static void chi(uint2 * a, const uint n, const uint2 * t)
+{
+	a[n+0] = bitselect(t[n + 0] ^ t[n + 2], t[n + 0], t[n + 1]);
+	a[n+1] = bitselect(t[n + 1] ^ t[n + 3], t[n + 1], t[n + 2]);
+	a[n+2] = bitselect(t[n + 2] ^ t[n + 4], t[n + 2], t[n + 3]);
+	a[n+3] = bitselect(t[n + 3] ^ t[n + 0], t[n + 3], t[n + 4]);
+	a[n+4] = bitselect(t[n + 4] ^ t[n + 1], t[n + 4], t[n + 0]);
+}
+
+static void keccak_f1600_round(uint2* a, uint r)
+{
+	uint2 t[25];
+	uint2 u;
+
+	// Theta
+	t[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
+	t[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
+	t[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
+	t[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
+	t[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
+	u = t[4] ^ ROL2(t[1], 1);
+	a[0] ^= u;
+	a[5] ^= u;
+	a[10] ^= u;
+	a[15] ^= u;
+	a[20] ^= u;
+	u = t[0] ^ ROL2(t[2], 1);
+	a[1] ^= u;
+	a[6] ^= u;
+	a[11] ^= u;
+	a[16] ^= u;
+	a[21] ^= u;
+	u = t[1] ^ ROL2(t[3], 1);
+	a[2] ^= u;
+	a[7] ^= u;
+	a[12] ^= u;
+	a[17] ^= u;
+	a[22] ^= u;
+	u = t[2] ^ ROL2(t[4], 1);
+	a[3] ^= u;
+	a[8] ^= u;
+	a[13] ^= u;
+	a[18] ^= u;
+	a[23] ^= u;
+	u = t[3] ^ ROL2(t[0], 1);
+	a[4] ^= u;
+	a[9] ^= u;
+	a[14] ^= u;
+	a[19] ^= u;
+	a[24] ^= u;
+
+	// Rho Pi
+
+	t[0]  = a[0];
+	t[10] = ROL2(a[1], 1);
+	t[20] = ROL2(a[2], 62);
+	t[5]  = ROL2(a[3], 28);
+	t[15] = ROL2(a[4], 27);
+	
+	t[16] = ROL2(a[5], 36);
+	t[1]  = ROL2(a[6], 44);
+	t[11] = ROL2(a[7], 6);
+	t[21] = ROL2(a[8], 55);
+	t[6]  = ROL2(a[9], 20);
+	
+	t[7]  = ROL2(a[10], 3);
+	t[17] = ROL2(a[11], 10);
+	t[2]  = ROL2(a[12], 43);
+	t[12] = ROL2(a[13], 25);
+	t[22] = ROL2(a[14], 39);
+	
+	t[23] = ROL2(a[15], 41);
+	t[8]  = ROL2(a[16], 45);
+	t[18] = ROL2(a[17], 15);
+	t[3]  = ROL2(a[18], 21);
+	t[13] = ROL2(a[19], 8);
+	
+	t[14] = ROL2(a[20], 18);
+	t[24] = ROL2(a[21], 2);
+	t[9]  = ROL2(a[22], 61);
+	t[19] = ROL2(a[23], 56);
+	t[4]  = ROL2(a[24], 14);
+
+	// Chi
+	chi(a, 0, t);
+
+	// Iota
+	a[0] ^= Keccak_f1600_RC[r];
+
+	chi(a, 5, t);
+	chi(a, 10, t);
+	chi(a, 15, t);
+	chi(a, 20, t);
+}
+
+static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate)
+{
+	// Originally I unrolled the first and last rounds to interface
+	// better with surrounding code, however I haven't done this
+	// without causing the AMD compiler to blow up the VGPR usage.
+
+	
+	//uint o = 25;
+	for (uint r = 0; r < 24;)
+	{
+		// This dynamic branch stops the AMD compiler unrolling the loop
+		// and additionally saves about 33% of the VGPRs, enough to gain another
+		// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
+		// massage out of the compiler. It doesn't really seem to matter how
+		// much we try and help the compiler save VGPRs because it seems to throw
+		// that information away, hence the implementation of keccak here
+		// doesn't bother.
+		if (isolate)
+		{
+			keccak_f1600_round(a, r++);
+			//if (r == 23) o = out_size;
+		}
+	} 
+	
+
+	// final round optimised for digest size
+	//keccak_f1600_round(a, 23, out_size);
+}
+
+#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
+
+static uint fnv(uint x, uint y)
+{
+	return x * FNV_PRIME ^ y;
+}
+
+static uint4 fnv4(uint4 x, uint4 y)
+{
+	return x * FNV_PRIME ^ y;
+}
+
+static uint fnv_reduce(uint4 v)
+{
+	return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
+}
+
+typedef struct
+{
+	ulong ulongs[32 / sizeof(ulong)];
+} hash32_t;
+
+typedef union {
+	uint	 words[64 / sizeof(uint)];
+	uint2	 uint2s[64 / sizeof(uint2)];
+	uint4	 uint4s[64 / sizeof(uint4)];
+} hash64_t;
+
+typedef union {
+	uint	 words[200 / sizeof(uint)];
+	uint2	 uint2s[200 / sizeof(uint2)];
+	uint4	 uint4s[200 / sizeof(uint4)];
+} hash200_t;
+
+typedef struct
+{
+	uint4 uint4s[128 / sizeof(uint4)];
+} hash128_t;
+
+typedef union {
+	uint4 uint4s[4];
+	ulong ulongs[8];
+	uint  uints[16];
+} compute_hash_share;
+
+#if PLATFORM != OPENCL_PLATFORM_NVIDIA // use maxrregs on nv
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+#endif
+__kernel void ethash_search(
+	__global volatile uint* restrict g_output,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	ulong target,
+	uint isolate
+	)
+{
+	__local compute_hash_share share[HASHES_PER_LOOP];
+
+	uint const gid = get_global_id(0);
+
+	// Compute one init hash per work item.
+
+	// sha3_512(header .. nonce)
+	ulong state[25];
+	copy(state, g_header->ulongs, 4);
+	state[4] = start_nonce + gid;
+
+	for (uint i = 6; i != 25; ++i)
+	{
+		state[i] = 0;
+	}
+	state[5] = 0x0000000000000001;
+	state[8] = 0x8000000000000000;
+
+	keccak_f1600_no_absorb((uint2*)state, 8, isolate);
+	
+	// Threads work together in this phase in groups of 8.
+	uint const thread_id = gid & 7;
+	uint const hash_id = (gid % GROUP_SIZE) >> 3;
+
+	for (int i = 0; i < THREADS_PER_HASH; i++)
+	{
+		// share init with other threads
+		if (i == thread_id)
+			copy(share[hash_id].ulongs, state, 8);
+
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		uint4 mix = share[hash_id].uint4s[thread_id & 3];
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		__local uint *share0 = share[hash_id].uints;
+
+		// share init0
+		if (thread_id == 0)
+			*share0 = mix.x;
+		barrier(CLK_LOCAL_MEM_FENCE);
+		uint init0 = *share0;
+
+		for (uint a = 0; a < ACCESSES; a += 4)
+		{
+			bool update_share = thread_id == ((a >> 2) & (THREADS_PER_HASH - 1));
+
+			for (uint i = 0; i != 4; ++i)
+			{
+				if (update_share)
+				{
+					*share0 = fnv(init0 ^ (a + i), ((uint *)&mix)[i]) % DAG_SIZE;
+				}
+				barrier(CLK_LOCAL_MEM_FENCE);
+
+				mix = fnv4(mix, g_dag[*share0].uint4s[thread_id]);
+			}
+		}
+
+		share[hash_id].uints[thread_id] = fnv_reduce(mix);
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		if (i == thread_id)
+			copy(state + 8, share[hash_id].ulongs, 4);
+
+		barrier(CLK_LOCAL_MEM_FENCE);
+	}
+
+	for (uint i = 13; i != 25; ++i)
+	{
+		state[i] = 0;
+	}
+	state[12] = 0x0000000000000001;
+	state[16] = 0x8000000000000000;
+
+	// keccak_256(keccak_512(header..nonce) .. mix);
+	keccak_f1600_no_absorb((uint2*)state, 1, isolate);
+
+	if (as_ulong(as_uchar8(state[0]).s76543210) < target)
+	{
+		uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+		g_output[slot] = gid;
+	}
+}
+
+static void SHA3_512(uint2* s, uint isolate)
+{
+	for (uint i = 8; i != 25; ++i)
+	{
+		s[i] = (uint2){ 0, 0 };
+	}
+	s[8].x = 0x00000001;
+	s[8].y = 0x80000000;
+	keccak_f1600_no_absorb(s, 8, isolate);
+}
+
+__kernel void ethash_calculate_dag_item(uint start, __global hash64_t const* g_light, __global hash64_t * g_dag, uint isolate)
+{
+	uint const node_index = start + get_global_id(0);
+	if (node_index > DAG_SIZE * 2) return;
+
+	hash200_t dag_node;
+	copy(dag_node.uint4s, g_light[node_index % LIGHT_SIZE].uint4s, 4);
+	dag_node.words[0] ^= node_index;
+	SHA3_512(dag_node.uint2s, isolate);
+
+	for (uint i = 0; i != ETHASH_DATASET_PARENTS; ++i) {
+		uint parent_index = fnv(node_index ^ i, dag_node.words[i % NODE_WORDS]) % LIGHT_SIZE;
+
+		for (uint w = 0; w != 4; ++w) {
+			dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], g_light[parent_index].uint4s[w]);
+		}
+	}
+	SHA3_512(dag_node.uint2s, isolate);
+	copy(g_dag[node_index].uint4s, dag_node.uint4s, 4);
+}
diff --git a/libethash-cl/CLMiner_kernel.cl b/libethash-cl/CLMiner_kernel_unstable.cl
similarity index 99%
rename from libethash-cl/CLMiner_kernel.cl
rename to libethash-cl/CLMiner_kernel_unstable.cl
index c473f447b7..7c3bef9c6f 100644
--- a/libethash-cl/CLMiner_kernel.cl
+++ b/libethash-cl/CLMiner_kernel_unstable.cl
@@ -36,6 +36,10 @@
 #endif
 #define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
 
+#ifdef cl_clang_storage_class_specifiers
+    #pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+#endif
+
 // Check for valid THREADS_PER_HASH param
 #if THREADS_PER_HASH == 1
     #define LN_THREAD_PER_HASH  0
diff --git a/libethash-cl/CMakeLists.txt b/libethash-cl/CMakeLists.txt
index 55e58e9215..be3ee2e902 100644
--- a/libethash-cl/CMakeLists.txt
+++ b/libethash-cl/CMakeLists.txt
@@ -1,21 +1,36 @@
 # A custom command and target to turn the OpenCL kernel into a byte array header
 # The normal build depends on it properly and if the kernel file is changed, then
 # a rebuild of libethash-cl should be triggered
+
+#TODO: clean up the copy&pasting here
+add_custom_command(
+	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h
+	COMMAND ${CMAKE_COMMAND} ARGS
+	-DBIN2H_SOURCE_FILE="${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_stable.cl"
+	-DBIN2H_VARIABLE_NAME=CLMiner_kernel_stable
+	-DBIN2H_HEADER_FILE="${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h"
+	-P "${CMAKE_CURRENT_SOURCE_DIR}/bin2h.cmake"
+	COMMENT "Generating OpenCL Kernel Byte Array"
+	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_stable.cl
+)
+add_custom_target(clbin2h_stable DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_stable.cl)
+
 add_custom_command(
-	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h
+	OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h
 	COMMAND ${CMAKE_COMMAND} ARGS
-	-DBIN2H_SOURCE_FILE="${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl"
-	-DBIN2H_VARIABLE_NAME=CLMiner_kernel
-	-DBIN2H_HEADER_FILE="${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h"
+	-DBIN2H_SOURCE_FILE="${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_unstable.cl"
+	-DBIN2H_VARIABLE_NAME=CLMiner_kernel_unstable
+	-DBIN2H_HEADER_FILE="${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h"
 	-P "${CMAKE_CURRENT_SOURCE_DIR}/bin2h.cmake"
 	COMMENT "Generating OpenCL Kernel Byte Array"
-	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl
+	DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_unstable.cl
 )
-add_custom_target(clbin2h DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel.cl)
+add_custom_target(clbin2h_unstable DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h ${CMAKE_CURRENT_SOURCE_DIR}/CLMiner_kernel_unstable.cl)
 
 set(SOURCES
 	CLMiner.h CLMiner.cpp
-	${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel.h
+	${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_stable.h
+	${CMAKE_CURRENT_BINARY_DIR}/CLMiner_kernel_unstable.h
 )
 
 if(APPLE)
diff --git a/libethash-cuda/CUDAMiner.cpp b/libethash-cuda/CUDAMiner.cpp
index d563144f46..058a937eb7 100644
--- a/libethash-cuda/CUDAMiner.cpp
+++ b/libethash-cuda/CUDAMiner.cpp
@@ -14,12 +14,6 @@ GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
 */
-/** @file CUDAMiner.cpp
-* @author Gav Wood <i@gavwood.com>
-* @date 2014
-*
-* Determines the PoW algorithm.
-*/
 
 #include "CUDAMiner.h"
 
@@ -59,18 +53,26 @@ namespace eth
 			m_aborted = m_abort = false;
 		}
 
+		bool isStale()
+		{
+			return m_abort;
+		}
+
+
 	protected:
-		virtual bool found(uint64_t const* _nonces) override
+		void found(uint64_t const* _nonces, uint32_t count) override
 		{
-			m_owner.report(_nonces[0]);
-			return m_owner.shouldStop();
+			for (uint32_t i = 0; i < count; i++)
+				m_owner.report(_nonces[i]);
 		}
 
-		virtual bool searched(uint64_t _startNonce, uint32_t _count) override
+		void searched(uint32_t _count) override
 		{
-			(void) _startNonce;  // FIXME: unusued arg.
-			UniqueGuard l(x_all);
 			m_owner.addHashCount(_count);
+		}
+
+		bool shouldStop() override
+		{
 			if (m_abort || m_owner.shouldStop())
 				return (m_aborted = true);
 			return false;
@@ -84,21 +86,19 @@ namespace eth
 	};
 }
 }
-unsigned CUDAMiner::s_platformId = 0;
-unsigned CUDAMiner::s_deviceId = 0;
 unsigned CUDAMiner::s_numInstances = 0;
 int CUDAMiner::s_devices[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
 
 CUDAMiner::CUDAMiner(FarmFace& _farm, unsigned _index) :
 	Miner("CUDA", _farm, _index),
-	m_hook(new EthashCUDAHook(*this))  // FIXME!
+	m_hook(new EthashCUDAHook(*this)),
+	m_miner(getNumDevices())
 {}
 
 CUDAMiner::~CUDAMiner()
 {
 	stopWorking();
 	pause();
-	delete m_miner;
 	delete m_hook;
 }
 
@@ -108,7 +108,13 @@ void CUDAMiner::report(uint64_t _nonce)
 	WorkPackage w = work();  // Copy work package to avoid repeated mutex lock.
 	Result r = EthashAux::eval(w.seed, w.header, _nonce);
 	if (r.value < w.boundary)
-		farm.submitProof(Solution{_nonce, r.mixHash, w.header, w.seed, w.boundary});
+		farm.submitProof(Solution{_nonce, r.mixHash, w.header, w.seed, w.boundary, w.job, m_hook->isStale()});
+	else
+	{
+		farm.failedSolution();
+		cwarn << "FAILURE: GPU gave incorrect result!";
+
+	}
 }
 
 void CUDAMiner::kickOff()
@@ -123,17 +129,13 @@ bool CUDAMiner::init(const h256& seed)
 		unsigned device = s_devices[index] > -1 ? s_devices[index] : index;
 
 		cnote << "Initialising miner...";
-		m_minerSeed = seed;
-
-		if(!m_miner)
-			m_miner = new ethash_cuda_miner;
 
 		EthashAux::LightType light;
 		light = EthashAux::light(seed);
 		bytesConstRef lightData = light->data();
 
-		m_miner->init(light->light, lightData.data(), lightData.size(), 
-			device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory);
+		m_miner.init(getNumDevices(), light->light, lightData.data(), lightData.size(), 
+			device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory, s_dagCreateDevice);
 		s_dagLoadIndex++;
     
 		if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE)
@@ -143,15 +145,13 @@ bool CUDAMiner::init(const h256& seed)
 				// all devices have loaded DAG, we can free now
 				delete[] s_dagInHostMemory;
 				s_dagInHostMemory = NULL;
-				cout << "Freeing DAG from host" << endl;
+				cnote << "Freeing DAG from host";
 			}
 		}
 		return true;
 	}
 	catch (std::runtime_error const& _e)
 	{
-		delete m_miner;
-		m_miner = nullptr;
 		cwarn << "Error CUDA mining: " << _e.what();
 		return false;
 	}
@@ -168,7 +168,7 @@ void CUDAMiner::workLoop()
 		{
 			const WorkPackage w = work();
 			
-			if(!m_miner || current.header != w.header || current.seed != w.seed)
+			if (current.header != w.header || current.seed != w.seed)
 			{
 				if(!w || w.header == h256())
 				{
@@ -178,7 +178,7 @@ void CUDAMiner::workLoop()
 				}
 				
 				//cnote << "set work; seed: " << "#" + w.seed.hex().substr(0, 8) + ", target: " << "#" + w.boundary.hex().substr(0, 12);
-				if (!m_miner || current.seed != w.seed)
+				if (current.seed != w.seed)
 				{
 					if(!init(w.seed))
 						break;
@@ -189,7 +189,7 @@ void CUDAMiner::workLoop()
 			uint64_t startN = current.startNonce;
 			if (current.exSizeBits >= 0) 
 				startN = current.startNonce | ((uint64_t)index << (64 - 4 - current.exSizeBits)); // this can support up to 16 devices
-			m_miner->search(current.header.data(), upper64OfBoundary, *m_hook, (current.exSizeBits >= 0), startN);
+			m_miner.search(current.header.data(), upper64OfBoundary, *m_hook, (current.exSizeBits >= 0), startN);
 
 			// Check if we should stop.
 			if (shouldStop())
@@ -200,8 +200,6 @@ void CUDAMiner::workLoop()
 	}
 	catch (std::runtime_error const& _e)
 	{
-		delete m_miner;
-		m_miner = nullptr;
 		cwarn << "Error CUDA mining: " << _e.what();
 	}
 }
@@ -211,28 +209,51 @@ void CUDAMiner::pause()
 	m_hook->abort();
 }
 
-std::string CUDAMiner::platformInfo()
-{
-	return ethash_cuda_miner::platform_info(s_deviceId);
-}
-
 unsigned CUDAMiner::getNumDevices()
 {
-	return ethash_cuda_miner::getNumDevices();
+	int deviceCount = -1;
+	cudaError_t err = cudaGetDeviceCount(&deviceCount);
+	if (err == cudaSuccess)
+		return deviceCount;
+
+	if (err == cudaErrorInsufficientDriver)
+	{
+		int driverVersion = -1;
+		cudaDriverGetVersion(&driverVersion);
+		if (driverVersion == 0)
+			throw std::runtime_error{"No CUDA driver found"};
+		throw std::runtime_error{"Insufficient CUDA driver: " + std::to_string(driverVersion)};
+	}
+
+	throw std::runtime_error{cudaGetErrorString(err)};
 }
 
 void CUDAMiner::listDevices()
 {
-	return ethash_cuda_miner::listDevices();
+	try
+	{
+		string outString = "\nListing CUDA devices.\nFORMAT: [deviceID] deviceName\n";
+		int numDevices = getNumDevices();
+		for (int i = 0; i < numDevices; ++i)
+		{
+			cudaDeviceProp props;
+			CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i));
+
+			outString += "[" + to_string(i) + "] " + string(props.name) + "\n";
+			outString += "\tCompute version: " + to_string(props.major) + "." + to_string(props.minor) + "\n";
+			outString += "\tcudaDeviceProp::totalGlobalMem: " + to_string(props.totalGlobalMem) + "\n";
+		}
+		std::cout << outString;
+	}
+	catch(std::runtime_error const& err)
+	{
+		cwarn << "CUDA error: " << err.what();
+	}
 }
 
 HwMonitor CUDAMiner::hwmon()
 {
-	HwMonitor hw;
-	if (m_miner) {
-		hw = m_miner->hwmon();
-	}
-	return hw;
+	return m_miner.hwmon();
 }
 
 bool CUDAMiner::configureGPU(
@@ -250,6 +271,7 @@ bool CUDAMiner::configureGPU(
 	_blockSize = ((_blockSize + 7) / 8) * 8;
 
 	if (!ethash_cuda_miner::configureGPU(
+		getNumDevices(),
 		s_devices,
 		_blockSize,
 		_gridSize,
diff --git a/libethash-cuda/CUDAMiner.h b/libethash-cuda/CUDAMiner.h
index 3e0fce5fd2..780e4a51cb 100644
--- a/libethash-cuda/CUDAMiner.h
+++ b/libethash-cuda/CUDAMiner.h
@@ -39,13 +39,12 @@ class EthashCUDAHook;
 
 	public:
 		CUDAMiner(FarmFace& _farm, unsigned _index);
-		~CUDAMiner();
+		~CUDAMiner() override;
 
 		static unsigned instances() 
 		{ 
 			return s_numInstances > 0 ? s_numInstances : 1; 
 		}
-		static std::string platformInfo();
 		static unsigned getNumDevices();
 		static void listDevices();
 		static void setParallelHash(unsigned _parallelHash);
@@ -62,7 +61,7 @@ class EthashCUDAHook;
 		{ 
 			s_numInstances = std::min<unsigned>(_instances, getNumDevices());
 		}
-		static void setDevices(unsigned * _devices, unsigned _selectedDeviceCount) 
+		static void setDevices(const unsigned* _devices, unsigned _selectedDeviceCount)
 		{
 			for (unsigned i = 0; i < _selectedDeviceCount; i++) 
 			{
@@ -76,16 +75,12 @@ class EthashCUDAHook;
 	private:
 		void workLoop() override;
 		void report(uint64_t _nonce);
-		void initDevice(WorkPackage w);
 
 		bool init(const h256& seed);
 
 		EthashCUDAHook* m_hook = nullptr;
-		ethash_cuda_miner* m_miner = nullptr;
+		ethash_cuda_miner m_miner;
 
-		h256 m_minerSeed;		///< Last seed in m_miner
-		static unsigned s_platformId;
-		static unsigned s_deviceId;
 		static unsigned s_numInstances;
 		static int s_devices[16];
 
diff --git a/libethash-cuda/ethash_cuda_miner.cpp b/libethash-cuda/ethash_cuda_miner.cpp
index 162287c590..f6b61b76cb 100644
--- a/libethash-cuda/ethash_cuda_miner.cpp
+++ b/libethash-cuda/ethash_cuda_miner.cpp
@@ -61,61 +61,11 @@ unsigned const ethash_cuda_miner::c_defaultNumStreams = 2;
 
 ethash_cuda_miner::search_hook::~search_hook() {}
 
-ethash_cuda_miner::ethash_cuda_miner()
-{
-	m_light.resize(getNumDevices());
-}
-
-std::string ethash_cuda_miner::platform_info(unsigned _deviceId)
-{
-	int runtime_version;
-	int device_count;
-
-	device_count = getNumDevices();
-
-	if (device_count == 0)
-		return std::string();
-
-	CUDA_SAFE_CALL(cudaRuntimeGetVersion(&runtime_version));
-
-	// use selected default device
-	int device_num = std::min<int>((int)_deviceId, device_count - 1);
-	cudaDeviceProp device_props;
-
-	CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_props, device_num));
-
-	char platform[5];
-	int version_major = runtime_version / 1000;
-	int version_minor = (runtime_version - (version_major * 1000)) / 10;
-	sprintf(platform, "%d.%d", version_major, version_minor);
-
-	char compute[5];
-	sprintf(compute, "%d.%d", device_props.major, device_props.minor);
-
-	return "{ \"platform\": \"CUDA " + std::string(platform) + "\", \"device\": \"" + std::string(device_props.name) + "\", \"version\": \"Compute " + std::string(compute) + "\" }";
-}
-
-int ethash_cuda_miner::getNumDevices()
-{
-	int deviceCount = -1;
-	cudaError_t err = cudaGetDeviceCount(&deviceCount);
-	if (err == cudaSuccess)
-		return deviceCount;
-
-	if (err == cudaErrorInsufficientDriver)
-	{
-		int driverVersion = -1;
-		cudaDriverGetVersion(&driverVersion);
-		if (driverVersion == 0)
-			throw std::runtime_error{"No CUDA driver found"};
-		throw std::runtime_error{"Insufficient CUDA driver: " + std::to_string(driverVersion)};
-	}
-
-	throw std::runtime_error{cudaGetErrorString(err)};
-}
+ethash_cuda_miner::ethash_cuda_miner(size_t numDevices) : m_light(numDevices) {}
 
 bool ethash_cuda_miner::configureGPU(
-	int *	 _devices,
+	size_t numDevices,
+	const int* _devices,
 	unsigned _blockSize,
 	unsigned _gridSize,
 	unsigned _numStreams,
@@ -134,10 +84,9 @@ bool ethash_cuda_miner::configureGPU(
 
 		// by default let's only consider the DAG of the first epoch
 		uint64_t dagSize = ethash_get_datasize(_currentBlock);
-		int devicesCount = getNumDevices();
+		int devicesCount = static_cast<int>(numDevices);
 		for (int i = 0; i < devicesCount; i++)
 		{
-			
 			if (_devices[i] != -1)
 			{
 				int deviceId = min(devicesCount - 1, _devices[i]);
@@ -173,45 +122,15 @@ unsigned ethash_cuda_miner::s_gridSize = ethash_cuda_miner::c_defaultGridSize;
 unsigned ethash_cuda_miner::s_numStreams = ethash_cuda_miner::c_defaultNumStreams;
 unsigned ethash_cuda_miner::s_scheduleFlag = 0;
 
-void ethash_cuda_miner::listDevices()
+bool ethash_cuda_miner::init(size_t numDevices, ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t* &hostDAG, unsigned dagCreateDevice)
 {
 	try
 	{
-		string outString = "\nListing CUDA devices.\nFORMAT: [deviceID] deviceName\n";
-		int numDevices = getNumDevices();
-		for (int i = 0; i < numDevices; ++i)
-		{
-			cudaDeviceProp props;
-			CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i));
-
-			outString += "[" + to_string(i) + "] " + string(props.name) + "\n";
-			outString += "\tCompute version: " + to_string(props.major) + "." + to_string(props.minor) + "\n";
-			outString += "\tcudaDeviceProp::totalGlobalMem: " + to_string(props.totalGlobalMem) + "\n";
-		}
-		std::cout << outString;
-	}
-	catch(std::runtime_error const& err)
-	{
-		cwarn << "CUDA error: " << err.what();
-	}
-}
-
-void ethash_cuda_miner::finish()
-{
-	CUDA_SAFE_CALL(cudaDeviceReset());
-}
-
-bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t* &hostDAG)
-{
-	try
-	{
-		unsigned device_count = getNumDevices();
-
-		if (device_count == 0)
+		if (numDevices == 0)
 			return false;
 
 		// use selected device
-		m_device_num = _deviceId < device_count -1 ? _deviceId : device_count - 1;
+		m_device_num = _deviceId < numDevices -1 ? _deviceId : numDevices - 1;
 		nvmlh = wrap_nvml_create();
 
 		cudaDeviceProp device_props;
@@ -232,6 +151,12 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u
 		cudalog << "Set Device to current";
 		if(dagSize128 != m_dag_size || !m_dag)
 		{
+			//Check whether the current device has sufficient memory everytime we recreate the dag
+			if (device_props.totalGlobalMem < dagSize)
+			{
+				cudalog <<  "CUDA device " << string(device_props.name) << " has insufficient GPU memory." << device_props.totalGlobalMem << " bytes of memory found < " << dagSize << " bytes of memory required";
+				return false;
+			}
 			//We need to reset the device and recreate the dag  
 			cudalog << "Resetting device";
 			CUDA_SAFE_CALL(cudaDeviceReset());
@@ -278,7 +203,7 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u
 
 			if (!hostDAG)
 			{
-				if(m_device_num == 0 || !_cpyToHost){ //if !cpyToHost -> All devices shall generate their DAG
+				if((m_device_num == dagCreateDevice) || !_cpyToHost){ //if !cpyToHost -> All devices shall generate their DAG
 					cudalog << "Generating DAG for GPU #" << m_device_num << " with dagSize: " 
 							<< dagSize <<" gridSize: " << s_gridSize << " &m_streams[0]: " << &m_streams[0];
 					ethash_generate_dag(dagSize, s_gridSize, s_blockSize, m_streams[0], m_device_num);
@@ -374,16 +299,21 @@ void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_ho
 		{
 			CUDA_SAFE_CALL(cudaStreamSynchronize(stream));
 			found_count = buffer[0];
-			if (found_count)
+			if (found_count) {
 				buffer[0] = 0;
-			for (unsigned int j = 0; j < found_count; j++)
-				nonces[j] = nonce_base + buffer[j + 1];
+				if (found_count > (SEARCH_RESULT_BUFFER_SIZE - 1))
+					found_count = SEARCH_RESULT_BUFFER_SIZE - 1;
+				for (unsigned int j = 0; j < found_count; j++)
+					nonces[j] = nonce_base + buffer[j + 1];
+			}
 		}
 		run_ethash_search(s_gridSize, s_blockSize, m_sharedBytes, stream, buffer, m_current_nonce, m_parallelHash);
 		if (m_current_index >= s_numStreams)
 		{
-			exit = found_count && hook.found(nonces);
-			exit |= hook.searched(nonce_base, batch_size);
+			if (found_count)
+				hook.found(nonces, found_count);
+			hook.searched(batch_size);
+			exit = hook.shouldStop();
 		}
 	}
 }
diff --git a/libethash-cuda/ethash_cuda_miner.h b/libethash-cuda/ethash_cuda_miner.h
index e924e8de60..dbcbe53f5a 100644
--- a/libethash-cuda/ethash_cuda_miner.h
+++ b/libethash-cuda/ethash_cuda_miner.h
@@ -17,29 +17,27 @@ class ethash_cuda_miner
 		virtual ~search_hook(); // always a virtual destructor for a class with virtuals.
 
 		// reports progress, return true to abort
-		virtual bool found(uint64_t const* nonces) = 0;
-		virtual bool searched(uint64_t start_nonce, uint32_t count) = 0;
+		virtual void found(uint64_t const* nonces, uint32_t count) = 0;
+		virtual void searched(uint32_t count) = 0;
+		virtual bool shouldStop() = 0;
 	};
 
 public:
-	ethash_cuda_miner();
+	explicit ethash_cuda_miner(size_t numDevices);
 
-	static std::string platform_info(unsigned _deviceId = 0);
-	static int getNumDevices();
-	static void listDevices();
 	static bool configureGPU(
-		int *	 _devices,
+		size_t numDevices,
+		const int* _devices,
 		unsigned _blockSize,
 		unsigned _gridSize,
 		unsigned _numStreams,
 		unsigned _scheduleFlag,
 		uint64_t _currentBlock
 		);
-        static void setParallelHash(unsigned _parallelHash);
+	static void setParallelHash(unsigned _parallelHash);
 
-	bool init(ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t * &hostDAG);
+	bool init(size_t numDevices, ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, uint8_t * &hostDAG, unsigned dagCreateDevice);
 
-	void finish();
 	void search(uint8_t const* header, uint64_t target, search_hook& hook, bool _ethStratum, uint64_t _startN);
 	dev::eth::HwMonitor hwmon();
 
@@ -81,5 +79,5 @@ class ethash_cuda_miner
 
 	static unsigned m_parallelHash;
 
-	wrap_nvml_handle *nvmlh = NULL;
+	wrap_nvml_handle *nvmlh = nullptr;
 };
diff --git a/libethash-cuda/ethash_cuda_miner_kernel.cu b/libethash-cuda/ethash_cuda_miner_kernel.cu
index 5ae6f89649..03ff2ce698 100644
--- a/libethash-cuda/ethash_cuda_miner_kernel.cu
+++ b/libethash-cuda/ethash_cuda_miner_kernel.cu
@@ -31,7 +31,8 @@ ethash_search(
 	uint32_t const gid = blockIdx.x * blockDim.x + threadIdx.x;	
         uint64_t hash = compute_hash<_PARALLEL_HASH>(start_nonce + gid);
 	if (cuda_swab64(hash) > d_target) return;
-	uint32_t index = atomicInc(const_cast<uint32_t*>(g_output), SEARCH_RESULT_BUFFER_SIZE - 1) + 1;
+	uint32_t index = atomicInc(const_cast<uint32_t*>(g_output), 0xffffffff) + 1;
+	if (index > SEARCH_RESULT_BUFFER_SIZE -1) return;
 	g_output[index] = gid;
 }
 
@@ -49,11 +50,7 @@ void run_ethash_search(
 	{
 		case 1: ethash_search <1> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
 		case 2: ethash_search <2> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
-		case 3: ethash_search <3> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
 		case 4: ethash_search <4> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
-		case 5: ethash_search <5> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
-		case 6: ethash_search <6> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
-		case 7: ethash_search <7> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
 		case 8: ethash_search <8> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
 		default: ethash_search <4> <<<blocks, threads, sharedbytes, stream >>>(g_output, start_nonce); break;
 	}
@@ -153,9 +150,7 @@ void ethash_generate_dag(
 	{
 		ethash_calculate_dag_item <<<blocks, threads, 0, stream >>>(i * blocks * threads);
 		CUDA_SAFE_CALL(cudaDeviceSynchronize());
-		printf("CUDA#%d: %.0f%%\n",device, 100.0f * (float)i / (float)fullRuns);
 	}
-	//printf("GPU#%d 100%%\n");
 	CUDA_SAFE_CALL(cudaGetLastError());
 }
 
diff --git a/libethcore/EthashAux.h b/libethcore/EthashAux.h
index 909c223e31..145f294f41 100644
--- a/libethcore/EthashAux.h
+++ b/libethcore/EthashAux.h
@@ -39,6 +39,8 @@ struct Solution
 	h256 headerHash;
 	h256 seedHash;
 	h256 boundary;
+	h256 job;
+	bool stale;
 };
 
 struct Result
@@ -95,6 +97,7 @@ struct WorkPackage
 	h256 boundary;
 	h256 header;	///< When h256() means "pause until notified a new work package is available".
 	h256 seed;
+	h256 job;
 
 	uint64_t startNonce = 0;
 	int exSizeBits = -1;
diff --git a/libethcore/Miner.h b/libethcore/Miner.h
index ac04bce8be..e4fdca42a4 100644
--- a/libethcore/Miner.h
+++ b/libethcore/Miner.h
@@ -155,6 +155,7 @@ class FarmFace
 	 * @return true iff the solution was good (implying that mining should be .
 	 */
 	virtual bool submitProof(Solution const& _p) = 0;
+	virtual void failedSolution() = 0;
 };
 
 /**
diff --git a/libstratum/EthStratumClient.cpp b/libstratum/EthStratumClient.cpp
index d2b1ec0dd4..7fd5eeb07e 100644
--- a/libstratum/EthStratumClient.cpp
+++ b/libstratum/EthStratumClient.cpp
@@ -28,7 +28,8 @@ static void diffToTarget(uint32_t *target, double diff)
 
 
 EthStratumClient::EthStratumClient(Farm* f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, int const & protocol, string const & email)
-	: m_socket(m_io_service)
+        :       m_socket(m_io_service),
+	        m_worktimer(m_io_service)
 {
 	m_minerType = m;
 	m_primary.host = host;
@@ -50,7 +51,6 @@ EthStratumClient::EthStratumClient(Farm* f, MinerType m, string const & host, st
 	m_submit_hashrate_id = h256::random().hex();
 	
 	p_farm = f;
-	p_worktimer = nullptr;
 	connect();
 }
 
@@ -100,10 +100,7 @@ void EthStratumClient::connect()
 
 void EthStratumClient::reconnect()
 {
-	if (p_worktimer) {
-		p_worktimer->cancel();
-		p_worktimer = nullptr;
-	}
+	m_worktimer.cancel();
 
 	m_io_service.reset();
 	//m_socket.close(); // leads to crashes on Linux
@@ -399,23 +396,13 @@ void EthStratumClient::processReponse(Json::Value& responseObject)
 
 					if (sHeaderHash != "" && sSeedHash != "")
 					{
-
-						h256 seedHash = h256(sSeedHash);
-
-						m_previous.header = m_current.header;
-						m_previous.seed = m_current.seed;
-						m_previous.boundary = m_current.boundary;
-						m_previous.startNonce = m_current.startNonce;
-						m_previous.exSizeBits = m_previous.exSizeBits;
-						m_previousJob = m_job;
-
 						m_current.header = h256(sHeaderHash);
-						m_current.seed = seedHash;
+						m_current.seed = h256(sSeedHash);
 						m_current.boundary = h256();
 						diffToTarget((uint32_t*)m_current.boundary.data(), m_nextWorkDifficulty);
 						m_current.startNonce = ethash_swap_u64(*((uint64_t*)m_extraNonce.data()));
 						m_current.exSizeBits = m_extraNonceHexSize * 4;
-						m_job = job;
+						m_current.job = h256(job);
 
 						p_farm->setWork(m_current);
 						cnote << "Received new job #" + job.substr(0, 8)
@@ -437,35 +424,22 @@ void EthStratumClient::processReponse(Json::Value& responseObject)
 
 					if (sHeaderHash != "" && sSeedHash != "" && sShareTarget != "")
 					{
-
-						h256 seedHash = h256(sSeedHash);
 						h256 headerHash = h256(sHeaderHash);
 
 						if (headerHash != m_current.header)
 						{
-							//x_current.lock();
-							if (p_worktimer)
-								p_worktimer->cancel();
-
-							m_previous.header = m_current.header;
-							m_previous.seed = m_current.seed;
-							m_previous.boundary = m_current.boundary;
-							m_previousJob = m_job;
+							m_worktimer.cancel();
+                                                        m_worktimer.expires_from_now(boost::posix_time::seconds(m_worktimeout));
 
 							m_current.header = h256(sHeaderHash);
-							m_current.seed = seedHash;
+							m_current.seed = h256(sSeedHash);
 							m_current.boundary = h256(sShareTarget);
-							m_job = job;
+							m_current.job = h256(job);
 
 							p_farm->setWork(m_current);
 							cnote << "Received new job #" + job.substr(0, 8)
 								<< " seed: " << "#" + m_current.seed.hex().substr(0, 32)
 								<< " target: " << "#" + m_current.boundary.hex().substr(0, 24);
-
-							//x_current.unlock();
-							p_worktimer = new boost::asio::deadline_timer(m_io_service, boost::posix_time::seconds(m_worktimeout));
-							p_worktimer->async_wait(boost::bind(&EthStratumClient::work_timeout_handler, this, boost::asio::placeholders::error));
-
 						}
 					}
 				}
@@ -519,12 +493,6 @@ bool EthStratumClient::submitHashrate(string const & rate) {
 }
 
 bool EthStratumClient::submit(Solution solution) {
-	x_current.lock();
-	WorkPackage tempWork(m_current);
-	string temp_job = m_job;
-	WorkPackage tempPreviousWork(m_previous);
-	string temp_previous_job = m_previousJob;
-	x_current.unlock();
 
 	string minernonce;
 	string nonceHex = toHex(solution.nonce);
@@ -532,65 +500,37 @@ bool EthStratumClient::submit(Solution solution) {
 		minernonce = nonceHex.substr(m_extraNonceHexSize, 16 - m_extraNonceHexSize);
 	}
 
-	if (EthashAux::eval(tempWork.seed, tempWork.header, solution.nonce).value < tempWork.boundary)
-	{
-		string json;
-
-		switch (m_protocol) {
-			case STRATUM_PROTOCOL_STRATUM:
-				json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
-				break;
-			case STRATUM_PROTOCOL_ETHPROXY:
-				json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
-				break;
-			case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
-				json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"" + minernonce + "\"]}\n";
-				break;
-		}
-
-		std::ostream os(&m_requestBuffer);
-		os << json;
-		m_stale = false;
-		async_write(m_socket, m_requestBuffer,
-			boost::bind(&EthStratumClient::handleResponse, this,
-			boost::asio::placeholders::error));
-		cnote << "Solution found; Submitted to" << p_active->host;
-		if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) {
-			cnote << "Nonce:" << "0x" + nonceHex;
-		}
-		return true;
-	}
-	else if (EthashAux::eval(tempPreviousWork.seed, tempPreviousWork.header, solution.nonce).value < tempPreviousWork.boundary)
-	{
-		string json;
+	string json;
 
-		switch (m_protocol) {
+	switch (m_protocol) {
 		case STRATUM_PROTOCOL_STRATUM:
-			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
+			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
 			break;
 		case STRATUM_PROTOCOL_ETHPROXY:
-			json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
+			json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
 			break;
 		case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
-			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"" + minernonce + "\"]}\n";
+			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"" + minernonce + "\"]}\n";
 			break;
-		}
+	}
 
-		std::ostream os(&m_requestBuffer);
-		os << json;
-		m_stale = true;
-		async_write(m_socket, m_requestBuffer,
-			boost::bind(&EthStratumClient::handleResponse, this,
-			boost::asio::placeholders::error));
-		cwarn << "Submitted stale solution.";
-		return true;
+	std::ostream os(&m_requestBuffer);
+	os << json;
+	m_stale = solution.stale;
+	async_write(m_socket, m_requestBuffer,
+		boost::bind(&EthStratumClient::handleResponse, this,
+		boost::asio::placeholders::error));
+	if (m_stale)
+	{
+		cwarn << "Stale solution found. Submitted to" << p_active->host;
 	}
-	else {
-		m_stale = false;
-		cwarn << "FAILURE: GPU gave incorrect result!";
-		p_farm->failedSolution();
+	else
+	{
+		cnote << "Solution found; Submitted to" << p_active->host;
 	}
-
-	return false;
+	if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) {
+		cnote << "Nonce:" << "0x" + nonceHex;
+	}
+	return true;
 }
 
diff --git a/libstratum/EthStratumClient.h b/libstratum/EthStratumClient.h
index e9a44b6a27..6928fe457f 100644
--- a/libstratum/EthStratumClient.h
+++ b/libstratum/EthStratumClient.h
@@ -65,15 +65,10 @@ class EthStratumClient
 	int m_pending;
 
 	Farm* p_farm;
-	std::mutex x_current;
 	WorkPackage m_current;
-	WorkPackage m_previous;
 
 	bool m_stale = false;
 
-	string m_job;
-	string m_previousJob;
-
 	std::thread m_serviceThread;  ///< The IO service thread.
 	boost::asio::io_service m_io_service;
 	boost::asio::ip::tcp::socket m_socket;
@@ -81,7 +76,7 @@ class EthStratumClient
 	boost::asio::streambuf m_requestBuffer;
 	boost::asio::streambuf m_responseBuffer;
 
-	boost::asio::deadline_timer * p_worktimer;
+	boost::asio::deadline_timer m_worktimer;
 
 	int m_protocol;
 	string m_email;
@@ -94,4 +89,4 @@ class EthStratumClient
 	string m_submit_hashrate_id;
 
 	void processExtranonce(std::string& enonce);
-};
\ No newline at end of file
+};
diff --git a/libstratum/EthStratumClientV2.cpp b/libstratum/EthStratumClientV2.cpp
index ef3df8db86..f7654899b5 100644
--- a/libstratum/EthStratumClientV2.cpp
+++ b/libstratum/EthStratumClientV2.cpp
@@ -344,22 +344,13 @@ void EthStratumClientV2::processReponse(Json::Value& responseObject)
 
 					if (sHeaderHash != "" && sSeedHash != "")
 					{
-						h256 seedHash = h256(sSeedHash);
-
-						m_previous.header = m_current.header;
-						m_previous.seed = m_current.seed;
-						m_previous.boundary = m_current.boundary;
-						m_previous.startNonce = m_current.startNonce;
-						m_previous.exSizeBits = m_previous.exSizeBits;
-						m_previousJob = m_job;
-
 						m_current.header = h256(sHeaderHash);
-						m_current.seed = seedHash;
+						m_current.seed = h256(sSeedHash);
 						m_current.boundary = h256();
 						diffToTarget((uint32_t*)m_current.boundary.data(), m_nextWorkDifficulty);
 						m_current.startNonce = ethash_swap_u64(*((uint64_t*)m_extraNonce.data()));
 						m_current.exSizeBits = m_extraNonceHexSize * 4;
-						m_job = job;
+						m_current.job = h256(job);
 
 						p_farm->setWork(m_current);
 						cnote << "Received new job #" + job.substr(0, 8)
@@ -382,29 +373,16 @@ void EthStratumClientV2::processReponse(Json::Value& responseObject)
 					if (sHeaderHash != "" && sSeedHash != "" && sShareTarget != "")
 					{
 
-						h256 seedHash = h256(sSeedHash);
 						h256 headerHash = h256(sHeaderHash);
 
 						if (headerHash != m_current.header)
 						{
-							//x_current.lock();
-							//if (p_worktimer)
-							//	p_worktimer->cancel();
-
-							m_previous.header = m_current.header;
-							m_previous.seed = m_current.seed;
-							m_previous.boundary = m_current.boundary;
-							m_previousJob = m_job;
-
 							m_current.header = h256(sHeaderHash);
-							m_current.seed = seedHash;
+							m_current.seed = h256(sSeedHash);
 							m_current.boundary = h256(sShareTarget);
-							m_job = job;
+							m_current.job = h256(job);
 
 							p_farm->setWork(m_current);
-							//x_current.unlock();
-							//p_worktimer = new boost::asio::deadline_timer(m_io_service, boost::posix_time::seconds(m_worktimeout));
-							//p_worktimer->async_wait(boost::bind(&EthStratumClientV2::work_timeout_handler, this, boost::asio::placeholders::error));
 						}
 						cnote << "Received new job #" + job.substr(0, 8)
 							<< " seed: " << "#" + m_current.seed.hex().substr(0, 32)
@@ -459,12 +437,6 @@ bool EthStratumClientV2::submitHashrate(string const & rate) {
 }
 
 bool EthStratumClientV2::submit(Solution solution) {
-	x_current.lock();
-	WorkPackage tempWork(m_current);
-	string temp_job = m_job;
-	WorkPackage tempPreviousWork(m_previous);
-	string temp_previous_job = m_previousJob;
-	x_current.unlock();
 
 	string minernonce;
 	string nonceHex = toHex(solution.nonce);
@@ -472,56 +444,33 @@ bool EthStratumClientV2::submit(Solution solution) {
 		minernonce = nonceHex.substr(m_extraNonceHexSize, 16 - m_extraNonceHexSize);
 	}
 
-	if (EthashAux::eval(tempWork.seed, tempWork.header, solution.nonce).value < tempWork.boundary)
+	string json;
+	switch (m_protocol) {
+	case STRATUM_PROTOCOL_STRATUM:
+		json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
+		break;
+	case STRATUM_PROTOCOL_ETHPROXY:
+		json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + solution.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
+		break;
+	case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
+		json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + solution.job.hex() + "\",\"" + minernonce + "\"]}\n";
+		break;
+	}
+	std::ostream os(&m_requestBuffer);
+	os << json;
+	m_stale = solution.stale;
+	write(m_socket, m_requestBuffer);
+	if (m_stale)
 	{
-		string json;
-		switch (m_protocol) {
-		case STRATUM_PROTOCOL_STRATUM:
-			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
-			break;
-		case STRATUM_PROTOCOL_ETHPROXY:
-			json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
-			break;
-		case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
-			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"" + minernonce + "\"]}\n";
-			break;
-		}
-		std::ostream os(&m_requestBuffer);
-		os << json;
-		m_stale = false;
-		write(m_socket, m_requestBuffer);
-		cnote << "Solution found; Submitted to" << p_active->host;
-		if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) {
-			cnote << "Nonce:" << "0x" + nonceHex;
-		}
-		return true;
+		cwarn << "Stale solution found; Submitted to" << p_active->host;
 	}
-	else if (EthashAux::eval(tempPreviousWork.seed, tempPreviousWork.header, solution.nonce).value < tempPreviousWork.boundary)
+	else
 	{
-		string json;
-		switch (m_protocol) {
-		case STRATUM_PROTOCOL_STRATUM:
-			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
-			break;
-		case STRATUM_PROTOCOL_ETHPROXY:
-			json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + nonceHex + "\",\"0x" + tempPreviousWork.header.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
-			break;
-		case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
-			json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"" + minernonce + "\"]}\n";
-			break;
-		}		std::ostream os(&m_requestBuffer);
-		os << json;
-		m_stale = true;
-		write(m_socket, m_requestBuffer);
-		cwarn << "Submitted stale solution.";
-		return true;
+		cnote << "Solution found; Submitted to" << p_active->host;
 	}
-	else {
-		m_stale = false;
-		cwarn << "FAILURE: GPU gave incorrect result!";
-		p_farm->failedSolution();
+	if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM) {
+		cnote << "Nonce:" << "0x" + nonceHex;
 	}
-
-	return false;
+	return true;
 }
 
diff --git a/libstratum/EthStratumClientV2.h b/libstratum/EthStratumClientV2.h
index 8902805747..f308fa1569 100644
--- a/libstratum/EthStratumClientV2.h
+++ b/libstratum/EthStratumClientV2.h
@@ -64,15 +64,10 @@ class EthStratumClientV2 : public Worker
 	string m_response;
 
 	Farm* p_farm;
-	mutex x_current;
 	WorkPackage m_current;
-	WorkPackage m_previous;
 
 	bool m_stale = false;
 
-	string m_job;
-	string m_previousJob;
-
 	boost::asio::io_service m_io_service;
 	boost::asio::ip::tcp::socket m_socket;
 
@@ -92,4 +87,4 @@ class EthStratumClientV2 : public Worker
 	string m_submit_hashrate_id;
 
 	void processExtranonce(std::string& enonce);
-};
\ No newline at end of file
+};