diff --git a/CMakeLists.txt b/CMakeLists.txt
index cdb5e60828..6a2a07a7f2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,7 @@ option(ETHDBUS "Build with D-Bus support" OFF)
 option(APICORE "Build with API Server support" ON)
 option(BINKERN "Install AMD binary kernels" ON)
 option(DEVBUILD "Log developer metrics" OFF)
+option(USE_SYS_OPENCL "Build with system OpenCL" OFF)
 
 # propagates CMake configuration options to the compiler
 function(configureProject)
@@ -55,6 +56,9 @@ function(configureProject)
     if (DEVBUILD)
         add_definitions(-DDEV_BUILD)
     endif()
+    if (USE_SYS_OPENCL)
+        add_definitions(-DUSE_SYS_OPENCL)
+    endif()
 endfunction()
 
 hunter_add_package(Boost COMPONENTS system filesystem thread)
@@ -68,6 +72,10 @@ find_package(ethash CONFIG REQUIRED)
 
 configureProject()
 
+if(APPLE)
+    set(USE_SYS_OPENCL ON)
+endif()
+
 message("----------------------------------------------------------------------------")
 message("-- CMake ${CMAKE_VERSION}")
 message("-- Build ${CMAKE_BUILD_TYPE} / ${CMAKE_SYSTEM_NAME}")
@@ -79,6 +87,7 @@ message("-- ETHDBUS          Build D-Bus components                       ${ETHD
 message("-- APICORE          Build API Server components                  ${APICORE}")
 message("-- BINKERN          Install AMD binary kernels                   ${BINKERN}")
 message("-- DEVBUILD         Build with dev logging                       ${DEVBUILD}")
+message("-- USE_SYS_OPENCL   Build with system OpenCL                     ${USE_SYS_OPENCL}")
 message("----------------------------------------------------------------------------")
 message("")
 
@@ -87,7 +96,6 @@ if(UNIX AND NOT APPLE)
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++")
 endif()
 
-
 cable_add_buildinfo_library(PROJECT_NAME ${PROJECT_NAME})
 
 add_subdirectory(libdevcore)
diff --git a/README.md b/README.md
index 7ae2fabb26..bf50c25cb6 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ Ordered alphabetically. [Contributors statistics since 2015-08-20].
 | EoD                   | [@EoD](https://github.com/EoD)                               |     |
 | Genoil                | [@Genoil](https://github.com/Genoil)                         |     |
 | goobur                | [@goobur](https://github.com/goobur)                         |     |
-| Marius van der Wijden | [@MariusVanDerWijden](https://github.com/MariusVanDerWijden) | ETH: 0x57d22b967c9dc64e5577f37edf1514c2d8985099 |
+| Marius van der Wijden | [@MariusVanDerWijden](https://github.com/MariusVanDerWijden) |     |
 | Paweł Bylica          | [@chfast](https://github.com/chfast)                         |     |
 | Philipp Andreas       | [@smurfy](https://github.com/smurfy)                         |     |
 | Stefan Oberhumer      | [@StefanOberhumer](https://github.com/StefanOberhumer)       |     |
@@ -131,9 +131,9 @@ Because of the GDDR5X memory, which can't be fully utilized for ETH mining (yet)
 
 Only GCN 1.0 GPUs (78x0, 79x0, 270, 280), but in a different way. You'll see that on each new epoch (30K blocks), the hashrate will go down a little bit.
 
-### Can I still mine ETH with my 2GB GPU?
+### Can I still mine ETH with my 4GB GPU?
 
-Not really, your VRAM must be above the DAG size (Currently about 2.15 GB.) to get best performance. Without it severe hash loss will occur.
+Not really, your VRAM must be above the DAG size (Currently about 4.023 GB.) to get best performance. Without it severe hash loss will occur.
 
 ### What are the optimal launch parameters?
 
diff --git a/docs/BUILD.md b/docs/BUILD.md
index 44649083eb..32f9ceff7b 100644
--- a/docs/BUILD.md
+++ b/docs/BUILD.md
@@ -43,6 +43,8 @@ you have to install the OpenGL libraries. E.g. on Ubuntu run:
 sudo apt-get install mesa-common-dev
 ```
 
+If you want to use locally installed [ROCm-OpenCL](https://rocmdocs.amd.com/en/latest/) package, use build flag `-DUSE_SYS_OPENCL=ON` with cmake config.
+
 ### macOS
 
 1. GCC version >= TBF
@@ -143,6 +145,7 @@ cmake .. -DETHASHCUDA=ON -DETHASHCL=OFF
 * `-DAPICORE=ON` - enable API Server, `ON` by default.
 * `-DBINKERN=ON` - install AMD binary kernels, `ON` by default.
 * `-DETHDBUS=ON` - enable D-Bus support, `OFF` by default.
+* `-DUSE_SYS_OPENCL=ON` - Use system OpenCL, `OFF` by default, unless on macOS. Specify to use local **ROCm-OpenCL** package.
 
 ## Disable Hunter
 
diff --git a/docs/POOL_EXAMPLES_ETH.md b/docs/POOL_EXAMPLES_ETH.md
index 7c58615ebb..ed56446e4f 100644
--- a/docs/POOL_EXAMPLES_ETH.md
+++ b/docs/POOL_EXAMPLES_ETH.md
@@ -67,7 +67,7 @@ The above samples produce the very same result.
 -P stratum://account%%2e1234.Worker:password@eu1.ethermine.org:4444
 ```  
 
-## Secure socket comunications for stratum only
+## Secure socket communications for stratum only
 
 Ethminer supports secure socket communications (where pool implements and offers it) to avoid the risk of a [man-in-the-middle attack](https://en.wikipedia.org/wiki/Man-in-the-middle_attack)
 To enable it simply replace tcp with either:
@@ -104,7 +104,7 @@ Here you can find a collection of samples to connect to most commonly used ethas
 * Stratum connection is **always to be preferred** over **getwork** when pool offers it due to its better network latency.
 * If possible the samples use a protocol which supports reporting of hashrate (`--report-hashrate`) if pool supports this.
 
-**Check for updates in the pool connection settings visiting the pools homepage.**
+**Check for updates in the pool connection settings visiting the pool's homepage.**
 
 ## Variables
 
@@ -252,6 +252,11 @@ HINTS:
 
 ### nanopool.org
 
+Notice ⚠
+
+* Use "%40" for the @-sign in your email address
+* Use "\%2e" for the .-sign on Linux in ETH_WALLET.WORKERNAME
+
 With email:
 
 ```
@@ -272,9 +277,6 @@ Without email:
 -P stratum1+tcp://ETH_WALLET.WORKERNAME@eth-us-west1.nanopool.org:9999
 ```
 
-HINTS:
-
-* Use "%40" for the @-sign in your email address
 
 ### nicehash.com
 
diff --git a/libapicore/ApiServer.cpp b/libapicore/ApiServer.cpp
index 2e6934b78c..473e8a0797 100644
--- a/libapicore/ApiServer.cpp
+++ b/libapicore/ApiServer.cpp
@@ -1169,10 +1169,17 @@ std::string ApiConnection::getHttpMinerStatDetail()
 
         _ret << "<td class=right>" << dev::getFormattedHashes(hashrate) << "</td>";
 
-        _ret << "<td class=right>" << device["mining"]["shares"][0].asString() << "</td>";
+        
+        string solString = "A" + device["mining"]["shares"][0].asString() + 
+                           ":R" + device["mining"]["shares"][1].asString() +
+                           ":F" + device["mining"]["shares"][2].asString();
+        _ret << "<td class=right>" << solString << "</td>";
         _ret << "<td class=right>" << device["hardware"]["sensors"][0].asString() << "</td>";
         _ret << "<td class=right>" << device["hardware"]["sensors"][1].asString() << "</td>";
-        _ret << "<td class=right>" << device["hardware"]["sensors"][2].asString() << "</td>";
+
+        stringstream powerStream; // Round the power to 2 decimal places to remove floating point garbage
+        powerStream << fixed << setprecision(2) << device["hardware"]["sensors"][2].asDouble();
+        _ret << "<td class=right>" << powerStream.str() << "</td>";
 
         _ret << "</tr>";  // Close row
     }
diff --git a/libethash-cl/CLMiner.cpp b/libethash-cl/CLMiner.cpp
index 38a2b5ce4b..62d1dc231d 100644
--- a/libethash-cl/CLMiner.cpp
+++ b/libethash-cl/CLMiner.cpp
@@ -768,7 +768,7 @@ bool CLMiner::initEpoch_internal()
         addDefinition(code, "WORKSIZE", m_settings.localWorkSize);
         addDefinition(code, "ACCESSES", 64);
         addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
-        addDefinition(code, "PLATFORM", m_deviceDescriptor.clPlatformId);
+        addDefinition(code, "PLATFORM", static_cast<unsigned>(m_deviceDescriptor.clPlatformType));
         addDefinition(code, "COMPUTE", computeCapability);
 
         if (m_deviceDescriptor.clPlatformType == ClPlatformTypeEnum::Clover)
diff --git a/libethash-cl/CMakeLists.txt b/libethash-cl/CMakeLists.txt
index 7aabf04454..f190b97ba6 100644
--- a/libethash-cl/CMakeLists.txt
+++ b/libethash-cl/CMakeLists.txt
@@ -20,8 +20,8 @@ set(SOURCES
 	${CMAKE_CURRENT_BINARY_DIR}/ethash.h
 )
 
-if(APPLE)
-	# On macOS use system OpenCL library.
+if(USE_SYS_OPENCL)
+	# On macOS or using ROCm-OpenCL, use system OpenCL library.
 	find_package(OpenCL REQUIRED)
 else()
 	hunter_add_package(OpenCL)
diff --git a/libethash-cl/kernels/cl/ethash.cl b/libethash-cl/kernels/cl/ethash.cl
index ce4586b676..d9614e9cbe 100644
--- a/libethash-cl/kernels/cl/ethash.cl
+++ b/libethash-cl/kernels/cl/ethash.cl
@@ -15,7 +15,11 @@
 // You should have received a copy of the GNU General Public License
 // along with Gateless Gate Sharp.  If not, see <http://www.gnu.org/licenses/>.
 
-
+#define OPENCL_PLATFORM_UNKNOWN 0
+#define OPENCL_PLATFORM_AMD     1
+#define OPENCL_PLATFORM_CLOVER  2
+#define OPENCL_PLATFORM_NVIDIA  3
+#define OPENCL_PLATFORM_INTEL   4
 
 #if (defined(__Tahiti__) || defined(__Pitcairn__) || defined(__Capeverde__) || defined(__Oland__) || defined(__Hainan__))
 #define LEGACY
@@ -26,6 +30,22 @@
 #endif
 
 #if defined(cl_amd_media_ops)
+#if PLATFORM == OPENCL_PLATFORM_CLOVER
+/*
+ * MESA define cl_amd_media_ops but no amd_bitalign() defined.
+ * https://github.com/openwall/john/issues/3454#issuecomment-436899959
+ */
+uint2 amd_bitalign(uint2 src0, uint2 src1, uint2 src2)
+{
+    uint2 dst;
+    __asm("v_alignbit_b32 %0, %2, %3, %4\n"
+          "v_alignbit_b32 %1, %5, %6, %7"
+          : "=v" (dst.x), "=v" (dst.y)
+          : "v" (src0.x), "v" (src1.x), "v" (src2.x),
+            "v" (src0.y), "v" (src1.y), "v" (src2.y));
+    return dst;
+}
+#endif
 #pragma OPENCL EXTENSION cl_amd_media_ops : enable
 #elif defined(cl_nv_pragma_unroll)
 uint amd_bitalign(uint src0, uint src1, uint src2)
@@ -246,7 +266,7 @@ struct SearchResults {
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(
-    __global struct SearchResults* restrict g_output,
+    __global volatile struct SearchResults* restrict g_output,
     __constant uint2 const* g_header,
     __global ulong8 const* _g_dag0,
     __global ulong8 const* _g_dag1,
@@ -424,23 +444,37 @@ static void SHA3_512(uint2 *s)
 __kernel void GenerateDAG(uint start, __global const uint16 *_Cache, __global uint16 *_DAG0, __global uint16 *_DAG1, uint light_size)
 {
     __global const Node *Cache = (__global const Node *) _Cache;
-    uint NodeIdx = start + get_global_id(0);
+    const uint gid = get_global_id(0);
+    uint NodeIdx = start + gid;
+    const uint thread_id = gid & 3;
+
+    __local Node sharebuf[WORKSIZE];
+    __local uint indexbuf[WORKSIZE];
+    __local Node *dagNode = sharebuf + (get_local_id(0) / 4) * 4;
+    __local uint *indexes = indexbuf + (get_local_id(0) / 4) * 4;
+    __global const Node *parentNode;
 
     Node DAGNode = Cache[NodeIdx % light_size];
 
     DAGNode.dwords[0] ^= NodeIdx;
     SHA3_512(DAGNode.qwords);
 
+    dagNode[thread_id] = DAGNode;
+    barrier(CLK_LOCAL_MEM_FENCE);
     for (uint i = 0; i < 256; ++i) {
-        uint ParentIdx = fnv(NodeIdx ^ i, DAGNode.dwords[i & 15]) % light_size;
-        __global const Node *ParentNode = Cache + ParentIdx;
+        uint ParentIdx = fnv(NodeIdx ^ i, dagNode[thread_id].dwords[i & 15]) % light_size;
+        indexes[thread_id] = ParentIdx;
+        barrier(CLK_LOCAL_MEM_FENCE);
 
-#pragma unroll
-        for (uint x = 0; x < 4; ++x) {
-                DAGNode.dqwords[x] *= (uint4)(FNV_PRIME);
-                DAGNode.dqwords[x] ^= ParentNode->dqwords[x];
+        for (uint t = 0; t < 4; ++t) {
+            uint parentIndex = indexes[t];
+            parentNode = Cache + parentIndex;
+
+            dagNode[t].dqwords[thread_id] = fnv(dagNode[t].dqwords[thread_id], parentNode->dqwords[thread_id]);
+            barrier(CLK_LOCAL_MEM_FENCE);
         }
     }
+    DAGNode = dagNode[thread_id];
 
     SHA3_512(DAGNode.qwords);
 
diff --git a/libethash-cuda/CMakeLists.txt b/libethash-cuda/CMakeLists.txt
index 72eb897e69..2c27ad2ede 100644
--- a/libethash-cuda/CMakeLists.txt
+++ b/libethash-cuda/CMakeLists.txt
@@ -28,6 +28,14 @@ else()
 	if(NOT CUDA_VERSION VERSION_LESS 10.0)
 		list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_75,code=sm_75")
 	endif()
+	if(NOT CUDA_VERSION VERSION_LESS 11.0)
+		# NVIDIA A100 and NVIDIA DGX-A100
+		list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_80,code=sm_80")
+	endif()
+	if(NOT CUDA_VERSION VERSION_LESS 11.1)
+		# Tesla GA10x cards, RTX Ampere – RTX 3080/3090, RTX A6000, RTX A40
+		list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_86,code=sm_86")
+	endif()
 endif()
 
 file(GLOB sources "*.cpp" "*.cu")
diff --git a/libethash-cuda/ethash_cuda_miner_kernel.cu b/libethash-cuda/ethash_cuda_miner_kernel.cu
index 1561d79390..4da35d7a88 100644
--- a/libethash-cuda/ethash_cuda_miner_kernel.cu
+++ b/libethash-cuda/ethash_cuda_miner_kernel.cu
@@ -52,11 +52,13 @@ __global__ void ethash_calculate_dag_item(uint32_t start)
     uint32_t const node_index = start + blockIdx.x * blockDim.x + threadIdx.x;
     if (((node_index >> 1) & (~1)) >= d_dag_size)
         return;
-
-    hash128_t dag_node;
+    union {
+       hash128_t dag_node;
+       uint2 dag_node_mem[25];
+    };
     copy(dag_node.uint4s, d_light[node_index % d_light_size].uint4s, 4);
     dag_node.words[0] ^= node_index;
-    SHA3_512(dag_node.uint2s);
+    SHA3_512(dag_node_mem);
 
     const int thread_id = threadIdx.x & 3;
 
@@ -78,7 +80,7 @@ __global__ void ethash_calculate_dag_item(uint32_t start)
             }
         }
     }
-    SHA3_512(dag_node.uint2s);
+    SHA3_512(dag_node_mem);
     hash64_t* dag_nodes = (hash64_t*)d_dag;
     copy(dag_nodes[node_index].uint4s, dag_node.uint4s, 4);
 }
diff --git a/libethcore/Miner.h b/libethcore/Miner.h
index 082f203b39..2f3b98f212 100644
--- a/libethcore/Miner.h
+++ b/libethcore/Miner.h
@@ -147,7 +147,7 @@ struct HwSensorsType
     {
         string _ret = to_string(tempC) + "C " + to_string(fanP) + "%";
         if (powerW)
-            _ret.append(boost::str(boost::format("%f") % powerW));
+            _ret.append(" " + boost::str(boost::format("%0.2f") % powerW) + "W");
         return _ret;
     };
 };
diff --git a/libpoolprotocols/PoolManager.cpp b/libpoolprotocols/PoolManager.cpp
index 12a5584f6d..d9192ad6f2 100644
--- a/libpoolprotocols/PoolManager.cpp
+++ b/libpoolprotocols/PoolManager.cpp
@@ -182,7 +182,7 @@ void PoolManager::setClientHandlers()
             // If epoch is valued in workpackage take it
             if (wp.epoch == -1)
             {
-                if (m_currentWp.block > 0)
+                if (m_currentWp.block >= 0)
                     m_currentWp.epoch = m_currentWp.block / 30000;
                 else
                     m_currentWp.epoch = ethash::find_epoch_number(