Skip to content

Commit

Permalink
Merge pull request #230 from fancyIX/feature/#228
Browse files Browse the repository at this point in the history
Feature/#228
  • Loading branch information
fancyIX authored Mar 23, 2021
2 parents 8e7448a + 057802e commit 0426ac7
Show file tree
Hide file tree
Showing 13 changed files with 865 additions and 9 deletions.
44 changes: 39 additions & 5 deletions algorithm.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,39 @@ static cl_int queue_sph_kernel(struct __clState *clState, struct _dev_blk_ctx *b
return status;
}

static cl_int queue_groestlcoin_kernel_f(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_ulong le_target;
cl_int status = 0;

clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, 32, NULL, &status);
if (status != CL_SUCCESS && !clState->buffer1) {
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
return NULL;
}

flip80(clState->cldata, blk->work->data);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->buffer1, true, 0, 32, blk->work->device_target, 0, NULL, NULL);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);

CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(clState->buffer1);

return status;
}

static cl_int queue_groestlcoin_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
if (!(clState->prebuilt)) {
return queue_sph_kernel(clState, blk, threads);
} else {
return queue_groestlcoin_kernel_f(clState, blk, threads);
}
}

static cl_int queue_darkcoin_mod_kernel(struct __clState *clState, struct _dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
cl_kernel *kernel;
Expand Down Expand Up @@ -2532,11 +2565,12 @@ static algorithm_settings_t algos[] = {
{ "mtp_vega" , ALGO_MTP , "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 1, 0,0, mtp_regenhash , NULL, NULL, queue_mtp_kernel , gen_hash, NULL },

// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
#define A_FUGUE(a, b, c) \
{ a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, queue_sph_kernel, c, NULL }
A_FUGUE("fuguecoin", fuguecoin_regenhash, sha256),
A_FUGUE("groestlcoin", groestlcoin_regenhash, sha256),
A_FUGUE("diamond", groestlcoin_regenhash, gen_hash),
#define A_FUGUE(a, b, c, qf) \
{ a, ALGO_FUGUE, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, b, NULL, NULL, qf, c, NULL }
A_FUGUE("fuguecoin", fuguecoin_regenhash, sha256, queue_sph_kernel),
A_FUGUE("groestlcoin", groestlcoin_regenhash, sha256, queue_groestlcoin_kernel),
A_FUGUE("groestlcoin_navi", groestlcoin_regenhash, sha256, queue_groestlcoin_kernel_f),
A_FUGUE("diamond", groestlcoin_regenhash, gen_hash, queue_sph_kernel),
#undef A_FUGUE

{ "whirlcoin", ALGO_WHIRL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 3, 8 * 16 * 4194304, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, whirlcoin_regenhash, NULL, NULL, queue_whirlcoin_kernel, sha256, NULL },
Expand Down
Binary file added bin-kernel/groestlcoinBaffingw256l4.bin
Binary file not shown.
Binary file added bin-kernel/groestlcoinBaffingw256l8.bin
Binary file not shown.
Binary file added bin-kernel/groestlcoinEllesmeregw256l4.bin
Binary file not shown.
Binary file added bin-kernel/groestlcoinEllesmeregw256l8.bin
Binary file not shown.
Binary file added bin-kernel/groestlcoingfx900gw256l4.bin
Binary file not shown.
Binary file added bin-kernel/groestlcoingfx900gw256l8.bin
Binary file not shown.
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
m4_define([v_maj], [0])
m4_define([v_min], [7])
m4_define([v_mic], [3])
m4_define([v_mic], [4])
m4_define([v_rev], [0])
##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
m4_ifdef([v_rev], [m4_define([v_ver], [v_maj.v_min.v_mic-v_rev])], [m4_define([v_ver], [v_maj.v_min.v_mic])])
Expand Down
8 changes: 8 additions & 0 deletions driver-opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,9 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,

set_threads_hashes(clState->vwidth, clState->compute_shaders, &hashes, globalThreads, localThreads[0],
&gpu->intensity, &gpu->xintensity, &gpu->rawintensity, &gpu->algorithm, &gpu->throughput);
if (((strcmp(gpu->algorithm.name, "groestlcoin") == 0) && clState->prebuilt) || strcmp(gpu->algorithm.name, "groestlcoin_navi") == 0) {
hashes /= 4;
}
if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes;

Expand All @@ -1429,8 +1432,13 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
return -1;
}

size_t temp_goffset = 0;
if (clState->goffset)
p_global_work_offset = (size_t *)&work->blk.nonce;
if (((strcmp(gpu->algorithm.name, "groestlcoin") == 0) && clState->prebuilt) || strcmp(gpu->algorithm.name, "groestlcoin_navi") == 0) {
temp_goffset = work->blk.nonce * 4;
p_global_work_offset = &temp_goffset;
}

if (gpu->algorithm.type != ALGO_MTP && gpu->algorithm.type != ALGO_YESCRYPT_NAVI && gpu->algorithm.type != ALGO_YESCRYPT
&& gpu->algorithm.type != ALGO_NEOSCRYPT && gpu->algorithm.type != ALGO_NEOSCRYPT_XAYA
Expand Down
124 changes: 124 additions & 0 deletions kernel/groestlcoin_navi.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* GroestlCoin kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2021 fancyIX
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author fancyIX
*/

#ifndef GROESTLCOIN_CL
#define GROESTLCOIN_CL

#ifdef __gfx900__
#else

#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))

#include "groestlf.cl"


__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global unsigned char* block, __global uint* output, __global uint* pTarget)
{
uint gid = get_global_id(0);
uint nounce = gid >> 2;
uint thread = gid - get_global_offset(0);

uint paddedInput[8] = {0};

#pragma unroll 8
for(int k=0;k<8;k++) {
uint idx = 4 * k + (get_local_id(0) & 0x3);
if (idx < 20) {
paddedInput[k] = ((__global uint *) (block))[idx];
}
if (idx == 20) {
paddedInput[k] = 0x80;
}
if (idx == 31) {
paddedInput[k] = 0x01000000;
}
}

if ((get_local_id(0) % 4) == 3)
paddedInput[4] = SWAP32(nounce); // 4*4+3 = 19

uint msgBitsliced[8];
to_bitslice_quad(paddedInput, msgBitsliced);

uint state[8];
for (int round=0; round<2; round++)
{
groestl512_progressMessage_quad(state, msgBitsliced);

if (round < 1)
{
// Verkettung zweier Runden inclusive Padding.
msgBitsliced[ 0] = __byte_perm(state[ 0], 0x00800100, 0x04030401 + ((get_local_id(0)%4)==3)*0x02000000);
msgBitsliced[ 1] = __byte_perm(state[ 1], 0x00800100, 0x04030401);
msgBitsliced[ 2] = __byte_perm(state[ 2], 0x00800100, 0x04030401);
msgBitsliced[ 3] = __byte_perm(state[ 3], 0x00800100, 0x04030401);
msgBitsliced[ 4] = __byte_perm(state[ 4], 0x00800100, 0x04030401);
msgBitsliced[ 5] = __byte_perm(state[ 5], 0x00800100, 0x04030401);
msgBitsliced[ 6] = __byte_perm(state[ 6], 0x00800100, 0x04030401);
msgBitsliced[ 7] = __byte_perm(state[ 7], 0x00800100, 0x04030401 + ((get_local_id(0)%4)==0)*0x00000100);
}
}

// Nur der erste von jeweils 4 Threads bekommt das Ergebns-Hash
uint out_state[16];
from_bitslice_quad(state, out_state);

if ((get_local_id(0) & 0x3) == 0)
{
int i, position = -1;
bool rc = true;

#pragma unroll 8
for (i = 7; i >= 0; i--) {
if (out_state[i] > pTarget[i]) {
if(position < i) {
position = i;
rc = false;
}
}
if (out_state[i] < pTarget[i]) {
if(position < i) {
position = i;
rc = true;
}
}
}

if(rc)
output[output[0xFF]++] = (nounce);
}

barrier(CLK_GLOBAL_MEM_FENCE);
}

#endif
#endif // GROESTLCOIN_CL
120 changes: 120 additions & 0 deletions kernel/groestlcoinf.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* GroestlCoin kernel implementation.
*
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2021 fancyIX
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ===========================(LICENSE END)=============================
*
* @author fancyIX
*/

#ifndef GROESTLCOIN_CL
#define GROESTLCOIN_CL

#define SWAP32(a) (as_uint(as_uchar4(a).wzyx))

#include "groestlf.cl"


__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global unsigned char* block, __global uint* output, __global uint* pTarget)
{
uint gid = get_global_id(0);
uint nounce = gid >> 2;
uint thread = gid - get_global_offset(0);

uint paddedInput[8] = {0};

#pragma unroll 8
for(int k=0;k<8;k++) {
uint idx = 4 * k + (get_local_id(0) & 0x3);
if (idx < 20) {
paddedInput[k] = ((__global uint *) (block))[idx];
}
if (idx == 20) {
paddedInput[k] = 0x80;
}
if (idx == 31) {
paddedInput[k] = 0x01000000;
}
}

if ((get_local_id(0) % 4) == 3)
paddedInput[4] = SWAP32(nounce); // 4*4+3 = 19

uint msgBitsliced[8];
to_bitslice_quad(paddedInput, msgBitsliced);

uint state[8];
for (int round=0; round<2; round++)
{
groestl512_progressMessage_quad(state, msgBitsliced);

if (round < 1)
{
// Verkettung zweier Runden inclusive Padding.
msgBitsliced[ 0] = __byte_perm(state[ 0], 0x00800100, 0x04030401 + ((get_local_id(0)%4)==3)*0x02000000);
msgBitsliced[ 1] = __byte_perm(state[ 1], 0x00800100, 0x04030401);
msgBitsliced[ 2] = __byte_perm(state[ 2], 0x00800100, 0x04030401);
msgBitsliced[ 3] = __byte_perm(state[ 3], 0x00800100, 0x04030401);
msgBitsliced[ 4] = __byte_perm(state[ 4], 0x00800100, 0x04030401);
msgBitsliced[ 5] = __byte_perm(state[ 5], 0x00800100, 0x04030401);
msgBitsliced[ 6] = __byte_perm(state[ 6], 0x00800100, 0x04030401);
msgBitsliced[ 7] = __byte_perm(state[ 7], 0x00800100, 0x04030401 + ((get_local_id(0)%4)==0)*0x00000100);
}
}

// Nur der erste von jeweils 4 Threads bekommt das Ergebns-Hash
uint out_state[16];
from_bitslice_quad(state, out_state);

if ((get_local_id(0) & 0x3) == 0)
{
int i, position = -1;
bool rc = true;

#pragma unroll 8
for (i = 7; i >= 0; i--) {
if (out_state[i] > pTarget[i]) {
if(position < i) {
position = i;
rc = false;
}
}
if (out_state[i] < pTarget[i]) {
if(position < i) {
position = i;
rc = true;
}
}
}

if(rc)
output[output[0xFF]++] = (nounce);
}

barrier(CLK_GLOBAL_MEM_FENCE);
}

#endif // GROESTLCOIN_CL
Loading

0 comments on commit 0426ac7

Please sign in to comment.