Skip to content

Commit

Permalink
Improve performance a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
fancyIX committed Nov 29, 2021
1 parent 255113a commit 924bdbd
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 21 deletions.
4 changes: 2 additions & 2 deletions algorithm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1862,7 +1862,7 @@ static cl_int queue_heavyhash_kernel(_clState *clState, dev_blk_ctx *blk, __mayb
uint32_t edata[20];
uint32_t seed[8];

uint16_t matrix[64][64];
uint32_t matrix[64][64];
struct xoshiro_state state;

memcpy(edata, clState->cldata, 80);
Expand All @@ -1875,7 +1875,7 @@ static cl_int queue_heavyhash_kernel(_clState *clState, dev_blk_ctx *blk, __mayb

generate_matrix(matrix, &state);

status = clEnqueueWriteBuffer(clState->commandQueue, clState->padbuffer8, true, 0, 64 * 64 * 2, matrix, 0, NULL, NULL);
status = clEnqueueWriteBuffer(clState->commandQueue, clState->padbuffer8, true, 0, 64 * 64 * 4, matrix, 0, NULL, NULL);

CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->padbuffer8);
Expand Down
20 changes: 10 additions & 10 deletions algorithm/heavyhash-gate.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ static inline uint64_t xoshiro_gen(struct xoshiro_state *state) {
return result;
}

static int compute_rank(const uint16_t A[64][64])
static int compute_rank(const uint32_t A[64][64])
{
double B[64][64];
for (int i = 0; i < 64; ++i){
Expand Down Expand Up @@ -104,12 +104,12 @@ static int compute_rank(const uint16_t A[64][64])
return rank;
}

static inline bool is_full_rank(const uint16_t matrix[64][64])
static inline bool is_full_rank(const uint32_t matrix[64][64])
{
return compute_rank(matrix) == 64;
}

void generate_matrix(uint16_t matrix[64][64], struct xoshiro_state *state) {
void generate_matrix(uint32_t matrix[64][64], struct xoshiro_state *state) {
do {
for (int i = 0; i < 64; ++i) {
for (int j = 0; j < 64; j += 16) {
Expand All @@ -122,14 +122,14 @@ void generate_matrix(uint16_t matrix[64][64], struct xoshiro_state *state) {
} while (!is_full_rank(matrix));
}

void heavyhash(const uint16_t matrix[64][64], uint8_t* pdata, size_t pdata_len, uint8_t* output)
void heavyhash(const uint32_t matrix[64][64], uint8_t* pdata, size_t pdata_len, uint8_t* output)
{
uint8_t hash_first[32] __attribute__((aligned(64)));
uint8_t hash_second[32] __attribute__((aligned(64)));
uint8_t hash_xored[32] __attribute__((aligned(64)));

uint16_t vector[64] __attribute__((aligned(64)));
uint16_t product[64] __attribute__((aligned(64)));
uint32_t vector[64] __attribute__((aligned(64)));
uint32_t product[64] __attribute__((aligned(64)));

kt_sha3_256((uint8_t*) hash_first, 32, pdata, pdata_len);

Expand All @@ -139,7 +139,7 @@ void heavyhash(const uint16_t matrix[64][64], uint8_t* pdata, size_t pdata_len,
}

for (int i = 0; i < 64; ++i) {
uint16_t sum = 0;
uint32_t sum = 0;
for (int j = 0; j < 64; ++j) {
sum += matrix[i][j] * vector[j];
}
Expand Down Expand Up @@ -167,7 +167,7 @@ int heavyhash_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t

uint32_t seed[8];

uint16_t matrix[64][64];
uint32_t matrix[64][64];
struct xoshiro_state state;

kt_sha3_256((uint8_t *)seed, 32, (uint8_t *)(data+1), 32);
Expand Down Expand Up @@ -200,7 +200,7 @@ void heavyhash_regenhash(struct work *work)

uint32_t seed[8];

uint16_t matrix[64][64];
uint32_t matrix[64][64];
struct xoshiro_state state;

kt_sha3_256((uint8_t *)seed, 32, (uint8_t*) (data+1), 32);
Expand Down Expand Up @@ -228,7 +228,7 @@ bool scanhash_heavyhash(struct thr_info *thr, const unsigned char *pmidstate,

const uint32_t first_nonce = ((uint32_t *)pdata)[19];

uint16_t matrix[64][64] __attribute__((aligned(64)));
uint32_t matrix[64][64] __attribute__((aligned(64)));
struct xoshiro_state state;

mm128_bswap32_80( edata, pdata );
Expand Down
2 changes: 1 addition & 1 deletion algorithm/heavyhash-gate.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ struct xoshiro_state {
};

extern uint64_t le64dec(const void *pp);
extern void generate_matrix(uint16_t matrix[64][64], struct xoshiro_state *state);
extern void generate_matrix(uint32_t matrix[64][64], struct xoshiro_state *state);
extern int heavyhash_test(unsigned char *pdata, const unsigned char *ptarget,
uint32_t nonce);
extern void heavyhash_regenhash(struct work *work);
Expand Down
39 changes: 33 additions & 6 deletions kernel/heavyhash.cl
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,16 @@ typedef union {


__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global uint *header, __global ushort* matrix, __global uint* output, const ulong target)
__kernel void search(__global uint *header, __global uint* gmatrix, __global uint* output, const ulong target)
{
__local ulong2 matrix[1024];

uint tid = get_local_id(0);
__global ulong2 *cp = (__global ulong2 *) gmatrix;
for (int i = 0; i < (1024 / WORKSIZE); i++) {
matrix[tid + i * WORKSIZE] = cp[tid + i * WORKSIZE];
}

uint gid = get_global_id(0);
hash_t hash;

Expand All @@ -51,8 +59,8 @@ __kernel void search(__global uint *header, __global ushort* matrix, __global ui
uchar hash_second[32];
uchar hash_xored[32];

ushort vector[64];
ushort product[64];
uint vector[64];
uint product[64];

((uchar *) pdata)[80] = 0x06;
((uchar *) pdata)[135] = 0x80;
Expand All @@ -69,9 +77,28 @@ __kernel void search(__global uint *header, __global ushort* matrix, __global ui
}

for (int i = 0; i < 64; ++i) {
ushort sum = 0;
for (int j = 0; j < 64; ++j) {
sum += matrix[i * 64 + j] * vector[j];
uint sum = 0;
for (int k = 0; k < 4; k++) {
ulong2 buf0 = matrix[i * 16 + k * 4 + 0];
ulong2 buf1 = matrix[i * 16 + k * 4 + 1];
ulong2 buf2 = matrix[i * 16 + k * 4 + 2];
ulong2 buf3 = matrix[i * 16 + k * 4 + 3];
uint *m0 = (uint *)&buf0;
for (int j = 0; j < 4; j++) {
sum += m0[j] * vector[(k * 4 + 0) * 4 + j];
}
uint *m1 = (uint *)&buf1;
for (int j = 0; j < 4; j++) {
sum += m1[j] * vector[(k * 4 + 1) * 4 + j];
}
uint *m2 = (uint *)&buf2;
for (int j = 0; j < 4; j++) {
sum += m2[j] * vector[(k * 4 + 2) * 4 + j];
}
uint *m3 = (uint *)&buf3;
for (int j = 0; j < 4; j++) {
sum += m3[j] * vector[(k * 4 + 3) * 4 + j];
}
}
product[i] = (sum >> 10);
}
Expand Down
2 changes: 1 addition & 1 deletion ocl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,7 @@ if (algorithm->type == ALGO_YESCRYPT || algorithm->type == ALGO_YESCRYPT_NAVI) {
readbufsize = 80;
}
else if (algorithm->type == ALGO_HEAVYHASH) {
bufsize = 64 * 64 * 2;
bufsize = 64 * 64 * 4;
readbufsize = 80;
}
else {
Expand Down
2 changes: 1 addition & 1 deletion util.c
Original file line number Diff line number Diff line change
Expand Up @@ -2527,7 +2527,7 @@ bool subscribe_extranonce(struct pool *pool)
res_val = json_object_get(val, "result");
err_val = json_object_get(val, "error");

if (!res_val || json_is_false(res_val) || (err_val && !json_is_null(err_val))) {
if (!res_val || json_is_false(res_val) || (err_val && !json_is_null(err_val) && !json_is_false(err_val))) {
char *ss;

if (err_val) {
Expand Down

0 comments on commit 924bdbd

Please sign in to comment.