Skip to content

Commit

Permalink
lyra2z and phi2 support
Browse files Browse the repository at this point in the history
  • Loading branch information
fancyIX committed Oct 13, 2021
1 parent bf0271e commit 603d0e6
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 95 deletions.
14 changes: 12 additions & 2 deletions driver-opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1460,11 +1460,16 @@ if (gpu->algorithm.type != ALGO_MTP && gpu->algorithm.type != ALGO_YESCRYPT_NAVI

for (i = 0; i < clState->n_extra_kernels; i++) {
if ((gpu->algorithm.type == ALGO_PHI2 || gpu->algorithm.type == ALGO_PHI2_NAVI) && i == 1) {
if (clState->prebuilt || gpu->algorithm.type == ALGO_PHI2_NAVI) {
if (clState->prebuilt) {
const size_t off2[] = { 0, 0, *p_global_work_offset };
const size_t gws[] = { 4, 4, globalThreads[0] };
const size_t expand[] = { 4, 4, 16 };
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 3, off2, gws, expand, 0, NULL, NULL); // lyra 4w monolithic
} else if (gpu->algorithm.type == ALGO_PHI2_NAVI) {
const size_t off2[] = { 0, 0, *p_global_work_offset };
const size_t gws[] = { 4, 2, globalThreads[0] * 2 };
const size_t expand[] = { 4, 2, 16 };
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 3, off2, gws, expand, 0, NULL, NULL); // lyra 4w monolithic
} else {
const size_t off2[] = { 0, *p_global_work_offset };
const size_t gws[] = { 4, globalThreads[0] * 2 };
Expand All @@ -1480,11 +1485,16 @@ if (gpu->algorithm.type != ALGO_MTP && gpu->algorithm.type != ALGO_YESCRYPT_NAVI
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 1, p_global_work_offset,
globalThreads2, localThreads2, 0, NULL, NULL);
} else if ((gpu->algorithm.type == ALGO_LYRA2Z || gpu->algorithm.type == ALGO_LYRA2Z_NAVI) && i == 1) {
if (clState->prebuilt || gpu->algorithm.type == ALGO_LYRA2Z_NAVI) {
if (clState->prebuilt) {
const size_t off2[] = { 0, 0, *p_global_work_offset };
const size_t gws[] = { 4, 4, globalThreads[0] / 2 };
const size_t expand[] = { 4, 4, 16 };
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 3, off2, gws, expand, 0, NULL, NULL); // lyra 4w monolithic
} else if(gpu->algorithm.type == ALGO_LYRA2Z_NAVI) {
const size_t off2[] = { 0, 0, *p_global_work_offset };
const size_t gws[] = { 4, 2, globalThreads[0] };
const size_t expand[] = { 4, 2, 16 };
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->extra_kernels[i], 3, off2, gws, expand, 0, NULL, NULL); // lyra 4w monolithic
} else {
const size_t off2[] = { 0, *p_global_work_offset };
const size_t gws[] = { 4, globalThreads[0] };
Expand Down
6 changes: 6 additions & 0 deletions kernel/allium_navi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ __kernel void search3(__global uchar* sharedDataBuf)

const int player = get_local_id(1) & 1;

uint zero = 0;
uint state[4];
uint si[3];
uint sII[3];
Expand Down Expand Up @@ -384,6 +385,8 @@ __kernel void search3(__global uchar* sharedDataBuf)
state_xor_modify(modify, 5, 0, mindex, state, notepad);
state_xor_modify(modify, 6, 0, mindex, state, notepad);
state_xor_modify(modify, 7, 0, mindex, state, notepad);

zero = 1;
/**/

//-------------------------------------
Expand Down Expand Up @@ -559,6 +562,7 @@ __kernel void search7(__global uchar* sharedDataBuf)

const int player = get_local_id(1) & 1;

uint zero = 0;
uint state[4];
uint si[3];
uint sII[3];
Expand Down Expand Up @@ -654,6 +658,8 @@ __kernel void search7(__global uchar* sharedDataBuf)
state_xor_modify(modify, 5, 0, mindex, state, notepad);
state_xor_modify(modify, 6, 0, mindex, state, notepad);
state_xor_modify(modify, 7, 0, mindex, state, notepad);

zero = 1;
/**/

//-------------------------------------
Expand Down
76 changes: 39 additions & 37 deletions kernel/lyra2Z_navi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -276,17 +276,17 @@ __kernel void search1(__global uint* hashes, __global uchar* sharedDataBuf)
__attribute__((amdgpu_waves_per_eu(1,1)))
__attribute__((amdgpu_num_vgpr(256)))
__attribute__((amdgpu_num_sgpr(100)))
__attribute__((reqd_work_group_size(4, 4, 16)))
__attribute__((reqd_work_group_size(4, 2, 16)))
__kernel void search2(__global uchar* sharedDataBuf)
{
uint gid = get_global_id(2);
__global lyraState_t *lyraState = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4 * 2) * (gid-get_global_offset(2))));
__global lyraState_t *lyraState2 = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4) + (8 * 4 * 4 * 2) * (gid-get_global_offset(2))));
__global lyraState_t *lyraState = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4) * (gid-get_global_offset(2))));

uint notepad[192];

const int player = get_local_id(1);
const int player = get_local_id(1) & 1;

uint zero = 0;
uint state[4];
uint si[3];
uint sII[3];
Expand All @@ -299,26 +299,26 @@ uint gid = get_global_id(2);
uint ss3;
uint ss;
uint carry;
const uint mindex = (LOCAL_LINEAR & 1) == 0 ? 0 : 1;
const uint mindex = (player & 1) == 0 ? 0 : 1;

//-------------------------------------
// Load Lyra state
if (LOCAL_LINEAR == 0) state[0] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 0]));
if (LOCAL_LINEAR == 0) state[1] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 1]));
if (LOCAL_LINEAR == 0) state[2] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 2]));
if (LOCAL_LINEAR == 0) state[3] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 3]));
if (LOCAL_LINEAR == 1) state[0] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 0 + 1]));
if (LOCAL_LINEAR == 1) state[1] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 1 + 1]));
if (LOCAL_LINEAR == 1) state[2] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 2 + 1]));
if (LOCAL_LINEAR == 1) state[3] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 3 + 1]));
if (LOCAL_LINEAR == 2) state[0] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 0]));
if (LOCAL_LINEAR == 2) state[1] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 1]));
if (LOCAL_LINEAR == 2) state[2] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 2]));
if (LOCAL_LINEAR == 2) state[3] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 3]));
if (LOCAL_LINEAR == 3) state[0] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 0 + 1]));
if (LOCAL_LINEAR == 3) state[1] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 1 + 1]));
if (LOCAL_LINEAR == 3) state[2] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 2 + 1]));
if (LOCAL_LINEAR == 3) state[3] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 3 + 1]));
if (LOCAL_LINEAR == 0) state[0] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 0) state[1] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 0) state[2] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 0) state[3] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 3 + player]));
if (LOCAL_LINEAR == 1) state[0] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 1) state[1] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 1) state[2] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 1) state[3] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 3 + player]));
if (LOCAL_LINEAR == 2) state[0] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 2) state[1] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 2) state[2] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 2) state[3] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 3 + player]));
if (LOCAL_LINEAR == 3) state[0] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 3) state[1] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 3) state[2] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 3) state[3] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 3 + player]));

write_state(notepad, state, 0, 7);
round_lyra_4way_sw(state);
Expand Down Expand Up @@ -490,24 +490,26 @@ uint gid = get_global_id(2);
state_xor_modify(modify, 6, 0, mindex, state, notepad);
state_xor_modify(modify, 7, 0, mindex, state, notepad);

zero = 1;

//-------------------------------------
// save lyra state
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 0] = state[0];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 1] = state[1];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 2] = state[2];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 3] = state[3];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 0] = state[0];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 1] = state[1];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 2] = state[2];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 3] = state[3];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 0 + 1] = state[0];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 1 + 1] = state[1];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 2 + 1] = state[2];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 3 + 1] = state[3];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 0 + 1] = state[0];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 1 + 1] = state[1];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 2 + 1] = state[2];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 3 + 1] = state[3];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 3 + player] = state[3];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 3 + player] = state[3];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 3 + player] = state[3];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 3 + player] = state[3];

barrier(CLK_GLOBAL_MEM_FENCE);
}
Expand Down
22 changes: 3 additions & 19 deletions kernel/lyra2mdzf2_navi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,18 @@
__asm ( \
"v_add_co_u32 %[daa], vcc_lo, %[bb], %[aa]\n" \
"s_lshl_b32 vcc_lo, vcc_lo, 4\n" \
"s_and_b32 vcc_lo, vcc_lo, 0xF0F0F0F0\n" \
"v_add_co_ci_u32 %[daa], vcc_lo, 0, %[daa], vcc_lo\n" \
"v_add_co_ci_u32_dpp %[daa], vcc_lo, %[z], %[daa], vcc_lo quad_perm:[0,1,2,3] bank_mask:0xa\n" \
: [daa] "=v" (a) \
: [aa] "0" (a), \
[bb] "v" (b) \
[bb] "v" (b), \
[z] "v" (zero) \
: "vcc");

#define SWAP32_DPP(s) \
ss = s; \
{ \
__asm ( \
"s_nop 1\n" \
"v_mov_b32_dpp %[p], %[pp] dpp8:[4,5,6,7,0,1,2,3]\n" \
"s_nop 1" \
: [p] "=v" (s) \
: [pp] "v" (ss)); \
}
Expand All @@ -122,9 +120,7 @@
ss = s; \
{ \
__asm ( \
"s_nop 1\n" \
"v_mov_b32_dpp %[dpp], %[pp] dpp8:[4,5,6,7,0,1,2,3]\n" \
"s_nop 1\n" \
"v_alignbyte_b32 %[dp], %[dpp], %[p], 3" \
: [dpp] "=v" (ss), \
[dp] "=v" (s) \
Expand All @@ -136,9 +132,7 @@
ss = s; \
{ \
__asm ( \
"s_nop 1\n" \
"v_mov_b32_dpp %[dpp], %[pp] dpp8:[4,5,6,7,0,1,2,3]\n" \
"s_nop 1\n" \
"v_alignbyte_b32 %[dp], %[dpp], %[p], 2" \
: [dpp] "=v" (ss), \
[dp] "=v" (s) \
Expand All @@ -150,9 +144,7 @@
ss = s; \
{ \
__asm ( \
"s_nop 1\n" \
"v_mov_b32_dpp %[dpp], %[pp] dpp8:[4,5,6,7,0,1,2,3]\n" \
"s_nop 1\n" \
"v_alignbit_b32 %[dp], %[p], %[dpp], 31" \
: [dpp] "=v" (ss), \
[dp] "=v" (s) \
Expand All @@ -170,11 +162,9 @@

#define shflldpp(state) \
__asm ( \
"s_nop 1\n" \
"v_mov_b32_dpp %[dp10], %[p10] quad_perm:[1,2,3,0]\n" \
"v_mov_b32_dpp %[dp20], %[p20] quad_perm:[2,3,0,1]\n" \
"v_mov_b32_dpp %[dp30], %[p30] quad_perm:[3,0,1,2]\n" \
"s_nop 1" \
: [dp10] "=v" (state[1]), \
[dp20] "=v" (state[2]), \
[dp30] "=v" (state[3]) \
Expand All @@ -184,11 +174,9 @@

#define shflrdpp(state) \
__asm ( \
"s_nop 1\n" \
"v_mov_b32_dpp %[dp10], %[p10] quad_perm:[3,0,1,2]\n" \
"v_mov_b32_dpp %[dp20], %[p20] quad_perm:[2,3,0,1]\n" \
"v_mov_b32_dpp %[dp30], %[p30] quad_perm:[1,2,3,0]\n" \
"s_nop 1" \
: [dp10] "=v" (state[1]), \
[dp20] "=v" (state[2]), \
[dp30] "=v" (state[3]) \
Expand All @@ -208,11 +196,9 @@
s1 = state[1]; \
s2 = state[2]; \
__asm ( \
"s_nop 1\n" \
"v_mov_b32_dpp %[dp10], %[p10] quad_perm:[3,0,1,2]\n" \
"v_mov_b32_dpp %[dp20], %[p20] quad_perm:[3,0,1,2]\n" \
"v_mov_b32_dpp %[dp30], %[p30] quad_perm:[3,0,1,2]\n" \
"s_nop 1" \
: [dp10] "=v" (s0), \
[dp20] "=v" (s1), \
[dp30] "=v" (s2) \
Expand Down Expand Up @@ -334,9 +320,7 @@
#define broadcast_zero(s) \
p0 = (s[0] & 7); \
__asm ( \
"s_nop 0\n" \
"v_mov_b32_dpp %[dp0], %[p0] dpp8:[0,0,0,0,0,0,0,0]\n" \
"s_nop 0" \
: [dp0] "=v" (p0) \
: [p0] "0" (p0)); \
if ((get_local_id(0) & 2) == 0) modify = p0; \
Expand Down
77 changes: 40 additions & 37 deletions kernel/phi2_navi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -343,17 +343,17 @@ __kernel void search1(__global uint* hashes, __global uchar* sharedDataBuf)
__attribute__((amdgpu_waves_per_eu(1,1)))
__attribute__((amdgpu_num_vgpr(256)))
__attribute__((amdgpu_num_sgpr(200)))
__attribute__((reqd_work_group_size(4, 4, 16)))
__attribute__((reqd_work_group_size(4, 2, 16)))
__kernel void search2(__global uchar* sharedDataBuf)
{
uint gid = get_global_id(2);
__global lyraState_t *lyraState = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4 * 2) * (gid-get_global_offset(2))));
__global lyraState_t *lyraState2 = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4) + (8 * 4 * 4 * 2) * (gid-get_global_offset(2))));
__global lyraState_t *lyraState = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4) * (gid-get_global_offset(2))));

uint notepad[192];

const int player = get_local_id(1) % 4;
const int player = get_local_id(1) & 1;

uint zero = 0;
uint state[4];
uint si[3];
uint sII[3];
Expand All @@ -366,26 +366,27 @@ __kernel void search2(__global uchar* sharedDataBuf)
uint ss3;
uint ss;
uint carry;
const uint mindex = (LOCAL_LINEAR & 1) == 0 ? 0 : 1;
const uint mindex = (player & 1) == 0 ? 0 : 1;

//-------------------------------------
// Load Lyra state
if (LOCAL_LINEAR == 0) state[0] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 0]));
if (LOCAL_LINEAR == 0) state[1] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 1]));
if (LOCAL_LINEAR == 0) state[2] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 2]));
if (LOCAL_LINEAR == 0) state[3] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 3]));
if (LOCAL_LINEAR == 1) state[0] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 0 + 1]));
if (LOCAL_LINEAR == 1) state[1] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 1 + 1]));
if (LOCAL_LINEAR == 1) state[2] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 2 + 1]));
if (LOCAL_LINEAR == 1) state[3] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 3 + 1]));
if (LOCAL_LINEAR == 2) state[0] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 0]));
if (LOCAL_LINEAR == 2) state[1] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 1]));
if (LOCAL_LINEAR == 2) state[2] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 2]));
if (LOCAL_LINEAR == 2) state[3] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 3]));
if (LOCAL_LINEAR == 3) state[0] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 0 + 1]));
if (LOCAL_LINEAR == 3) state[1] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 1 + 1]));
if (LOCAL_LINEAR == 3) state[2] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 2 + 1]));
if (LOCAL_LINEAR == 3) state[3] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 3 + 1]));
if (LOCAL_LINEAR == 0) state[0] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 0) state[1] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 0) state[2] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 0) state[3] = ((uint)(lyraState->h4[2 * 0 + 2 * 4 * 3 + player]));
if (LOCAL_LINEAR == 1) state[0] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 1) state[1] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 1) state[2] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 1) state[3] = ((uint)(lyraState->h4[2 * 1 + 2 * 4 * 3 + player]));
if (LOCAL_LINEAR == 2) state[0] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 2) state[1] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 2) state[2] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 2) state[3] = ((uint)(lyraState->h4[2 * 2 + 2 * 4 * 3 + player]));
if (LOCAL_LINEAR == 3) state[0] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 0 + player]));
if (LOCAL_LINEAR == 3) state[1] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 1 + player]));
if (LOCAL_LINEAR == 3) state[2] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 2 + player]));
if (LOCAL_LINEAR == 3) state[3] = ((uint)(lyraState->h4[2 * 3 + 2 * 4 * 3 + player]));


write_state(notepad, state, 0, 7);
round_lyra_4way_sw(state);
Expand Down Expand Up @@ -449,26 +450,28 @@ __kernel void search2(__global uchar* sharedDataBuf)
state_xor_modify(modify, 5, 0, mindex, state, notepad);
state_xor_modify(modify, 6, 0, mindex, state, notepad);
state_xor_modify(modify, 7, 0, mindex, state, notepad);

zero = 1;
/**/

//-------------------------------------
// save lyra state
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 0] = state[0];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 1] = state[1];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 2] = state[2];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 3] = state[3];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 0 + 1] = state[0];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 1 + 1] = state[1];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 2 + 1] = state[2];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 3 + 1] = state[3];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 0] = state[0];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 1] = state[1];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 2] = state[2];
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 3] = state[3];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 0 + 1] = state[0];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 1 + 1] = state[1];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 2 + 1] = state[2];
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 3 + 1] = state[3];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 0) lyraState->h4[2 * 0 + 2 * 4 * 3 + player] = state[3];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 1) lyraState->h4[2 * 1 + 2 * 4 * 3 + player] = state[3];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 2) lyraState->h4[2 * 2 + 2 * 4 * 3 + player] = state[3];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 0 + player] = state[0];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 1 + player] = state[1];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 2 + player] = state[2];
if (LOCAL_LINEAR == 3) lyraState->h4[2 * 3 + 2 * 4 * 3 + player] = state[3];

barrier(CLK_GLOBAL_MEM_FENCE);
}
Expand Down

0 comments on commit 603d0e6

Please sign in to comment.