Skip to content

Commit af5132a

Browse files
authored
Merge pull request #74 from fancyIX/feature/#72
Fix #72
2 parents 941fac1 + 7a9edeb commit af5132a

File tree

3 files changed

+341
-259
lines changed

3 files changed

+341
-259
lines changed

kernel/lyra2Zf.cl

Lines changed: 48 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -108,33 +108,43 @@ uint gid = get_global_id(2);
108108
__global lyraState_t *lyraState = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4 * 2) * (gid-get_global_offset(2))));
109109
__global lyraState_t *lyraState2 = (__global lyraState_t *)(sharedDataBuf + ((8 * 4 * 4) + (8 * 4 * 4 * 2) * (gid-get_global_offset(2))));
110110

111-
ulong notepad[192 / 3 + 192 / 6];
111+
uint notepad[192];
112112

113113
const int player = get_local_id(1);
114114

115-
ulong state[4];
116-
ulong si[3];
117-
ulong sII[3];
118-
uint2 s0;
119-
uint2 s1;
120-
uint2 s2;
121-
uint2 s3;
122-
long ss0;
123-
uint2 ss1;
124-
uint2 ss3;
115+
uint state[4];
116+
uint si[3];
117+
uint sII[3];
118+
uint s0;
119+
uint s1;
120+
uint s2;
121+
uint s3;
122+
int ss0;
123+
uint ss1;
124+
uint ss3;
125+
uint ss;
126+
uint carry;
125127
const uint mindex = (LOCAL_LINEAR & 1) == 0 ? 0 : 1;
126-
128+
SETSGPR100101;
127129
//-------------------------------------
128130
// Load Lyra state
129-
if (LOCAL_LINEAR == 0) state[0] = ((ulong)(lyraState->h8[player + 4 * 0]));
130-
if (LOCAL_LINEAR == 1) state[0] = ((ulong)(lyraState->h8[player + 4 * 1]));
131-
if (LOCAL_LINEAR == 0 || LOCAL_LINEAR == 1) state[2] = ((ulong)(lyraState->h8[player + 4 * 2]));
132-
if (LOCAL_LINEAR == 0 || LOCAL_LINEAR == 1) state[3] = ((ulong)(lyraState->h8[player + 4 * 3]));
133-
if (LOCAL_LINEAR == 2) state[0] = ((ulong)(lyraState2->h8[player + 4 * 0]));
134-
if (LOCAL_LINEAR == 3) state[0] = ((ulong)(lyraState2->h8[player + 4 * 1]));
135-
if (LOCAL_LINEAR == 2 || LOCAL_LINEAR == 3) state[2] = ((ulong)(lyraState2->h8[player + 4 * 2]));
136-
if (LOCAL_LINEAR == 2 || LOCAL_LINEAR == 3) state[3] = ((ulong)(lyraState2->h8[player + 4 * 3]));
137-
131+
if (LOCAL_LINEAR == 0) state[0] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 0]));
132+
if (LOCAL_LINEAR == 0) state[1] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 1]));
133+
if (LOCAL_LINEAR == 0) state[2] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 2]));
134+
if (LOCAL_LINEAR == 0) state[3] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 3]));
135+
if (LOCAL_LINEAR == 1) state[0] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 0 + 1]));
136+
if (LOCAL_LINEAR == 1) state[1] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 1 + 1]));
137+
if (LOCAL_LINEAR == 1) state[2] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 2 + 1]));
138+
if (LOCAL_LINEAR == 1) state[3] = ((uint)(lyraState->h4[2 * player + 2 * 4 * 3 + 1]));
139+
if (LOCAL_LINEAR == 2) state[0] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 0]));
140+
if (LOCAL_LINEAR == 2) state[1] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 1]));
141+
if (LOCAL_LINEAR == 2) state[2] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 2]));
142+
if (LOCAL_LINEAR == 2) state[3] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 3]));
143+
if (LOCAL_LINEAR == 3) state[0] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 0 + 1]));
144+
if (LOCAL_LINEAR == 3) state[1] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 1 + 1]));
145+
if (LOCAL_LINEAR == 3) state[2] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 2 + 1]));
146+
if (LOCAL_LINEAR == 3) state[3] = ((uint)(lyraState2->h4[2 * player + 2 * 4 * 3 + 1]));
147+
138148
write_state(notepad, state, 0, 7);
139149
round_lyra_4way_sw(state);
140150
write_state(notepad, state, 0, 6);
@@ -304,21 +314,25 @@ uint gid = get_global_id(2);
304314
state_xor_modify(modify, 5, 0, mindex, state, notepad);
305315
state_xor_modify(modify, 6, 0, mindex, state, notepad);
306316
state_xor_modify(modify, 7, 0, mindex, state, notepad);
307-
//-------------------------------------
308-
// save lyra state
309-
pull_state(state);
310317

311-
//-------------------------------------
318+
//-------------------------------------
312319
// save lyra state
313-
barrier(CLK_LOCAL_MEM_FENCE);
314-
if (LOCAL_LINEAR == 0) lyraState->h8[player + 4 * 0] = state[0];
315-
if (LOCAL_LINEAR == 0) lyraState->h8[player + 4 * 1] = state[1];
316-
if (LOCAL_LINEAR == 0) lyraState->h8[player + 4 * 2] = state[2];
317-
if (LOCAL_LINEAR == 0) lyraState->h8[player + 4 * 3] = state[3];
318-
if (LOCAL_LINEAR == 2) lyraState2->h8[player + 4 * 0] = state[0];
319-
if (LOCAL_LINEAR == 2) lyraState2->h8[player + 4 * 1] = state[1];
320-
if (LOCAL_LINEAR == 2) lyraState2->h8[player + 4 * 2] = state[2];
321-
if (LOCAL_LINEAR == 2) lyraState2->h8[player + 4 * 3] = state[3];
320+
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 0] = state[0];
321+
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 1] = state[1];
322+
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 2] = state[2];
323+
if (LOCAL_LINEAR == 0) lyraState->h4[2 * player + 2 * 4 * 3] = state[3];
324+
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 0] = state[0];
325+
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 1] = state[1];
326+
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 2] = state[2];
327+
if (LOCAL_LINEAR == 2) lyraState2->h4[2 * player + 2 * 4 * 3] = state[3];
328+
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 0 + 1] = state[0];
329+
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 1 + 1] = state[1];
330+
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 2 + 1] = state[2];
331+
if (LOCAL_LINEAR == 1) lyraState->h4[2 * player + 2 * 4 * 3 + 1] = state[3];
332+
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 0 + 1] = state[0];
333+
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 1 + 1] = state[1];
334+
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 2 + 1] = state[2];
335+
if (LOCAL_LINEAR == 3) lyraState2->h4[2 * player + 2 * 4 * 3 + 1] = state[3];
322336

323337
barrier(CLK_GLOBAL_MEM_FENCE);
324338
}

0 commit comments

Comments
 (0)