@@ -108,33 +108,43 @@ uint gid = get_global_id(2);
108
108
__global lyraState_t * lyraState = (__global lyraState_t * )(sharedDataBuf + ((8 * 4 * 4 * 2 ) * (gid - get_global_offset (2 ))));
109
109
__global lyraState_t * lyraState2 = (__global lyraState_t * )(sharedDataBuf + ((8 * 4 * 4 ) + (8 * 4 * 4 * 2 ) * (gid - get_global_offset (2 ))));
110
110
111
- ulong notepad [192 / 3 + 192 / 6 ];
111
+ uint notepad [192 ];
112
112
113
113
const int player = get_local_id (1 );
114
114
115
- ulong state [4 ];
116
- ulong si [3 ];
117
- ulong sII [3 ];
118
- uint2 s0 ;
119
- uint2 s1 ;
120
- uint2 s2 ;
121
- uint2 s3 ;
122
- long ss0 ;
123
- uint2 ss1 ;
124
- uint2 ss3 ;
115
+ uint state [4 ];
116
+ uint si [3 ];
117
+ uint sII [3 ];
118
+ uint s0 ;
119
+ uint s1 ;
120
+ uint s2 ;
121
+ uint s3 ;
122
+ int ss0 ;
123
+ uint ss1 ;
124
+ uint ss3 ;
125
+ uint ss ;
126
+ uint carry ;
125
127
const uint mindex = (LOCAL_LINEAR & 1 ) == 0 ? 0 : 1 ;
126
-
128
+ SETSGPR100101 ;
127
129
//-------------------------------------
128
130
// Load Lyra state
129
- if (LOCAL_LINEAR == 0 ) state [0 ] = ((ulong )(lyraState -> h8 [player + 4 * 0 ]));
130
- if (LOCAL_LINEAR == 1 ) state [0 ] = ((ulong )(lyraState -> h8 [player + 4 * 1 ]));
131
- if (LOCAL_LINEAR == 0 || LOCAL_LINEAR == 1 ) state [2 ] = ((ulong )(lyraState -> h8 [player + 4 * 2 ]));
132
- if (LOCAL_LINEAR == 0 || LOCAL_LINEAR == 1 ) state [3 ] = ((ulong )(lyraState -> h8 [player + 4 * 3 ]));
133
- if (LOCAL_LINEAR == 2 ) state [0 ] = ((ulong )(lyraState2 -> h8 [player + 4 * 0 ]));
134
- if (LOCAL_LINEAR == 3 ) state [0 ] = ((ulong )(lyraState2 -> h8 [player + 4 * 1 ]));
135
- if (LOCAL_LINEAR == 2 || LOCAL_LINEAR == 3 ) state [2 ] = ((ulong )(lyraState2 -> h8 [player + 4 * 2 ]));
136
- if (LOCAL_LINEAR == 2 || LOCAL_LINEAR == 3 ) state [3 ] = ((ulong )(lyraState2 -> h8 [player + 4 * 3 ]));
137
-
131
+ if (LOCAL_LINEAR == 0 ) state [0 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 0 ]));
132
+ if (LOCAL_LINEAR == 0 ) state [1 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 1 ]));
133
+ if (LOCAL_LINEAR == 0 ) state [2 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 2 ]));
134
+ if (LOCAL_LINEAR == 0 ) state [3 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 3 ]));
135
+ if (LOCAL_LINEAR == 1 ) state [0 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 0 + 1 ]));
136
+ if (LOCAL_LINEAR == 1 ) state [1 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 1 + 1 ]));
137
+ if (LOCAL_LINEAR == 1 ) state [2 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 2 + 1 ]));
138
+ if (LOCAL_LINEAR == 1 ) state [3 ] = ((uint )(lyraState -> h4 [2 * player + 2 * 4 * 3 + 1 ]));
139
+ if (LOCAL_LINEAR == 2 ) state [0 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 0 ]));
140
+ if (LOCAL_LINEAR == 2 ) state [1 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 1 ]));
141
+ if (LOCAL_LINEAR == 2 ) state [2 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 2 ]));
142
+ if (LOCAL_LINEAR == 2 ) state [3 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 3 ]));
143
+ if (LOCAL_LINEAR == 3 ) state [0 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 0 + 1 ]));
144
+ if (LOCAL_LINEAR == 3 ) state [1 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 1 + 1 ]));
145
+ if (LOCAL_LINEAR == 3 ) state [2 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 2 + 1 ]));
146
+ if (LOCAL_LINEAR == 3 ) state [3 ] = ((uint )(lyraState2 -> h4 [2 * player + 2 * 4 * 3 + 1 ]));
147
+
138
148
write_state (notepad , state , 0 , 7 );
139
149
round_lyra_4way_sw (state );
140
150
write_state (notepad , state , 0 , 6 );
@@ -304,21 +314,25 @@ uint gid = get_global_id(2);
304
314
state_xor_modify (modify , 5 , 0 , mindex , state , notepad );
305
315
state_xor_modify (modify , 6 , 0 , mindex , state , notepad );
306
316
state_xor_modify (modify , 7 , 0 , mindex , state , notepad );
307
- //-------------------------------------
308
- // save lyra state
309
- pull_state (state );
310
317
311
- //-------------------------------------
318
+ //-------------------------------------
312
319
// save lyra state
313
- barrier (CLK_LOCAL_MEM_FENCE );
314
- if (LOCAL_LINEAR == 0 ) lyraState -> h8 [player + 4 * 0 ] = state [0 ];
315
- if (LOCAL_LINEAR == 0 ) lyraState -> h8 [player + 4 * 1 ] = state [1 ];
316
- if (LOCAL_LINEAR == 0 ) lyraState -> h8 [player + 4 * 2 ] = state [2 ];
317
- if (LOCAL_LINEAR == 0 ) lyraState -> h8 [player + 4 * 3 ] = state [3 ];
318
- if (LOCAL_LINEAR == 2 ) lyraState2 -> h8 [player + 4 * 0 ] = state [0 ];
319
- if (LOCAL_LINEAR == 2 ) lyraState2 -> h8 [player + 4 * 1 ] = state [1 ];
320
- if (LOCAL_LINEAR == 2 ) lyraState2 -> h8 [player + 4 * 2 ] = state [2 ];
321
- if (LOCAL_LINEAR == 2 ) lyraState2 -> h8 [player + 4 * 3 ] = state [3 ];
320
+ if (LOCAL_LINEAR == 0 ) lyraState -> h4 [2 * player + 2 * 4 * 0 ] = state [0 ];
321
+ if (LOCAL_LINEAR == 0 ) lyraState -> h4 [2 * player + 2 * 4 * 1 ] = state [1 ];
322
+ if (LOCAL_LINEAR == 0 ) lyraState -> h4 [2 * player + 2 * 4 * 2 ] = state [2 ];
323
+ if (LOCAL_LINEAR == 0 ) lyraState -> h4 [2 * player + 2 * 4 * 3 ] = state [3 ];
324
+ if (LOCAL_LINEAR == 2 ) lyraState2 -> h4 [2 * player + 2 * 4 * 0 ] = state [0 ];
325
+ if (LOCAL_LINEAR == 2 ) lyraState2 -> h4 [2 * player + 2 * 4 * 1 ] = state [1 ];
326
+ if (LOCAL_LINEAR == 2 ) lyraState2 -> h4 [2 * player + 2 * 4 * 2 ] = state [2 ];
327
+ if (LOCAL_LINEAR == 2 ) lyraState2 -> h4 [2 * player + 2 * 4 * 3 ] = state [3 ];
328
+ if (LOCAL_LINEAR == 1 ) lyraState -> h4 [2 * player + 2 * 4 * 0 + 1 ] = state [0 ];
329
+ if (LOCAL_LINEAR == 1 ) lyraState -> h4 [2 * player + 2 * 4 * 1 + 1 ] = state [1 ];
330
+ if (LOCAL_LINEAR == 1 ) lyraState -> h4 [2 * player + 2 * 4 * 2 + 1 ] = state [2 ];
331
+ if (LOCAL_LINEAR == 1 ) lyraState -> h4 [2 * player + 2 * 4 * 3 + 1 ] = state [3 ];
332
+ if (LOCAL_LINEAR == 3 ) lyraState2 -> h4 [2 * player + 2 * 4 * 0 + 1 ] = state [0 ];
333
+ if (LOCAL_LINEAR == 3 ) lyraState2 -> h4 [2 * player + 2 * 4 * 1 + 1 ] = state [1 ];
334
+ if (LOCAL_LINEAR == 3 ) lyraState2 -> h4 [2 * player + 2 * 4 * 2 + 1 ] = state [2 ];
335
+ if (LOCAL_LINEAR == 3 ) lyraState2 -> h4 [2 * player + 2 * 4 * 3 + 1 ] = state [3 ];
322
336
323
337
barrier (CLK_GLOBAL_MEM_FENCE );
324
338
}
0 commit comments