@@ -120,31 +120,26 @@ float dot_cl(const float *vecAdata, const float *vecXdata, unsigned int dim1) {
120
120
121
121
size_t dim1_size = sizeof (float ) * dim1;
122
122
123
- opencl::Buffer inputA (cl_context_ref.context_inst_ , dim1_size, true ,
124
- nullptr );
125
-
126
- opencl::Buffer inputX (cl_context_ref.context_inst_ , dim1_size, true ,
127
- nullptr );
128
-
129
- opencl::Buffer dotResult (cl_context_ref.context_inst_ , sizeof (float ), true ,
130
- &cl_ret);
131
-
132
- result = inputA.WriteData (cl_context_ref.command_queue_inst_ , vecAdata);
123
+ result = clbuffInstance.getInBufferA ()->WriteDataRegion (
124
+ cl_context_ref.command_queue_inst_ , dim1_size, vecAdata);
133
125
if (!result) {
134
126
break ;
135
127
}
136
128
137
- result = inputX.WriteData (cl_context_ref.command_queue_inst_ , vecXdata);
129
+ result = clbuffInstance.getInBufferB ()->WriteDataRegion (
130
+ cl_context_ref.command_queue_inst_ , dim1_size, vecXdata);
138
131
if (!result) {
139
132
break ;
140
133
}
141
134
142
- result = kernel_dot_ptr->SetKernelArguments (0 , &inputA, sizeof (cl_mem));
135
+ result = kernel_dot_ptr->SetKernelArguments (
136
+ 0 , clbuffInstance.getInBufferA (), sizeof (cl_mem));
143
137
if (!result) {
144
138
break ;
145
139
}
146
140
147
- result = kernel_dot_ptr->SetKernelArguments (1 , &inputX, sizeof (cl_mem));
141
+ result = kernel_dot_ptr->SetKernelArguments (
142
+ 1 , clbuffInstance.getInBufferB (), sizeof (cl_mem));
148
143
if (!result) {
149
144
break ;
150
145
}
@@ -154,7 +149,8 @@ float dot_cl(const float *vecAdata, const float *vecXdata, unsigned int dim1) {
154
149
break ;
155
150
}
156
151
157
- result = kernel_dot_ptr->SetKernelArguments (3 , &dotResult, sizeof (cl_mem));
152
+ result = kernel_dot_ptr->SetKernelArguments (
153
+ 3 , clbuffInstance.getOutBufferA (), sizeof (cl_mem));
158
154
if (!result) {
159
155
break ;
160
156
}
@@ -168,7 +164,8 @@ float dot_cl(const float *vecAdata, const float *vecXdata, unsigned int dim1) {
168
164
break ;
169
165
}
170
166
171
- result = dotResult.ReadData (cl_context_ref.command_queue_inst_ , &cl_ret);
167
+ result = clbuffInstance.getOutBufferA ()->ReadDataRegion (
168
+ cl_context_ref.command_queue_inst_ , sizeof (float ), &cl_ret);
172
169
if (!result) {
173
170
break ;
174
171
}
@@ -213,41 +210,38 @@ void sgemm_cl(bool TransA, bool TransB, const float *A, const float *B,
213
210
size_t k_n_size = K * N * sizeof (float );
214
211
size_t m_n_size = M * N * sizeof (float );
215
212
216
- opencl::Buffer inputA (cl_context_ref.context_inst_ , m_k_size, true ,
217
- nullptr );
218
-
219
- opencl::Buffer inputB (cl_context_ref.context_inst_ , k_n_size, true ,
220
- nullptr );
221
-
222
- opencl::Buffer inOutC (cl_context_ref.context_inst_ , m_n_size, true ,
223
- nullptr );
224
-
225
- result = inputA.WriteData (cl_context_ref.command_queue_inst_ , A);
213
+ result = clbuffInstance.getInBufferA ()->WriteDataRegion (
214
+ cl_context_ref.command_queue_inst_ , m_k_size, A);
226
215
if (!result) {
227
216
break ;
228
217
}
229
218
230
- result = inputB.WriteData (cl_context_ref.command_queue_inst_ , B);
219
+ result = clbuffInstance.getInBufferB ()->WriteDataRegion (
220
+ cl_context_ref.command_queue_inst_ , k_n_size, B);
231
221
if (!result) {
232
222
break ;
233
223
}
234
224
235
- result = inOutC.WriteData (cl_context_ref.command_queue_inst_ , C);
225
+ result = clbuffInstance.getOutBufferA ()->WriteDataRegion (
226
+ cl_context_ref.command_queue_inst_ , m_n_size, C);
236
227
if (!result) {
237
228
break ;
238
229
}
239
230
240
- result = kernel_sgemm_ptr->SetKernelArguments (0 , &inputA, sizeof (cl_mem));
231
+ result = kernel_sgemm_ptr->SetKernelArguments (
232
+ 0 , clbuffInstance.getInBufferA (), sizeof (cl_mem));
241
233
if (!result) {
242
234
break ;
243
235
}
244
236
245
- result = kernel_sgemm_ptr->SetKernelArguments (1 , &inputB, sizeof (cl_mem));
237
+ result = kernel_sgemm_ptr->SetKernelArguments (
238
+ 1 , clbuffInstance.getInBufferB (), sizeof (cl_mem));
246
239
if (!result) {
247
240
break ;
248
241
}
249
242
250
- result = kernel_sgemm_ptr->SetKernelArguments (2 , &inOutC, sizeof (cl_mem));
243
+ result = kernel_sgemm_ptr->SetKernelArguments (
244
+ 2 , clbuffInstance.getOutBufferA (), sizeof (cl_mem));
251
245
if (!result) {
252
246
break ;
253
247
}
@@ -281,7 +275,8 @@ void sgemm_cl(bool TransA, bool TransB, const float *A, const float *B,
281
275
break ;
282
276
}
283
277
284
- result = inOutC.ReadData (cl_context_ref.command_queue_inst_ , C);
278
+ result = clbuffInstance.getOutBufferA ()->ReadDataRegion (
279
+ cl_context_ref.command_queue_inst_ , m_n_size, C);
285
280
if (!result) {
286
281
break ;
287
282
}
@@ -372,14 +367,14 @@ void sscal_cl(float *X, const unsigned int N, const float alpha) {
372
367
373
368
size_t x_size = N * sizeof (float );
374
369
375
- opencl::Buffer inputX (cl_context_ref.context_inst_ , x_size, false , nullptr );
376
-
377
- result = inputX.WriteData (cl_context_ref.command_queue_inst_ , X);
370
+ result = clbuffInstance.getOutBufferA ()->WriteDataRegion (
371
+ cl_context_ref.command_queue_inst_ , x_size, X);
378
372
if (!result) {
379
373
break ;
380
374
}
381
375
382
- result = kernel_ptr->SetKernelArguments (0 , &inputX, sizeof (cl_mem));
376
+ result = kernel_ptr->SetKernelArguments (0 , clbuffInstance.getOutBufferA (),
377
+ sizeof (cl_mem));
383
378
if (!result) {
384
379
break ;
385
380
}
@@ -398,7 +393,8 @@ void sscal_cl(float *X, const unsigned int N, const float alpha) {
398
393
break ;
399
394
}
400
395
401
- result = inputX.ReadData (cl_context_ref.command_queue_inst_ , X);
396
+ result = clbuffInstance.getOutBufferA ()->ReadDataRegion (
397
+ cl_context_ref.command_queue_inst_ , x_size, X);
402
398
if (!result) {
403
399
break ;
404
400
}
@@ -439,30 +435,26 @@ void transpose_cl_axis(const float *in, float *res,
439
435
size_t dim_size = sizeof (float ) * input_batch_size * input_height *
440
436
input_width * input_channels;
441
437
442
- opencl::Buffer inputA (cl_context_ref.context_inst_ , dim_size, true ,
443
- nullptr );
444
-
445
- opencl::Buffer inOutRes (cl_context_ref.context_inst_ , dim_size, true ,
446
- nullptr );
447
-
448
- result = inputA.WriteData (cl_context_ref.command_queue_inst_ , in);
438
+ result = clbuffInstance.getInBufferA ()->WriteDataRegion (
439
+ cl_context_ref.command_queue_inst_ , dim_size, in);
449
440
if (!result) {
450
441
break ;
451
442
}
452
443
453
- result = inOutRes.WriteData (cl_context_ref.command_queue_inst_ , res);
444
+ result = clbuffInstance.getOutBufferA ()->WriteDataRegion (
445
+ cl_context_ref.command_queue_inst_ , dim_size, res);
454
446
if (!result) {
455
447
break ;
456
448
}
457
449
458
- result =
459
- kernel_transpose_ptr-> SetKernelArguments ( 0 , &inputA , sizeof (cl_mem));
450
+ result = kernel_transpose_ptr-> SetKernelArguments (
451
+ 0 , clbuffInstance. getInBufferA () , sizeof (cl_mem));
460
452
if (!result) {
461
453
break ;
462
454
}
463
455
464
- result =
465
- kernel_transpose_ptr-> SetKernelArguments ( 1 , &inOutRes , sizeof (cl_mem));
456
+ result = kernel_transpose_ptr-> SetKernelArguments (
457
+ 1 , clbuffInstance. getOutBufferA () , sizeof (cl_mem));
466
458
if (!result) {
467
459
break ;
468
460
}
@@ -503,7 +495,8 @@ void transpose_cl_axis(const float *in, float *res,
503
495
break ;
504
496
}
505
497
506
- result = inOutRes.ReadData (cl_context_ref.command_queue_inst_ , res);
498
+ result = clbuffInstance.getOutBufferA ()->ReadDataRegion (
499
+ cl_context_ref.command_queue_inst_ , dim_size, res);
507
500
if (!result) {
508
501
break ;
509
502
}
0 commit comments