@@ -209,158 +209,133 @@ inline Col<complex<T1>> Gnufft<T1>::operator/(const Col<complex<T1>> &d) const {
209
209
Col<CxT1> temp (reinterpret_cast <CxT1 *>(pGridData), n1, false , true );
210
210
return temp; // Return a vector of type T1
211
211
}
212
- /*
212
+
213
213
template <typename T1>
214
- Col<complex<T1>> Gnufft<T1>::trimmedForwardOp(
215
- const Col<complex<T1>> &d,
216
- const Col<complex<T1>> &tempInterp) const // Don't change these arguments
217
- {
218
- // Let's trim the operations to avoid data overhead and transfers
219
- // Basically if we know that the data points are zero, they have no impact
220
- // on the transform
214
+ inline Col<complex<T1>> Gnufft<T1>::forwardSpatialInterp(const Col<complex<T1>> &d) const {
221
215
uword dataLength = this ->n2 ;
222
- uvec dataMaskTrimmed = find(abs(tempInterp) > 0);
223
-
224
- Col<T1> kxTrimmed = kx.elem(dataMaskTrimmed);
225
- Col<T1> kyTrimmed = ky.elem(dataMaskTrimmed);
226
216
227
- Col<T1> kzTrimmed = kz.elem(dataMaskTrimmed);
228
- uword dataLengthTrimmed = kxTrimmed.n_rows;
229
- // std::cout << "Length of DataMaskTrimmed = " << dataLengthTrimmed <<
230
- // std::endl;
231
- // cout << "Separating real and imaginary data." << endl;
232
-
233
- Col<T1> realData = real(d).eval();
234
- Col<T1> imagData = imag(d).eval();
235
-
236
- // Now we grab the data out of armadillo with the memptr() function
237
- // This returns a pointer of the type of the elements of the
238
- // array/vector/matrix/cube (3d matrix)
239
- // Armadillo uses column major like MATLAB and Fortran, but different from
240
- // 2D C++ arrays which are row major.
241
-
242
- T1 *realDataPtr = realData.memptr();
243
- T1 *imagDataPtr = imagData.memptr();
244
-
245
- // cout << "Allocating memory for transformed data." << endl;
246
- Col<T1> realXformedDataTrimmed(dataLengthTrimmed);
247
- Col<T1> imagXformedDataTrimmed(dataLengthTrimmed);
217
+ const T1 *dataPtr = reinterpret_cast <const T1 *>(d.memptr ());
248
218
249
- // realXformedData.zeros();
250
- // imagXformedData.zeros();
251
- // cout << "Grabbing pointers for new memory." << endl;
219
+ parameters<T1> params;
220
+ params.sync = 0 ;
221
+ params.binsize = 128 ;
222
+ params.useLUT = 1 ;
223
+ params.kernelWidth = kernelWidth;
224
+ params.gridOS = gridOS;
225
+ params.imageSize [0 ] = Nx; // gridSize is gridOS times larger than imageSize.
226
+ params.imageSize [1 ] = Ny;
227
+ params.imageSize [2 ] = Nz;
228
+ params.gridSize [0 ] = std::ceil (gridOS * (T1)Nx);
229
+ params.gridSize [1 ] = std::ceil (gridOS * (T1)Ny);
230
+ if (params.gridSize [0 ] % 2 ) // 3D case, gridOS is adjusted on the z dimension:
231
+ params.gridSize [0 ] += 1 ; // That why we need to make sure here that the xy
232
+ if (params.gridSize [1 ] % 2 ) // dimensions have even sizes.
233
+ params.gridSize [1 ] += 1 ;
234
+ params.gridSize [2 ] = (Nz == 1 ) ? Nz : (std::ceil (gridOS * (T1)Nz)); // 2D or 3D
235
+ params.numSamples = dataLength;
236
+
237
+ unsigned int n = params.numSamples ;
238
+
239
+ memcpy (pGridData_os, dataPtr, sizeof (T1) * 2 * gridNumElems);
240
+ #pragma acc update device(pGridData_os[0:2 * gridNumElems])
252
241
253
- T1 *realXformedDataPtr = realXformedDataTrimmed.memptr();
254
- T1 *imagXformedDataPtr = imagXformedDataTrimmed.memptr();
242
+ // #pragma acc parallel loop present(pSamples [0:2 * n])
243
+ for (int ii = 0 ; ii < 2 * n; ii++) {
244
+ pSamples[ii] = (T1)0.0 ;
245
+ }
246
+ #pragma acc update device(pSamples [0:2 * n])
247
+
248
+ if (Nz == 1 ) {
249
+ gridding_forward_2D (n, params, kx,
250
+ ky, beta, pSamples, LUT,
251
+ sizeLUT, pGridData_os);
252
+ } else {
253
+ gridding_forward_3D (n, params, kx,
254
+ ky, kz, beta, pSamples, LUT,
255
+ sizeLUT, pGridData_os);
256
+ }
255
257
256
- // Process data here, like calling a brute force transform, dft...
257
- // I assume you create the pointers to the arrays where the transformed data
258
- // will be stored
259
- // realXformedDataPtr and imagXformedDataPtr and they are of type float*
258
+ #pragma acc update host(pSamples [0:2 * n])
260
259
261
- // cout << "About to call the forward gridding routine." << endl;
262
- cufftHandle *nPlan = const_cast<cufftHandle *>(&plan);
263
- computeFd_CPU_Grid<T1>(dataLengthTrimmed, kxTrimmed,
264
- kyTrimmed, kzTrimmed, realDataPtr,
265
- imagDataPtr, Nx, Ny, Nz, gridOS, realXformedDataPtr,
266
- imagXformedDataPtr, kernelWidth, beta, LUT, sizeLUT,
267
- stream, nPlan, pGridData, pGridData_d, pGridData_os,
268
- pGridData_os_d, pSamples);
260
+ Col<CxT1> temp (reinterpret_cast <CxT1 *>(pSamples), n, false , true );
261
+ return temp; // Return a vector of type T1
269
262
270
- // To return data, we need to put our data back into Armadillo objects
271
- // We are telling the object how long it is because it will copy the data
272
- // back into managed memory
273
- // realXformedData(realXformedDataPtr, dataLength);
274
- // imagXformedData(imagXformedDataPtr, dataLength);
275
-
276
- // We can free the realDataXformPtr and imagDataXformPtr at this point and
277
- // Armadillo will manage armadillo object memory as things change size or go
278
- // out of scope and need to be destroyed
279
- Col<complex<T1>> XformedDataTrimmed(dataLengthTrimmed);
280
- Col<complex<T1>> XformedData(dataLength);
281
- XformedDataTrimmed.set_real(realXformedDataTrimmed);
282
- XformedDataTrimmed.set_imag(imagXformedDataTrimmed);
283
- XformedData.elem(dataMaskTrimmed) = XformedDataTrimmed;
284
-
285
- return conv_to<Col<complex<T1>>>::from(
286
- XformedData); // Return a vector of type T1
287
263
}
288
264
289
- // Adjoint transform operation
290
265
template <typename T1>
291
- Col<complex<T1>>
292
- Gnufft<T1>::trimmedAdjointOp(const Col<complex<T1>> &d,
293
- const Col<complex<T1>> &tempInterp) const {
294
-
295
- // uword dataLength = n2;
296
- // Let's trim the operations to avoid data overhead and transfers
297
- // Basically if we know that the data points are zero, they have no impact
298
- // on the transform
266
+ inline Col<complex<T1>> Gnufft<T1>::adjointSpatialInterp(const Col<complex<T1>> &d) const {
299
267
300
268
uword dataLength = this ->n2 ;
301
- uvec dataMaskTrimmed = find(abs(tempInterp) > 0);
302
- uword dataLengthTrimmed = dataMaskTrimmed.n_rows;
303
- // std::cout << "Length of DataMaskTrimmed = " << dataLengthTrimmed <<
304
- // std::endl;
305
269
306
- Col<complex<T1>> dTrimmed = d.elem(dataMaskTrimmed);
307
- Col<T1> kxTrimmed = kx.elem(dataMaskTrimmed);
308
- Col<T1> kyTrimmed = ky.elem(dataMaskTrimmed);
309
- Col<T1> kzTrimmed = kz.elem(dataMaskTrimmed);
270
+ const T1 *dataPtr = reinterpret_cast <const T1 *>(d.memptr ());
310
271
311
- Col<T1> realData = real(dTrimmed).eval();
312
- Col<T1> imagData = imag(dTrimmed).eval();
272
+ parameters<T1> params;
273
+ params.sync = 0 ;
274
+ params.binsize = 128 ;
275
+ params.useLUT = 1 ;
276
+ params.kernelWidth = kernelWidth;
277
+ params.gridOS = gridOS;
278
+ params.imageSize [0 ] = Nx; // gridSize is gridOS times larger than imageSize.
279
+ params.imageSize [1 ] = Ny;
280
+ params.imageSize [2 ] = Nz;
281
+ params.gridSize [0 ] = std::ceil (gridOS * (T1)Nx);
282
+ params.gridSize [1 ] = std::ceil (gridOS * (T1)Ny);
283
+ if (params.gridSize [0 ] % 2 ) // 3D case, gridOS is adjusted on the z dimension:
284
+ params.gridSize [0 ] += 1 ; // That why we need to make sure here that the xy
285
+ if (params.gridSize [1 ] % 2 ) // dimensions have even sizes.
286
+ params.gridSize [1 ] += 1 ;
287
+ params.gridSize [2 ] = (Nz == 1 ) ? Nz : (std::ceil (gridOS * (T1)Nz)); // 2D or 3D
288
+ params.numSamples = dataLength;
289
+
290
+ unsigned int n = params.numSamples ;
291
+
292
+ ReconstructionSample<T1>* samples; // Input Data
293
+ // allocate samples
294
+ samples = (ReconstructionSample<T1>*)malloc (
295
+ params.numSamples * sizeof (ReconstructionSample<T1>));
296
+
297
+ if (samples == NULL ) {
298
+ printf (" ERROR: Unable to allocate memory for input data\n " );
299
+ exit (1 );
300
+ }
313
301
314
- T1 *realDataPtr = realData.memptr();
315
- T1 *imagDataPtr = imagData.memptr();
302
+ //
303
+ for ( int i = 0 ; i < params. numSamples ; i++) {
316
304
317
- Col<T1> realXformedData(n1);
318
- Col<T1> imagXformedData(n1);
305
+ samples[i].kX = kx[i];
306
+ samples[i].kY = ky[i];
307
+ samples[i].kZ = kz[i];
319
308
320
- // realXformedData.zeros() ;
321
- // imagXformedData.zeros() ;
309
+ samples[i]. real = dataPtr[ 2 * i] ;
310
+ samples[i]. imag = dataPtr[ 2 * i + 1 ] ;
322
311
323
- T1 *realXformedDataPtr = realXformedData.memptr();
324
- T1 *imagXformedDataPtr = imagXformedData.memptr();
325
- // Process data here, like calling a brute force transform, dft...
326
- // I assume you create the pointers to the arrays where the transformed data
327
- // will be stored
328
- // realXformedDataPtr and imagXformedDataPtr and they are of type float*
312
+ samples[i].sdc = (T1)1.0 ;
313
+ // samples[i].t = t[i];
314
+ }
329
315
330
- // T2 gridOS = 2.0;
331
- cufftHandle *nPlan = const_cast<cufftHandle *>(&plan);
332
- computeFH_CPU_Grid<T1>(dataLengthTrimmed, kxTrimmed,
333
- kyTrimmed, kzTrimmed, realDataPtr,
334
- imagDataPtr, Nx, Ny, Nz, gridOS, realXformedDataPtr,
335
- imagXformedDataPtr, kernelWidth, beta, LUT, sizeLUT,
336
- stream, nPlan, pGridData, pGridData_d, pGridData_os,
337
- pGridData_os_d);
338
-
339
- //iftCpu<T2>(realXformedDataPtr,imagXformedDataPtr,
340
- // realDataPtr, imagDataPtr, kx.memptr(),
341
- // ky.memptr(), kz.memptr(),
342
- // ix.memptr(), iy.memptr(), iz.memptr(),
343
- // FM.memptr(), t.memptr(),
344
- // this->n2, this->n1
345
- // );
346
-
347
- // realXformedData(realXformedDataPtr, dataLength);
348
- // imagXformedData(imagXformedDataPtr, dataLength);
349
-
350
- // We can free the realDataXformPtr and imagDataXformPtr at this point and
351
- // Armadillo will manage armadillo object memory as things change size or go
352
- // out of scope and need to be destroyed
353
-
354
- Col<complex<T1>> XformedData(n1);
355
- XformedData.set_real(realXformedData);
356
- XformedData.set_imag(imagXformedData);
357
- // XformedData.elem(dataMaskTrimmed) = XformedDataTrimmed;
358
- // savemat("/shared/mrfil-data/data/PowerGridTest/64_64_16_4coils/ggrid.mat","img",XformedData);
359
-
360
- return conv_to<Col<complex<T1>>>::from(
361
- XformedData); // Return a vector of type T1
316
+ #pragma acc enter data copyin(samples [0:n])
317
+
318
+ // #pragma acc parallel loop
319
+ for (int i = 0 ; i < gridNumElems; i++) {
320
+ pGridData_os[2 * i] = (T1)0.0 ;
321
+ pGridData_os[2 * i + 1 ] = (T1)0.0 ;
322
+ }
323
+
324
+ #pragma acc update device(pGridData_os[0:2*gridNumElems])
325
+
326
+ if (Nz == 1 ) {
327
+ gridding_adjoint_2D (n, params, beta, samples,
328
+ LUT, sizeLUT, pGridData_os);
329
+ } else {
330
+ gridding_adjoint_3D (n, params, beta, samples,
331
+ LUT, sizeLUT, pGridData_os);
332
+ }
333
+
334
+ #pragma acc update host(pGridData_os[0:2*gridNumElems])
335
+ Col<CxT1> temp (reinterpret_cast <CxT1 *>(pGridData_os), gridNumElems, false , true );
336
+ return temp; // Return a vector of type T1
362
337
}
363
- */
338
+
364
339
// Explicit Instantiation
365
340
template class Gnufft <float >;
366
341
template class Gnufft <double >;
0 commit comments