Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
JJPPeters committed Jul 20, 2021
2 parents 1025601 + 83f2361 commit b70d8a8
Show file tree
Hide file tree
Showing 55 changed files with 1,948 additions and 753 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
cmake-build-*
.idea
.autosave
.autosave

scripts/out*
4 changes: 2 additions & 2 deletions docs/guide/input.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ The first line contains the number of atoms and the second line is reserved for

### Extra information

To add any more information, clTEM will look at the comments section as a sort of header section for the columns. This has defaults and multiple options to try and easily support as much as possible. They keywords that are looked for are `A`, `x`, `y`, `z`, `occ`, `u`, `ux`, `uy` and `uz`. Any other terms will be ignored, so comments can be added (just make sure they don't contain any of these terms separated by spaces).
To add any more information, clTEM will look at the comments section as a sort of header section for the columns. This has defaults and multiple options to try and easily support as much as possible. They keywords that are looked for are `A`, `x`, `y`, `z`, `occ`, `u`, `u1`, `u2` and `u3`. Any other terms will be ignored, so comments can be added (just make sure they don't contain any of these terms separated by spaces).

`A`, `x`, `y`, `z` do not need to be stated, if none of them are found, it is assumed that they form columns 1-4 in the order shown. Other terms are then treated as columns 5 onwards, in the order they are written in the comments section.

Expand All @@ -59,7 +59,7 @@ Many structures require occupancies for the same sites. If the column `occ` is d

#### Thermal vibrations

Thermal vibrations are defined by the headers `u`, `ux`, `uy` and `uz` that define the isotropic vibration and then the vibration specific to each cartesian axis. `u` is set first, then any of `ux`, `uy` or `uz` will override the relevant component of `u`. `u` is defined here as the mean square displacement of the atom, <u²>, and will have the units of Ų (or nm² if the `nm` tag has been set).
Thermal vibrations are defined by the headers `u`, `u1`, `u2` and `u3` that define the isotropic vibration and then the vibration specific to each cartesian axis. `u` is set first, then any of `u1`, `u2` or `u3` will override the relevant component of `u`. `u` is defined here as the mean square displacement of the atom, <u²>, and will have the units of Ų (or nm² if the `nm` tag has been set).

#### Example file

Expand Down
108 changes: 57 additions & 51 deletions kernels/transmission_potentials_full_3d_d.cl
Original file line number Diff line number Diff line change
Expand Up @@ -134,52 +134,54 @@ double peng(__constant double* params, int i_lim, int ZNum, double rad) {
}

__kernel void transmission_potentials_full_3d_d( __global double2* potential,
__global const double* restrict pos_x,
__global const double* restrict pos_y,
__global const double* restrict pos_z,
__global const int* restrict atomic_num,
__constant double* params,
unsigned int param_selector,
unsigned int param_i_count,
__global const int* restrict block_start_pos,
unsigned int width,
unsigned int height,
int current_slice,
int total_slices,
double z,
double dz,
double pixel_scale,
int blocks_x,
int blocks_y,
double max_x,
double min_x,
double max_y,
double min_y,
int block_load_x,
int block_load_y,
int slice_load_z,
double sigma,
double startx,
double starty,
double slice_shift_x,
double slice_shift_y,
int integrals)
__global const double* restrict pos_x,
__global const double* restrict pos_y,
__global const double* restrict pos_z,
__global const int* restrict atomic_num,
__constant double* params,
unsigned int param_selector,
unsigned int param_i_count,
__global const int* restrict block_start_pos,
unsigned int width,
unsigned int height,
int current_slice,
int total_slices,
double dz,
double pixel_scale,
int blocks_x,
int blocks_y,
double max_x,
double min_x,
double max_y,
double min_y,
int block_load_x,
int block_load_y,
int slice_load_z,
double sigma,
double startx,
double starty,
double current_z,
double slice_shift_x,
double slice_shift_y,
int integrals)
{
int xid = get_global_id(0);
int yid = get_global_id(1);
int lid = get_local_id(0) + get_local_size(0)*get_local_id(1);
int id = xid + width * yid;
int topz = current_slice - slice_load_z;
int bottomz = current_slice + slice_load_z;
double sumz = 0.0;
int gx = get_group_id(0);
int gy = get_group_id(1);
double int_r = native_recip(integrals);

int topz = current_slice - slice_load_z;
int bottomz = current_slice + slice_load_z;
double int_r = native_recip(integrals);
double sub_slice_thickness = dz * int_r;

if(topz < 0 )
topz = 0;
if(bottomz >= total_slices )
bottomz = total_slices-1;
bottomz = total_slices - 1;

__local double atx[256];
__local double aty[256];
Expand All @@ -192,11 +194,11 @@ __kernel void transmission_potentials_full_3d_d( __global double2* potential,
double group_size_y = get_local_size(1) * pixel_scale;

// get the start and end position of the current workgroup
double group_start_x = startx + gx * group_size_x;
double group_end_x = startx + (gx + 1) * group_size_x;
double group_start_x = startx + gx * group_size_x;
double group_end_x = group_start_x + group_size_x;

double group_start_y = starty + gy * group_size_y;
double group_end_y = starty + (gy + 1) * group_size_y;
double group_start_y = starty + gy * group_size_y;
double group_end_y = group_start_y + group_size_y;

// get the reciprocal of the full range (for efficiency)
double recip_range_x = native_recip(max_x - min_x);
Expand All @@ -210,8 +212,8 @@ __kernel void transmission_potentials_full_3d_d( __global double2* potential,
for(int k = topz; k <= bottomz; k++) {
for (int j = startj ; j <= endj; j++) {
//Need list of atoms to load, so we can load in sequence
int start = block_start_pos[k*blocks_x*blocks_y + blocks_x*j + starti];
int end = block_start_pos[k*blocks_x*blocks_y + blocks_x*j + endi + 1];
int start = block_start_pos[k*blocks_x*blocks_y + blocks_x*j + starti ];
int end = block_start_pos[k*blocks_x*blocks_y + blocks_x*j + endi + 1];

int gid = start + lid;

Expand All @@ -224,7 +226,8 @@ __kernel void transmission_potentials_full_3d_d( __global double2* potential,

barrier(CLK_LOCAL_MEM_FENCE);

double p2=0.0;
double p2 = 0.0;

for (int l = 0; l < end-start; l++) {
// calculate the radius from the current position in space (i.e. pixel?)
double im_pos_x = startx + xid * pixel_scale;
Expand All @@ -233,7 +236,7 @@ __kernel void transmission_potentials_full_3d_d( __global double2* potential,
double im_pos_y = starty + yid * pixel_scale;
double rad_y = im_pos_y - aty[l];

for (int h = 0; h < integrals; h++) {
for (int h = 0; h <= integrals; h++) {
// not sure how the integrals work here (integrals = integrals)
// I think we are generating multiple subslices for each slice (nut not propagating through them,
// just building our single slice potential from them
Expand All @@ -244,8 +247,9 @@ __kernel void transmission_potentials_full_3d_d( __global double2* potential,

double xyrad2 = rad_x*rad_x + rad_y*rad_y;

// z is the slice position, h is the 'sub' integral, dz is the slice thickness and int_r is 1/integrals
double im_pos_z = z - h * dz * int_r;
// current_z is the slice position, h is the 'sub' integral, dz is the slice thickness and int_r is 1/integrals
// so basically this gets our exact z position...
double im_pos_z = current_z - h * dz * int_r;
double rad_z = im_pos_z - atz[l];

double rad = native_sqrt(xyrad2 + rad_z*rad_z);
Expand All @@ -258,27 +262,29 @@ __kernel void transmission_potentials_full_3d_d( __global double2* potential,

if(xyrad2 <= 64.0 && rad_z <= 3.0) {
double p1;

if (param_selector == 0)
p1 = kirkland(params, param_i_count, atZ[l], rad);
else if (param_selector == 1)
p1 = peng(params, param_i_count, atZ[l], rad);
else if (param_selector == 2)
p1 = lobato(params, param_i_count, atZ[l], rad);

// why make sure h!=0 when we can just remove it from the loop?
// surely h == 0 will be in the previous slice??
// because p1 is used in the next iteration (why it is set to p2)
sumz += (h!=0) * (p1+p2)*0.5;
// Q: why make sure h!=0 when we can just remove it from the loop?
// A: because p1 is used in the next iteration (why it is set to p2)
// note that the sub slice thickness is included in the final sin/cos
sumz += (h != 0) * (p1 + p2) * 0.5;
p2 = p1;
}
}
}
}

barrier(CLK_LOCAL_MEM_FENCE);
}
}

if(xid < width && yid < height) {
potential[id].x = native_cos((dz * int_r) * sigma * sumz);
potential[id].y = native_sin((dz * int_r) * sigma * sumz);
potential[id].x = native_cos(sub_slice_thickness * sigma * sumz);
potential[id].y = native_sin(sub_slice_thickness * sigma * sumz);
}
}
Loading

0 comments on commit b70d8a8

Please sign in to comment.