-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
234b501
commit 557262f
Showing
1 changed file
with
168 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
%%cu | ||
|
||
/* | ||
* CUDA version:1.2(multiple threads per block) | ||
*/ | ||
|
||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <time.h> | ||
|
||
|
||
// ***Modulo for negative numbers*** (useful only for the device - GPU. It is used in Ising function) | ||
__device__ int mod(int a, int b){ | ||
int r = a % b; | ||
return r < 0 ? r + b : r; | ||
} | ||
|
||
|
||
|
||
// the mathematical formula that will be run on the GPU | ||
__global__ void Ising(int* older, int* newer, int n){ | ||
|
||
//***inputs*** | ||
// 'older' is the 2D square lattice that contains the initial info for the dipoles' moments | ||
// 'newer' is the 2D square lattice that will be used for the model's procedure(a simple exchange) | ||
// 'n' is the dimension of the square lattices | ||
//***output*** | ||
// 'none': There is no output. The function does some calculations only. | ||
|
||
int j = blockIdx.x * blockDim.x + threadIdx.x; | ||
int i = blockIdx.y * blockDim.y + threadIdx.y; | ||
int sum; | ||
// build the new phase of the material(or just create the formula that describes our model) | ||
if ( i < n && j < n ){ | ||
sum = older[n * i + j] + older[n * i + mod(j - 1, n)] + older[n * i + (j + 1) % n] + older[mod(i - 1, n) * n + j] + older[((i + 1) % n) * n + j]; | ||
if(sum > 0){ | ||
newer[n * i + j] = 1; | ||
} else { | ||
newer[n * i + j] = -1; | ||
} | ||
} | ||
__syncthreads(); | ||
} | ||
|
||
|
||
|
||
// swap implementation | ||
void swap(int **x, int **y){ | ||
int *temp = *x; | ||
*x = *y; | ||
*y = temp; | ||
} | ||
|
||
|
||
|
||
int main(void){ | ||
|
||
printf("\n"); | ||
printf("Let's begin"); | ||
printf("\n"); | ||
|
||
//***___CPU variables___*** | ||
int k = 10; // k iterations of the formula | ||
int n = 8; // the dimensions of the square 2D lattice | ||
int *G1, *G2; // the 2D square lattices stored in a form of an array | ||
|
||
// 1st 2D square lattice initialization | ||
G1 = (int *)malloc(n * n * sizeof(int)); | ||
// filling the 2D square lattice(array) with -1 or 1 | ||
for(int i = 0; i < n * n; i++){ | ||
int random = (rand() % 2); // random numbers between 0 and 1 | ||
if(random == 1){ // if random is 1 the G(i,j) is 1 | ||
G1[i] = 1; | ||
} else { // if random is 0 the G(i,j) is -1 | ||
G1[i] = -1; | ||
} | ||
} | ||
|
||
// 2nd 2D square lattice initialization | ||
G2 = (int *)malloc(n * n * sizeof(int)); | ||
|
||
//***___GPU variables___*** | ||
int *CUDAG1, *CUDAG2; // the 2D square lattices(arrays) that will exist in the GPU | ||
|
||
cudaMalloc((void**)&CUDAG1, n * n * sizeof(int)); | ||
|
||
cudaMalloc((void**)&CUDAG2, n * n * sizeof(int)); | ||
|
||
|
||
|
||
|
||
printf("\n"); | ||
printf("Let's start the procedure!!!"); | ||
printf("\n"); | ||
|
||
//***___CUDA parameters___*** | ||
int m = 4; // for gpu blocks -> m*m blocks | ||
dim3 dimGrid(m, m); // grid size / number of blocks | ||
printf("m = : %d block_dimension \n", m); | ||
int t = 2; // t*tpb(t*t threads per block) | ||
dim3 dimBlock(t, t); // Block size / number of threads | ||
printf("t = : %d threads dimension\n", t); | ||
|
||
|
||
// initial state | ||
printf("The initial state of the ferromagnetic substance is: \n"); | ||
for(int i = 0; i < n; i++){ | ||
for(int j = 0; j < n; j++){ | ||
printf(" %d ",G1[i * n + j]); | ||
} | ||
printf("\n"); | ||
} | ||
|
||
//***___start stopwatch___*** | ||
clock_t begin = clock(); | ||
|
||
//***___k iterations___*** | ||
for(int i = 0 ; i < k ; i++){ | ||
printf("*****____ %d iteration: ____***** \n", i); | ||
cudaMemcpy(CUDAG1, G1, n*n*sizeof(int), cudaMemcpyHostToDevice); | ||
cudaMemcpy(CUDAG2, G2, n*n*sizeof(int), cudaMemcpyHostToDevice); | ||
Ising<<<dimGrid, dimBlock>>>(CUDAG1, CUDAG2, n); | ||
cudaMemcpy(G1, CUDAG1, n*n*sizeof(int), cudaMemcpyDeviceToHost); | ||
cudaMemcpy(G2, CUDAG2, n*n*sizeof(int), cudaMemcpyDeviceToHost); | ||
swap(&G1, &G2); | ||
for(int l = 0; l < n; l++){ | ||
for(int j = 0; j < n; j++){ | ||
printf(" %d ",G1[l * n + j]); | ||
} | ||
printf("\n"); | ||
} | ||
} | ||
|
||
// stop stopwatch and print time | ||
clock_t end = clock(); | ||
|
||
// The execution time | ||
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; | ||
|
||
printf("\n"); | ||
printf("End of the procedure!!!"); | ||
printf("\n"); | ||
|
||
// print the execution time | ||
printf("The time spent for execution was: %f \n", time_spent); | ||
|
||
|
||
// print the finished state of the moments(the G1 array, because it holds the results after the last swap) | ||
|
||
/* | ||
printf("The final state of the ferromagnetic substance is: \n"); | ||
for(int i = 0; i < n; i++){ | ||
for(int j = 0; j < n; j++){ | ||
printf(" %d ",G1[i * n + j]); | ||
} | ||
printf("\n"); | ||
} | ||
*/ | ||
|
||
// free the memory, do not need it anymore | ||
cudaFree(CUDAG1); | ||
cudaFree(CUDAG2); | ||
free(G1); | ||
free(G2); | ||
|
||
return 0; | ||
} |