Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jagrutivichare committed Dec 15, 2017
0 parents commit 86e1e21
Show file tree
Hide file tree
Showing 13 changed files with 2,573 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
195 changes: 195 additions & 0 deletions CUDA.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
#include <stdio.h>
#include <sys/time.h>

#define SIZE 1024


__global__ void Add(int *c, int *a, int *b, int n){
int i = threadIdx.x;

if (i < n) {
c[i] = a[i] + b[i];
}
}

__global__ void Add_f(float *c, float *a, float *b, float n){
int i = threadIdx.x;

if (i < n) {
c[i] = a[i] + b[i];
}
}
/********************* calculate read and write bandwidth****************************/
int bandwidth(){
int *a, *b, *c;
int *d_a, *d_b, *d_c;
for (int i=1 ; i <= 1024*1024; i = i * 1024) {
double size_0 = i;

// Allocate memory block(in CPU), returns a pointer
a = (int *)malloc(size_0*sizeof(int));
b = (int *)malloc(size_0*sizeof(int));
c = (int *)malloc(size_0*sizeof(int));

// Allocate memory block(in GPU), returns a pointer
cudaMalloc( &d_a, size_0*sizeof(int));
cudaMalloc( &d_b, size_0*sizeof(int));
cudaMalloc( &d_c, size_0*sizeof(int));


// structure for calculate the interval
struct timeval start_read, end_read;
struct timeval start_write, end_write;
for (int j = 0; j < size_0; ++j) {
a[j] = j;
b[j] = j;
c[j] = 0;
}

gettimeofday(&start_read, NULL);
for (int i = 0; i < 1000000; i++) {
cudaMemcpy( d_a, a, size_0*sizeof(int), cudaMemcpyHostToDevice ); // copy memory from CPU to GPU
cudaMemcpy( d_b, b, size_0*sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( d_c, c, size_0*sizeof(int), cudaMemcpyHostToDevice );
}
gettimeofday(&end_read, NULL);

// read_bandwidth = the size of memory copied from CPU to GPU / interval
float read_bandwidth = ((float)size_0*sizeof(int)*3*1000000)/(1000.0*(end_read.tv_sec-start_read.tv_sec)+(end_read.tv_usec-start_read.tv_usec)/1000.0);
// (((size_0/(1024*1024))*3)*(1e6))/ (1000*1000*(end_read.tv_sec - start_read.tv_sec) + (end_read.tv_usec - start_read.tv_usec));
printf ("with the size of %lf bytes, the read_bandwidth is %lf MBps\n ", size_0, read_bandwidth/1000.0);

Add<<< 1, size_0 >>>(a, b, c, size_0);

gettimeofday(&start_write, NULL);
for (int i = 0; i < 1000000; i++) {
cudaMemcpy( c, d_c, size_0*sizeof(int), cudaMemcpyDeviceToHost ); // copy memory from GPU to CPU
}
gettimeofday(&end_write, NULL);

// read_bandwidth = the size of memory copied from GPU to CPU / interval
float write_bandwidth = ((float)size_0*sizeof(int)*1000000)/ ((float)(end_write.tv_sec - start_write.tv_sec)*1000.0*1000.0 +(float)(end_write.tv_usec - start_write.tv_usec));
printf ("with the size of %lf bytes, the write_bandwidth is %lf MBps\n ", size_0, write_bandwidth);
}

// free memory

free(a);
free(b);
free(c);

cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);

return 0;
}

/******************* claculate FLOPS and IOPS **********************/
int Operations(){
int *a, *b, *c;
int *d_a, *d_b, *d_c;
float *a_f, *b_f, *c_f;
float *d_a_f, *d_b_f, *d_c_f;

// declare interger and float variable(pointer) for calculation

a = (int *)malloc(SIZE*sizeof(int));
b = (int *)malloc(SIZE*sizeof(int));
c = (int *)malloc(SIZE*sizeof(int));
a_f = (float *)malloc(SIZE*sizeof(float));
b_f = (float *)malloc(SIZE*sizeof(float));
c_f = (float *)malloc(SIZE*sizeof(float));

cudaMalloc( &d_a, SIZE*sizeof(int));
cudaMalloc( &d_b, SIZE*sizeof(int));
cudaMalloc( &d_c, SIZE*sizeof(int));
cudaMalloc( &d_a_f, SIZE*sizeof(float));
cudaMalloc( &d_b_f, SIZE*sizeof(float));
cudaMalloc( &d_c_f, SIZE*sizeof(float));

struct timeval start_seq, end_seq;
struct timeval start_f, end_f;

int i = 0;
for (i = 0; i < SIZE; ++i) {
a[i] = i;
b[i] = i;
c[i] = 0;
}

for (i = 0; i < SIZE; ++i) {
a_f[i] = i * 1.0;
b_f[i] = i * 1.0;
c_f[i] = 0.0;
}


cudaMemcpy( d_a, a, SIZE*sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( d_b, b, SIZE*sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( d_c, c, SIZE*sizeof(int), cudaMemcpyHostToDevice );
cudaMemcpy( d_a_f, a_f, SIZE*sizeof(float), cudaMemcpyHostToDevice );
cudaMemcpy( d_b_f, b_f, SIZE*sizeof(float), cudaMemcpyHostToDevice );
cudaMemcpy( d_c_f, c_f, SIZE*sizeof(float), cudaMemcpyHostToDevice );

gettimeofday(&start_seq, NULL);


int n = 0;
// loop here is used to extend the operating time of CPU
for (n = 0; n < 1000; n++) {
Add<<< 1, SIZE >>>(a, b, c, SIZE); // calculate a[ ] + b[ ] (interger)
}

gettimeofday(&end_seq, NULL);

gettimeofday(&start_f, NULL);

for (n = 0; n < 1000; n++) {
Add_f<<< 1, SIZE >>>(a_f, b_f, c_f, SIZE); // calculate a[ ] + b[ ] (float)
}

gettimeofday(&end_f, NULL);

// IOPS = size * loop / interval
float IOPS = ((SIZE*1000)/ ((1000.0 * (end_seq.tv_sec - start_seq.tv_sec) + (end_seq.tv_usec - start_seq.tv_usec) / 1000.0)/1000)/1e9);

printf("the IOPS is %30f GIOPS\n", IOPS);

// FLOPS = size * loop / interval
float FLOPS = ((SIZE*1000)/ ((1000.0 * (end_f.tv_sec - start_f.tv_sec) + (end_f.tv_usec - start_f.tv_usec) / 1000.0)/1000)/1e9);

printf("the FLOPS is %30f GFLOPS\n", FLOPS);

//cudaMemcpy( c, d_c, SIZE*sizeof(int), cudaMemcpyDeviceToHost );

//for (int i = 0; i < 10; ++i) {
//printf("c[%d] = %d\n", i, c[i]);
//}


// free memory

free(a);
free(b);
free(c);
free(a_f);
free(b_f);
free(c_f);

cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);

cudaFree(d_a_f);
cudaFree(d_b_f);
cudaFree(d_c_f);

return 0;
}

int main(void){
bandwidth();
Operations();
return 0;
}
139 changes: 139 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
all: cpu cpu2 memory disk
cpu:
$(info ********************** CPU Benchmark **********************)
gcc -mavx2 cpu_benchmark.c -o cpu -lpthread
./cpu -f -o 1000000000 -t 1
./cpu -f -o 1000000000 -t 2
./cpu -f -o 10000000000 -t 4
./cpu -f -o 10000000000 -t 8
./cpu -i -o 1000000000 -t 1
./cpu -i -o 1000000000 -t 2
./cpu -i -o 1000000000 -t 4
./cpu -i -o 1000000000 -t 8
cpu2:
$(info ***************** CPU Benchmark- Part f *******************)
gcc -mavx2 cpu_benchmark_2.c -o cpu2 -lpthread
./cpu2 -f
./cpu2 -i
memory:
$(info ******************* Memory Benchmark **********************)
gcc memory_benchmark.c -o memory -lpthread
./memory -w -o 1000000 -b 8 -t 1
./memory -w -o 1000000 -b 8 -t 2
./memory -w -o 100000 -b 8 -t 4
./memory -w -o 100000 -b 8 -t 8

./memory -w -o 1000 -b 8192 -t 1
./memory -w -o 1000 -b 8192 -t 2
./memory -w -o 1000 -b 8192 -t 4
./memory -w -o 1000 -b 8192 -t 8

./memory -w -o 1 -b 8388608 -t 1
./memory -w -o 1 -b 8388608 -t 2
./memory -w -o 1 -b 8388608 -t 4
./memory -w -o 1 -b 8388608 -t 8

./memory -w -o 1 -b 83886080 -t 1
./memory -w -o 1 -b 83886080 -t 2
./memory -w -o 1 -b 83886080 -t 4
./memory -w -o 1 -b 83886080 -t 8
./memory -s -o 100000 -b 8 -t 1
./memory -s -o 100000 -b 8 -t 2
./memory -s -o 100000 -b 8 -t 4
./memory -s -o 100000 -b 8 -t 8

./memory -s -o 1000 -b 8192 -t 1
./memory -s -o 1000 -b 8192 -t 2
./memory -s -o 1000 -b 8192 -t 4
./memory -s -o 1000 -b 8192 -t 8

./memory -s -o 1 -b 8388608 -t 1
./memory -s -o 1 -b 8388608 -t 2
./memory -s -o 1 -b 8388608 -t 4
./memory -s -o 1 -b 8388608 -t 8

./memory -s -o 1 -b 83886080 -t 1
./memory -s -o 1 -b 83886080 -t 2
./memory -s -o 1 -b 83886080 -t 4
./memory -s -o 1 -b 83886080 -t 8

./memory -r -o 100000 -b 8 -t 1
./memory -r -o 100000 -b 8 -t 2
./memory -r -o 100000 -b 8 -t 4
./memory -r -o 100000 -b 8 -t 8

./memory -r -o 1000 -b 8192 -t 1
./memory -r -o 1000 -b 8192 -t 2
./memory -r -o 1000 -b 8192 -t 4
./memory -r -o 1000 -b 8192 -t 8

./memory -r -o 1 -b 8388608 -t 1
./memory -r -o 1 -b 8388608 -t 2
./memory -r -o 1 -b 8388608 -t 4
./memory -r -o 1 -b 8388608 -t 8

./memory -r -o 1 -b 83886080 -t 1
./memory -r -o 1 -b 83886080 -t 2
./memory -r -o 1 -b 83886080 -t 4
./memory -r -o 1 -b 83886080 -t 8
disk:
$(info ****************** disk Benchmark *********************)
gcc disk_benchmark.c -o disk -lpthread
./disk -w -o 10000000 -b 8 -t 1
./disk -w -o 10000000 -b 8 -t 2
./disk -w -o 10000000 -b 8 -t 4
./disk -w -o 10000000 -b 8 -t 8

./disk -w -o 10000 -b 8192 -t 1
./disk -w -o 10000 -b 8192 -t 2
./disk -w -o 10000 -b 8192 -t 4
./disk -w -o 10000 -b 8192 -t 8

./disk -w -o 100000 -b 8388608 -t 1
./disk -w -o 100 -b 8388608 -t 2
./disk -w -o 10 -b 8388608 -t 4
./disk -w -o 10 -b 8388608 -t 8

./disk -w -o 10000 -b 83886080 -t 1
./disk -w -o 10 -b 83886080 -t 2
./disk -w -o 1 -b 83886080 -t 4
./disk -w -o 1 -b 83886080 -t 8
./disk -s -o 100000 -b 8 -t 1
./disk -s -o 100000 -b 8 -t 2
./disk -s -o 100000 -b 8 -t 4
./disk -s -o 100000 -b 8 -t 8

./disk -s -o 100000 -b 8192 -t 1
./disk -s -o 100000 -b 8192 -t 2
./disk -s -o 10000 -b 8192 -t 4
./disk -s -o 10000 -b 8192 -t 8

./disk -s -o 100 -b 8388608 -t 1
./disk -s -o 100 -b 8388608 -t 2
./disk -s -o 10 -b 8388608 -t 4
./disk -s -o 10 -b 8388608 -t 8

./disk -s -o 10000 -b 83886080 -t 1
./disk -s -o 10 -b 83886080 -t 2
./disk -s -o 1 -b 83886080 -t 4
./disk -s -o 1 -b 83886080 -t 8

./disk -r -o 100000 -b 8 -t 1
./disk -r -o 100000 -b 8 -t 2
./disk -r -o 100000 -b 8 -t 4
./disk -r -o 100000 -b 8 -t 8

./disk -r -o 10000 -b 8192 -t 1
./disk -r -o 1000 -b 8192 -t 2
./disk -r -o 1000 -b 8192 -t 4
./disk -r -o 1000 -b 8192 -t 8

./disk -r -o 100 -b 8388608 -t 1
./disk -r -o 100 -b 8388608 -t 2
./disk -r -o 10 -b 8388608 -t 4
./disk -r -o 10 -b 8388608 -t 8

./disk -r -o 10 -b 83886080 -t 1
./disk -r -o 10 -b 83886080 -t 2
./disk -r -o 1 -b 83886080 -t 4
./disk -r -o 1 -b 83886080 -t 8
Binary file added Manual.pdf
Binary file not shown.
Binary file added Performance.pdf
Binary file not shown.
Loading

0 comments on commit 86e1e21

Please sign in to comment.