-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 86e1e21
Showing
13 changed files
with
2,573 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Auto detect text files and perform LF normalization | ||
* text=auto |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
#include <stdio.h> | ||
#include <sys/time.h> | ||
|
||
#define SIZE 1024 | ||
|
||
|
||
__global__ void Add(int *c, int *a, int *b, int n){ | ||
int i = threadIdx.x; | ||
|
||
if (i < n) { | ||
c[i] = a[i] + b[i]; | ||
} | ||
} | ||
|
||
__global__ void Add_f(float *c, float *a, float *b, float n){ | ||
int i = threadIdx.x; | ||
|
||
if (i < n) { | ||
c[i] = a[i] + b[i]; | ||
} | ||
} | ||
/********************* calculate read and write bandwidth****************************/ | ||
int bandwidth(){ | ||
int *a, *b, *c; | ||
int *d_a, *d_b, *d_c; | ||
for (int i=1 ; i <= 1024*1024; i = i * 1024) { | ||
double size_0 = i; | ||
|
||
// Allocate memory block(in CPU), returns a pointer | ||
a = (int *)malloc(size_0*sizeof(int)); | ||
b = (int *)malloc(size_0*sizeof(int)); | ||
c = (int *)malloc(size_0*sizeof(int)); | ||
|
||
// Allocate memory block(in GPU), returns a pointer | ||
cudaMalloc( &d_a, size_0*sizeof(int)); | ||
cudaMalloc( &d_b, size_0*sizeof(int)); | ||
cudaMalloc( &d_c, size_0*sizeof(int)); | ||
|
||
|
||
// structure for calculate the interval | ||
struct timeval start_read, end_read; | ||
struct timeval start_write, end_write; | ||
for (int j = 0; j < size_0; ++j) { | ||
a[j] = j; | ||
b[j] = j; | ||
c[j] = 0; | ||
} | ||
|
||
gettimeofday(&start_read, NULL); | ||
for (int i = 0; i < 1000000; i++) { | ||
cudaMemcpy( d_a, a, size_0*sizeof(int), cudaMemcpyHostToDevice ); // copy memory from CPU to GPU | ||
cudaMemcpy( d_b, b, size_0*sizeof(int), cudaMemcpyHostToDevice ); | ||
cudaMemcpy( d_c, c, size_0*sizeof(int), cudaMemcpyHostToDevice ); | ||
} | ||
gettimeofday(&end_read, NULL); | ||
|
||
// read_bandwidth = the size of memory copied from CPU to GPU / interval | ||
float read_bandwidth = ((float)size_0*sizeof(int)*3*1000000)/(1000.0*(end_read.tv_sec-start_read.tv_sec)+(end_read.tv_usec-start_read.tv_usec)/1000.0); | ||
// (((size_0/(1024*1024))*3)*(1e6))/ (1000*1000*(end_read.tv_sec - start_read.tv_sec) + (end_read.tv_usec - start_read.tv_usec)); | ||
printf ("with the size of %lf bytes, the read_bandwidth is %lf MBps\n ", size_0, read_bandwidth/1000.0); | ||
|
||
Add<<< 1, size_0 >>>(a, b, c, size_0); | ||
|
||
gettimeofday(&start_write, NULL); | ||
for (int i = 0; i < 1000000; i++) { | ||
cudaMemcpy( c, d_c, size_0*sizeof(int), cudaMemcpyDeviceToHost ); // copy memory from GPU to CPU | ||
} | ||
gettimeofday(&end_write, NULL); | ||
|
||
// read_bandwidth = the size of memory copied from GPU to CPU / interval | ||
float write_bandwidth = ((float)size_0*sizeof(int)*1000000)/ ((float)(end_write.tv_sec - start_write.tv_sec)*1000.0*1000.0 +(float)(end_write.tv_usec - start_write.tv_usec)); | ||
printf ("with the size of %lf bytes, the write_bandwidth is %lf MBps\n ", size_0, write_bandwidth); | ||
} | ||
|
||
// free memory | ||
|
||
free(a); | ||
free(b); | ||
free(c); | ||
|
||
cudaFree(d_a); | ||
cudaFree(d_b); | ||
cudaFree(d_c); | ||
|
||
return 0; | ||
} | ||
|
||
/******************* claculate FLOPS and IOPS **********************/ | ||
int Operations(){ | ||
int *a, *b, *c; | ||
int *d_a, *d_b, *d_c; | ||
float *a_f, *b_f, *c_f; | ||
float *d_a_f, *d_b_f, *d_c_f; | ||
|
||
// declare interger and float variable(pointer) for calculation | ||
|
||
a = (int *)malloc(SIZE*sizeof(int)); | ||
b = (int *)malloc(SIZE*sizeof(int)); | ||
c = (int *)malloc(SIZE*sizeof(int)); | ||
a_f = (float *)malloc(SIZE*sizeof(float)); | ||
b_f = (float *)malloc(SIZE*sizeof(float)); | ||
c_f = (float *)malloc(SIZE*sizeof(float)); | ||
|
||
cudaMalloc( &d_a, SIZE*sizeof(int)); | ||
cudaMalloc( &d_b, SIZE*sizeof(int)); | ||
cudaMalloc( &d_c, SIZE*sizeof(int)); | ||
cudaMalloc( &d_a_f, SIZE*sizeof(float)); | ||
cudaMalloc( &d_b_f, SIZE*sizeof(float)); | ||
cudaMalloc( &d_c_f, SIZE*sizeof(float)); | ||
|
||
struct timeval start_seq, end_seq; | ||
struct timeval start_f, end_f; | ||
|
||
int i = 0; | ||
for (i = 0; i < SIZE; ++i) { | ||
a[i] = i; | ||
b[i] = i; | ||
c[i] = 0; | ||
} | ||
|
||
for (i = 0; i < SIZE; ++i) { | ||
a_f[i] = i * 1.0; | ||
b_f[i] = i * 1.0; | ||
c_f[i] = 0.0; | ||
} | ||
|
||
|
||
cudaMemcpy( d_a, a, SIZE*sizeof(int), cudaMemcpyHostToDevice ); | ||
cudaMemcpy( d_b, b, SIZE*sizeof(int), cudaMemcpyHostToDevice ); | ||
cudaMemcpy( d_c, c, SIZE*sizeof(int), cudaMemcpyHostToDevice ); | ||
cudaMemcpy( d_a_f, a_f, SIZE*sizeof(float), cudaMemcpyHostToDevice ); | ||
cudaMemcpy( d_b_f, b_f, SIZE*sizeof(float), cudaMemcpyHostToDevice ); | ||
cudaMemcpy( d_c_f, c_f, SIZE*sizeof(float), cudaMemcpyHostToDevice ); | ||
|
||
gettimeofday(&start_seq, NULL); | ||
|
||
|
||
int n = 0; | ||
// loop here is used to extend the operating time of CPU | ||
for (n = 0; n < 1000; n++) { | ||
Add<<< 1, SIZE >>>(a, b, c, SIZE); // calculate a[ ] + b[ ] (interger) | ||
} | ||
|
||
gettimeofday(&end_seq, NULL); | ||
|
||
gettimeofday(&start_f, NULL); | ||
|
||
for (n = 0; n < 1000; n++) { | ||
Add_f<<< 1, SIZE >>>(a_f, b_f, c_f, SIZE); // calculate a[ ] + b[ ] (float) | ||
} | ||
|
||
gettimeofday(&end_f, NULL); | ||
|
||
// IOPS = size * loop / interval | ||
float IOPS = ((SIZE*1000)/ ((1000.0 * (end_seq.tv_sec - start_seq.tv_sec) + (end_seq.tv_usec - start_seq.tv_usec) / 1000.0)/1000)/1e9); | ||
|
||
printf("the IOPS is %30f GIOPS\n", IOPS); | ||
|
||
// FLOPS = size * loop / interval | ||
float FLOPS = ((SIZE*1000)/ ((1000.0 * (end_f.tv_sec - start_f.tv_sec) + (end_f.tv_usec - start_f.tv_usec) / 1000.0)/1000)/1e9); | ||
|
||
printf("the FLOPS is %30f GFLOPS\n", FLOPS); | ||
|
||
//cudaMemcpy( c, d_c, SIZE*sizeof(int), cudaMemcpyDeviceToHost ); | ||
|
||
//for (int i = 0; i < 10; ++i) { | ||
//printf("c[%d] = %d\n", i, c[i]); | ||
//} | ||
|
||
|
||
// free memory | ||
|
||
free(a); | ||
free(b); | ||
free(c); | ||
free(a_f); | ||
free(b_f); | ||
free(c_f); | ||
|
||
cudaFree(d_a); | ||
cudaFree(d_b); | ||
cudaFree(d_c); | ||
|
||
cudaFree(d_a_f); | ||
cudaFree(d_b_f); | ||
cudaFree(d_c_f); | ||
|
||
return 0; | ||
} | ||
|
||
int main(void){ | ||
bandwidth(); | ||
Operations(); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
all: cpu cpu2 memory disk | ||
cpu: | ||
$(info ********************** CPU Benchmark **********************) | ||
gcc -mavx2 cpu_benchmark.c -o cpu -lpthread | ||
./cpu -f -o 1000000000 -t 1 | ||
./cpu -f -o 1000000000 -t 2 | ||
./cpu -f -o 10000000000 -t 4 | ||
./cpu -f -o 10000000000 -t 8 | ||
./cpu -i -o 1000000000 -t 1 | ||
./cpu -i -o 1000000000 -t 2 | ||
./cpu -i -o 1000000000 -t 4 | ||
./cpu -i -o 1000000000 -t 8 | ||
cpu2: | ||
$(info ***************** CPU Benchmark- Part f *******************) | ||
gcc -mavx2 cpu_benchmark_2.c -o cpu2 -lpthread | ||
./cpu2 -f | ||
./cpu2 -i | ||
memory: | ||
$(info ******************* Memory Benchmark **********************) | ||
gcc memory_benchmark.c -o memory -lpthread | ||
./memory -w -o 1000000 -b 8 -t 1 | ||
./memory -w -o 1000000 -b 8 -t 2 | ||
./memory -w -o 100000 -b 8 -t 4 | ||
./memory -w -o 100000 -b 8 -t 8 | ||
|
||
./memory -w -o 1000 -b 8192 -t 1 | ||
./memory -w -o 1000 -b 8192 -t 2 | ||
./memory -w -o 1000 -b 8192 -t 4 | ||
./memory -w -o 1000 -b 8192 -t 8 | ||
|
||
./memory -w -o 1 -b 8388608 -t 1 | ||
./memory -w -o 1 -b 8388608 -t 2 | ||
./memory -w -o 1 -b 8388608 -t 4 | ||
./memory -w -o 1 -b 8388608 -t 8 | ||
|
||
./memory -w -o 1 -b 83886080 -t 1 | ||
./memory -w -o 1 -b 83886080 -t 2 | ||
./memory -w -o 1 -b 83886080 -t 4 | ||
./memory -w -o 1 -b 83886080 -t 8 | ||
./memory -s -o 100000 -b 8 -t 1 | ||
./memory -s -o 100000 -b 8 -t 2 | ||
./memory -s -o 100000 -b 8 -t 4 | ||
./memory -s -o 100000 -b 8 -t 8 | ||
|
||
./memory -s -o 1000 -b 8192 -t 1 | ||
./memory -s -o 1000 -b 8192 -t 2 | ||
./memory -s -o 1000 -b 8192 -t 4 | ||
./memory -s -o 1000 -b 8192 -t 8 | ||
|
||
./memory -s -o 1 -b 8388608 -t 1 | ||
./memory -s -o 1 -b 8388608 -t 2 | ||
./memory -s -o 1 -b 8388608 -t 4 | ||
./memory -s -o 1 -b 8388608 -t 8 | ||
|
||
./memory -s -o 1 -b 83886080 -t 1 | ||
./memory -s -o 1 -b 83886080 -t 2 | ||
./memory -s -o 1 -b 83886080 -t 4 | ||
./memory -s -o 1 -b 83886080 -t 8 | ||
|
||
./memory -r -o 100000 -b 8 -t 1 | ||
./memory -r -o 100000 -b 8 -t 2 | ||
./memory -r -o 100000 -b 8 -t 4 | ||
./memory -r -o 100000 -b 8 -t 8 | ||
|
||
./memory -r -o 1000 -b 8192 -t 1 | ||
./memory -r -o 1000 -b 8192 -t 2 | ||
./memory -r -o 1000 -b 8192 -t 4 | ||
./memory -r -o 1000 -b 8192 -t 8 | ||
|
||
./memory -r -o 1 -b 8388608 -t 1 | ||
./memory -r -o 1 -b 8388608 -t 2 | ||
./memory -r -o 1 -b 8388608 -t 4 | ||
./memory -r -o 1 -b 8388608 -t 8 | ||
|
||
./memory -r -o 1 -b 83886080 -t 1 | ||
./memory -r -o 1 -b 83886080 -t 2 | ||
./memory -r -o 1 -b 83886080 -t 4 | ||
./memory -r -o 1 -b 83886080 -t 8 | ||
disk: | ||
$(info ****************** disk Benchmark *********************) | ||
gcc disk_benchmark.c -o disk -lpthread | ||
./disk -w -o 10000000 -b 8 -t 1 | ||
./disk -w -o 10000000 -b 8 -t 2 | ||
./disk -w -o 10000000 -b 8 -t 4 | ||
./disk -w -o 10000000 -b 8 -t 8 | ||
|
||
./disk -w -o 10000 -b 8192 -t 1 | ||
./disk -w -o 10000 -b 8192 -t 2 | ||
./disk -w -o 10000 -b 8192 -t 4 | ||
./disk -w -o 10000 -b 8192 -t 8 | ||
|
||
./disk -w -o 100000 -b 8388608 -t 1 | ||
./disk -w -o 100 -b 8388608 -t 2 | ||
./disk -w -o 10 -b 8388608 -t 4 | ||
./disk -w -o 10 -b 8388608 -t 8 | ||
|
||
./disk -w -o 10000 -b 83886080 -t 1 | ||
./disk -w -o 10 -b 83886080 -t 2 | ||
./disk -w -o 1 -b 83886080 -t 4 | ||
./disk -w -o 1 -b 83886080 -t 8 | ||
./disk -s -o 100000 -b 8 -t 1 | ||
./disk -s -o 100000 -b 8 -t 2 | ||
./disk -s -o 100000 -b 8 -t 4 | ||
./disk -s -o 100000 -b 8 -t 8 | ||
|
||
./disk -s -o 100000 -b 8192 -t 1 | ||
./disk -s -o 100000 -b 8192 -t 2 | ||
./disk -s -o 10000 -b 8192 -t 4 | ||
./disk -s -o 10000 -b 8192 -t 8 | ||
|
||
./disk -s -o 100 -b 8388608 -t 1 | ||
./disk -s -o 100 -b 8388608 -t 2 | ||
./disk -s -o 10 -b 8388608 -t 4 | ||
./disk -s -o 10 -b 8388608 -t 8 | ||
|
||
./disk -s -o 10000 -b 83886080 -t 1 | ||
./disk -s -o 10 -b 83886080 -t 2 | ||
./disk -s -o 1 -b 83886080 -t 4 | ||
./disk -s -o 1 -b 83886080 -t 8 | ||
|
||
./disk -r -o 100000 -b 8 -t 1 | ||
./disk -r -o 100000 -b 8 -t 2 | ||
./disk -r -o 100000 -b 8 -t 4 | ||
./disk -r -o 100000 -b 8 -t 8 | ||
|
||
./disk -r -o 10000 -b 8192 -t 1 | ||
./disk -r -o 1000 -b 8192 -t 2 | ||
./disk -r -o 1000 -b 8192 -t 4 | ||
./disk -r -o 1000 -b 8192 -t 8 | ||
|
||
./disk -r -o 100 -b 8388608 -t 1 | ||
./disk -r -o 100 -b 8388608 -t 2 | ||
./disk -r -o 10 -b 8388608 -t 4 | ||
./disk -r -o 10 -b 8388608 -t 8 | ||
|
||
./disk -r -o 10 -b 83886080 -t 1 | ||
./disk -r -o 10 -b 83886080 -t 2 | ||
./disk -r -o 1 -b 83886080 -t 4 | ||
./disk -r -o 1 -b 83886080 -t 8 |
Binary file not shown.
Binary file not shown.
Oops, something went wrong.