-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathndt.h
135 lines (109 loc) · 4.49 KB
/
ndt.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*************************************************************************
LIBRARY: NDT-NEW DEVELOPER TOOLS
FILE: ndt.h
AUTHOR: Zehuan Wang
DATA: 12/20/2012
Header file of NDT
**************************************************************************
**************************************************************************
ROUTINES:
ndt_gpu_timer_start()
ndt_gpu_timer_end()
ndt_cpu_timer_start()
ndt_cpu_timer_end()
ndt_timer_print()
ndt_error_print()
ndt_block_reduce_min()
*************************************************************************/
#include <time.h>
#include <stdio.h>
#include <assert.h>
#include <sys/time.h>
#define NDT_ERR { \
cudaDeviceSynchronize();\
cudaError_t ndt_cudaError_crt = cudaGetLastError();\
printf("%d,%s\n",__LINE__,cudaGetErrorString(ndt_cudaError_crt));\
}
enum ndt_error {ndt_success,ndt_input_null_pointer};
/************************************************************************
#cat: ndt_gpu_timer_start - used to init the cudaEvent start and end to
#cat: timing the kernel run on gpu.
#cat: before it we need to declare cudaEvent start and end. Then put the
#cat: pointer in to it.
Output:
ndt_o_pcudaEvent_start - pointer of the event start
ndt_o_pcudaEvent_end - pointer of the event end
Input:
--
Return:
error message of cuda function
***********************************************************************/
extern cudaError_t ndt_gpu_timer_start(cudaEvent_t *ndt_o_pcudaEvent_start, cudaEvent_t *ndt_o_pcudaEvent_end);
/***********************************************************************
#cat: ndt_gpu_timer_end - used to used to timing the kernel run on gpu.
#cat: before it we need to call ndt_gpu_timer_start to init the
#cat: cudaEvent start and end. then put them as input of this function.
#cat: This function will output the time elapsed in ms.
Output:
ndt_o_pf_time - pointer to the record time.
Input:
ndt_i_cudaEvent_start - the event start
ndt_i_cudaEvent_end - the event end
Return:
error message of cuda function
**********************************************************************/
extern cudaError_t ndt_gpu_timer_end(cudaEvent_t ndt_i_cudaEvent_start, cudaEvent_t ndt_i_cudaEvent_end, float* ndt_o_pf_time);
/***********************************************************************
#cat: ndt_cpu_timer_start - record the start time of the host code
#cat: we need to declare a float start before it and put the pointer into it
Output:
ndt_o_pf_start - pinter to the start time.
Input:
--
Return:
NDT_ERROR message
**********************************************************************/
extern ndt_error ndt_cpu_timer_start(long long* ndt_o_pf_start);
/***********************************************************************
#cat: ndt_cpu_timer_end - record the end time of the host code
#cat: we need to call ndt_cpu_timer_start before it. And pass the
#cat: output into this function. We need to declare a float time before
#cat: to store the time elapsed in ms.
Output:
ndt_o_pf_time - time elapsed in ms
Input:
ndt_i_f_start - the start time.
Return:
NDT_ERROR message
**********************************************************************/
extern ndt_error ndt_cpu_timer_end(long long ndt_i_f_start, float* ndt_o_pf_time);
/**********************************************************************
#cat: ndt_cpu_timer_print - print the time cost in ms.
Output:
--
Input:
ndt_i_pf_time - time elapsed in ms
*********************************************************************/
extern void ndt_timer_print(float ndt_i_pf_time);
/**********************************************************************
#cat: ndt_cpu_timer_print - print the time cost in ms.
Output:
--
Input:
ndt_i_error - error message returned by ndt functions.
*********************************************************************/
extern void ndt_error_print(ndt_error ndt_i_error);
/**********************************************************************
#cat: ndt_block_reduce_min - a device function to find the min value
#cat: in a array. Each block reduce a array. Need Enough Shared (
#cat: ndt_i_i_length*sizeof(T) and enough threads (blockDim == ndt_i_i_length)
Output:
--
Input:
ndt_i_pt_array - input array of different types
ndt_i_i_length - length of the input array
Return:
The min value
*********************************************************************/
template<typename T>
__device__ T ndt_block_reduce_min(T *ndt_i_pt_array, const int ndt_i_i_length);