-
Notifications
You must be signed in to change notification settings - Fork 11
/
cuPrintf.cuh
130 lines (115 loc) · 5.48 KB
/
cuPrintf.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
* Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef CUPRINTF_H
#define CUPRINTF_H
/*
* This is the header file supporting cuPrintf.cu and defining both
* the host and device-side interfaces. See that file for some more
* explanation and sample use code. See also below for details of the
* host-side interfaces.
*
* Quick sample code:
*
#include "cuPrintf.cu"
__global__ void testKernel(int val)
{
cuPrintf("Value is: %d\n", val);
}
int main()
{
cudaPrintfInit();
testKernel<<< 2, 3 >>>(10);
cudaPrintfDisplay(stdout, true);
cudaPrintfEnd();
return 0;
}
*/
///////////////////////////////////////////////////////////////////////////////
// DEVICE SIDE
// External function definitions for device-side code
// Abuse of templates to simulate varargs
__device__ int cuPrintf(const char *fmt);
template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1);
template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2);
template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3);
template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4);
template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9);
template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10);
//
// cuPrintfRestrict
//
// Called to restrict output to a given thread/block. Pass
// the constant CUPRINTF_UNRESTRICTED to unrestrict output
// for thread/block IDs. Note you can therefore allow
// "all printfs from block 3" or "printfs from thread 2
// on all blocks", or "printfs only from block 1, thread 5".
//
// Arguments:
// threadid - Thread ID to allow printfs from
// blockid - Block ID to allow printfs from
//
// NOTE: Restrictions last between invocations of
// kernels unless cudaPrintfInit() is called again.
//
#define CUPRINTF_UNRESTRICTED -1
__device__ void cuPrintfRestrict(int threadid, int blockid);
///////////////////////////////////////////////////////////////////////////////
// HOST SIDE
// External function definitions for host-side code
//
// cudaPrintfInit
//
// Call this once to initialise the printf system. If the output
// file or buffer size needs to be changed, call cudaPrintfEnd()
// before re-calling cudaPrintfInit().
//
// The default size for the buffer is 1 megabyte. For CUDA
// architecture 1.1 and above, the buffer is filled linearly and
// is completely used; however for architecture 1.0, the buffer
// is divided into as many segments are there are threads, even
// if some threads do not call cuPrintf().
//
// Arguments:
// bufferLen - Length, in bytes, of total space to reserve
// (in device global memory) for output.
//
// Returns:
// cudaSuccess if all is well.
//
extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576); // 1-meg - that's enough for 4096 printfs by all threads put together
//
// cudaPrintfEnd
//
// Cleans up all memories allocated by cudaPrintfInit().
// Call this at exit, or before calling cudaPrintfInit() again.
//
extern "C" void cudaPrintfEnd();
//
// cudaPrintfDisplay
//
// Dumps the contents of the output buffer to the specified
// file pointer. If the output pointer is not specified,
// the default "stdout" is used.
//
// Arguments:
// outputFP - A file pointer to an output stream.
// showThreadID - If "true", output strings are prefixed
// by "[blockid, threadid] " at output.
//
// Returns:
// cudaSuccess if all is well.
//
extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false);
#endif // CUPRINTF_H