-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.py
37 lines (31 loc) · 907 Bytes
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import torch
import debug_print
x = torch.rand(3, 4, 5).to(0)
debug_print.print_tensor(x)
debug_print.print_tensor(x[..., 0:3])
x = torch.arange(3 * 4 * 5, dtype=torch.int32).view(3, 4, 5).to(0)
debug_print.print_tensor(x[..., 0])
debug_print.print_tensor(x[0:1, 1:3, 0:4])
s = torch.cuda.Stream()
s.wait_stream(torch.cuda.current_stream())
x = torch.empty(2, 2).half().to(0)
y = torch.empty(2, 2).half().to(0)
with torch.cuda.stream(s):
for i in range(3):
z = x @ y
z1 = z @ y
z2 = z1 @ y
g = torch.cuda.CUDAGraph()
with torch.cuda.graph(g, stream=s):
debug_print.print_tensor(x)
debug_print.print_tensor(y, print_ptr=True)
z = x @ y
debug_print.print_tensor(z)
z1 = z @ y
debug_print.print_tensor(z1[..., 0])
z2 = z1 @ y
debug_print.print_tensor(z2)
x.copy_(torch.randn(2, 2))
y.copy_(torch.ones(2, 2))
print("start replay...")
g.replay()