-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
54 lines (45 loc) · 1.33 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from math import prod
from tinygrad import Tensor, dtypes
# returns x_train,y_train,x_test,y_test
def make_dataset(train_test_ratio=0.3, mod=113):
ds_len = mod * mod
# each have shape 12769=mod*mod
# [ [0,1,2,..,mod,0,1,2,...mod] ] mod times
a = (
Tensor.arange(mod, dtype=dtypes.int)
.repeat((mod, 1))
.flatten(0, -1)
.unsqueeze(0)
)
# [ [0,0,0,...,1,1,1,...,112,112,112] ]
b = (
Tensor.arange(mod, dtype=dtypes.int)
.unsqueeze(-1)
.repeat((1, mod))
.flatten(0, -1)
.unsqueeze(0)
)
# [ [113, 113, 113,...,113, 113] ]
equals = Tensor.full((ds_len), mod).unsqueeze(0)
sum = a + b
products = sum.div(mod).floor() * mod
targets = sum - products
ds = a.cat(b, equals, dim=0).T
indices = Tensor.randint(
ds_len,
low=0,
high=ds_len,
)
ds_shuffled = ds[indices].cast(dtypes.float)
targets_shuffled = (
targets[:, indices].cast(dtypes.float).reshape(prod(targets.shape), 1)
)
if train_test_ratio == None:
return ds_shuffled, targets_shuffled
train_cutoff = int(train_test_ratio * ds_len)
return (
ds_shuffled[:train_cutoff],
targets_shuffled[:train_cutoff],
ds_shuffled[train_cutoff:],
targets_shuffled[train_cutoff:],
)