forked from ml-explore/mlx-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
54 lines (41 loc) · 1.59 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Copyright © 2023-2024 Apple Inc.
from mlx.data.datasets import load_mnist
def mnist(batch_size, img_size, root=None):
# load train and test sets using mlx-data
load_fn = load_mnist
tr = load_fn(root=root, train=True)
test = load_fn(root=root, train=False)
# number of image channels is 1 for MNIST
num_img_channels = 1
# normalize to [0,1]
def normalize(x):
return x.astype("float32") / 255.0
# iterator over training set
tr_iter = (
tr.shuffle()
.to_stream()
.image_resize("image", h=img_size[0], w=img_size[1])
.key_transform("image", normalize)
.batch(batch_size)
.prefetch(4, 4)
)
# iterator over test set
test_iter = (
test.to_stream()
.image_resize("image", h=img_size[0], w=img_size[1])
.key_transform("image", normalize)
.batch(batch_size)
)
return tr_iter, test_iter
if __name__ == "__main__":
batch_size = 32
img_size = (64, 64) # (H, W)
tr_iter, test_iter = mnist(batch_size=batch_size, img_size=img_size)
B, H, W, C = batch_size, img_size[0], img_size[1], 1
print(f"Batch size: {B}, Channels: {C}, Height: {H}, Width: {W}")
batch_tr_iter = next(tr_iter)
assert batch_tr_iter["image"].shape == (B, H, W, C), "Wrong training set size"
assert batch_tr_iter["label"].shape == (batch_size,), "Wrong training set size"
batch_test_iter = next(test_iter)
assert batch_test_iter["image"].shape == (B, H, W, C), "Wrong training set size"
assert batch_test_iter["label"].shape == (batch_size,), "Wrong training set size"