-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathcacheline.js
80 lines (68 loc) · 4.16 KB
/
cacheline.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import { run, bench, boxplot } from 'mitata';
// https://lemire.me/blog/2023/12/12/measuring-the-size-of-the-cache-line-empirically/
boxplot(() => {
bench('stride copy ($stride)', function* (state) {
const size = 32 * 1024 * 1024;
const stride = state.get('stride');
const buffer0 = new Uint8Array(size);
const buffer1 = new Uint8Array(size);
yield () => {
for (let o = 0; o < size; o += stride) {
buffer1[o] = buffer0[o];
}
}
}).range('stride', 16, 512, 2);
});
await run();
/*
clk: ~3.34 GHz
cpu: Apple M2 Pro
runtime: bun 1.1.42 (arm64-darwin)
benchmark avg (min … max) p75 p99 (min … top 1%)
------------------------------------------- -------------------------------
stride copy (16) 1.14 ms/iter 1.16 ms ▇▆█▇▄
(1.07 ms … 1.88 ms) 1.31 ms ██████▇▇▅▃▅▄▅▆▃▂▂▂▂▁▁
3.74 ipc ( 35.60% stalls) 65.31% L1 data cache
3.96M cycles 14.81M instructions 28.53% retired LD/ST ( 4.22M)
stride copy (32) 1.14 ms/iter 1.17 ms ▄█▇▅▂
(1.07 ms … 1.49 ms) 1.31 ms ▅█████▇▅▅▅▄▃▅▅▃▄▃▂▁▂▂
2.67 ipc ( 27.41% stalls) 60.91% L1 data cache
3.96M cycles 10.59M instructions 19.96% retired LD/ST ( 2.11M)
stride copy (64) 1.13 ms/iter 1.15 ms █▆▆▅
(1.06 ms … 1.31 ms) 1.28 ms ▅██████▇▅▄▃▄▃▅▄▂▄▂▂▂▂
1.35 ipc ( 51.30% stalls) 10.00% L1 data cache
3.93M cycles 5.30M instructions 19.92% retired LD/ST ( 1.06M)
stride copy (128) 1.69 ms/iter 1.71 ms █▅
(1.61 ms … 2.09 ms) 1.86 ms ▂▂▇▇███▆▆▆▄▂▄▁▂▂▁▁▁▁▁
0.46 ipc ( 36.19% stalls) 1.63% L1 data cache
5.89M cycles 2.69M instructions 19.71% retired LD/ST (530.48k)
stride copy (256) 706.60 µs/iter 716.54 µs ▂▆█▆▄
(669.17 µs … 1.16 ms) 804.33 µs ▂█████▇█▇▄▅▃▂▂▂▁▂▂▂▁▁
0.54 ipc ( 35.69% stalls) 1.80% L1 data cache
2.46M cycles 1.34M instructions 19.72% retired LD/ST (263.77k)
stride copy (512) 291.77 µs/iter 294.21 µs █▅
(281.71 µs … 755.79 µs) 337.75 µs ▄██▆▆▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁
0.66 ipc ( 24.48% stalls) 0.70% L1 data cache
1.02M cycles 672.34k instructions 19.59% retired LD/ST (131.69k)
┌ ┐
╷┌┬┐ ╷
stride copy (16) ├┤│├───┤
╵└┴┘ ╵
╷┌┬┐ ╷
stride copy (32) ├┤│├───┤
╵└┴┘ ╵
╷┌┬┐ ╷
stride copy (64) ├┤│├──┤
╵└┴┘ ╵
╷┌┬┐ ╷
stride copy (128) ├┤│├───┤
╵└┴┘ ╵
╷┬ ╷
stride copy (256) ├│──┤
╵┴ ╵
┬ ╷
stride copy (512) │─┤
┴ ╵
└ ┘
281.71 µs 1.07 ms 1.86 ms
*/