-
Notifications
You must be signed in to change notification settings - Fork 1
/
v6.html
454 lines (395 loc) · 23.3 KB
/
v6.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
<!DOCTYPE HTML>
<html lang="en" class="sidebar-visible no-js">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>v6 - Comparing parallel Rust and C++</title>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta name="description" content="">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff" />
<link rel="shortcut icon" href="favicon.png">
<link rel="stylesheet" href="css/variables.css">
<link rel="stylesheet" href="css/general.css">
<link rel="stylesheet" href="css/chrome.css">
<link rel="stylesheet" href="css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="FontAwesome/css/font-awesome.css">
<link href="https://fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet" type="text/css">
<link href="https://fonts.googleapis.com/css?family=Source+Code+Pro:500" rel="stylesheet" type="text/css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" href="highlight.css">
<link rel="stylesheet" href="tomorrow-night.css">
<link rel="stylesheet" href="ayu-highlight.css">
<!-- Custom theme stylesheets -->
</head>
<body class="light">
<!-- Provide site root to javascript -->
<script type="text/javascript">
var path_to_root = "";
var default_theme = "light";
</script>
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script type="text/javascript">
try {
var theme = localStorage.getItem('mdbook-theme');
var sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script type="text/javascript">
var theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
document.body.className = theme;
document.querySelector('html').className = theme + ' js';
</script>
<!-- Hide / unhide sidebar before it is displayed -->
<script type="text/javascript">
var html = document.querySelector('html');
var sidebar = 'hidden';
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
}
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<div class="sidebar-scrollbox">
<ol class="chapter"><li class="affix"><a href="introduction.html">Introduction</a></li><li class="affix"><a href="cpp_abi.html">Calling Rust functions from C++</a></li><li class="affix"><a href="v0.html">v0</a></li><li class="affix"><a href="v1.html">v1</a></li><li class="affix"><a href="v2.html">v2</a></li><li class="affix"><a href="v3.html">v3</a></li><li class="affix"><a href="v4.html">v4</a></li><li class="affix"><a href="v5.html">v5</a></li><li class="affix"><a href="v6.html" class="active">v6</a></li><li class="affix"><a href="v7.html">v7</a></li><li class="affix"><a href="results.html">Results</a></li><li class="affix"><a href="references.html">Additional reading</a></li></ol>
</div>
<div id="sidebar-resize-handle" class="sidebar-resize-handle"></div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar" class="menu-bar">
<div id="menu-bar-sticky-container">
<div class="left-buttons">
<button id="sidebar-toggle" class="icon-button" type="button" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</button>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="light">Light (default)</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
</div>
<h1 class="menu-title">Comparing parallel Rust and C++</h1>
<div class="right-buttons">
<a href="print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
</div>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script type="text/javascript">
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1><a class="header" href="#software-prefetching" id="software-prefetching">Software prefetching</a></h1>
<p><a href="https://github.com/parallel-rust-cpp/shortcut-comparison/blob/8cdab059d22eb8f30e1408c2fbf0ae666fa231d9/src/rust/v6_prefetch/src/lib.rs">Full source</a></p>
<p>In this version we will attempt to take advantage of vacant CPU execution ports by inserting prefetch instructions to reduce average memory access latency in the performance critical loop.</p>
<p>The motivation behind this idea is explained in the <a href="http://ppc.cs.aalto.fi/ch2/v5asm">reference materials</a>.
Note that <code>vpermpd</code> and <code>vpermilpd</code> use same execution ports as <code>vperm2f128</code> and <code>vpermilps</code>, so the reasoning holds also for <code>clang</code> and <code>rustc</code>.</p>
<h2><a class="header" href="#implementation" id="implementation">Implementation</a></h2>
<p>We won't be making much changes from <a href="v5.html"><code>v5</code></a> since we only want to insert 2 <code>prefetcht0</code> instructions in the innermost loop.
<code>prefetcht0</code> uses the strongest locality hint T0, which requests the data to be loaded into all cache levels.
The instruction is provided in the same Intel intrinsics crate we have been using for inserting SIMD instructions, where it is defined as <a href="https://doc.rust-lang.org/1.37.0/core/arch/x86_64/fn._mm_prefetch.html"><code>_mm_prefetch</code></a>.
Since we will be using it only for prefetching addresses containing <code>f32x8</code>s, we might as well wrap it into a helper function and put it in our SIMD helper module:</p>
<pre><code class="language-rust no_run noplaypen">#[inline]
pub fn prefetch(p: *const f32x8, offset: isize) {
unsafe { _mm_prefetch(p.offset(offset) as *const i8, _MM_HINT_T0) }
}
</code></pre>
<p>The function takes as arguments the memory address of an <code>f32x8</code>, for which we want to request a cache line fetch using locality T0.
In C, <code>p.offset(offset)</code> would basically be equal to <code>p + offset</code>.
We need the <code>unsafe</code> expression both for using <code>_mm_prefetch</code> intrinsic and <code>p.offset</code>, but we shouldn't have to worry about memory safety so much here since we only need the offset address, the pointer will not be dereferenced.</p>
<p>Now that we have our prefetch-helper, we can add it to our <a href="v5.html"><code>v5</code></a> implementation.
First, we get a pair of <code>f32x8</code> pointers to the current row pair <code>vd_row</code> and <code>vt_row</code>:</p>
<pre><code class="language-rust no_run noplaypen"> // Everything is exactly as in v5, but we add some prefetch instructions in the innermost loop
let step_row_block = |(r_row_block, vd_row): (&mut [f32], &[f32x8])| {
// Create const raw pointers for specifying addresses to prefetch
let vd_row_ptr = vd_row.as_ptr();
const PREFETCH_LENGTH: usize = 20;
for (j, vt_row) in vt.chunks_exact(n).enumerate() {
let vt_row_ptr = vt_row.as_ptr();
</code></pre>
<p><code>PREFETCH_LENGTH = 20</code> is the amount of <code>f32x8</code> addresses we want to look ahead, and it was chosen empirically in the reference implementation.</p>
<p>We'll insert two prefetch-hints for addresses 20 elements ahead of <code>d0</code> and <code>t0</code> in the beginning of the innermost loop:</p>
<pre><code class="language-rust no_run noplaypen"> let mut tmp = [simd::f32x8_infty(); simd::f32x8_LENGTH];
for (col, (&d0, &t0)) in vd_row.iter().zip(vt_row).enumerate() {
// Insert prefetch hints for fetching the cache line containing
// the memory address 20 addresses ahead of the current column
simd::prefetch(vd_row_ptr, (col + PREFETCH_LENGTH) as isize);
simd::prefetch(vt_row_ptr, (col + PREFETCH_LENGTH) as isize);
let d2 = simd::swap(d0, 2);
let d4 = simd::swap(d0, 4);
let d6 = simd::swap(d4, 2);
let t1 = simd::swap(t0, 1);
tmp[0] = simd::min(tmp[0], simd::add(d0, t0));
tmp[1] = simd::min(tmp[1], simd::add(d0, t1));
tmp[2] = simd::min(tmp[2], simd::add(d2, t0));
tmp[3] = simd::min(tmp[3], simd::add(d2, t1));
tmp[4] = simd::min(tmp[4], simd::add(d4, t0));
tmp[5] = simd::min(tmp[5], simd::add(d4, t1));
tmp[6] = simd::min(tmp[6], simd::add(d6, t0));
tmp[7] = simd::min(tmp[7], simd::add(d6, t1));
}
</code></pre>
<p>That's about it, let's run the benchmarks.
C++ version available <a href="https://github.com/parallel-rust-cpp/shortcut-comparison/blob/8cdab059d22eb8f30e1408c2fbf0ae666fa231d9/src/cpp/v6_prefetch/step.cpp">here</a>.</p>
<table><thead><tr><th align="left">Implementation</th><th align="left">Compiler</th><th align="left">Time (s)</th><th align="left">IPC</th></tr></thead><tbody>
<tr><td align="left">C++ <code>v6</code></td><td align="left"><code>gcc 7.4.0-1ubuntu1</code></td><td align="left">2.10</td><td align="left">3.20</td></tr>
<tr><td align="left">C++ <code>v6</code></td><td align="left"><code>clang 6.0.0-1ubuntu2</code></td><td align="left">2.33</td><td align="left">2.25</td></tr>
<tr><td align="left">Rust <code>v6</code></td><td align="left"><code>rustc 1.38.0-nightly</code></td><td align="left">2.67</td><td align="left">2.77</td></tr>
</tbody></table>
<p>Something is not right, the Rust implementation became slower compared to the <a href="v5.html">previous</a> version.</p>
<p>Let's look at the assembly.</p>
<h3><a class="header" href="#gcc" id="gcc"><code>gcc</code></a></h3>
<pre><code class="language-x86asm">LOOP:
vmovaps ymm2,YMMWORD PTR [rdx-0x280]
vmovaps ymm3,YMMWORD PTR [rax-0x280]
prefetcht0 BYTE PTR [rax]
add rax,0x20
prefetcht0 BYTE PTR [rdx]
add rdx,0x20
vpermilps ymm0,ymm2,0xb1
vperm2f128 ymm13,ymm3,ymm3,0x1
vpermilps ymm14,ymm3,0x4e
vaddps ymm15,ymm3,ymm2
vaddps ymm3,ymm3,ymm0
vpermilps ymm1,ymm13,0x4e
vminps ymm7,ymm7,ymm3
vaddps ymm3,ymm2,ymm14
vaddps ymm14,ymm0,ymm14
vminps ymm11,ymm11,ymm15
vminps ymm10,ymm10,ymm3
vaddps ymm3,ymm2,ymm13
vaddps ymm13,ymm0,ymm13
vaddps ymm2,ymm2,ymm1
vaddps ymm0,ymm0,ymm1
vminps ymm6,ymm6,ymm14
vminps ymm9,ymm9,ymm3
vminps ymm5,ymm5,ymm13
vminps ymm8,ymm8,ymm2
vminps ymm4,ymm4,ymm0
cmp rax,rcx
jne LOOP
</code></pre>
<p>There are two prefetch-hints <code>prefetcht0</code>, placed <code>0x280</code> bytes ahead of the current loop indexes in registers <code>rdx</code> and <code>rax</code>.
This equals 20 <code>f32x8</code> vectors, because each <code>f32x8</code> is 32 bytes and <code>0x280/32 = 20</code>, as we wanted.</p>
<h3><a class="header" href="#clang" id="clang"><code>clang</code></a></h3>
<pre><code class="language-x86asm">LOOP:
prefetcht0 BYTE PTR [rcx+rdi*1]
prefetcht0 BYTE PTR [rax+rdi*1]
vmovapd ymm9,YMMWORD PTR [rcx+rdi*1-0x280]
vmovaps ymm10,YMMWORD PTR [rax+rdi*1-0x280]
vpermpd ymm11,ymm9,0x4e
vpermilpd ymm12,ymm9,0x5
vpermilpd ymm13,ymm11,0x5
vpermilps ymm14,ymm10,0xb1
vaddps ymm15,ymm9,ymm10
vminps ymm8,ymm8,ymm15
vaddps ymm9,ymm9,ymm14
vminps ymm4,ymm4,ymm9
vaddps ymm9,ymm12,ymm10
vminps ymm7,ymm7,ymm9
vaddps ymm9,ymm12,ymm14
vminps ymm3,ymm3,ymm9
vaddps ymm9,ymm11,ymm10
vminps ymm6,ymm6,ymm9
vaddps ymm9,ymm11,ymm14
vminps ymm2,ymm2,ymm9
vaddps ymm9,ymm10,ymm13
vminps ymm5,ymm5,ymm9
vaddps ymm9,ymm13,ymm14
vminps ymm1,ymm1,ymm9
add rdi,0x20
add rdx,0xffffffffffffffff
jne LOOP
</code></pre>
<h3><a class="header" href="#rustc" id="rustc"><code>rustc</code></a></h3>
<pre><code class="language-x86asm">LOOP:
inc rbx
vmovapd ymm9,YMMWORD PTR [r11+rsi*1-0x280]
vmovaps ymm10,YMMWORD PTR [rcx+rsi*1]
prefetcht0 BYTE PTR [r11+rsi*1]
prefetcht0 BYTE PTR [rdx+rsi*1]
vpermilpd ymm11,ymm9,0x5
vpermpd ymm12,ymm9,0x4e
vpermpd ymm13,ymm9,0x1b
vpermilps ymm14,ymm10,0xb1
vaddps ymm15,ymm9,ymm10
vminps ymm8,ymm8,ymm15
vmovaps YMMWORD PTR [rsp+0xc0],ymm8
vaddps ymm9,ymm9,ymm14
vminps ymm7,ymm7,ymm9
vmovaps YMMWORD PTR [rsp+0xe0],ymm7
vaddps ymm9,ymm11,ymm10
vminps ymm6,ymm6,ymm9
vmovaps YMMWORD PTR [rsp+0x100],ymm6
vaddps ymm9,ymm11,ymm14
vminps ymm5,ymm5,ymm9
vmovaps YMMWORD PTR [rsp+0x120],ymm5
vaddps ymm9,ymm12,ymm10
vminps ymm4,ymm4,ymm9
vmovaps YMMWORD PTR [rsp+0x140],ymm4
vaddps ymm9,ymm12,ymm14
vminps ymm3,ymm3,ymm9
vmovaps YMMWORD PTR [rsp+0x160],ymm3
vaddps ymm9,ymm10,ymm13
vminps ymm2,ymm2,ymm9
vmovaps YMMWORD PTR [rsp+0x180],ymm2
vaddps ymm9,ymm13,ymm14
vminps ymm1,ymm1,ymm9
vmovaps YMMWORD PTR [rsp+0x1a0],ymm1
add rsi,0x20
cmp rbx,rax
jb LOOP
</code></pre>
<p>We can see two prefetch instructions with locality hint T0, but for some reason there is also pretty bad register spilling.
This behaviour seems a bit odd, since the only thing we changed in the inner loop from <a href="v5.html"><code>v5</code></a> was to add two prefetch instructions.
Also, we can see that after writing a register into memory, the same register is not used anywhere in the loop during that iteration.</p>
<p>Recall how we faced the same issue in <a href="v4.html"><code>v4</code></a>, which we solved by unrolling the <code>tmp</code> results array into separate, mutable variables.
This seemed to encourage the compiler to keep the temporary results in registers for the duration of the loop, so let's do the same also here.</p>
<h2><a class="header" href="#full-step_row_block-implementation" id="full-step_row_block-implementation">Full <code>step_row_block</code> implementation</a></h2>
<pre><code class="language-rust no_run noplaypen"> // Everything is mostly as in v5,
// but we add some prefetch instructions in the innermost loop,
// and unroll the tmp results array to avoid register spilling
let step_row_block = |(r_row_block, vd_row): (&mut [f32], &[f32x8])| {
// Create const raw pointers for specifying addresses to prefetch
let vd_row_ptr = vd_row.as_ptr();
const PREFETCH_LENGTH: usize = 20;
for (j, vt_row) in vt.chunks_exact(n).enumerate() {
let vt_row_ptr = vt_row.as_ptr();
let mut tmp0 = simd::f32x8_infty();
let mut tmp1 = simd::f32x8_infty();
let mut tmp2 = simd::f32x8_infty();
let mut tmp3 = simd::f32x8_infty();
let mut tmp4 = simd::f32x8_infty();
let mut tmp5 = simd::f32x8_infty();
let mut tmp6 = simd::f32x8_infty();
let mut tmp7 = simd::f32x8_infty();
for (col, (&d0, &t0)) in vd_row.iter().zip(vt_row).enumerate() {
// Insert prefetch hints for fetching the cache line containing
// the memory address 20 addresses ahead of the current column
simd::prefetch(vd_row_ptr, (col + PREFETCH_LENGTH) as isize);
simd::prefetch(vt_row_ptr, (col + PREFETCH_LENGTH) as isize);
let d2 = simd::swap(d0, 2);
let d4 = simd::swap(d0, 4);
let d6 = simd::swap(d4, 2);
let t1 = simd::swap(t0, 1);
tmp0 = simd::min(tmp0, simd::add(d0, t0));
tmp1 = simd::min(tmp1, simd::add(d0, t1));
tmp2 = simd::min(tmp2, simd::add(d2, t0));
tmp3 = simd::min(tmp3, simd::add(d2, t1));
tmp4 = simd::min(tmp4, simd::add(d4, t0));
tmp5 = simd::min(tmp5, simd::add(d4, t1));
tmp6 = simd::min(tmp6, simd::add(d6, t0));
tmp7 = simd::min(tmp7, simd::add(d6, t1));
}
let tmp = [
tmp0, simd::swap(tmp1, 1),
tmp2, simd::swap(tmp3, 1),
tmp4, simd::swap(tmp5, 1),
tmp6, simd::swap(tmp7, 1),
];
for (tmp_i, r_row) in r_row_block.chunks_exact_mut(n).enumerate() {
for tmp_j in 0..simd::f32x8_LENGTH {
let res_j = j * simd::f32x8_LENGTH + tmp_j;
if res_j < n {
let v = tmp[tmp_i ^ tmp_j];
let vi = tmp_j as u8;
r_row[res_j] = simd::extract(v, vi);
}
}
}
}
};
r.par_chunks_mut(simd::f32x8_LENGTH * n)
.zip(vd.par_chunks(n))
.for_each(step_row_block);
</code></pre>
<h3><a class="header" href="#rustc-without-spilling" id="rustc-without-spilling"><code>rustc</code> without spilling</a></h3>
<pre><code class="language-x86asm">LOOP:
inc rbx
vmovapd ymm9,YMMWORD PTR [r11+rsi*1-0x280]
vmovaps ymm10,YMMWORD PTR [rcx+rsi*1]
prefetcht0 BYTE PTR [r11+rsi*1]
prefetcht0 BYTE PTR [rdx+rsi*1]
vpermilpd ymm11,ymm9,0x5
vpermpd ymm12,ymm9,0x4e
vpermpd ymm13,ymm9,0x1b
vpermilps ymm14,ymm10,0xb1
vaddps ymm15,ymm9,ymm10
vminps ymm4,ymm4,ymm15
vaddps ymm9,ymm9,ymm14
vminps ymm8,ymm8,ymm9
vaddps ymm9,ymm11,ymm10
vminps ymm3,ymm3,ymm9
vaddps ymm9,ymm11,ymm14
vminps ymm7,ymm7,ymm9
vaddps ymm9,ymm12,ymm10
vminps ymm2,ymm2,ymm9
vaddps ymm9,ymm12,ymm14
vminps ymm6,ymm6,ymm9
vaddps ymm9,ymm10,ymm13
vminps ymm1,ymm1,ymm9
vaddps ymm9,ymm13,ymm14
vminps ymm5,ymm5,ymm9
add rsi,0x20
cmp rbx,rax
jb LOOP
</code></pre>
<h2><a class="header" href="#benchmark" id="benchmark">Benchmark</a></h2>
<table><thead><tr><th align="left">Implementation</th><th align="left">Compiler</th><th align="left">Time (s)</th><th align="left">IPC</th></tr></thead><tbody>
<tr><td align="left">C++ <code>v6</code></td><td align="left"><code>gcc 7.4.0-1ubuntu1</code></td><td align="left">2.10</td><td align="left">3.20</td></tr>
<tr><td align="left">C++ <code>v6</code></td><td align="left"><code>clang 6.0.0-1ubuntu2</code></td><td align="left">2.33</td><td align="left">2.25</td></tr>
<tr><td align="left">Rust <code>v6</code></td><td align="left"><code>rustc 1.38.0-nightly</code></td><td align="left">2.16</td><td align="left">3.23</td></tr>
</tbody></table>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="v5.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next" href="v7.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a href="v5.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a href="v7.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<script src="clipboard.min.js" type="text/javascript" charset="utf-8"></script>
<script src="highlight.js" type="text/javascript" charset="utf-8"></script>
<script src="book.js" type="text/javascript" charset="utf-8"></script>
<!-- Custom JS scripts -->
</body>
</html>