-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnrm2.S
55 lines (55 loc) · 1.12 KB
/
nrm2.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
nrm2: # @nrm2
beqz a0, .LBB0_3
li a2, 8
bgeu a0, a2, .LBB0_4
li a2, 0
fmv.d.x fa0, zero
j .LBB0_7
.LBB0_3:
fmv.d.x fa0, zero
j .LBB0_9
.LBB0_4:
andi a2, a0, -8
fmv.d.x ft0, zero
fneg.d ft0, ft0
vsetivli zero, 4, e64, m1, ta, mu
vfmv.v.f v8, ft0
vsetvli zero, zero, e64, m1, tu, mu
vmv.v.v v9, v8
vmv.s.x v9, zero
mv a3, a2
mv a4, a1
.LBB0_5: # =>This Inner Loop Header: Depth=1
addi a5, a4, 32
vsetvli zero, zero, e64, m1, ta, mu
vle64.v v10, (a4)
vle64.v v11, (a5)
vfmacc.vv v9, v10, v10
vfmacc.vv v8, v11, v11
addi a3, a3, -8
addi a4, a4, 64
bnez a3, .LBB0_5
vfadd.vv v8, v8, v9
vfmv.s.f v9, ft0
vfredosum.vs v8, v8, v9
vfmv.f.s fa0, v8
beq a2, a0, .LBB0_9
.LBB0_7:
sub a0, a0, a2
slli a2, a2, 3
add a1, a1, a2
.LBB0_8: # =>This Inner Loop Header: Depth=1
fld ft0, 0(a1)
fmadd.d fa0, ft0, ft0, fa0
addi a0, a0, -1
addi a1, a1, 8
bnez a0, .LBB0_8
.LBB0_9:
fsqrt.d ft0, fa0
feq.d a0, ft0, ft0
beqz a0, .LBB0_11
fmv.d fa0, ft0
ret
.LBB0_11:
tail sqrt
# -- End function