@@ -31,23 +31,114 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
31
31
}
32
32
}
33
33
34
- void insn_fusion (ph2_ir_t * ph2_ir )
34
+ bool insn_fusion (ph2_ir_t * ph2_ir )
35
35
{
36
36
ph2_ir_t * next = ph2_ir -> next ;
37
37
if (!next )
38
- return ;
38
+ return false ;
39
39
40
40
if (next -> op == OP_assign ) {
41
- /* eliminate {ALU rn, rs1, rs2; mv rd, rn;} */
42
- if (!is_fusible_insn (ph2_ir ))
43
- return ;
44
- if (ph2_ir -> dest == next -> src0 ) {
41
+ if (is_fusible_insn (ph2_ir ) && ph2_ir -> dest == next -> src0 ) {
42
+ /* eliminates:
43
+ * {ALU rn, rs1, rs2; mv rd, rn;}
44
+ * reduces to:
45
+ * {ALU rd, rs1, rs2;}
46
+ */
45
47
ph2_ir -> dest = next -> dest ;
46
48
ph2_ir -> next = next -> next ;
47
- return ;
49
+ return true ;
48
50
}
49
51
}
50
- /* other insn fusions */
52
+
53
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 ) {
54
+ if (next -> op == OP_add &&
55
+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
56
+ /* eliminates:
57
+ * {li rn, 0; add rd, rs1, rn;} or
58
+ * {li rn, 0; add rd, rn, rs1;}
59
+ * reduces to:
60
+ * {mv rd, rs1;}, based on identity property of addition
61
+ */
62
+ /* Determine the non-zero source operand */
63
+ int non_zero_src =
64
+ (ph2_ir -> dest == next -> src0 ) ? next -> src1 : next -> src0 ;
65
+
66
+ /* Transform instruction sequence from addition with zero to move */
67
+ ph2_ir -> op = OP_assign ;
68
+ ph2_ir -> src0 = non_zero_src ;
69
+ ph2_ir -> dest = next -> dest ;
70
+ ph2_ir -> next = next -> next ;
71
+ return true;
72
+ }
73
+
74
+ if (next -> op == OP_sub ) {
75
+ if (ph2_ir -> dest == next -> src1 ) {
76
+ /* eliminates:
77
+ * {li rn, 0; sub rd, rs1, rn;}
78
+ * reduces to:
79
+ * {mv rd, rs1;}
80
+ */
81
+ ph2_ir -> op = OP_assign ;
82
+ ph2_ir -> src0 = next -> src0 ;
83
+ ph2_ir -> dest = next -> dest ;
84
+ ph2_ir -> next = next -> next ;
85
+ return true;
86
+ }
87
+
88
+ if (ph2_ir -> dest == next -> src0 ) {
89
+ /* eliminates:
90
+ * {li rn, 0; sub rd, rn, rs1;}
91
+ * reduces to:
92
+ * {negate rd, rs1;}
93
+ */
94
+ ph2_ir -> op = OP_negate ;
95
+ ph2_ir -> src0 = next -> src1 ;
96
+ ph2_ir -> dest = next -> dest ;
97
+ ph2_ir -> next = next -> next ;
98
+ return true;
99
+ }
100
+ }
101
+
102
+ if (next -> op == OP_mul &&
103
+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
104
+ /* eliminates:
105
+ * {li rn, 0; mul rd, rs1, rn;} or
106
+ * {li rn, 0; mul rd, rn, rs1;}
107
+ * reduces to:
108
+ * {li rd, 0}, based on zero property of multiplication
109
+ */
110
+ ph2_ir -> op = OP_load_constant ;
111
+ ph2_ir -> src0 = 0 ;
112
+ ph2_ir -> dest = next -> dest ;
113
+ ph2_ir -> next = next -> next ;
114
+ return true;
115
+ }
116
+ }
117
+
118
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 ) {
119
+ if (next -> op == OP_mul &&
120
+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
121
+ /* eliminates:
122
+ * {li rn, 1; mul rd, rs1, rn;} or
123
+ * {li rn, 1; mul rd, rn, rs1;}
124
+ * reduces to:
125
+ * {li rd, rs1}, based on identity property of multiplication
126
+ */
127
+ ph2_ir -> op = OP_assign ;
128
+ ph2_ir -> src0 = ph2_ir -> dest == next -> src0 ? next -> src1 : next -> src0 ;
129
+ ph2_ir -> dest = next -> dest ;
130
+ ph2_ir -> next = next -> next ;
131
+ return true;
132
+ }
133
+ }
134
+
135
+ /* Other instruction fusion should be done here, and for any success fusion,
136
+ * it should return true. This meant to allow peephole optimization to do
137
+ * multiple passes over the IR list to maximize optimization as much as
138
+ * possbile.
139
+ */
140
+
141
+ return false;
51
142
}
52
143
53
144
/* FIXME: release detached basic blocks */
0 commit comments