Skip to content

Commit 6196ab1

Browse files
authored
Merge pull request #183 from ChAoSUnItY/feat/optimization
Eliminate arithmetic instructions based on properties
2 parents 24bffd4 + 52a81ab commit 6196ab1

File tree

1 file changed

+99
-8
lines changed

1 file changed

+99
-8
lines changed

src/peephole.c

+99-8
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,114 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3131
}
3232
}
3333

34-
void insn_fusion(ph2_ir_t *ph2_ir)
34+
bool insn_fusion(ph2_ir_t *ph2_ir)
3535
{
3636
ph2_ir_t *next = ph2_ir->next;
3737
if (!next)
38-
return;
38+
return false;
3939

4040
if (next->op == OP_assign) {
41-
/* eliminate {ALU rn, rs1, rs2; mv rd, rn;} */
42-
if (!is_fusible_insn(ph2_ir))
43-
return;
44-
if (ph2_ir->dest == next->src0) {
41+
if (is_fusible_insn(ph2_ir) && ph2_ir->dest == next->src0) {
42+
/* eliminates:
43+
* {ALU rn, rs1, rs2; mv rd, rn;}
44+
* reduces to:
45+
* {ALU rd, rs1, rs2;}
46+
*/
4547
ph2_ir->dest = next->dest;
4648
ph2_ir->next = next->next;
47-
return;
49+
return true;
4850
}
4951
}
50-
/* other insn fusions */
52+
53+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0) {
54+
if (next->op == OP_add &&
55+
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
56+
/* eliminates:
57+
* {li rn, 0; add rd, rs1, rn;} or
58+
* {li rn, 0; add rd, rn, rs1;}
59+
* reduces to:
60+
* {mv rd, rs1;}, based on identity property of addition
61+
*/
62+
/* Determine the non-zero source operand */
63+
int non_zero_src =
64+
(ph2_ir->dest == next->src0) ? next->src1 : next->src0;
65+
66+
/* Transform instruction sequence from addition with zero to move */
67+
ph2_ir->op = OP_assign;
68+
ph2_ir->src0 = non_zero_src;
69+
ph2_ir->dest = next->dest;
70+
ph2_ir->next = next->next;
71+
return true;
72+
}
73+
74+
if (next->op == OP_sub) {
75+
if (ph2_ir->dest == next->src1) {
76+
/* eliminates:
77+
* {li rn, 0; sub rd, rs1, rn;}
78+
* reduces to:
79+
* {mv rd, rs1;}
80+
*/
81+
ph2_ir->op = OP_assign;
82+
ph2_ir->src0 = next->src0;
83+
ph2_ir->dest = next->dest;
84+
ph2_ir->next = next->next;
85+
return true;
86+
}
87+
88+
if (ph2_ir->dest == next->src0) {
89+
/* eliminates:
90+
* {li rn, 0; sub rd, rn, rs1;}
91+
* reduces to:
92+
* {negate rd, rs1;}
93+
*/
94+
ph2_ir->op = OP_negate;
95+
ph2_ir->src0 = next->src1;
96+
ph2_ir->dest = next->dest;
97+
ph2_ir->next = next->next;
98+
return true;
99+
}
100+
}
101+
102+
if (next->op == OP_mul &&
103+
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
104+
/* eliminates:
105+
* {li rn, 0; mul rd, rs1, rn;} or
106+
* {li rn, 0; mul rd, rn, rs1;}
107+
* reduces to:
108+
* {li rd, 0}, based on zero property of multiplication
109+
*/
110+
ph2_ir->op = OP_load_constant;
111+
ph2_ir->src0 = 0;
112+
ph2_ir->dest = next->dest;
113+
ph2_ir->next = next->next;
114+
return true;
115+
}
116+
}
117+
118+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 1) {
119+
if (next->op == OP_mul &&
120+
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
121+
/* eliminates:
122+
* {li rn, 1; mul rd, rs1, rn;} or
123+
* {li rn, 1; mul rd, rn, rs1;}
124+
* reduces to:
125+
* {li rd, rs1}, based on identity property of multiplication
126+
*/
127+
ph2_ir->op = OP_assign;
128+
ph2_ir->src0 = ph2_ir->dest == next->src0 ? next->src1 : next->src0;
129+
ph2_ir->dest = next->dest;
130+
ph2_ir->next = next->next;
131+
return true;
132+
}
133+
}
134+
135+
/* Other instruction fusion should be done here, and for any success fusion,
136+
* it should return true. This meant to allow peephole optimization to do
137+
* multiple passes over the IR list to maximize optimization as much as
138+
* possbile.
139+
*/
140+
141+
return false;
51142
}
52143

53144
/* FIXME: release detached basic blocks */

0 commit comments

Comments
 (0)