Skip to content

Commit cdc7bfe

Browse files
jameshu15869schoolclaby2
authored
Cranelift: Optimize out redundant select + icmp instructions (#12135)
* Implement simple icmp + select optimization This optimizes the following: a = select x, k1, k2 b = icmp eq a, k1 to b = x We shouldn't trigger this optimization when k1 == k2 because constant propagation should optimize that case. * Optimize icmp eq and ne for select-on-icmp (#2) * Optimize icmp eq and ne for select-on-icmp When we have a select followed by an icmp and the inner condition to the select is also an icmp, we can remove the select + icmp and directly use the inner condition. We negated the values of the inner_condition via bxor x 1. * Add trailing newline * feat: avoid relying on icmp inner_cond --------- Co-authored-by: school <school@schools-MacBook-Air.local> Co-authored-by: Edward Wibowo <wibow9770@gmail.com> * test: add extra test --------- Co-authored-by: school <school@schools-MacBook-Air.local> Co-authored-by: Edward Wibowo <wibow9770@gmail.com>
1 parent 69ef9af commit cdc7bfe

File tree

3 files changed

+257
-0
lines changed

3 files changed

+257
-0
lines changed

cranelift/codegen/src/opts/icmp.isle

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,3 +376,37 @@
376376
(simplify (band ty (ult ty x y) (ne ty (iconst_s _ -1) x)))
377377
(ult ty x y))
378378

379+
;; icmp on select with two constant inputs compared with one of the two
380+
;; constants can directly use the inner select condition.
381+
;; See: https://github.com/bytecodealliance/wasmtime/issues/11578
382+
(rule (simplify (eq _
383+
(select select_ty inner_cond
384+
(iconst_u _ k1)
385+
(iconst_u _ k2))
386+
(iconst_u _ k1)))
387+
(if-let false (u64_eq k1 k2))
388+
(ne select_ty inner_cond (iconst_u select_ty 0)))
389+
390+
(rule (simplify (eq _
391+
(select select_ty inner_cond
392+
(iconst_u _ k1)
393+
(iconst_u _ k2))
394+
(iconst_u _ k2)))
395+
(if-let false (u64_eq k1 k2))
396+
(eq select_ty inner_cond (iconst_u select_ty 0)))
397+
398+
(rule (simplify (ne _
399+
(select select_ty inner_cond
400+
(iconst_u _ k1)
401+
(iconst_u _ k2))
402+
(iconst_u _ k1)))
403+
(if-let false (u64_eq k1 k2))
404+
(eq select_ty inner_cond (iconst_u select_ty 0)))
405+
406+
(rule (simplify (ne _
407+
(select select_ty inner_cond
408+
(iconst_u _ k1)
409+
(iconst_u _ k2))
410+
(iconst_u _ k2)))
411+
(if-let false (u64_eq k1 k2))
412+
(ne select_ty inner_cond (iconst_u select_ty 0)))
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
test optimize
2+
set opt_level=speed
3+
target x86_64
4+
target aarch64
5+
6+
function %eq_k1(i64) -> i8 {
7+
block0(v2: i64):
8+
v14 = iconst.i64 -562949953421310
9+
v3 = band v2, v14
10+
v16 = iconst.i64 0
11+
v4 = icmp eq v3, v16
12+
v5 = iconst.i64 6
13+
v6 = iconst.i64 7
14+
v7 = select v4, v5, v6
15+
v8 = icmp eq v7, v5
16+
return v8
17+
}
18+
19+
; check: function %eq_k1(i64) -> i8 fast {
20+
; check: block0(v2: i64):
21+
; nextln: v14 = iconst.i64 -562949953421310
22+
; nextln: v3 = band v2, v14
23+
; nextln: v16 = iconst.i64 0
24+
; nextln: v4 = icmp eq v3, v16
25+
; nextln: return v4
26+
; nextln: }
27+
28+
function %eq_k2(i64) -> i8 {
29+
block0(v2: i64):
30+
v14 = iconst.i64 -562949953421310
31+
v3 = band v2, v14
32+
v16 = iconst.i64 0
33+
v4 = icmp eq v3, v16
34+
v5 = iconst.i64 6
35+
v6 = iconst.i64 7
36+
v7 = select v4, v5, v6
37+
v8 = icmp eq v7, v6
38+
return v8
39+
}
40+
41+
; check: function %eq_k2(i64) -> i8 fast {
42+
; check: block0(v2: i64):
43+
; nextln: v14 = iconst.i64 -562949953421310
44+
; nextln: v3 = band v2, v14 ; v14 = -562949953421310
45+
; nextln: v16 = iconst.i64 0
46+
; nextln: v18 = icmp ne v3, v16 ; v16 = 0
47+
; nextln: return v18
48+
; nextln: }
49+
50+
function %ne_k1(i64) -> i8 {
51+
block0(v2: i64):
52+
v14 = iconst.i64 -562949953421310
53+
v3 = band v2, v14
54+
v16 = iconst.i64 0
55+
v4 = icmp eq v3, v16
56+
v5 = iconst.i64 6
57+
v6 = iconst.i64 7
58+
v7 = select v4, v5, v6
59+
v8 = icmp ne v7, v5
60+
return v8
61+
}
62+
63+
; check: function %ne_k1(i64) -> i8 fast {
64+
; check: block0(v2: i64):
65+
; nextln: v14 = iconst.i64 -562949953421310
66+
; nextln: v3 = band v2, v14 ; v14 = -562949953421310
67+
; nextln: v16 = iconst.i64 0
68+
; nextln: v18 = icmp ne v3, v16 ; v16 = 0
69+
; nextln: return v18
70+
; nextln: }
71+
72+
function %ne_k2(i64) -> i8 {
73+
block0(v2: i64):
74+
v14 = iconst.i64 -562949953421310
75+
v3 = band v2, v14
76+
v16 = iconst.i64 0
77+
v4 = icmp eq v3, v16
78+
v5 = iconst.i64 6
79+
v6 = iconst.i64 7
80+
v7 = select v4, v5, v6
81+
v8 = icmp ne v7, v6
82+
return v8
83+
}
84+
85+
; check: function %ne_k2(i64) -> i8 fast {
86+
; check: block0(v2: i64):
87+
; nextln: v14 = iconst.i64 -562949953421310
88+
; nextln: v3 = band v2, v14
89+
; nextln: v16 = iconst.i64 0
90+
; nextln: v4 = icmp eq v3, v16
91+
; nextln: return v4
92+
; nextln: }
93+
94+
; select x, k1, k1 should be constant propagated
95+
function %constant_propagated(i64) -> i8 {
96+
block0(v2: i64):
97+
v14 = iconst.i64 -562949953421310
98+
v3 = band v2, v14
99+
v16 = iconst.i64 0
100+
v4 = icmp eq v3, v16
101+
v5 = iconst.i64 6
102+
v6 = select v4, v5, v5
103+
v7 = icmp eq v6, v5
104+
return v7
105+
}
106+
107+
; check: function %constant_propagated(i64) -> i8 fast {
108+
; check: block0(v2: i64):
109+
; nextln: v17 = iconst.i8 1
110+
; nextln: return v17
111+
; nextln: }
112+
113+
function %a(i64) -> i8 {
114+
block0(v0: i64):
115+
v1 = iconst.i64 6
116+
v3 = iconst.i64 7
117+
v4 = select v0, v1, v3
118+
v5 = icmp eq v4, v1
119+
return v5
120+
}
121+
122+
; check: function %a(i64) -> i8 fast {
123+
; check: block0(v0: i64):
124+
; nextln: v6 = iconst.i64 0
125+
; nextln: v7 = icmp ne v0, v6 ; v6 = 0
126+
; nextln: return v7
127+
; nextln: }
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
test interpret
2+
test run
3+
set opt_level=none
4+
target x86_64
5+
target aarch64
6+
set opt_level=speed
7+
target x86_64
8+
target aarch64
9+
10+
function %eq_k1(i64) -> i8 {
11+
block0(v2: i64):
12+
v14 = iconst.i64 -562949953421310
13+
v3 = band v2, v14
14+
v16 = iconst.i64 0
15+
v4 = icmp eq v3, v16
16+
v5 = iconst.i64 6
17+
v6 = iconst.i64 7
18+
v7 = select v4, v5, v6
19+
v8 = icmp eq v7, v5
20+
return v8
21+
}
22+
23+
; run: %eq_k1(0) == 1
24+
25+
function %eq_k2(i64) -> i8 {
26+
block0(v2: i64):
27+
v14 = iconst.i64 -562949953421310
28+
v3 = band v2, v14
29+
v16 = iconst.i64 0
30+
v4 = icmp eq v3, v16
31+
v5 = iconst.i64 6
32+
v6 = iconst.i64 7
33+
v7 = select v4, v5, v6
34+
v8 = icmp eq v7, v6
35+
return v8
36+
}
37+
38+
; run: %eq_k2(0) == 0
39+
40+
function %ne_k1(i64) -> i8 {
41+
block0(v2: i64):
42+
v14 = iconst.i64 -562949953421310
43+
v3 = band v2, v14
44+
v16 = iconst.i64 0
45+
v4 = icmp eq v3, v16
46+
v5 = iconst.i64 6
47+
v6 = iconst.i64 7
48+
v7 = select v4, v5, v6
49+
v8 = icmp ne v7, v5
50+
return v8
51+
}
52+
53+
; run: %ne_k1(0) == 0
54+
55+
function %ne_k2(i64) -> i8 {
56+
block0(v2: i64):
57+
v14 = iconst.i64 -562949953421310
58+
v3 = band v2, v14
59+
v16 = iconst.i64 0
60+
v4 = icmp eq v3, v16
61+
v5 = iconst.i64 6
62+
v6 = iconst.i64 7
63+
v7 = select v4, v5, v6
64+
v8 = icmp ne v7, v6
65+
return v8
66+
}
67+
68+
; run: %ne_k2(0) == 1
69+
70+
; select x, k1, k1 should be constant propagated
71+
function %constant_propagated(i64) -> i8 {
72+
block0(v2: i64):
73+
v14 = iconst.i64 -562949953421310
74+
v3 = band v2, v14
75+
v16 = iconst.i64 0
76+
v4 = icmp eq v3, v16
77+
v5 = iconst.i64 6
78+
v6 = select v4, v5, v5
79+
v7 = icmp eq v6, v5
80+
return v7
81+
}
82+
83+
; run: %constant_propagated(0) == 1
84+
85+
function %non_icmp_inner(i64) -> i8 {
86+
block0(v0: i64):
87+
v1 = iconst.i64 6
88+
v3 = iconst.i64 7
89+
v4 = select v0, v1, v3
90+
v5 = icmp eq v4, v1
91+
return v5
92+
}
93+
94+
; run: %non_icmp_inner(0) == 0
95+
; run: %non_icmp_inner(1) == 1
96+
; run: %non_icmp_inner(5) == 1

0 commit comments

Comments
 (0)