Skip to content

Commit 1f29def

Browse files
committed
perf(OptimizeInstructions): gate LSB→ctz fold on shrinkLevel >= 1
Per #8562 review (kripken, MaxGraey): the `(if (i32.and X 1) ...)` and `eqz(and X 1)` → `i32.ctz X` rewrites save one instruction (a byte) but TZCNT can cost 1-2 cycles more than AND on common JIT VMs (Agner Fog tables), and JIT-less interpreters (wasm3, smart-contract runtimes) lack a fast path for ctz at all. The byte-saving is unambiguously the win we want under shrink modes; under speed modes the AND form stays. Restrict both folds to `getPassOptions().shrinkLevel >= 1` — fires under -Os and -Oz, no-ops everywhere else. Test rewritten with two RUN lines (DEFAULT + SHRINK prefixes) so both directions are asserted: the fold suppresses cleanly under the default --optimize-instructions invocation, and fires as before when --shrink-level=1 is added.
1 parent 3bacb80 commit 1f29def

2 files changed

Lines changed: 164 additions & 47 deletions

File tree

src/passes/OptimizeInstructions.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,8 +1195,15 @@ struct OptimizeInstructions
11951195
}
11961196
// (i32.and X 1) as if-else condition => (i32.ctz X) with swapped arms,
11971197
// since ctz(X) == 0 iff LSB(X) == 1 (saves one instruction).
1198+
//
1199+
// Gated on shrinkLevel >= 1 (i.e. -Os or -Oz) only: TZCNT can cost
1200+
// 1-2 cycles more than AND on common JIT VMs (per Agner Fog's
1201+
// tables), and JIT-less interpreters (wasm3, smart-contract
1202+
// runtimes) lack a fast path for ctz at all. The byte-saving is the
1203+
// win we want under shrink modes; under speed modes the AND form
1204+
// stays. See WebAssembly/binaryen#8562.
11981205
if (auto* binary = curr->condition->dynCast<Binary>()) {
1199-
if (binary->op == AndInt32) {
1206+
if (binary->op == AndInt32 && getPassOptions().shrinkLevel >= 1) {
12001207
Expression* other = nullptr;
12011208
if (auto* c = binary->right->dynCast<Const>()) {
12021209
if (c->value.geti32() == 1) {
@@ -3138,7 +3145,10 @@ struct OptimizeInstructions
31383145
}
31393146
// eqz(and X 1) ==> ctz X in boolean context:
31403147
// both are truthy iff LSB(X) == 0, saving one instruction.
3141-
if (binary->op == AndInt32) {
3148+
// Gated on shrinkLevel >= 1 (-Os, -Oz) — see the matching
3149+
// comment in visitIf and WebAssembly/binaryen#8562.
3150+
if (binary->op == AndInt32 &&
3151+
getPassOptions().shrinkLevel >= 1) {
31423152
Expression* other = nullptr;
31433153
if (auto* c = binary->right->dynCast<Const>()) {
31443154
if (c->value.geti32() == 1) {
Lines changed: 152 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,80 @@
11
;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
2-
;; RUN: wasm-opt %s --optimize-instructions -S -o - | filecheck %s
2+
;; RUN: wasm-opt %s --optimize-instructions -S -o - | filecheck %s --check-prefix=DEFAULT
3+
;; RUN: wasm-opt %s --shrink-level=1 --optimize-instructions -S -o - | filecheck %s --check-prefix=SHRINK
34

4-
;; Test that (if (i32.and X (i32.const 1)) T E) is optimized to
5-
;; (if (i32.ctz X) E T), and (br_if N V (i32.eqz (i32.and X 1))) to
6-
;; (br_if N V (i32.ctz X)), saving one instruction in each case.
5+
;; Test the LSB→ctz fold: under shrink modes (-Os, -Oz, equivalent to
6+
;; --shrink-level >= 1) `(if (i32.and X 1) T E)` becomes
7+
;; `(if (i32.ctz X) E T)`, and `(br_if N V (i32.eqz (i32.and X 1)))`
8+
;; becomes `(br_if N V (i32.ctz X))` — one instruction less, but
9+
;; potentially 1-2 cycles slower on JIT VMs and unconditionally slower
10+
;; on JIT-less interpreters. The fold is therefore suppressed under
11+
;; default and speed-optimised modes, and only fires when the user has
12+
;; opted into shrinking. See WebAssembly/binaryen#8562.
713

814
(module
9-
;; CHECK: (func $lsb-if (param $x i32) (result i32)
10-
;; CHECK-NEXT: (if (result i32)
11-
;; CHECK-NEXT: (i32.ctz
12-
;; CHECK-NEXT: (local.get $x)
13-
;; CHECK-NEXT: )
14-
;; CHECK-NEXT: (then
15-
;; CHECK-NEXT: (i32.const 0)
16-
;; CHECK-NEXT: )
17-
;; CHECK-NEXT: (else
18-
;; CHECK-NEXT: (i32.const 1)
19-
;; CHECK-NEXT: )
20-
;; CHECK-NEXT: )
21-
;; CHECK-NEXT: )
15+
;; DEFAULT: (func $lsb-if (param $x i32) (result i32)
16+
;; DEFAULT-NEXT: (if (result i32)
17+
;; DEFAULT-NEXT: (i32.and
18+
;; DEFAULT-NEXT: (local.get $x)
19+
;; DEFAULT-NEXT: (i32.const 1)
20+
;; DEFAULT-NEXT: )
21+
;; DEFAULT-NEXT: (then
22+
;; DEFAULT-NEXT: (i32.const 1)
23+
;; DEFAULT-NEXT: )
24+
;; DEFAULT-NEXT: (else
25+
;; DEFAULT-NEXT: (i32.const 0)
26+
;; DEFAULT-NEXT: )
27+
;; DEFAULT-NEXT: )
28+
;; DEFAULT-NEXT: )
29+
;; SHRINK: (func $lsb-if (param $x i32) (result i32)
30+
;; SHRINK-NEXT: (if (result i32)
31+
;; SHRINK-NEXT: (i32.ctz
32+
;; SHRINK-NEXT: (local.get $x)
33+
;; SHRINK-NEXT: )
34+
;; SHRINK-NEXT: (then
35+
;; SHRINK-NEXT: (i32.const 0)
36+
;; SHRINK-NEXT: )
37+
;; SHRINK-NEXT: (else
38+
;; SHRINK-NEXT: (i32.const 1)
39+
;; SHRINK-NEXT: )
40+
;; SHRINK-NEXT: )
41+
;; SHRINK-NEXT: )
2242
(func $lsb-if (param $x i32) (result i32)
2343
;; if LSB is set, return 1; else return 0
24-
;; optimizes to: if ctz(x) != 0, return 0; else return 1
2544
(if (result i32)
2645
(i32.and (local.get $x) (i32.const 1))
2746
(then (i32.const 1))
2847
(else (i32.const 0))
2948
)
3049
)
3150

32-
;; CHECK: (func $lsb-if-const-left (param $x i32) (result i32)
33-
;; CHECK-NEXT: (if (result i32)
34-
;; CHECK-NEXT: (i32.ctz
35-
;; CHECK-NEXT: (local.get $x)
36-
;; CHECK-NEXT: )
37-
;; CHECK-NEXT: (then
38-
;; CHECK-NEXT: (i32.const 0)
39-
;; CHECK-NEXT: )
40-
;; CHECK-NEXT: (else
41-
;; CHECK-NEXT: (i32.const 1)
42-
;; CHECK-NEXT: )
43-
;; CHECK-NEXT: )
44-
;; CHECK-NEXT: )
51+
;; DEFAULT: (func $lsb-if-const-left (param $x i32) (result i32)
52+
;; DEFAULT-NEXT: (if (result i32)
53+
;; DEFAULT-NEXT: (i32.and
54+
;; DEFAULT-NEXT: (local.get $x)
55+
;; DEFAULT-NEXT: (i32.const 1)
56+
;; DEFAULT-NEXT: )
57+
;; DEFAULT-NEXT: (then
58+
;; DEFAULT-NEXT: (i32.const 1)
59+
;; DEFAULT-NEXT: )
60+
;; DEFAULT-NEXT: (else
61+
;; DEFAULT-NEXT: (i32.const 0)
62+
;; DEFAULT-NEXT: )
63+
;; DEFAULT-NEXT: )
64+
;; DEFAULT-NEXT: )
65+
;; SHRINK: (func $lsb-if-const-left (param $x i32) (result i32)
66+
;; SHRINK-NEXT: (if (result i32)
67+
;; SHRINK-NEXT: (i32.ctz
68+
;; SHRINK-NEXT: (local.get $x)
69+
;; SHRINK-NEXT: )
70+
;; SHRINK-NEXT: (then
71+
;; SHRINK-NEXT: (i32.const 0)
72+
;; SHRINK-NEXT: )
73+
;; SHRINK-NEXT: (else
74+
;; SHRINK-NEXT: (i32.const 1)
75+
;; SHRINK-NEXT: )
76+
;; SHRINK-NEXT: )
77+
;; SHRINK-NEXT: )
4578
(func $lsb-if-const-left (param $x i32) (result i32)
4679
;; same but constant on the left
4780
(if (result i32)
@@ -51,21 +84,37 @@
5184
)
5285
)
5386

54-
;; CHECK: (func $lsb-brif (param $x i32) (result i32)
55-
;; CHECK-NEXT: (block $done (result i32)
56-
;; CHECK-NEXT: (drop
57-
;; CHECK-NEXT: (br_if $done
58-
;; CHECK-NEXT: (i32.const 99)
59-
;; CHECK-NEXT: (i32.ctz
60-
;; CHECK-NEXT: (local.get $x)
61-
;; CHECK-NEXT: )
62-
;; CHECK-NEXT: )
63-
;; CHECK-NEXT: )
64-
;; CHECK-NEXT: (i32.const 42)
65-
;; CHECK-NEXT: )
66-
;; CHECK-NEXT: )
87+
;; DEFAULT: (func $lsb-brif (param $x i32) (result i32)
88+
;; DEFAULT-NEXT: (block $done (result i32)
89+
;; DEFAULT-NEXT: (drop
90+
;; DEFAULT-NEXT: (br_if $done
91+
;; DEFAULT-NEXT: (i32.const 99)
92+
;; DEFAULT-NEXT: (i32.eqz
93+
;; DEFAULT-NEXT: (i32.and
94+
;; DEFAULT-NEXT: (local.get $x)
95+
;; DEFAULT-NEXT: (i32.const 1)
96+
;; DEFAULT-NEXT: )
97+
;; DEFAULT-NEXT: )
98+
;; DEFAULT-NEXT: )
99+
;; DEFAULT-NEXT: )
100+
;; DEFAULT-NEXT: (i32.const 42)
101+
;; DEFAULT-NEXT: )
102+
;; DEFAULT-NEXT: )
103+
;; SHRINK: (func $lsb-brif (param $x i32) (result i32)
104+
;; SHRINK-NEXT: (block $done (result i32)
105+
;; SHRINK-NEXT: (drop
106+
;; SHRINK-NEXT: (br_if $done
107+
;; SHRINK-NEXT: (i32.const 99)
108+
;; SHRINK-NEXT: (i32.ctz
109+
;; SHRINK-NEXT: (local.get $x)
110+
;; SHRINK-NEXT: )
111+
;; SHRINK-NEXT: )
112+
;; SHRINK-NEXT: )
113+
;; SHRINK-NEXT: (i32.const 42)
114+
;; SHRINK-NEXT: )
115+
;; SHRINK-NEXT: )
67116
(func $lsb-brif (param $x i32) (result i32)
68-
;; br_if (eqz (and X 1)) => br_if (ctz X): the typical is_skewed/is_scalar pattern
117+
;; br_if (eqz (and X 1)) the typical is_skewed/is_scalar pattern
69118
(block $done (result i32)
70119
(drop
71120
(br_if $done
@@ -76,4 +125,62 @@
76125
(i32.const 42)
77126
)
78127
)
128+
129+
;; DEFAULT: (func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32)
130+
;; DEFAULT-NEXT: (select
131+
;; DEFAULT-NEXT: (local.get $b)
132+
;; DEFAULT-NEXT: (local.get $a)
133+
;; DEFAULT-NEXT: (i32.and
134+
;; DEFAULT-NEXT: (local.get $x)
135+
;; DEFAULT-NEXT: (i32.const 1)
136+
;; DEFAULT-NEXT: )
137+
;; DEFAULT-NEXT: )
138+
;; DEFAULT-NEXT: )
139+
;; SHRINK: (func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32)
140+
;; SHRINK-NEXT: (select
141+
;; SHRINK-NEXT: (local.get $a)
142+
;; SHRINK-NEXT: (local.get $b)
143+
;; SHRINK-NEXT: (i32.ctz
144+
;; SHRINK-NEXT: (local.get $x)
145+
;; SHRINK-NEXT: )
146+
;; SHRINK-NEXT: )
147+
;; SHRINK-NEXT: )
148+
(func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32)
149+
;; select with the eqz-and-1 boolean condition.
150+
;; Non-constant arms keep the select itself in the IR — otherwise
151+
;; an unrelated `select c1 c0 P` simplification would eat it.
152+
(select
153+
(local.get $a)
154+
(local.get $b)
155+
(i32.eqz (i32.and (local.get $x) (i32.const 1)))
156+
)
157+
)
158+
159+
;; DEFAULT: (func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32)
160+
;; DEFAULT-NEXT: (select
161+
;; DEFAULT-NEXT: (local.get $b)
162+
;; DEFAULT-NEXT: (local.get $a)
163+
;; DEFAULT-NEXT: (i32.and
164+
;; DEFAULT-NEXT: (local.get $x)
165+
;; DEFAULT-NEXT: (i32.const 1)
166+
;; DEFAULT-NEXT: )
167+
;; DEFAULT-NEXT: )
168+
;; DEFAULT-NEXT: )
169+
;; SHRINK: (func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32)
170+
;; SHRINK-NEXT: (select
171+
;; SHRINK-NEXT: (local.get $a)
172+
;; SHRINK-NEXT: (local.get $b)
173+
;; SHRINK-NEXT: (i32.ctz
174+
;; SHRINK-NEXT: (local.get $x)
175+
;; SHRINK-NEXT: )
176+
;; SHRINK-NEXT: )
177+
;; SHRINK-NEXT: )
178+
(func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32)
179+
;; same but with the constant on the left of the AND.
180+
(select
181+
(local.get $a)
182+
(local.get $b)
183+
(i32.eqz (i32.and (i32.const 1) (local.get $x)))
184+
)
185+
)
79186
)

0 commit comments

Comments
 (0)