Skip to content

Commit 684d5a0

Browse files
feat: lazy memory allocation, fix instruction rewrite order
Signed-off-by: Henry <mail@henrygressmann.de>
1 parent 143c7dd commit 684d5a0

15 files changed

Lines changed: 552 additions & 57 deletions

File tree

crates/parser/src/lib.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,30 @@ pub use tinywasm_types::TinyWasmModule;
4343

4444
/// Parser optimization and lowering options.
4545
#[non_exhaustive]
46-
#[derive(Debug, Clone, Default)]
47-
pub struct ParserOptions {}
46+
#[derive(Debug, Clone)]
47+
pub struct ParserOptions {
48+
/// Whether to optimize local memory allocation by skipping allocation of unused local memories.
49+
pub optimize_local_memory_allocation: bool,
50+
}
51+
52+
impl Default for ParserOptions {
53+
fn default() -> Self {
54+
Self { optimize_local_memory_allocation: true }
55+
}
56+
}
57+
58+
impl ParserOptions {
59+
/// Enable or disable the optimization that skips allocating unused local memories.
60+
pub const fn with_local_memory_allocation_optimization(mut self, enabled: bool) -> Self {
61+
self.optimize_local_memory_allocation = enabled;
62+
self
63+
}
64+
65+
/// Returns whether unused local memory allocation optimization is enabled.
66+
pub const fn optimize_local_memory_allocation(&self) -> bool {
67+
self.optimize_local_memory_allocation
68+
}
69+
}
4870

4971
/// A WebAssembly parser
5072
#[derive(Debug, Default)]

crates/parser/src/module.rs

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ impl ModuleReader {
167167
Ok(())
168168
}
169169

170-
pub(crate) fn into_module(self, _options: &ParserOptions) -> Result<TinyWasmModule> {
170+
pub(crate) fn into_module(self, options: &ParserOptions) -> Result<TinyWasmModule> {
171171
if !self.end_reached {
172172
return Err(ParseError::EndNotReached);
173173
}
@@ -176,28 +176,73 @@ impl ModuleReader {
176176
return Err(ParseError::Other("Code and code type address count mismatch".to_string()));
177177
}
178178

179-
let imported_func_count = self.imports.iter().filter(|i| matches!(&i.kind, ImportKind::Function(_))).count();
180-
let funcs = self.code.into_iter().zip(self.code_type_addrs).enumerate().map(
181-
|(func_idx, ((instructions, mut data, locals), ty_idx))| {
182-
let ty = self.func_types.get(ty_idx as usize).expect("No func type for func, this is a bug").clone();
183-
let params = ValueCounts::from_iter(ty.params());
184-
let self_func = (imported_func_count + func_idx) as u32;
185-
let instructions = optimize::optimize_instructions(instructions, &mut data, self_func);
186-
WasmFunction { instructions: ArcSlice::from(instructions), data, locals, params, ty }
187-
},
188-
);
179+
let Self {
180+
start_func,
181+
func_types,
182+
code_type_addrs,
183+
exports,
184+
code,
185+
globals,
186+
table_types,
187+
memory_types,
188+
imports,
189+
data,
190+
elements,
191+
..
192+
} = self;
193+
194+
let imported_func_count = imports.iter().filter(|i| matches!(&i.kind, ImportKind::Function(_))).count();
195+
let imported_memory_count = imports.iter().filter(|i| matches!(&i.kind, ImportKind::Memory(_))).count() as u32;
196+
let has_local_memory_export =
197+
exports.iter().any(|export| export.kind == ExternalKind::Memory && export.index >= imported_memory_count);
198+
let has_active_data_segment_on_local_memory = data.iter().any(|data| match &data.kind {
199+
DataKind::Active { mem, .. } => *mem >= imported_memory_count,
200+
DataKind::Passive => false,
201+
});
202+
let optimize_local_memory_allocation = options.optimize_local_memory_allocation();
203+
let mut local_memory_allocation = if memory_types.is_empty() {
204+
LocalMemoryAllocation::Skip
205+
} else if !optimize_local_memory_allocation || has_active_data_segment_on_local_memory {
206+
LocalMemoryAllocation::Eager
207+
} else if has_local_memory_export {
208+
LocalMemoryAllocation::Lazy
209+
} else {
210+
LocalMemoryAllocation::Skip
211+
};
212+
let mut funcs = Vec::with_capacity(code.len());
213+
214+
for (func_idx, ((instructions, mut data, locals), ty_idx)) in code.into_iter().zip(code_type_addrs).enumerate()
215+
{
216+
let ty = func_types.get(ty_idx as usize).expect("No func type for func, this is a bug").clone();
217+
let params = ValueCounts::from_iter(ty.params());
218+
let self_func = (imported_func_count + func_idx) as u32;
219+
let optimized = optimize::optimize_instructions(
220+
instructions,
221+
&mut data,
222+
self_func,
223+
imported_memory_count,
224+
optimize_local_memory_allocation && local_memory_allocation != LocalMemoryAllocation::Eager,
225+
);
226+
227+
if optimized.uses_local_memory {
228+
local_memory_allocation = LocalMemoryAllocation::Eager;
229+
}
230+
231+
funcs.push(WasmFunction { instructions: ArcSlice::from(optimized.instructions), data, locals, params, ty });
232+
}
189233

190234
Ok(TinyWasmModule {
191-
funcs: funcs.collect(),
192-
func_types: self.func_types.into(),
193-
globals: self.globals.into(),
194-
table_types: self.table_types.into(),
195-
imports: self.imports.into(),
196-
start_func: self.start_func,
197-
data: self.data.into(),
198-
exports: self.exports.into(),
199-
elements: self.elements.into(),
200-
memory_types: self.memory_types.into(),
235+
funcs: funcs.into(),
236+
func_types: func_types.into(),
237+
globals: globals.into(),
238+
table_types: table_types.into(),
239+
imports: imports.into(),
240+
start_func,
241+
data: data.into(),
242+
exports: exports.into(),
243+
elements: elements.into(),
244+
memory_types: memory_types.into(),
245+
local_memory_allocation,
201246
})
202247
}
203248
}

crates/parser/src/optimize.rs

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,32 @@ use crate::macros::optimize::*;
22
use alloc::vec::Vec;
33
use tinywasm_types::{CmpOp, Instruction, WasmFunctionData};
44

5+
pub(crate) struct OptimizeResult {
6+
pub(crate) instructions: Vec<Instruction>,
7+
pub(crate) uses_local_memory: bool,
8+
}
9+
510
pub(crate) fn optimize_instructions(
611
mut instructions: Vec<Instruction>,
712
function_data: &mut WasmFunctionData,
813
self_func_addr: u32,
9-
) -> Vec<Instruction> {
10-
rewrite(&mut instructions, self_func_addr);
14+
imported_memory_count: u32,
15+
track_local_memory_usage: bool,
16+
) -> OptimizeResult {
17+
let uses_local_memory = rewrite(&mut instructions, self_func_addr, imported_memory_count, track_local_memory_usage);
1118
remove_nop(&mut instructions, function_data);
12-
instructions
19+
OptimizeResult { instructions, uses_local_memory }
1320
}
1421

15-
fn rewrite(instrs: &mut [Instruction], self_func_addr: u32) {
22+
fn rewrite(
23+
instrs: &mut [Instruction],
24+
self_func_addr: u32,
25+
imported_memory_count: u32,
26+
track_local_memory_usage: bool,
27+
) -> bool {
1628
use Instruction::*;
29+
let mut uses_local_memory = false;
30+
1731
for i in 0..instrs.len() {
1832
match instrs[i] {
1933
LocalCopy32(a, b) if a == b => instrs[i] = Nop,
@@ -22,14 +36,14 @@ fn rewrite(instrs: &mut [Instruction], self_func_addr: u32) {
2236
Call(addr) if addr == self_func_addr => instrs[i] = CallSelf,
2337
ReturnCall(addr) if addr == self_func_addr => instrs[i] = ReturnCallSelf,
2438
I32Add => {
25-
rewrite!(instrs, i, [I32Const(c)] => AddConst32(c));
2639
rewrite!(instrs, i, [LocalGet32(a), LocalGet32(b)] => AddLocalLocal32(a, b));
2740
rewrite!(instrs, i, [LocalGet32(local), I32Const(c)] => [ Nop, LocalGet32(local), AddConst32(c)]);
41+
rewrite!(instrs, i, [I32Const(c)] => AddConst32(c));
2842
}
2943
I64Add => {
30-
rewrite!(instrs, i, [I64Const(c)] => AddConst64(c));
3144
rewrite!(instrs, i, [LocalGet64(a), LocalGet64(b)] => AddLocalLocal64(a, b));
3245
rewrite!(instrs, i, [LocalGet64(local), I64Const(c)] => [ Nop, LocalGet64(local), AddConst64(c)]);
46+
rewrite!(instrs, i, [I64Const(c)] => AddConst64(c));
3347
}
3448
I64Rotl => rewrite!(instrs, i, [I64Xor, I64Const(c)] => XorRotlConst64(c)),
3549
I32Store(memarg) => {
@@ -69,6 +83,7 @@ fn rewrite(instrs: &mut [Instruction], self_func_addr: u32) {
6983
rewrite!(instrs, i, [LocalGet32(src)] => if src == dst { Nop } else { LocalCopy32(src, dst) });
7084
rewrite!(instrs, i, [I32Const(c)] => SetLocalConst32(dst, c));
7185
rewrite!(instrs, i, [F32Const(c)] => SetLocalConst32(dst, i32::from_ne_bytes(c.to_bits().to_ne_bytes())));
86+
rewrite!(instrs, i, [AddLocalLocal32(a, b)] => AddLocalLocalSet32(a, b, dst));
7287
rewrite!(instrs, i, [LocalGet32(src), AddConst32(c)] if (src == dst) => AddLocalConst32(dst, c));
7388
rewrite!(instrs, i, [LoadLocal32(memarg, addr)] if (let Ok(dst) = u8::try_from(dst)) => LoadLocalSet32(memarg, addr, dst));
7489
rewrite!(instrs, i,
@@ -81,6 +96,7 @@ fn rewrite(instrs: &mut [Instruction], self_func_addr: u32) {
8196
rewrite!(instrs, i, [LocalGet64(src)] => if src == dst { Nop } else { LocalCopy64(src, dst) });
8297
rewrite!(instrs, i, [I64Const(c)] => SetLocalConst64(dst, c));
8398
rewrite!(instrs, i, [F64Const(c)] => SetLocalConst64(dst, i64::from_ne_bytes(c.to_bits().to_ne_bytes())));
99+
rewrite!(instrs, i, [AddLocalLocal64(a, b)] => AddLocalLocalSet64(a, b, dst));
84100
rewrite!(instrs, i,
85101
[LocalGet64(src), AddConst64(c)] if (src == dst) =>
86102
AddLocalConst64(dst, c)
@@ -130,62 +146,68 @@ fn rewrite(instrs: &mut [Instruction], self_func_addr: u32) {
130146
replace!(instrs, i, 1 => [Nop, JumpIfNonZero(ip)]);
131147
continue;
132148
});
133-
rewrite!(instrs, i, [cmp, I32Const(imm)] if (let Some(op) = cmp_op(cmp)) =>
134-
JumpCmpStackConst32 { target_ip: ip, imm, op: inverse_cmp_op(op) }
135-
);
136-
rewrite!(instrs, i, [cmp, I64Const(imm)] if (let Some(op) = cmp_op_64(cmp)) =>
137-
JumpCmpStackConst64 { target_ip: ip, imm, op: inverse_cmp_op(op) }
138-
);
139149
rewrite!(instrs, i,
140-
[LocalGet32(local), cmp, I32Const(imm)] if (let Some(op) = cmp_op(cmp)) =>
150+
[LocalGet32(local), I32Const(imm), cmp] if (let Some(op) = cmp_op(cmp)) =>
141151
JumpCmpLocalConst32 { target_ip: ip, local, imm, op: inverse_cmp_op(op) }
142152
);
143153
rewrite!(instrs, i,
144-
[LocalGet64(local), cmp, I64Const(imm)] if
154+
[LocalGet64(local), I64Const(imm), cmp] if
145155
(let Some(op) = cmp_op_64(cmp) && let Ok(imm) = i32::try_from(imm)) =>
146156
JumpCmpLocalConst64 { target_ip: ip, local, imm, op: inverse_cmp_op(op) }
147157
);
148158
rewrite!(instrs, i,
149-
[LocalGet32(left), cmp, LocalGet32(right)] if (let Some(op) = cmp_op(cmp)) =>
159+
[LocalGet32(left), LocalGet32(right), cmp] if (let Some(op) = cmp_op(cmp)) =>
150160
JumpCmpLocalLocal32 { target_ip: ip, left, right, op: inverse_cmp_op(op) }
151161
);
152162
rewrite!(instrs, i,
153-
[LocalGet64(left), cmp, LocalGet64(right)] if (let Some(op) = cmp_op_64(cmp)) =>
163+
[LocalGet64(left), LocalGet64(right), cmp] if (let Some(op) = cmp_op_64(cmp)) =>
154164
JumpCmpLocalLocal64 { target_ip: ip, left, right, op: inverse_cmp_op(op) }
155165
);
166+
rewrite!(instrs, i, [I32Const(imm), cmp] if (let Some(op) = cmp_op(cmp)) =>
167+
JumpCmpStackConst32 { target_ip: ip, imm, op: inverse_cmp_op(op) }
168+
);
169+
rewrite!(instrs, i, [I64Const(imm), cmp] if (let Some(op) = cmp_op_64(cmp)) =>
170+
JumpCmpStackConst64 { target_ip: ip, imm, op: inverse_cmp_op(op) }
171+
);
156172
}
157173
JumpIfNonZero(ip) => {
158174
rewrite!(instrs, i, [I32Eqz] => {
159175
replace!(instrs, i, 1 => [Nop, JumpIfZero(ip)]);
160176
continue;
161177
});
162-
rewrite!(instrs, i, [cmp, I32Const(imm)] if (let Some(op) = cmp_op(cmp)) =>
163-
JumpCmpStackConst32 { target_ip: ip, imm, op }
164-
);
165-
rewrite!(instrs, i, [cmp, I64Const(imm)] if (let Some(op) = cmp_op_64(cmp)) =>
166-
JumpCmpStackConst64 { target_ip: ip, imm, op }
167-
);
168178
rewrite!(instrs, i,
169-
[LocalGet32(local), cmp, I32Const(imm)] if (let Some(op) = cmp_op(cmp)) =>
179+
[LocalGet32(local), I32Const(imm), cmp] if (let Some(op) = cmp_op(cmp)) =>
170180
JumpCmpLocalConst32 { target_ip: ip, local, imm, op }
171181
);
172182
rewrite!(instrs, i,
173-
[LocalGet64(local), cmp, I64Const(imm)] if
183+
[LocalGet64(local), I64Const(imm), cmp] if
174184
(let Some(op) = cmp_op_64(cmp) && let Ok(imm) = i32::try_from(imm)) =>
175185
JumpCmpLocalConst64 { target_ip: ip, local, imm, op }
176186
);
177187
rewrite!(instrs, i,
178-
[LocalGet32(left), cmp, LocalGet32(right)] if (let Some(op) = cmp_op(cmp)) =>
188+
[LocalGet32(left), LocalGet32(right), cmp] if (let Some(op) = cmp_op(cmp)) =>
179189
JumpCmpLocalLocal32 { target_ip: ip, left, right, op }
180190
);
181191
rewrite!(instrs, i,
182-
[LocalGet64(left), cmp, LocalGet64(right)] if (let Some(op) = cmp_op_64(cmp)) =>
192+
[LocalGet64(left), LocalGet64(right), cmp] if (let Some(op) = cmp_op_64(cmp)) =>
183193
JumpCmpLocalLocal64 { target_ip: ip, left, right, op }
184194
);
195+
rewrite!(instrs, i, [I32Const(imm), cmp] if (let Some(op) = cmp_op(cmp)) =>
196+
JumpCmpStackConst32 { target_ip: ip, imm, op }
197+
);
198+
rewrite!(instrs, i, [I64Const(imm), cmp] if (let Some(op) = cmp_op_64(cmp)) =>
199+
JumpCmpStackConst64 { target_ip: ip, imm, op }
200+
);
185201
}
186202
_ => {}
187203
}
204+
205+
if track_local_memory_usage {
206+
uses_local_memory |= instrs[i].memory_addr().is_some_and(|mem| mem >= imported_memory_count);
207+
}
188208
}
209+
210+
uses_local_memory
189211
}
190212

191213
fn cmp_op(instr: Instruction) -> Option<CmpOp> {

crates/tinywasm/src/engine.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use alloc::sync::Arc;
22

33
/// Memory backend types and traits.
4-
pub use crate::store::{LinearMemory, MemoryBackend, PagedMemory, VecMemory};
4+
pub use crate::store::{LazyLinearMemory, LinearMemory, MemoryBackend, PagedMemory, VecMemory};
55

66
/// Global configuration for the WebAssembly interpreter
77
///

crates/tinywasm/src/instance.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,13 @@ impl ModuleInstance {
133133

134134
addrs.funcs.extend(store.init_funcs(&module.0.funcs, idx));
135135
addrs.tables.extend(store.init_tables(&module.0.table_types, idx));
136-
addrs.memories.extend(store.init_memories(&module.0.memory_types, idx)?);
136+
match module.0.local_memory_allocation {
137+
LocalMemoryAllocation::Skip => {}
138+
LocalMemoryAllocation::Lazy => {
139+
addrs.memories.extend(store.init_lazy_memories(&module.0.memory_types, idx)?)
140+
}
141+
LocalMemoryAllocation::Eager => addrs.memories.extend(store.init_memories(&module.0.memory_types, idx)?),
142+
}
137143
let global_addrs = store.init_globals(addrs.globals, &module.0.globals, &addrs.funcs, idx)?;
138144
let (elem_addrs, elem_trapped) =
139145
store.init_elements(&addrs.tables, &addrs.funcs, &global_addrs, &module.0.elements, idx)?;

crates/tinywasm/src/interpreter/executor.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ impl<'store, const BUDGETED: bool> Executor<'store, BUDGETED> {
172172
LocalCopy128(from, to) => self.store.value_stack.local_set(&self.cf, *to, self.store.value_stack.local_get::<Value128>(&self.cf, *from)),
173173
AddLocalLocal32(a, b) => self.store.value_stack.push(self.store.value_stack.local_get::<i32>(&self.cf, *a).wrapping_add(self.store.value_stack.local_get::<i32>(&self.cf, *b)))?,
174174
AddLocalLocal64(a, b) => self.store.value_stack.push(self.store.value_stack.local_get::<i64>(&self.cf, *a).wrapping_add(self.store.value_stack.local_get::<i64>(&self.cf, *b)))?,
175+
AddLocalLocalSet32(a, b, dst) => self.store.value_stack.local_set::<i32>(&self.cf, *dst, self.store.value_stack.local_get::<i32>(&self.cf, *a).wrapping_add(self.store.value_stack.local_get::<i32>(&self.cf, *b))),
176+
AddLocalLocalSet64(a, b, dst) => self.store.value_stack.local_set::<i64>(&self.cf, *dst, self.store.value_stack.local_get::<i64>(&self.cf, *a).wrapping_add(self.store.value_stack.local_get::<i64>(&self.cf, *b))),
175177
AddConst32(c) => stack_op!(unary i32, |v| v.wrapping_add(*c)),
176178
AddConst64(c) => stack_op!(unary i64, |v| v.wrapping_add(*c)),
177179
AddLocalConst32(local_index, c) => self.store.value_stack.local_update::<Value32>(&self.cf, *local_index, |local| local.wrapping_add(*c as u32)),

crates/tinywasm/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ use interpreter::InterpreterRuntime;
115115

116116
/// Global configuration for the WebAssembly interpreter
117117
pub mod engine;
118-
pub use engine::{Engine, LinearMemory, MemoryBackend, PagedMemory, VecMemory};
118+
pub use engine::{Engine, LazyLinearMemory, LinearMemory, MemoryBackend, PagedMemory, VecMemory};
119119

120120
#[cfg(feature = "parser")]
121121
/// Re-export of [`tinywasm_parser`]. Requires `parser` feature.

crates/tinywasm/src/store/memory/instance.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,22 @@ impl MemoryInstance {
4747
Ok(Self { kind, inner: storage, page_count: kind.page_count_initial() as usize })
4848
}
4949

50+
pub(crate) fn new_lazy(kind: MemoryType, backend: &MemoryBackend) -> Result<Self> {
51+
assert!(kind.page_count_initial() <= kind.page_count_max());
52+
53+
let initial_len = usize::try_from(kind.initial_size())
54+
.map_err(|_| Error::UnsupportedFeature("memory size exceeds the host address space"))?;
55+
56+
crate::log::debug!(
57+
"initializing lazy memory with {} pages of {} bytes",
58+
kind.page_count_initial(),
59+
kind.page_size()
60+
);
61+
62+
let storage = backend.create_lazy(kind, initial_len)?;
63+
Ok(Self { kind, inner: storage, page_count: kind.page_count_initial() as usize })
64+
}
65+
5066
pub(crate) const fn is_64bit(&self) -> bool {
5167
matches!(self.kind.arch(), MemoryArch::I64)
5268
}

0 commit comments

Comments
 (0)