Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
75 commits
Select commit Hold shift + click to select a range
d2f732d
Add FAST_CALL instruction
matthew-mojira Oct 6, 2025
b6e1ca1
Update tag for fast_call
matthew-mojira Oct 6, 2025
e562b30
Separate more fast call behavior
matthew-mojira Oct 8, 2025
d6a175e
Add CallProperty to distinguish between tail call and fast call
matthew-mojira Oct 14, 2025
d3f6b14
Add stubs/initialization for fast call entries and no-op FAST_CALL in…
matthew-mojira Jan 5, 2026
2ca070a
Clean up unused dispatch function and specify goals
matthew-mojira Jan 5, 2026
3ad8de7
Work on SPC accepting fast mode and emitting dispatch sequence
matthew-mojira Jan 5, 2026
52adc24
macro dispatch, add fast_target_code correctly, better stub caller di…
matthew-mojira Jan 14, 2026
5372c3b
Fix vsp in fast spc
matthew-mojira Jan 15, 2026
fa843b0
Replace CALL with FAST_CALL when function is exported with "fast:" pr…
matthew-mojira Jan 15, 2026
951f0d0
AOT compile fast functions declared in export name
matthew-mojira Jan 21, 2026
864c5e9
Add fast prologue/epilogue stubs
matthew-mojira Jan 21, 2026
25860eb
Add small frame in fast call to save vfp (for local variables)
matthew-mojira Jan 22, 2026
f248373
Add test files/programs
matthew-mojira Jan 23, 2026
1e7c7d6
Add logic for specialized FAST_CALL bytecodes
matthew-mojira Jan 23, 2026
ab607a4
Add more instructions
matthew-mojira Mar 11, 2026
bc5bbf4
Remove use of fast frame
matthew-mojira Mar 18, 2026
2ff8147
Add FAST_CALL 0-47 for each unused bytecode, and minor merge fixes
matthew-mojira Apr 1, 2026
79f2d61
Patch the dispatch table with fast-compiled functions (and use them) …
matthew-mojira Apr 1, 2026
514265b
Cleaned up some stuff
matthew-mojira Apr 2, 2026
3384f9c
Remove unused stubs and deps
matthew-mojira Apr 2, 2026
71e8de9
Build fast SPC exec env at the same time as other exec envs, remove dup
matthew-mojira Apr 2, 2026
10d2905
save/restore caller IVars in SPC and across frame reconstruction
matthew-mojira Apr 2, 2026
d78cb6c
Complete stack reconstruction around fast compilation
matthew-mojira Apr 3, 2026
291368a
Remove FAST_CALL47 and opcode mapped to 0xFF
matthew-mojira Apr 3, 2026
7721d47
Remove more FAST_CALLs contradicting stack switching
matthew-mojira Apr 3, 2026
5059eba
Move fast_target_code and fast_call_idx (conflict in internal offset!…
matthew-mojira Apr 3, 2026
5a6e994
fixup! save/restore caller IVars in SPC and across frame reconstruction
matthew-mojira Apr 5, 2026
55cbdc7
fixup! Add CallProperty to distinguish between tail call and fast call
matthew-mojira Apr 5, 2026
c7a5be4
fixup! Add CallProperty to distinguish between tail call and fast call
matthew-mojira Apr 5, 2026
4be96a8
Add options for fast functions
matthew-mojira Apr 5, 2026
0196dc6
Fast call tests
matthew-mojira Apr 5, 2026
e9a929c
Fix regular inlining
matthew-mojira Apr 5, 2026
f02acf0
Add .wat from tests and more fastcall tests
matthew-mojira Apr 6, 2026
673fd79
Fix multi_return test (div incorrect)
matthew-mojira Apr 8, 2026
f63aa5d
Add stackframe test
matthew-mojira Apr 8, 2026
68ee6b5
Save interpreter r_ip into the frame (to protect against div)
matthew-mojira Apr 8, 2026
4e5d8cf
Fix test expectation
matthew-mojira Apr 8, 2026
b6029b9
Validate interpreter's pc in a fast function's outcall
matthew-mojira Apr 8, 2026
0c6e2a7
Add more tests
matthew-mojira Apr 9, 2026
a4286b0
Update some register stuff
matthew-mojira Apr 9, 2026
bce391e
Claude's junk
matthew-mojira Apr 15, 2026
0a67c6e
Revert "Claude's junk"
matthew-mojira Apr 15, 2026
6551d78
Streamline compiler to help fix bug
matthew-mojira Apr 17, 2026
bc3ce6f
Fix saving of vfp
matthew-mojira Apr 20, 2026
fef448c
More vfp fix progress
matthew-mojira Apr 20, 2026
4fc6efb
Fix bug
matthew-mojira Apr 20, 2026
567ec28
Fix test expectation
matthew-mojira Apr 20, 2026
0f0662c
Merge remote-tracking branch 'origin/master' into fast-handlers
matthew-mojira Apr 20, 2026
ce77d92
Merge remote-tracking branch 'origin/master' into fast-handlers
matthew-mojira Apr 20, 2026
10d1d20
Remove too early of Instance.functions
matthew-mojira Apr 20, 2026
532442a
Fix non-fast reconstruction
matthew-mojira Apr 20, 2026
d1d33e8
Remove unnecessary else if block
matthew-mojira Apr 20, 2026
1bcbf64
Streamline code to detect fast handlers in hardware traps
matthew-mojira Apr 20, 2026
5125dbf
Merge remote-tracking branch 'origin/master' into fast-handlers
matthew-mojira Apr 21, 2026
f5e8234
Remove `int` directory
matthew-mojira Apr 21, 2026
b232bfc
Refactor SPC
matthew-mojira Apr 21, 2026
81632a3
Revert "Add CallProperty to distinguish between tail call and fast call"
matthew-mojira Apr 21, 2026
8cb0d81
Refactor and clean up code
matthew-mojira Apr 21, 2026
335cf48
Remove printing fast function
matthew-mojira Apr 21, 2026
c5a3f8d
Remove (lazy) fastCompile function
matthew-mojira Apr 21, 2026
1e118f7
Add guards around FastIntTuning.useFastFunctions
matthew-mojira Apr 21, 2026
b675571
Deduplicate Mmap.reserve
matthew-mojira Apr 21, 2026
527b70d
Remove FAST_SPC_EXEC_ENV
matthew-mojira Apr 21, 2026
f8cabad
Rename _reserve to reserve_
matthew-mojira Apr 21, 2026
235f068
Add more tests from CLAUDE
matthew-mojira Apr 21, 2026
1913f13
Always reconstruct frames across runtime calls in fast handler
matthew-mojira Apr 21, 2026
7d3da8b
Streamline SPC register saving (should just be sent to MacroAssembler…
matthew-mojira Apr 22, 2026
ee9403c
Revert "Streamline SPC register saving (should just be sent to MacroA…
matthew-mojira Apr 22, 2026
ffe898e
Merge remote-tracking branch 'origin/master' into fast-handlers
matthew-mojira Apr 22, 2026
4375145
Add wave.new_func tests to fastcall
matthew-mojira Apr 22, 2026
8430acd
Optimize replaceCall
matthew-mojira Apr 22, 2026
db48ba9
Print out patches to dispatch table
matthew-mojira Apr 22, 2026
9c7e4c0
Eagely allocate fast handler slots
matthew-mojira Apr 22, 2026
677dde8
Save r_stp during reconstruction
matthew-mojira Apr 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added fast_call.wasm
Binary file not shown.
10 changes: 10 additions & 0 deletions fast_call.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
(module
(import "wizeng" "puti" (func $puti (param i32)))
(func $f (result i32)
i32.const 10)
(func (export "main") (result i32)
call $f
call $puti
i32.const 0
)
)
Binary file added fast_call2.wasm
Binary file not shown.
7 changes: 7 additions & 0 deletions fast_call2.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
(module
(func $f (result i32)
i32.const 10)
(func (export "main") (result i32)
call $f
)
)
Binary file added fast_call_export.wasm
Binary file not shown.
10 changes: 10 additions & 0 deletions fast_call_export.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
;; export name holds fast information, we don't modify binary ahead of time

(module
(func $fast (export "fast:foo") (result i32)
(i32.const 2)
)
(func (export "main") (result i32)
(call $fast)
)
)
Binary file added fast_call_nop.wasm
Binary file not shown.
10 changes: 10 additions & 0 deletions fast_call_nop.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
(module
(func $f)
(func $g)
(func (export "main") (result i32)
i64.const 11
drop
call $g
i32.const 0
)
)
Binary file added fast_call_param.wasm
Binary file not shown.
18 changes: 18 additions & 0 deletions fast_call_param.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
(module
(import "wizeng" "puti" (func $puti (param i32)))
(func $f (param i32) (result i32)
local.get 0
if (result i32)
i32.const 999
else
i32.const -216
end
)
(func (export "main") (result i32)
(call $f (i32.const 1))
call $puti
(call $f (i32.const 0))
call $puti
i32.const 0
)
)
Binary file added slow_call.wasm
Binary file not shown.
Binary file added slow_call_nop.wasm
Binary file not shown.
7 changes: 7 additions & 0 deletions slow_call_nop.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
(module
(func $f)
(func (export "main") (result i32)
call $f
i32.const 0
)
)
42 changes: 42 additions & 0 deletions src/engine/BytecodeIterator.v3
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,48 @@ class BytecodeIterator {
RESUME_THROW => v.visit_RESUME_THROW(read_CONT(), read_TAG(), read_HANDLERS());
RESUME_THROW_REF => v.visit_RESUME_THROW_REF(read_CONT(), read_HANDLERS());
SWITCH => v.visit_SWITCH(read_CONT(), read_TAG());

// replacing CALL with FAST_CALL does not touch the operand so that the original function can still be recovered
FAST_CALL0 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL1 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL2 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL3 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL4 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL5 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL6 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL7 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL8 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL9 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL10 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL11 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL12 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL13 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL14 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL15 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL16 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL17 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL18 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL19 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL20 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL21 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL22 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL23 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL24 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL25 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL26 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL27 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL28 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL29 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL30 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL31 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL32 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL33 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL34 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL35 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL36 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL37 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL38 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
FAST_CALL39 => v.visit_FAST_CALL(Opcodes.fastCallToIndex(opcode), read_FUNC());
}
}
def trace(out: StringBuilder, module: Module, tracer: InstrTracer) {
Expand Down
46 changes: 46 additions & 0 deletions src/engine/CodeValidator.v3
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,53 @@ class CodeValidator(extensions: Extension.set, limits: Limits, module: Module, e
var func = parser.readFuncRef();
if (func == null) return;
checkSignature(func.sig);

// fast call: if function is exported with fast name, replace the bytecode with FAST_CALL
if (FastIntTuning.useFastFunctions && func.fast_call_idx >= 0) {
if (Trace.validation) Trace.OUT.put1(" replacing with FAST_CALL%d\n", func.fast_call_idx);
this.func.replaceCall(opcode_pos, func.fast_call_idx);
}
}
FAST_CALL0 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL1 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL2 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL3 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL4 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL5 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL6 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL7 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL8 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL9 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL10 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL11 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL12 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL13 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL14 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL15 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL16 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL17 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL18 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL19 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL20 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL21 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL22 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL23 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL24 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL25 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL26 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL27 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL28 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL29 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL30 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL31 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL32 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL33 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL34 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL35 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL36 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL37 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL38 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
FAST_CALL39 => if (FastIntTuning.useFastFunctions) System.error("validation error", "trying to validate FAST_CALL internal opcode");
CALL_INDIRECT => {
var sig = parser.readSigRef();
var table = parser.readTableRef();
Expand Down
2 changes: 2 additions & 0 deletions src/engine/EngineOptions.v3
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ component EngineOptions {
var extensions: Extension.set = Extensions.getDefaults();
def DEFAULT_STACK_SIZE = 512u * 1024u;
def STACK_SIZE = group.newSizeOption("stack-size", DEFAULT_STACK_SIZE, "Initial stack size in bytes for Wasm execution stacks.");
def FAST_FUNCTIONS = group.newBoolOption("fast-functions", false, "Treat functions exported with `fast:` in the name as fast functions.")
.onSet(fun v => void(FastIntTuning.useFastFunctions = v));
def X_ = OptionsRegistry.addParseFunc(parse);

def parse(arg: string, err: ErrorGen) -> bool {
Expand Down
10 changes: 10 additions & 0 deletions src/engine/Module.v3
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class Module(filename: string) {
def exports = Vector<(string, Decl)>.new();
def elems = Vector<ElemDecl>.new();
def data = Vector<DataDecl>.new();
def fast_funcs = Vector<FuncDecl>.new();
def custom_sections = Vector<CustomSection>.new();
var probes: Array<Array<Probe>>;
var dyn_probes: Vector<(int, int, Probe)>;
Expand Down Expand Up @@ -143,6 +144,8 @@ class FuncDecl(sig_index: int) extends Decl {
var target_code: TargetCode;
var tierup_trigger: int = int.max;
var handlers = FuncHandlerInfo.new();
var fast_target_code: TargetCode;
var fast_call_idx: int = -1;

def render(names: NameSection, buf: StringBuilder) -> StringBuilder {
var name = if (names != null, names.getFuncName(func_index));
Expand All @@ -154,6 +157,7 @@ class FuncDecl(sig_index: int) extends Decl {
var tc: TargetCode;
var tr: TargetCode;
target_code = tc; // reset target code as well
fast_target_code = tc;
sidetable = Sidetables.NO_SIDETABLE;
cbd_sidetable = null;
}
Expand All @@ -168,6 +172,11 @@ class FuncDecl(sig_index: int) extends Decl {
if (cur_bytecode == orig_bytecode) return;
cur_bytecode[pc] = orig_bytecode[pc];
}
def replaceCall(pc: int, idx: int) {
// copy bytecode for modification
if (cur_bytecode == orig_bytecode) orig_bytecode = Arrays.dup(orig_bytecode);
cur_bytecode[pc] = byte.!(Opcodes.indexToFastCall(idx).code);
}
def reset() -> this {
if (cur_bytecode == orig_bytecode) return;
ArrayUtil.copyInto(cur_bytecode, 0, orig_bytecode, 0, orig_bytecode.length);
Expand All @@ -183,6 +192,7 @@ class FuncDecl(sig_index: int) extends Decl {
n.sidetable = this.sidetable;
n.num_locals = this.num_locals;
n.target_code = this.target_code;
n.fast_target_code = this.fast_target_code;
return n;
}
def findExHandler(instance: Instance, tag: Tag, throw_pc: int) -> ExHandler {
Expand Down
Loading
Loading