Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/jit/zjit.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ stackprof path/to/zjit_exits_{pid}.dump

Using `--zjit-dump-hir-iongraph` will dump all compiled functions into a directory named `/tmp/zjit-iongraph-{PROCESS_PID}`. Each file will be named `func_{ZJIT_FUNC_NAME}.json`. In order to use them in the Iongraph viewer, you'll need to use `jq` to collate them to a single file. An example invocation of `jq` is shown below for reference.

`jq --slurp --null-input '.functions=inputs | .version=2' /tmp/zjit-iongraph-{PROCESS_PID}/func*.json > ~/Downloads/ion.json`
`jq --slurp --null-input '.functions=inputs | .version=1' /tmp/zjit-iongraph-{PROCESS_PID}/func*.json > ~/Downloads/ion.json`

From there, you can use https://mozilla-spidermonkey.github.io/iongraph/ to view your trace.

Expand Down
1,404 changes: 214 additions & 1,190 deletions prism/prism.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ ROBJECT_FIELDS_COUNT(VALUE obj)
static inline uint32_t
RBASIC_FIELDS_COUNT(VALUE obj)
{
return RSHAPE(rb_obj_shape_id(obj))->next_field_index;
return RSHAPE(RBASIC_SHAPE_ID(obj))->next_field_index;
}

static inline bool
Expand Down
15 changes: 15 additions & 0 deletions test/ruby/test_zjit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,21 @@ def test = [1, 2].map(&:to_s)
}
end

def test_send_variadic_with_block
assert_compiles '[[1, "a"], [2, "b"], [3, "c"]]', %q{
A = [1, 2, 3]
B = ["a", "b", "c"]

def test
result = []
A.zip(B) { |x, y| result << [x, y] }
result
end

test; test
}, call_threshold: 2
end

def test_send_splat
assert_runs '[1, 2]', %q{
def test(a, b) = [a, b]
Expand Down
66 changes: 47 additions & 19 deletions zjit/src/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,14 +425,15 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
&Insn::GuardLess { left, right, state } => gen_guard_less(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)),
&Insn::GuardGreaterEq { left, right, state } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)),
Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))),
Insn::CCall { cfunc, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnds!(args)),
// Give up CCallWithFrame for 7+ args since asm.ccall() doesn't support it.
Insn::CCallWithFrame { cd, state, args, .. } if args.len() > C_ARG_OPNDS.len() =>
Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)),
// Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args).
// There's no test case for this because no core cfuncs have this many parameters. But C extensions could have such methods.
Insn::CCallWithFrame { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() =>
gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs),
Insn::CCallWithFrame { cfunc, name, args, cme, state, blockiseq, .. } =>
gen_ccall_with_frame(jit, asm, *cfunc, *name, opnds!(args), *cme, *blockiseq, &function.frame_state(*state)),
Insn::CCallVariadic { cfunc, recv, args, name, cme, state, return_type: _, elidable: _ } => {
gen_ccall_variadic(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state))
Insn::CCallWithFrame { cfunc, recv, name, args, cme, state, blockiseq, .. } =>
gen_ccall_with_frame(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *blockiseq, &function.frame_state(*state)),
Insn::CCallVariadic { cfunc, recv, name, args, cme, state, blockiseq, return_type: _, elidable: _ } => {
gen_ccall_variadic(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *blockiseq, &function.frame_state(*state))
}
Insn::GetIvar { self_val, id, ic, state: _ } => gen_getivar(jit, asm, opnd!(self_val), *id, *ic),
Insn::SetGlobal { id, val, state } => no_output!(gen_setglobal(jit, asm, *id, opnd!(val), &function.frame_state(*state))),
Expand Down Expand Up @@ -766,6 +767,7 @@ fn gen_ccall_with_frame(
asm: &mut Assembler,
cfunc: *const u8,
name: ID,
recv: Opnd,
args: Vec<Opnd>,
cme: *const rb_callable_method_entry_t,
blockiseq: Option<IseqPtr>,
Expand All @@ -774,7 +776,8 @@ fn gen_ccall_with_frame(
gen_incr_counter(asm, Counter::non_variadic_cfunc_optimized_send_count);
gen_stack_overflow_check(jit, asm, state, state.stack_size());

let caller_stack_size = state.stack_size() - args.len();
let args_with_recv_len = args.len() + 1;
let caller_stack_size = state.stack().len() - args_with_recv_len;

// Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
// to account for the receiver and arguments (and block arguments if any)
Expand All @@ -794,8 +797,8 @@ fn gen_ccall_with_frame(
VM_BLOCK_HANDLER_NONE.into()
};

gen_push_frame(asm, args.len(), state, ControlFrame {
recv: args[0],
gen_push_frame(asm, args_with_recv_len, state, ControlFrame {
recv,
iseq: None,
cme,
frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL,
Expand All @@ -813,8 +816,10 @@ fn gen_ccall_with_frame(
asm.mov(CFP, new_cfp);
asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP);

let mut cfunc_args = vec![recv];
cfunc_args.extend(args);
asm.count_call_to(&name.contents_lossy());
let result = asm.ccall(cfunc, args);
let result = asm.ccall(cfunc, cfunc_args);

asm_comment!(asm, "pop C frame");
let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into());
Expand All @@ -830,9 +835,11 @@ fn gen_ccall_with_frame(

/// Lowering for [`Insn::CCall`]. This is a low-level raw call that doesn't know
/// anything about the callee, so handling for e.g. GC safety is dealt with elsewhere.
fn gen_ccall(asm: &mut Assembler, cfunc: *const u8, name: ID, args: Vec<Opnd>) -> lir::Opnd {
fn gen_ccall(asm: &mut Assembler, cfunc: *const u8, name: ID, recv: Opnd, args: Vec<Opnd>) -> lir::Opnd {
let mut cfunc_args = vec![recv];
cfunc_args.extend(args);
asm.count_call_to(&name.contents_lossy());
asm.ccall(cfunc, args)
asm.ccall(cfunc, cfunc_args)
}

/// Generate code for a variadic C function call
Expand All @@ -845,26 +852,47 @@ fn gen_ccall_variadic(
recv: Opnd,
args: Vec<Opnd>,
cme: *const rb_callable_method_entry_t,
blockiseq: Option<IseqPtr>,
state: &FrameState,
) -> lir::Opnd {
gen_incr_counter(asm, Counter::variadic_cfunc_optimized_send_count);
gen_stack_overflow_check(jit, asm, state, state.stack_size());

gen_prepare_non_leaf_call(jit, asm, state);
let args_with_recv_len = args.len() + 1;

let stack_growth = state.stack_size();
gen_stack_overflow_check(jit, asm, state, stack_growth);
// Compute the caller's stack size after consuming recv and args.
// state.stack() includes recv + args, so subtract both.
let caller_stack_size = state.stack_size() - args_with_recv_len;

gen_push_frame(asm, args.len(), state, ControlFrame {
// Can't use gen_prepare_non_leaf_call() because we need to adjust the SP
// to account for the receiver and arguments (like gen_ccall_with_frame does)
gen_prepare_call_with_gc(asm, state, false);
gen_save_sp(asm, caller_stack_size);
gen_spill_stack(jit, asm, state);
gen_spill_locals(jit, asm, state);

let block_handler_specval = if let Some(block_iseq) = blockiseq {
// Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
// VM_CFP_TO_CAPTURED_BLOCK then turns &cfp->self into a block handler.
// rb_captured_block->code.iseq aliases with cfp->block_code.
asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into());
let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF));
asm.or(cfp_self_addr, Opnd::Imm(1))
} else {
VM_BLOCK_HANDLER_NONE.into()
};

gen_push_frame(asm, args_with_recv_len, state, ControlFrame {
recv,
iseq: None,
cme,
frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL,
specval: VM_BLOCK_HANDLER_NONE.into(),
specval: block_handler_specval,
pc: PC_POISON,
});

asm_comment!(asm, "switch to new SP register");
let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.to_usize()) * SIZEOF_VALUE;
let sp_offset = (caller_stack_size + VM_ENV_DATA_SIZE.to_usize()) * SIZEOF_VALUE;
let new_sp = asm.add(SP, sp_offset.into());
asm.mov(SP, new_sp);

Expand Down
3 changes: 2 additions & 1 deletion zjit/src/cruby_methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,8 @@ fn inline_string_eq(fun: &mut hir::Function, block: hir::BlockId, recv: hir::Ins
// TODO(max): Make StringEqual its own opcode so that we can later constant-fold StringEqual(a, a) => true
let result = fun.push_insn(block, hir::Insn::CCall {
cfunc: rb_yarv_str_eql_internal as *const u8,
args: vec![recv, other],
recv,
args: vec![other],
name: ID!(string_eq),
return_type,
elidable,
Expand Down
Loading