From 5ca71364ff6cf4a9c445cb2701b1eb4770e79579 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 22 Jul 2025 12:10:45 -0700 Subject: [PATCH 01/18] Avoid GC while operands in inconsistent state compile_data_calloc2 may run GC (though because it allocates from an arena this is rare in practice). When this happened when resizing operands there was a risk of seeing the insn in an inconsistent state. To solve this we need to make any allocations before we start modifying the instrucitons. This refactors the code to use a new insn_replace_with_operands() function that allocates the new operands array before modifying the instruction object. Co-authored-by: Aaron Patterson --- compile.c | 97 +++++++++++++++++++++++-------------------------------- 1 file changed, 40 insertions(+), 57 deletions(-) diff --git a/compile.c b/compile.c index 8d5cb45904c6e7..bda18c1c424ab6 100644 --- a/compile.c +++ b/compile.c @@ -1440,6 +1440,30 @@ new_insn_body(rb_iseq_t *iseq, int line_no, int node_id, enum ruby_vminsn_type i return new_insn_core(iseq, line_no, node_id, insn_id, argc, operands); } +static INSN * +insn_replace_with_operands(rb_iseq_t *iseq, INSN *iobj, enum ruby_vminsn_type insn_id, int argc, ...) +{ + VALUE *operands = 0; + va_list argv; + if (argc > 0) { + int i; + va_start(argv, argc); + operands = compile_data_alloc2(iseq, sizeof(VALUE), argc); + for (i = 0; i < argc; i++) { + VALUE v = va_arg(argv, VALUE); + operands[i] = v; + } + va_end(argv); + } + + iobj->insn_id = insn_id; + iobj->operand_size = argc; + iobj->operands = operands; + iseq_insn_each_markable_object(iobj, iseq_insn_each_object_write_barrier, (VALUE)iseq); + + return iobj; +} + static const struct rb_callinfo * new_callinfo(rb_iseq_t *iseq, ID mid, int argc, unsigned int flag, struct rb_callinfo_kwarg *kw_arg, int has_blockiseq) { @@ -3439,11 +3463,7 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal VALUE ary = iobj->operands[0]; rb_obj_reveal(ary, rb_cArray); - iobj->insn_id = BIN(opt_ary_freeze); - iobj->operand_size = 2; - iobj->operands = compile_data_calloc2(iseq, iobj->operand_size, sizeof(VALUE)); - iobj->operands[0] = ary; - iobj->operands[1] = (VALUE)ci; + insn_replace_with_operands(iseq, iobj, BIN(opt_ary_freeze), 2, ary, (VALUE)ci); ELEM_REMOVE(next); } } @@ -3465,11 +3485,7 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal VALUE hash = iobj->operands[0]; rb_obj_reveal(hash, rb_cHash); - iobj->insn_id = BIN(opt_hash_freeze); - iobj->operand_size = 2; - iobj->operands = compile_data_calloc2(iseq, iobj->operand_size, sizeof(VALUE)); - iobj->operands[0] = hash; - iobj->operands[1] = (VALUE)ci; + insn_replace_with_operands(iseq, iobj, BIN(opt_hash_freeze), 2, hash, (VALUE)ci); ELEM_REMOVE(next); } } @@ -3488,11 +3504,7 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal const rb_iseq_t *blockiseq = (rb_iseq_t *)OPERAND_AT(next, 1); if (vm_ci_simple(ci) && vm_ci_argc(ci) == 0 && blockiseq == NULL && vm_ci_mid(ci) == idFreeze) { - iobj->insn_id = BIN(opt_ary_freeze); - iobj->operand_size = 2; - iobj->operands = compile_data_calloc2(iseq, iobj->operand_size, sizeof(VALUE)); - RB_OBJ_WRITE(iseq, &iobj->operands[0], rb_cArray_empty_frozen); - iobj->operands[1] = (VALUE)ci; + insn_replace_with_operands(iseq, iobj, BIN(opt_ary_freeze), 2, rb_cArray_empty_frozen, (VALUE)ci); ELEM_REMOVE(next); } } @@ -3511,11 +3523,7 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal const rb_iseq_t *blockiseq = (rb_iseq_t *)OPERAND_AT(next, 1); if (vm_ci_simple(ci) && vm_ci_argc(ci) == 0 && blockiseq == NULL && vm_ci_mid(ci) == idFreeze) { - iobj->insn_id = BIN(opt_hash_freeze); - iobj->operand_size = 2; - iobj->operands = compile_data_calloc2(iseq, iobj->operand_size, sizeof(VALUE)); - RB_OBJ_WRITE(iseq, &iobj->operands[0], rb_cHash_empty_frozen); - iobj->operands[1] = (VALUE)ci; + insn_replace_with_operands(iseq, iobj, BIN(opt_hash_freeze), 2, rb_cHash_empty_frozen, (VALUE)ci); ELEM_REMOVE(next); } } @@ -4109,17 +4117,16 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal static int insn_set_specialized_instruction(rb_iseq_t *iseq, INSN *iobj, int insn_id) { - iobj->insn_id = insn_id; - iobj->operand_size = insn_len(insn_id) - 1; - iobj->insn_info.events |= RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN; - if (insn_id == BIN(opt_neq)) { VALUE original_ci = iobj->operands[0]; - iobj->operand_size = 2; - iobj->operands = compile_data_calloc2(iseq, iobj->operand_size, sizeof(VALUE)); - iobj->operands[0] = (VALUE)new_callinfo(iseq, idEq, 1, 0, NULL, FALSE); - iobj->operands[1] = original_ci; + VALUE new_ci = (VALUE)new_callinfo(iseq, idEq, 1, 0, NULL, FALSE); + insn_replace_with_operands(iseq, iobj, insn_id, 2, new_ci, original_ci); } + else { + iobj->insn_id = insn_id; + iobj->operand_size = insn_len(insn_id) - 1; + } + iobj->insn_info.events |= RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN; return COMPILE_OK; } @@ -4151,12 +4158,7 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj) if (method != INT2FIX(0)) { VALUE num = iobj->operands[0]; - int operand_len = insn_len(BIN(opt_newarray_send)) - 1; - iobj->insn_id = BIN(opt_newarray_send); - iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE)); - iobj->operands[0] = num; - iobj->operands[1] = method; - iobj->operand_size = operand_len; + insn_replace_with_operands(iseq, iobj, BIN(opt_newarray_send), 2, num, method); ELEM_REMOVE(&niobj->link); return COMPILE_OK; } @@ -4168,12 +4170,7 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj) const struct rb_callinfo *ci = (struct rb_callinfo *)OPERAND_AT((INSN *)niobj->link.next, 0); if (vm_ci_simple(ci) && vm_ci_argc(ci) == 1 && vm_ci_mid(ci) == idPack) { VALUE num = iobj->operands[0]; - int operand_len = insn_len(BIN(opt_newarray_send)) - 1; - iobj->insn_id = BIN(opt_newarray_send); - iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE)); - iobj->operands[0] = FIXNUM_INC(num, 1); - iobj->operands[1] = INT2FIX(VM_OPT_NEWARRAY_SEND_PACK); - iobj->operand_size = operand_len; + insn_replace_with_operands(iseq, iobj, BIN(opt_newarray_send), 2, FIXNUM_INC(num, 1), INT2FIX(VM_OPT_NEWARRAY_SEND_PACK)); ELEM_REMOVE(&iobj->link); ELEM_REMOVE(niobj->link.next); ELEM_INSERT_NEXT(&niobj->link, &iobj->link); @@ -4191,12 +4188,7 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj) if (vm_ci_mid(ci) == idPack && vm_ci_argc(ci) == 2 && (kwarg && kwarg->keyword_len == 1 && kwarg->keywords[0] == rb_id2sym(idBuffer))) { VALUE num = iobj->operands[0]; - int operand_len = insn_len(BIN(opt_newarray_send)) - 1; - iobj->insn_id = BIN(opt_newarray_send); - iobj->operands = compile_data_calloc2(iseq, operand_len, sizeof(VALUE)); - iobj->operands[0] = FIXNUM_INC(num, 2); - iobj->operands[1] = INT2FIX(VM_OPT_NEWARRAY_SEND_PACK_BUFFER); - iobj->operand_size = operand_len; + insn_replace_with_operands(iseq, iobj, BIN(opt_newarray_send), 2, FIXNUM_INC(num, 2), INT2FIX(VM_OPT_NEWARRAY_SEND_PACK_BUFFER)); // Remove the "send" insn. ELEM_REMOVE((niobj->link.next)->next); // Remove the modified insn from its original "newarray" position... @@ -4230,11 +4222,7 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj) if (vm_ci_simple(ci) && vm_ci_argc(ci) == 1 && vm_ci_mid(ci) == idIncludeP) { VALUE num = iobj->operands[0]; INSN *sendins = (INSN *)sendobj; - sendins->insn_id = BIN(opt_newarray_send); - sendins->operand_size = insn_len(sendins->insn_id) - 1; - sendins->operands = compile_data_calloc2(iseq, sendins->operand_size, sizeof(VALUE)); - sendins->operands[0] = FIXNUM_INC(num, 1); - sendins->operands[1] = INT2FIX(VM_OPT_NEWARRAY_SEND_INCLUDE_P); + insn_replace_with_operands(iseq, sendins, BIN(opt_newarray_send), 2, FIXNUM_INC(num, 1), INT2FIX(VM_OPT_NEWARRAY_SEND_INCLUDE_P)); // Remove the original "newarray" insn. ELEM_REMOVE(&iobj->link); return COMPILE_OK; @@ -4272,12 +4260,7 @@ iseq_specialized_instruction(rb_iseq_t *iseq, INSN *iobj) rb_obj_reveal(ary, rb_cArray); INSN *sendins = (INSN *)sendobj; - sendins->insn_id = BIN(opt_duparray_send); - sendins->operand_size = insn_len(sendins->insn_id) - 1;; - sendins->operands = compile_data_calloc2(iseq, sendins->operand_size, sizeof(VALUE)); - sendins->operands[0] = ary; - sendins->operands[1] = rb_id2sym(idIncludeP); - sendins->operands[2] = INT2FIX(1); + insn_replace_with_operands(iseq, sendins, BIN(opt_duparray_send), 3, ary, rb_id2sym(idIncludeP), INT2FIX(1)); // Remove the duparray insn. ELEM_REMOVE(&iobj->link); From ff428b4dd0c5f0a07abbd8f8520d8d1e4bff8d66 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 25 Jul 2025 22:09:51 -0400 Subject: [PATCH 02/18] ZJIT: Keep a frame pointer and use it for memory params Previously, ZJIT miscompiled the following because of native SP interference. def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] a(0,0,0,0,0,0,0, :ok) Commented problematic disassembly: ; call rb_ary_new_capa mov x0, #1 mov x16, #0x1278 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 ; call rb_ary_push mov x1, x0 str x1, [sp, #-0x10]! ; c_push() from alloc_regs() mov x0, x1 ; arg0, the array ldur x1, [sp] ; meant to be arg1=n8, but sp just moved! mov x16, #0x3968 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 Since the frame pointer stays constant in the body of the function, static offsets based on it don't run the risk of being invalidated by SP movements. Pass the registers to preserve through Insn::FrameSetup. This allows ARM to use STP and waste no gaps between EC, SP, and CFP. x86 now preserves and restores RBP since we use it as the frame pointer. Since all arches now have a frame pointer, remove offset based SP movement in the epilogue and restore registers using the frame pointer. --- test/ruby/test_zjit.rb | 7 ++ zjit/src/asm/arm64/opnd.rs | 3 + zjit/src/backend/arm64/mod.rs | 117 ++++++++++++++++++++++++++++++--- zjit/src/backend/lir.rs | 41 ++++++------ zjit/src/backend/x86_64/mod.rs | 76 ++++++++++++++++----- zjit/src/codegen.rs | 87 +++++++----------------- 6 files changed, 220 insertions(+), 111 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 0dcdb8e4cb8275..fc085d2e93c9a4 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -819,6 +819,13 @@ def a(n1,n2,n3,n4,n5,n6,n7,n8) = self } end + def test_spilled_param_new_arary + assert_compiles '[:ok]', %q{ + def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] + a(0,0,0,0,0,0,0, :ok) + } + end + def test_opt_aref_with assert_compiles ':ok', %q{ def aref_with(hash) = hash["key"] diff --git a/zjit/src/asm/arm64/opnd.rs b/zjit/src/asm/arm64/opnd.rs index 28422b747652d8..a77958f7e6eeec 100644 --- a/zjit/src/asm/arm64/opnd.rs +++ b/zjit/src/asm/arm64/opnd.rs @@ -119,6 +119,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 }; pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 }; pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 }; +// frame pointer (base pointer) +pub const X29_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 29 }; + // link register pub const X30_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 30 }; diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 88ccad8e091ed1..42dc31c90fd5cc 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -29,6 +29,7 @@ pub const C_ARG_OPNDS: [Opnd; 6] = [ pub const C_RET_REG: Reg = X0_REG; pub const C_RET_OPND: Opnd = Opnd::Reg(X0_REG); pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(XZR_REG); +pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(X29_REG); // These constants define the way we work with Arm64's stack pointer. The stack // pointer always needs to be aligned to a 16-byte boundary. @@ -911,18 +912,54 @@ impl Assembler cb.write_byte(0); } }, - Insn::FrameSetup => { + &Insn::FrameSetup { preserved, mut slot_count } => { + const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); } + // Preserve X29 and set up frame record stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); - - // X29 (frame_pointer) = SP mov(cb, X29, C_SP_REG); - }, - Insn::FrameTeardown => { + + for regs in preserved.chunks(2) { + // For the body, store pairs and move SP + if let [reg0, reg1] = regs { + stp_pre(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, C_SP_REG, -16)); + } else if let [reg] = regs { + // For overhang, store but don't move SP. Combine movement with + // movement for slots below. + stur(cb, reg.into(), A64Opnd::new_mem(64, C_SP_REG, -8)); + slot_count += 1; + } else { + unreachable!("chunks(2)"); + } + } + // Align slot_count + if slot_count % 2 == 1 { + slot_count += 1 + } + if slot_count > 0 { + let slot_offset = (slot_count * SIZEOF_VALUE) as u64; + // Bail when asked to reserve too many slots in one instruction. + ShiftedImmediate::try_from(slot_offset).ok()?; + sub(cb, C_SP_REG, C_SP_REG, A64Opnd::new_uimm(slot_offset)); + } + } + Insn::FrameTeardown { preserved } => { + // Restore preserved registers below frame pointer. + let mut base_offset = 0; + for regs in preserved.chunks(2) { + if let [reg0, reg1] = regs { + base_offset -= 16; + ldp(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, X29, base_offset)); + } else if let [reg] = regs { + ldur(cb, reg.into(), A64Opnd::new_mem(64, X29, base_offset - 8)); + } else { + unreachable!("chunks(2)"); + } + } + // SP = X29 (frame pointer) mov(cb, C_SP_REG, X29); - ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); - }, + } Insn::Add { left, right, out } => { // Usually, we issue ADDS, so you could branch on overflow, but ADDS with // out=31 refers to out=XZR, which discards the sum. So, instead of ADDS @@ -1482,11 +1519,73 @@ mod tests { fn test_emit_frame() { let (mut asm, mut cb) = setup_asm(); - asm.frame_setup(); - asm.frame_teardown(); + asm.frame_setup(&[], 0); + asm.frame_teardown(&[]); asm.compile_with_num_regs(&mut cb, 0); } + #[test] + fn frame_setup_and_teardown() { + const THREE_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG)]; + // Test 3 preserved regs (odd), odd slot_count + { + let (mut asm, mut cb) = setup_asm(); + asm.frame_setup(THREE_REGS, 3); + asm.frame_teardown(THREE_REGS); + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ff8300d1b44f7fa9b5835ef8bf030091fd7bc1a8", " + 0x0: stp x29, x30, [sp, #-0x10]! + 0x4: mov x29, sp + 0x8: stp x20, x19, [sp, #-0x10]! + 0xc: stur x21, [sp, #-8] + 0x10: sub sp, sp, #0x20 + 0x14: ldp x20, x19, [x29, #-0x10] + 0x18: ldur x21, [x29, #-0x18] + 0x1c: mov sp, x29 + 0x20: ldp x29, x30, [sp], #0x10 + "); + } + + // Test 3 preserved regs (odd), even slot_count + { + let (mut asm, mut cb) = setup_asm(); + asm.frame_setup(THREE_REGS, 4); + asm.frame_teardown(THREE_REGS); + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ffc300d1b44f7fa9b5835ef8bf030091fd7bc1a8", " + 0x0: stp x29, x30, [sp, #-0x10]! + 0x4: mov x29, sp + 0x8: stp x20, x19, [sp, #-0x10]! + 0xc: stur x21, [sp, #-8] + 0x10: sub sp, sp, #0x30 + 0x14: ldp x20, x19, [x29, #-0x10] + 0x18: ldur x21, [x29, #-0x18] + 0x1c: mov sp, x29 + 0x20: ldp x29, x30, [sp], #0x10 + "); + } + + // Test 4 preserved regs (even), odd slot_count + { + static FOUR_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG), Opnd::Reg(X22_REG)]; + let (mut asm, mut cb) = setup_asm(); + asm.frame_setup(FOUR_REGS, 3); + asm.frame_teardown(FOUR_REGS); + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f657bfa9ff8300d1b44f7fa9b6577ea9bf030091fd7bc1a8", " + 0x0: stp x29, x30, [sp, #-0x10]! + 0x4: mov x29, sp + 0x8: stp x20, x19, [sp, #-0x10]! + 0xc: stp x22, x21, [sp, #-0x10]! + 0x10: sub sp, sp, #0x20 + 0x14: ldp x20, x19, [x29, #-0x10] + 0x18: ldp x22, x21, [x29, #-0x20] + 0x1c: mov sp, x29 + 0x20: ldp x29, x30, [sp], #0x10 + "); + } + } + #[test] fn test_emit_je_fits_into_bcond() { let (mut asm, mut cb) = setup_asm(); diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 7bac210bee6689..36e783bd4e658a 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -12,10 +12,12 @@ use crate::asm::{CodeBlock, Label}; pub use crate::backend::current::{ Reg, EC, CFP, SP, - NATIVE_STACK_PTR, + NATIVE_STACK_PTR, NATIVE_BASE_PTR, C_ARG_OPNDS, C_RET_REG, C_RET_OPND, }; +pub static JIT_PRESERVED_REGS: &'static [Opnd] = &[CFP, SP, EC]; + // Memory operand base #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum MemBase @@ -291,8 +293,6 @@ pub enum Target context: Option, /// We use this to enrich asm comments. reason: SideExitReason, - /// The number of bytes we need to adjust the C stack pointer by. - c_stack_bytes: usize, /// Some if the side exit should write this label. We use it for patch points. label: Option