From 15883e808ba17e7dfe05a3e09b60b5c426bf0763 Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sun, 15 Sep 2024 21:32:56 +0100 Subject: [PATCH] tail recursive interpreter loop Copyright (c) 2024, Arm Limited. Signed-off-by: Karl Meakin --- pulley/Cargo.toml | 3 +- pulley/src/decode.rs | 69 +++- pulley/src/interp.rs | 28 +- pulley/src/interp/interp_loop.rs | 656 +++++++++++++++++++++++++++++++ pulley/src/lib.rs | 2 + 5 files changed, 735 insertions(+), 23 deletions(-) create mode 100644 pulley/src/interp/interp_loop.rs diff --git a/pulley/Cargo.toml b/pulley/Cargo.toml index 0c3677641a1b..56728537e5f6 100644 --- a/pulley/Cargo.toml +++ b/pulley/Cargo.toml @@ -27,7 +27,8 @@ arbitrary = ["dep:arbitrary", "arbitrary/derive", "std", "cranelift-bitset/arbit encode = [] decode = [] disas = ["decode"] -interp = ["decode"] +interp = ["decode", "encode"] +tail_calls = ["interp"] [package.metadata.docs.rs] all-features = true diff --git a/pulley/src/decode.rs b/pulley/src/decode.rs index 12ad48aa7448..77fb5782f6af 100644 --- a/pulley/src/decode.rs +++ b/pulley/src/decode.rs @@ -260,7 +260,8 @@ impl BytecodeStream for UnsafeBytecodeStream { /// Anything that can be decoded from a bytecode stream, e.g. opcodes, /// immediates, registers, etc... -trait Decode: Sized { +pub trait Decode: Sized { + /// Decode this type from the given bytecode stream. fn decode(bytecode: &mut T) -> Result where T: BytecodeStream; @@ -377,6 +378,32 @@ impl Decode for PcRelOffset { } } +impl Decode for Opcode { + fn decode(bytecode: &mut T) -> Result + where + T: BytecodeStream, + { + let byte = u8::decode(bytecode)?; + match Opcode::new(byte) { + Some(v) => Ok(v), + None => Err(bytecode.invalid_opcode(byte)), + } + } +} + +impl Decode for ExtendedOpcode { + fn decode(bytecode: &mut T) -> Result + where + T: BytecodeStream, + { + let word = u16::decode(bytecode)?; + match ExtendedOpcode::new(word) { + Some(v) => Ok(v), + None => Err(bytecode.invalid_extended_opcode(word)), + } + } +} + impl Decode for BinaryOperands { fn decode(bytecode: &mut T) -> Result where @@ -655,3 +682,43 @@ macro_rules! define_extended_decoder { }; } for_each_extended_op!(define_extended_decoder); + +fn unwrap_uninhabited(res: Result) -> T { + match res { + Ok(ok) => ok, + } +} + +#[allow(missing_docs)] +pub mod operands { + use super::*; + + macro_rules! define_operands_decoder { + ( + $( + $( #[$attr:meta] )* + $snake_name:ident = $name:ident $( { + $( + $( #[$field_attr:meta] )* + $field:ident : $field_ty:ty + ),* + } )? ; + )* + ) => { + $( + #[allow(unused_variables)] + pub fn $snake_name(pc: &mut UnsafeBytecodeStream) -> ($($($field_ty,)*)?) { + ($($(unwrap_uninhabited(<$field_ty>::decode(pc)),)*)?) + } + )* + }; + } + + for_each_op!(define_operands_decoder); + + pub fn extended(pc: &mut UnsafeBytecodeStream) -> (ExtendedOpcode,) { + (unwrap_uninhabited(ExtendedOpcode::decode(pc)),) + } + + for_each_extended_op!(define_operands_decoder); +} diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 40cdfed4139f..1a83e1b2a9db 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -4,6 +4,7 @@ use crate::decode::*; use crate::imms::*; use crate::regs::*; use crate::ExtendedOpcode; +use crate::Opcode; use alloc::string::ToString; use alloc::{vec, vec::Vec}; use core::fmt; @@ -13,6 +14,8 @@ use core::ops::{Index, IndexMut}; use core::ptr::{self, NonNull}; use sptr::Strict; +mod interp_loop; + const DEFAULT_STACK_SIZE: usize = 1 << 20; // 1 MiB /// A virtual machine for interpreting Pulley bytecode. @@ -121,27 +124,10 @@ impl Vm { } unsafe fn run(&mut self, pc: NonNull) -> Result<(), NonNull> { - let mut visitor = InterpreterVisitor { - state: &mut self.state, - pc: UnsafeBytecodeStream::new(pc), - }; - - loop { - let cf = self.decoder.decode_one(&mut visitor).unwrap(); - - // Really wish we had `feature(explicit_tail_calls)`... - match cf { - ControlFlow::Continue(()) => continue, - - // Out-of-line slow paths marked `cold` and `inline(never)` to - // improve codegen. - ControlFlow::Break(Done::Trap) => { - let pc = visitor.pc.as_ptr(); - return self.trap(pc); - } - ControlFlow::Break(Done::ReturnToHost) => return self.return_to_host(), - ControlFlow::Break(Done::HostCall) => return self.host_call(), - } + match interp_loop::interpreter_loop(self, UnsafeBytecodeStream::new(pc)) { + Done::ReturnToHost => self.return_to_host(), + Done::Trap => self.trap(pc), + Done::HostCall => self.host_call(), } } diff --git a/pulley/src/interp/interp_loop.rs b/pulley/src/interp/interp_loop.rs new file mode 100644 index 000000000000..53d6abac8934 --- /dev/null +++ b/pulley/src/interp/interp_loop.rs @@ -0,0 +1,656 @@ +use super::*; + +#[derive(Copy, Clone)] +pub struct OpcodeHandler { + /// The type of non tail-recursive opcode handlers: return + /// `ControlFlow::Continue` with the next handler to call, or + /// `ControlFlow::Done` with the reason to stop. + #[cfg(not(feature = "tail_calls"))] + fun: fn(&mut MachineState, &mut UnsafeBytecodeStream) -> ControlFlow, + + /// The type of tail-recursive opcode handlers: instead of returning + /// `ControwFlow::Continue`, tail call the next handler directly; so + /// `ControlFlow::Continue` is uninhabited. + #[cfg(feature = "tail_calls")] + fun: fn(&mut MachineState, &mut UnsafeBytecodeStream) -> ControlFlow, +} + +#[cfg(not(feature = "tail_calls"))] +pub fn interpreter_loop(vm: &mut Vm, mut pc: UnsafeBytecodeStream) -> Done { + let opcode = Opcode::decode(&mut pc).unwrap(); + let mut handler = OPCODE_HANDLER_TABLE[opcode as usize]; + + // As tight as we can get the interpreter loop without tail calls: while + // the handlers keep returning the next handler to call, call it. + loop { + match (handler.fun)(&mut vm.state, &mut pc) { + ControlFlow::Continue(next_handler) => handler = next_handler, + ControlFlow::Break(done) => return done, + } + } +} + +#[cfg(feature = "tail_calls")] +pub fn interpreter_loop(vm: &mut Vm, mut pc: UnsafeBytecodeStream) -> Done { + let opcode = Opcode::decode(&mut pc).unwrap(); + let handler = OPCODE_HANDLER_TABLE[opcode as usize]; + + // The ideal interpreter loop: a bunch of opcode handlers tail calling + // each other! + match (handler.fun)(&mut vm.state, &mut pc) { + ControlFlow::Break(done) => done, + } +} + +/// Wrap the business logic of each handler with the boilerplate of decoding +/// operands and dispatching to next handler/exiting the loop. +macro_rules! define_opcode_handlers { + ( + $( + fn $name:ident ( + $state:ident : &mut MachineState, + $pc:ident : &mut UnsafeBytecodeStream$(,)? + $($field:ident : $field_ty:ty),* + ) $body:block + )* + ) => { + $( + #[cfg(not(feature = "tail_calls"))] + pub fn $name($state: &mut MachineState, $pc: &mut UnsafeBytecodeStream) -> ControlFlow { + let ($($field,)*) = crate::decode::operands::$name($pc); + match $body { + ControlFlow::Continue(()) => { + // Decode the next handler and return it so that `run` + // can call it. + let next_opcode = Opcode::decode($pc).unwrap(); + let next_handler = OPCODE_HANDLER_TABLE[next_opcode as usize]; + ControlFlow::Continue(next_handler) + } + ControlFlow::Break(done) => ControlFlow::Break(done), + } + } + + #[cfg(feature = "tail_calls")] + pub fn $name($state: &mut MachineState, $pc: &mut UnsafeBytecodeStream) -> ControlFlow { + let ($($field,)*) = crate::decode::operands::$name($pc); + match $body { + ControlFlow::Continue(()) => { + // Decode the next handler and return it so that `run` + // can call it. + let next_opcode = Opcode::decode($pc).unwrap(); + let next_handler = OPCODE_HANDLER_TABLE[next_opcode as usize]; + + // FIXME: ICE + // become (next_handler.fun)($state, $pc); + return (next_handler.fun)($state, $pc); + } + ControlFlow::Break(done) => ControlFlow::Break(done), + } + } + )* + }; +} + +/// Define the table of opcode handlers. +macro_rules! opcode_handler_table_entry { + ( + $( + $( #[$attr:meta] )* + $snake_name:ident = $name:ident $( { + $( + $( #[$field_attr:meta] )* + $field:ident : $field_ty:ty + ),* + } )? ; + )* + ) => {[ $(OpcodeHandler { fun: $snake_name },)* OpcodeHandler { fun: extended }]}; +} + +/// Add one to account for `ExtendedOp`. +const NUM_OPCODES: usize = Opcode::MAX as usize + 1; +static OPCODE_HANDLER_TABLE: [OpcodeHandler; NUM_OPCODES] = + for_each_op!(opcode_handler_table_entry); + +#[inline] +fn pc_rel_jump(pc: &mut UnsafeBytecodeStream, offset: PcRelOffset, inst_size: isize) { + let offset = isize::try_from(i32::from(offset)).unwrap(); + *pc = unsafe { pc.offset(offset - inst_size) }; +} + +define_opcode_handlers! { + fn ret(state: &mut MachineState, pc: &mut UnsafeBytecodeStream) { + if state[XReg::lr] == XRegVal::HOST_RETURN_ADDR { + ControlFlow::Break(Done::ReturnToHost) + } else { + let return_addr = state[XReg::lr].get_ptr(); + *pc = unsafe { UnsafeBytecodeStream::new(NonNull::new_unchecked(return_addr)) }; + ControlFlow::Continue(()) + } + } + + fn call(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, offset: PcRelOffset) { + let return_addr = pc.as_ptr(); + state[XReg::lr].set_ptr(return_addr.as_ptr()); + pc_rel_jump(pc, offset, 5); + ControlFlow::Continue(()) + } + + fn jump(_state: &mut MachineState, pc: &mut UnsafeBytecodeStream, offset: PcRelOffset) { + pc_rel_jump(pc, offset, 5); + ControlFlow::Continue(()) + } + + fn br_if(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, cond: XReg, offset: PcRelOffset) { + let cond = state[cond].get_u64(); + if cond != 0 { + pc_rel_jump(pc, offset, 6) + } + ControlFlow::Continue(()) + } + + fn br_if_not(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, cond: XReg, offset: PcRelOffset) { + let cond = state[cond].get_u64(); + if cond == 0 { + pc_rel_jump(pc, offset, 6) + } + ControlFlow::Continue(()) + } + + fn br_if_xeq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u32(); + let b = state[b].get_u32(); + if a == b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xneq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u32(); + let b = state[b].get_u32(); + if a != b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xslt32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_i32(); + let b = state[b].get_i32(); + if a < b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xslteq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_i32(); + let b = state[b].get_i32(); + if a <= b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xult32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u32(); + let b = state[b].get_u32(); + if a < b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xulteq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u32(); + let b = state[b].get_u32(); + if a <= b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xeq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u64(); + let b = state[b].get_u64(); + if a == b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xneq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u64(); + let b = state[b].get_u64(); + if a != b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xslt64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_i64(); + let b = state[b].get_i64(); + if a < b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xslteq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_i64(); + let b = state[b].get_i64(); + if a <= b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xult64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u64(); + let b = state[b].get_u64(); + if a < b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn br_if_xulteq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, a: XReg, b: XReg, offset: PcRelOffset) { + let a = state[a].get_u64(); + let b = state[b].get_u64(); + if a <= b { + pc_rel_jump(pc, offset, 7) + } + ControlFlow::Continue(()) + } + + fn xmov(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, src: XReg) { + let val = state[src]; + state[dst] = val; + ControlFlow::Continue(()) + } + + fn fmov(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: FReg, src: FReg) { + let val = state[src]; + state[dst] = val; + ControlFlow::Continue(()) + } + + fn vmov(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: VReg, src: VReg) { + let val = state[src]; + state[dst] = val; + ControlFlow::Continue(()) + } + + fn xconst8(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, imm: i8) { + state[dst].set_i64(i64::from(imm)); + ControlFlow::Continue(()) + } + + fn xconst16(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, imm: i16) { + state[dst].set_i64(i64::from(imm)); + ControlFlow::Continue(()) + } + + fn xconst32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, imm: i32) { + state[dst].set_i64(i64::from(imm)); + ControlFlow::Continue(()) + } + + fn xconst64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, imm: i64) { + state[dst].set_i64(imm); + ControlFlow::Continue(()) + } + + fn xadd32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u32(); + let b = state[operands.src2].get_u32(); + state[operands.dst].set_u32(a.wrapping_add(b)); + ControlFlow::Continue(()) + } + + fn xadd64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u64(); + let b = state[operands.src2].get_u64(); + state[operands.dst].set_u64(a.wrapping_add(b)); + ControlFlow::Continue(()) + } + + fn xeq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u64(); + let b = state[operands.src2].get_u64(); + state[operands.dst].set_u64(u64::from(a == b)); + ControlFlow::Continue(()) + } + + fn xneq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u64(); + let b = state[operands.src2].get_u64(); + state[operands.dst].set_u64(u64::from(a != b)); + ControlFlow::Continue(()) + } + + fn xslt64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_i64(); + let b = state[operands.src2].get_i64(); + state[operands.dst].set_u64(u64::from(a < b)); + ControlFlow::Continue(()) + } + + fn xslteq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_i64(); + let b = state[operands.src2].get_i64(); + state[operands.dst].set_u64(u64::from(a <= b)); + ControlFlow::Continue(()) + } + + fn xult64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u64(); + let b = state[operands.src2].get_u64(); + state[operands.dst].set_u64(u64::from(a < b)); + ControlFlow::Continue(()) + } + + fn xulteq64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u64(); + let b = state[operands.src2].get_u64(); + state[operands.dst].set_u64(u64::from(a <= b)); + ControlFlow::Continue(()) + } + + fn xeq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u32(); + let b = state[operands.src2].get_u32(); + state[operands.dst].set_u64(u64::from(a == b)); + ControlFlow::Continue(()) + } + + fn xneq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u32(); + let b = state[operands.src2].get_u32(); + state[operands.dst].set_u64(u64::from(a != b)); + ControlFlow::Continue(()) + } + + fn xslt32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_i32(); + let b = state[operands.src2].get_i32(); + state[operands.dst].set_u64(u64::from(a < b)); + ControlFlow::Continue(()) + } + + fn xslteq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_i32(); + let b = state[operands.src2].get_i32(); + state[operands.dst].set_u64(u64::from(a <= b)); + ControlFlow::Continue(()) + } + + fn xult32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u32(); + let b = state[operands.src2].get_u32(); + state[operands.dst].set_u64(u64::from(a < b)); + ControlFlow::Continue(()) + } + + fn xulteq32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, operands: BinaryOperands) { + let a = state[operands.src1].get_u32(); + let b = state[operands.src2].get_u32(); + state[operands.dst].set_u64(u64::from(a <= b)); + ControlFlow::Continue(()) + } + + fn load32_u(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg) { + let ptr = state[ptr].get_ptr::(); + let val = unsafe { ptr::read_unaligned(ptr) }; + state[dst].set_u64(u64::from(val)); + ControlFlow::Continue(()) + } + + fn load32_s(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg) { + let ptr = state[ptr].get_ptr::(); + let val = unsafe { ptr::read_unaligned(ptr) }; + state[dst].set_i64(i64::from(val)); + ControlFlow::Continue(()) + } + + fn load64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg) { + let ptr = state[ptr].get_ptr::(); + let val = unsafe { ptr::read_unaligned(ptr) }; + state[dst].set_u64(val); + ControlFlow::Continue(()) + } + + fn load32_u_offset8(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg, offset: i8) { + let val = unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset.into()) + .read_unaligned() + }; + state[dst].set_u64(u64::from(val)); + ControlFlow::Continue(()) + } + + fn load32_s_offset8(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg, offset: i8) { + let val = unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset.into()) + .read_unaligned() + }; + state[dst].set_i64(i64::from(val)); + ControlFlow::Continue(()) + } + + fn load32_u_offset64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg, offset: i64) { + let val = unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .read_unaligned() + }; + state[dst].set_u64(u64::from(val)); + ControlFlow::Continue(()) + } + + fn load32_s_offset64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg, offset: i64) { + let val = unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .read_unaligned() + }; + state[dst].set_i64(i64::from(val)); + ControlFlow::Continue(()) + } + + fn load64_offset8(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg, offset: i8) { + let val = unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset.into()) + .read_unaligned() + }; + state[dst].set_u64(val); + ControlFlow::Continue(()) + } + + fn load64_offset64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, ptr: XReg, offset: i64) { + let val = unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .read_unaligned() + }; + state[dst].set_u64(val); + ControlFlow::Continue(()) + } + + fn store32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, ptr: XReg, src: XReg) { + let ptr = state[ptr].get_ptr::(); + let val = state[src].get_u32(); + unsafe { + ptr::write_unaligned(ptr, val); + } + ControlFlow::Continue(()) + } + + fn store64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, ptr: XReg, src: XReg) { + let ptr = state[ptr].get_ptr::(); + let val = state[src].get_u64(); + unsafe { + ptr::write_unaligned(ptr, val); + } + ControlFlow::Continue(()) + } + + fn store32_offset8(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, ptr: XReg, offset: i8, src: XReg) { + let val = state[src].get_u32(); + unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset.into()) + .write_unaligned(val); + } + ControlFlow::Continue(()) + } + + fn store64_offset8(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, ptr: XReg, offset: i8, src: XReg) { + let val = state[src].get_u64(); + unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset.into()) + .write_unaligned(val); + } + ControlFlow::Continue(()) + } + + fn store32_offset64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, ptr: XReg, offset: i64, src: XReg) { + let val = state[src].get_u32(); + unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .write_unaligned(val); + } + ControlFlow::Continue(()) + } + + fn store64_offset64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, ptr: XReg, offset: i64, src: XReg) { + let val = state[src].get_u64(); + unsafe { + state[ptr] + .get_ptr::() + .byte_offset(offset as isize) + .write_unaligned(val); + } + ControlFlow::Continue(()) + } + + fn xpush32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, src: XReg) { + state.push(state[src].get_u32()); + ControlFlow::Continue(()) + } + + fn xpush32_many(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, srcs: RegSet) { + for src in srcs { + state.push(state[src].get_u32()); + } + ControlFlow::Continue(()) + } + + fn xpush64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, src: XReg) { + state.push(state[src].get_u64()); + ControlFlow::Continue(()) + } + + fn xpush64_many(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, srcs: RegSet) { + for src in srcs { + state.push(state[src].get_u64()); + } + ControlFlow::Continue(()) + } + + fn xpop32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg) { + let val = state.pop(); + state[dst].set_u32(val); + ControlFlow::Continue(()) + } + + fn xpop32_many(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dsts: RegSet) { + for dst in dsts.into_iter().rev() { + let val = state.pop(); + state[dst].set_u32(val); + } + ControlFlow::Continue(()) + } + + fn xpop64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg) { + let val = state.pop(); + state[dst].set_u64(val); + ControlFlow::Continue(()) + } + + fn xpop64_many(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dsts: RegSet) { + for dst in dsts.into_iter().rev() { + let val = state.pop(); + state[dst].set_u64(val); + } + ControlFlow::Continue(()) + } + + fn push_frame(state: &mut MachineState, pc: &mut UnsafeBytecodeStream) { + state.push(state[XReg::lr].get_ptr::()); + state.push(state[XReg::fp].get_ptr::()); + state[XReg::fp] = state[XReg::sp]; + ControlFlow::Continue(()) + } + + fn pop_frame(state: &mut MachineState, pc: &mut UnsafeBytecodeStream) { + state[XReg::sp] = state[XReg::fp]; + let fp = state.pop(); + let lr = state.pop(); + state[XReg::fp].set_ptr::(fp); + state[XReg::lr].set_ptr::(lr); + ControlFlow::Continue(()) + } + + fn bitcast_int_from_float_32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, src: FReg) { + let val = state[src].get_f32(); + state[dst].set_u64(u32::from_ne_bytes(val.to_ne_bytes()).into()); + ControlFlow::Continue(()) + } + + fn bitcast_int_from_float_64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: XReg, src: FReg) { + let val = state[src].get_f64(); + state[dst].set_u64(u64::from_ne_bytes(val.to_ne_bytes())); + ControlFlow::Continue(()) + } + + fn bitcast_float_from_int_32(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: FReg, src: XReg) { + let val = state[src].get_u32(); + state[dst].set_f32(f32::from_ne_bytes(val.to_ne_bytes())); + ControlFlow::Continue(()) + } + + fn bitcast_float_from_int_64(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, dst: FReg, src: XReg) { + let val = state[src].get_u64(); + state[dst].set_f64(f64::from_ne_bytes(val.to_ne_bytes())); + ControlFlow::Continue(()) + } + + fn extended(state: &mut MachineState, pc: &mut UnsafeBytecodeStream, opcode: ExtendedOpcode) { + match opcode { + ExtendedOpcode::Nop => ControlFlow::Continue(()), + ExtendedOpcode::Trap => ControlFlow::Break(Done::Trap), + ExtendedOpcode::GetSp => { + let (dst,) = crate::decode::operands::get_sp(pc); + let sp = state[XReg::sp].get_u64(); + state[dst].set_u64(sp); + ControlFlow::Continue(()) + } + } + } +} diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index f792b83df727..83772af3f6e5 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -1,6 +1,8 @@ //! The pulley bytecode for fast interpreters. #![cfg_attr(docsrs, feature(doc_auto_cfg))] +#![cfg_attr(feature = "tail_calls", feature(explicit_tail_calls))] +#![cfg_attr(feature = "tail_calls", allow(incomplete_features, unstable_features))] #![deny(missing_docs)] #![no_std]