From 24c05179d0cae55b2e57acdb63859a1d25e344db Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 12 Jul 2023 18:11:13 +0100 Subject: [PATCH] Add `regalloc2-tool` (#148) This PR has 2 components: - A `SerializableFunction` which wraps a `Function` and `MachineEnv`. This type can be serialized and deserialized with Serde, and is enabled by the "enable-serde" feature. - A `regalloc2-tool` binary which reads a bincode-encoded `SerializableFunction` and then runs the register allocator and checker on it. This is a useful tool for debugging register allocation failures and to investigate cases of poor register allocation. --- Cargo.toml | 3 + deny.toml | 4 +- regalloc2-tool/Cargo.toml | 18 +++ regalloc2-tool/src/main.rs | 95 +++++++++++ src/index.rs | 4 + src/lib.rs | 3 + src/serialize.rs | 311 +++++++++++++++++++++++++++++++++++++ 7 files changed, 436 insertions(+), 2 deletions(-) create mode 100644 regalloc2-tool/Cargo.toml create mode 100644 regalloc2-tool/src/main.rs create mode 100644 src/serialize.rs diff --git a/Cargo.toml b/Cargo.toml index 4b4c27ec..35fbc4d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,6 @@ +[workspace] +members = ["regalloc2-tool"] + [package] name = "regalloc2" version = "0.9.1" diff --git a/deny.toml b/deny.toml index 586d47cc..42238753 100644 --- a/deny.toml +++ b/deny.toml @@ -10,8 +10,7 @@ targets = [ vulnerability = "deny" unmaintained = "deny" yanked = "deny" -ignore = [ -] +ignore = [] # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html [licenses] @@ -19,6 +18,7 @@ allow = [ "Apache-2.0 WITH LLVM-exception", "Apache-2.0", "MIT", + "Unicode-DFS-2016", ] # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html diff --git a/regalloc2-tool/Cargo.toml b/regalloc2-tool/Cargo.toml new file mode 100644 index 00000000..4bc7418a --- /dev/null +++ b/regalloc2-tool/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "regalloc2-tool" +authors = [ + "Chris Fallin ", + "Mozilla SpiderMonkey Developers", +] +version = "0.0.0" +edition = "2021" +publish = false +license = "Apache-2.0 WITH LLVM-exception" +description = "Tool for testing regalloc2" +repository = "https://github.com/bytecodealliance/regalloc2" + +[dependencies] +bincode = "1.3.3" +clap = { version = "4.3.11", features = ["derive"] } +pretty_env_logger = "0.5.0" +regalloc2 = { path = "..", features = ["trace-log", "enable-serde"] } diff --git a/regalloc2-tool/src/main.rs b/regalloc2-tool/src/main.rs new file mode 100644 index 00000000..e6d31734 --- /dev/null +++ b/regalloc2-tool/src/main.rs @@ -0,0 +1,95 @@ +use std::path::PathBuf; + +use clap::Parser; +use regalloc2::{ + checker::Checker, serialize::SerializableFunction, Block, Edit, Function, InstOrEdit, Output, + RegallocOptions, +}; + +#[derive(Parser)] +/// Tool for testing regalloc2. +struct Args { + /// Print the input function and the result of register allocation. + #[clap(short = 'v')] + verbose: bool, + + /// Input file containing a bincode-encoded SerializedFunction. + input: PathBuf, +} + +fn main() { + pretty_env_logger::init(); + let args = Args::parse(); + + let input = std::fs::read(&args.input).expect("could not read input file"); + let function: SerializableFunction = + bincode::deserialize(&input).expect("could not deserialize input file"); + + if args.verbose { + println!("Input function: {function:?}"); + } + + let options = RegallocOptions { + verbose_log: true, + validate_ssa: true, + }; + let output = match regalloc2::run(&function, function.machine_env(), &options) { + Ok(output) => output, + Err(e) => { + panic!("Register allocation failed: {e:#?}"); + } + }; + + if args.verbose { + print_output(&function, &output); + } + + let mut checker = Checker::new(&function, function.machine_env()); + checker.prepare(&output); + if let Err(e) = checker.run() { + panic!("Regsiter allocation checker failed: {e:#?}"); + } +} + +fn print_output(func: &SerializableFunction, output: &Output) { + print!("Register allocation result: {{\n"); + for i in 0..func.num_blocks() { + let block = Block::new(i); + let succs = func + .block_succs(block) + .iter() + .map(|b| b.index()) + .collect::>(); + let preds = func + .block_preds(block) + .iter() + .map(|b| b.index()) + .collect::>(); + print!(" block{}: # succs:{:?} preds:{:?}\n", i, succs, preds); + for inst_or_edit in output.block_insts_and_edits(func, block) { + match inst_or_edit { + InstOrEdit::Inst(inst) => { + let op = if func.is_ret(inst) { + "ret" + } else if func.is_branch(inst) { + "branch" + } else { + "op" + }; + let ops: Vec<_> = func + .inst_operands(inst) + .iter() + .zip(output.inst_allocs(inst)) + .map(|(op, alloc)| format!("{op} => {alloc}")) + .collect(); + let ops = ops.join(", "); + print!(" inst{}: {op} {ops}\n", inst.index(),); + } + InstOrEdit::Edit(Edit::Move { from, to }) => { + print!(" edit: move {to} <- {from}\n"); + } + } + } + } + print!("}}\n"); +} diff --git a/src/index.rs b/src/index.rs index 3cb04205..df16df0b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -137,6 +137,10 @@ define_index!(Inst); define_index!(Block); #[derive(Clone, Copy, Debug)] +#[cfg_attr( + feature = "enable-serde", + derive(::serde::Serialize, ::serde::Deserialize) +)] pub struct InstRange(Inst, Inst, bool); impl InstRange { diff --git a/src/lib.rs b/src/lib.rs index 4185530b..c89410c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -58,6 +58,9 @@ pub mod checker; #[cfg(feature = "fuzzing")] pub mod fuzzing; +#[cfg(feature = "enable-serde")] +pub mod serialize; + #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; diff --git a/src/serialize.rs b/src/serialize.rs new file mode 100644 index 00000000..a74a60ec --- /dev/null +++ b/src/serialize.rs @@ -0,0 +1,311 @@ +use core::fmt; + +use alloc::{format, string::ToString, vec::Vec}; +use serde::{Deserialize, Serialize}; + +use crate::{Block, Function, Inst, InstRange, MachineEnv, Operand, PRegSet, RegClass, VReg}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +enum InstOpcode { + Op, + Ret, + Branch, +} + +impl fmt::Display for InstOpcode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + InstOpcode::Op => f.write_str("op"), + InstOpcode::Ret => f.write_str("ret"), + InstOpcode::Branch => f.write_str("branch"), + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +struct InstData { + op: InstOpcode, + operands: Vec, + clobbers: PRegSet, + is_safepoint: bool, +} + +/// A wrapper around a `Function` and `MachineEnv` that can be serialized and +/// deserialized. +/// +/// The serialized form of this structure is not stable: it is intended to be +/// deserialized with the exact same version of regalloc2 as the one that it +/// was created with. +#[derive(Serialize, Deserialize)] +pub struct SerializableFunction { + machine_env: MachineEnv, + entry_block: Block, + insts: Vec, + blocks: Vec, + block_preds: Vec>, + block_succs: Vec>, + block_params_in: Vec>, + block_params_out: Vec>>, + num_vregs: usize, + reftype_vregs: Vec, + debug_value_labels: Vec<(VReg, Inst, Inst, u32)>, + spillslot_size: Vec, + multi_spillslot_named_by_last_slot: bool, + allow_multiple_vreg_defs: bool, +} + +impl SerializableFunction { + /// Creates a new `SerializableFunction` from an arbitray `Function` and + /// `MachineEnv`. + pub fn new(func: &impl Function, machine_env: MachineEnv) -> Self { + Self { + machine_env, + entry_block: func.entry_block(), + insts: (0..func.num_insts()) + .map(|i| { + let inst = Inst::new(i); + let op = if func.is_ret(inst) { + InstOpcode::Ret + } else if func.is_branch(inst) { + InstOpcode::Branch + } else { + InstOpcode::Op + }; + InstData { + op, + operands: func.inst_operands(inst).to_vec(), + clobbers: func.inst_clobbers(inst), + is_safepoint: func.requires_refs_on_stack(inst), + } + }) + .collect(), + blocks: (0..func.num_blocks()) + .map(|i| { + let block = Block::new(i); + func.block_insns(block) + }) + .collect(), + block_preds: (0..func.num_blocks()) + .map(|i| { + let block = Block::new(i); + func.block_preds(block).to_vec() + }) + .collect(), + block_succs: (0..func.num_blocks()) + .map(|i| { + let block = Block::new(i); + func.block_succs(block).to_vec() + }) + .collect(), + block_params_in: (0..func.num_blocks()) + .map(|i| { + let block = Block::new(i); + func.block_params(block).to_vec() + }) + .collect(), + block_params_out: (0..func.num_blocks()) + .map(|i| { + let block = Block::new(i); + let inst = func.block_insns(block).last(); + (0..func.block_succs(block).len()) + .map(|succ_idx| func.branch_blockparams(block, inst, succ_idx).to_vec()) + .collect() + }) + .collect(), + num_vregs: func.num_vregs(), + reftype_vregs: func.reftype_vregs().to_vec(), + debug_value_labels: func.debug_value_labels().to_vec(), + spillslot_size: [ + func.spillslot_size(RegClass::Int), + func.spillslot_size(RegClass::Float), + func.spillslot_size(RegClass::Vector), + ] + .to_vec(), + multi_spillslot_named_by_last_slot: func.multi_spillslot_named_by_last_slot(), + allow_multiple_vreg_defs: func.allow_multiple_vreg_defs(), + } + } + + /// Returns the `MachineEnv` associated with this function. + pub fn machine_env(&self) -> &MachineEnv { + &self.machine_env + } +} + +impl Function for SerializableFunction { + fn num_insts(&self) -> usize { + self.insts.len() + } + + fn num_blocks(&self) -> usize { + self.blocks.len() + } + + fn entry_block(&self) -> Block { + self.entry_block + } + + fn block_insns(&self, block: Block) -> InstRange { + self.blocks[block.index()] + } + + fn block_succs(&self, block: Block) -> &[Block] { + &self.block_succs[block.index()][..] + } + + fn block_preds(&self, block: Block) -> &[Block] { + &self.block_preds[block.index()][..] + } + + fn block_params(&self, block: Block) -> &[VReg] { + &self.block_params_in[block.index()][..] + } + + fn is_ret(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Ret + } + + fn is_branch(&self, insn: Inst) -> bool { + self.insts[insn.index()].op == InstOpcode::Branch + } + + fn branch_blockparams(&self, block: Block, _: Inst, succ: usize) -> &[VReg] { + &self.block_params_out[block.index()][succ][..] + } + + fn requires_refs_on_stack(&self, insn: Inst) -> bool { + self.insts[insn.index()].is_safepoint + } + + fn inst_operands(&self, insn: Inst) -> &[Operand] { + &self.insts[insn.index()].operands[..] + } + + fn inst_clobbers(&self, insn: Inst) -> PRegSet { + self.insts[insn.index()].clobbers + } + + fn num_vregs(&self) -> usize { + self.num_vregs + } + + fn reftype_vregs(&self) -> &[VReg] { + &self.reftype_vregs[..] + } + + fn debug_value_labels(&self) -> &[(VReg, Inst, Inst, u32)] { + &self.debug_value_labels[..] + } + + fn spillslot_size(&self, regclass: RegClass) -> usize { + self.spillslot_size[regclass as usize] + } + + fn multi_spillslot_named_by_last_slot(&self) -> bool { + self.multi_spillslot_named_by_last_slot + } + + fn allow_multiple_vreg_defs(&self) -> bool { + self.allow_multiple_vreg_defs + } +} + +impl fmt::Debug for SerializableFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{\n")?; + write!(f, " machine_env: {:#?}\n", self.machine_env())?; + write!( + f, + " spillslot_size(Int): {}\n", + self.spillslot_size(RegClass::Int) + )?; + write!( + f, + " spillslot_size(Float): {}\n", + self.spillslot_size(RegClass::Float) + )?; + write!( + f, + " spillslot_size(Vector): {}\n", + self.spillslot_size(RegClass::Vector) + )?; + write!( + f, + " multi_spillslot_named_by_last_slot: {}\n", + self.multi_spillslot_named_by_last_slot() + )?; + write!( + f, + " allow_multiple_vreg_defs: {}\n", + self.allow_multiple_vreg_defs() + )?; + for vreg in self.reftype_vregs() { + write!(f, " REF: {}\n", vreg)?; + } + for (i, blockrange) in self.blocks.iter().enumerate() { + let succs = self.block_succs[i] + .iter() + .map(|b| b.index()) + .collect::>(); + let preds = self.block_preds[i] + .iter() + .map(|b| b.index()) + .collect::>(); + let params_in = self.block_params_in[i] + .iter() + .map(|v| format!("v{}", v.vreg())) + .collect::>() + .join(", "); + let params_out = self.block_params_out[i] + .iter() + .enumerate() + .map(|(succ_idx, vec)| { + let succ = self.block_succs[i][succ_idx]; + let params = vec + .iter() + .map(|v| format!("v{}", v.vreg())) + .collect::>() + .join(", "); + format!("block{}({})", succ.index(), params) + }) + .collect::>() + .join(", "); + write!( + f, + " block{i}({params_in}): # succs:{succs:?} preds:{preds:?}\n", + )?; + for inst in blockrange.iter() { + if self.requires_refs_on_stack(inst) { + write!(f, " -- SAFEPOINT --\n")?; + } + let ops: Vec<_> = self + .inst_operands(inst) + .iter() + .map(|op| op.to_string()) + .collect(); + let ops = ops.join(", "); + let clobbers = if self.inst_clobbers(inst) == PRegSet::empty() { + format!("") + } else { + let clobbers: Vec<_> = self + .inst_clobbers(inst) + .into_iter() + .map(|preg| format!("Clobber: {preg}")) + .collect(); + format!(", {}", clobbers.join(", ")) + }; + write!( + f, + " inst{}: {} {ops}{clobbers}\n", + inst.index(), + self.insts[inst.index()].op, + )?; + if let InstOpcode::Branch = self.insts[inst.index()].op { + write!(f, " params: {}\n", params_out)?; + } + } + } + write!(f, "}}\n")?; + Ok(()) + } +}